Blame


1 3448adb0 2022-11-02 op /* See LICENSE file for copyright and license details. */
2 3448adb0 2022-11-02 op #include <stddef.h>
3 3448adb0 2022-11-02 op #include <stdint.h>
4 3448adb0 2022-11-02 op #include <stdio.h>
5 3448adb0 2022-11-02 op #include <string.h>
6 3448adb0 2022-11-02 op
7 3448adb0 2022-11-02 op #include "../grapheme.h"
8 3448adb0 2022-11-02 op #include "util.h"
9 3448adb0 2022-11-02 op
10 3448adb0 2022-11-02 op static const struct {
11 3448adb0 2022-11-02 op uint_least32_t cp; /* input codepoint */
12 3448adb0 2022-11-02 op char *exp_arr; /* expected UTF-8 byte sequence */
13 3448adb0 2022-11-02 op size_t exp_len; /* expected length of UTF-8 sequence */
14 3448adb0 2022-11-02 op } enc_test[] = {
15 3448adb0 2022-11-02 op {
16 3448adb0 2022-11-02 op /* invalid codepoint (UTF-16 surrogate half) */
17 3448adb0 2022-11-02 op .cp = UINT32_C(0xD800),
18 3448adb0 2022-11-02 op .exp_arr = (char *)(unsigned char[]){ 0xEF, 0xBF, 0xBD },
19 3448adb0 2022-11-02 op .exp_len = 3,
20 3448adb0 2022-11-02 op },
21 3448adb0 2022-11-02 op {
22 3448adb0 2022-11-02 op /* invalid codepoint (UTF-16-unrepresentable) */
23 3448adb0 2022-11-02 op .cp = UINT32_C(0x110000),
24 3448adb0 2022-11-02 op .exp_arr = (char *)(unsigned char[]){ 0xEF, 0xBF, 0xBD },
25 3448adb0 2022-11-02 op .exp_len = 3,
26 3448adb0 2022-11-02 op },
27 3448adb0 2022-11-02 op {
28 3448adb0 2022-11-02 op /* codepoint encoded to a 1-byte sequence */
29 3448adb0 2022-11-02 op .cp = 0x01,
30 3448adb0 2022-11-02 op .exp_arr = (char *)(unsigned char[]){ 0x01 },
31 3448adb0 2022-11-02 op .exp_len = 1,
32 3448adb0 2022-11-02 op },
33 3448adb0 2022-11-02 op {
34 3448adb0 2022-11-02 op /* codepoint encoded to a 2-byte sequence */
35 3448adb0 2022-11-02 op .cp = 0xFF,
36 3448adb0 2022-11-02 op .exp_arr = (char *)(unsigned char[]){ 0xC3, 0xBF },
37 3448adb0 2022-11-02 op .exp_len = 2,
38 3448adb0 2022-11-02 op },
39 3448adb0 2022-11-02 op {
40 3448adb0 2022-11-02 op /* codepoint encoded to a 3-byte sequence */
41 3448adb0 2022-11-02 op .cp = 0xFFF,
42 3448adb0 2022-11-02 op .exp_arr = (char *)(unsigned char[]){ 0xE0, 0xBF, 0xBF },
43 3448adb0 2022-11-02 op .exp_len = 3,
44 3448adb0 2022-11-02 op },
45 3448adb0 2022-11-02 op {
46 3448adb0 2022-11-02 op /* codepoint encoded to a 4-byte sequence */
47 3448adb0 2022-11-02 op .cp = UINT32_C(0xFFFFF),
48 3448adb0 2022-11-02 op .exp_arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0xBF },
49 3448adb0 2022-11-02 op .exp_len = 4,
50 3448adb0 2022-11-02 op },
51 3448adb0 2022-11-02 op };
52 3448adb0 2022-11-02 op
53 3448adb0 2022-11-02 op int
54 3448adb0 2022-11-02 op main(int argc, char *argv[])
55 3448adb0 2022-11-02 op {
56 3448adb0 2022-11-02 op size_t i, j, failed;
57 3448adb0 2022-11-02 op
58 3448adb0 2022-11-02 op (void)argc;
59 3448adb0 2022-11-02 op
60 3448adb0 2022-11-02 op /* UTF-8 encoder test */
61 3448adb0 2022-11-02 op for (i = 0, failed = 0; i < LEN(enc_test); i++) {
62 3448adb0 2022-11-02 op char arr[4];
63 3448adb0 2022-11-02 op size_t len;
64 3448adb0 2022-11-02 op
65 3448adb0 2022-11-02 op len = grapheme_encode_utf8(enc_test[i].cp, arr, LEN(arr));
66 3448adb0 2022-11-02 op
67 3448adb0 2022-11-02 op if (len != enc_test[i].exp_len ||
68 3448adb0 2022-11-02 op memcmp(arr, enc_test[i].exp_arr, len)) {
69 3448adb0 2022-11-02 op fprintf(stderr, "%s, Failed test %zu: "
70 3448adb0 2022-11-02 op "Expected (", argv[0], i);
71 3448adb0 2022-11-02 op for (j = 0; j < enc_test[i].exp_len; j++) {
72 3448adb0 2022-11-02 op fprintf(stderr, "0x%x",
73 3448adb0 2022-11-02 op enc_test[i].exp_arr[j]);
74 3448adb0 2022-11-02 op if (j + 1 < enc_test[i].exp_len) {
75 3448adb0 2022-11-02 op fprintf(stderr, " ");
76 3448adb0 2022-11-02 op }
77 3448adb0 2022-11-02 op }
78 3448adb0 2022-11-02 op fprintf(stderr, "), but got (");
79 3448adb0 2022-11-02 op for (j = 0; j < len; j++) {
80 3448adb0 2022-11-02 op fprintf(stderr, "0x%x", arr[j]);
81 3448adb0 2022-11-02 op if (j + 1 < len) {
82 3448adb0 2022-11-02 op fprintf(stderr, " ");
83 3448adb0 2022-11-02 op }
84 3448adb0 2022-11-02 op }
85 3448adb0 2022-11-02 op fprintf(stderr, ").\n");
86 3448adb0 2022-11-02 op failed++;
87 3448adb0 2022-11-02 op }
88 3448adb0 2022-11-02 op }
89 3448adb0 2022-11-02 op printf("%s: %zu/%zu unit tests passed.\n", argv[0],
90 3448adb0 2022-11-02 op LEN(enc_test) - failed, LEN(enc_test));
91 3448adb0 2022-11-02 op
92 3448adb0 2022-11-02 op return (failed > 0) ? 1 : 0;
93 3448adb0 2022-11-02 op }