1 3448adb0 2022-11-02 op /* See LICENSE file for copyright and license details. */
2 3448adb0 2022-11-02 op #include <stddef.h>
3 3448adb0 2022-11-02 op #include <stdint.h>
4 3448adb0 2022-11-02 op #include <stdio.h>
5 3448adb0 2022-11-02 op #include <string.h>
7 3448adb0 2022-11-02 op #include "../grapheme.h"
8 3448adb0 2022-11-02 op #include "util.h"
10 3448adb0 2022-11-02 op static const struct {
11 3448adb0 2022-11-02 op uint_least32_t cp; /* input codepoint */
12 3448adb0 2022-11-02 op char *exp_arr; /* expected UTF-8 byte sequence */
13 3448adb0 2022-11-02 op size_t exp_len; /* expected length of UTF-8 sequence */
14 3448adb0 2022-11-02 op } enc_test[] = {
16 3448adb0 2022-11-02 op /* invalid codepoint (UTF-16 surrogate half) */
17 3448adb0 2022-11-02 op .cp = UINT32_C(0xD800),
18 3448adb0 2022-11-02 op .exp_arr = (char *)(unsigned char[]){ 0xEF, 0xBF, 0xBD },
22 3448adb0 2022-11-02 op /* invalid codepoint (UTF-16-unrepresentable) */
23 3448adb0 2022-11-02 op .cp = UINT32_C(0x110000),
24 3448adb0 2022-11-02 op .exp_arr = (char *)(unsigned char[]){ 0xEF, 0xBF, 0xBD },
28 3448adb0 2022-11-02 op /* codepoint encoded to a 1-byte sequence */
30 3448adb0 2022-11-02 op .exp_arr = (char *)(unsigned char[]){ 0x01 },
34 3448adb0 2022-11-02 op /* codepoint encoded to a 2-byte sequence */
36 3448adb0 2022-11-02 op .exp_arr = (char *)(unsigned char[]){ 0xC3, 0xBF },
40 3448adb0 2022-11-02 op /* codepoint encoded to a 3-byte sequence */
42 3448adb0 2022-11-02 op .exp_arr = (char *)(unsigned char[]){ 0xE0, 0xBF, 0xBF },
46 3448adb0 2022-11-02 op /* codepoint encoded to a 4-byte sequence */
47 3448adb0 2022-11-02 op .cp = UINT32_C(0xFFFFF),
48 3448adb0 2022-11-02 op .exp_arr = (char *)(unsigned char[]){ 0xF3, 0xBF, 0xBF, 0xBF },
54 3448adb0 2022-11-02 op main(int argc, char *argv[])
56 3448adb0 2022-11-02 op size_t i, j, failed;
60 3448adb0 2022-11-02 op /* UTF-8 encoder test */
61 3448adb0 2022-11-02 op for (i = 0, failed = 0; i < LEN(enc_test); i++) {
65 3448adb0 2022-11-02 op len = grapheme_encode_utf8(enc_test[i].cp, arr, LEN(arr));
67 3448adb0 2022-11-02 op if (len != enc_test[i].exp_len ||
68 3448adb0 2022-11-02 op memcmp(arr, enc_test[i].exp_arr, len)) {
69 3448adb0 2022-11-02 op fprintf(stderr, "%s, Failed test %zu: "
70 3448adb0 2022-11-02 op "Expected (", argv[0], i);
71 3448adb0 2022-11-02 op for (j = 0; j < enc_test[i].exp_len; j++) {
72 3448adb0 2022-11-02 op fprintf(stderr, "0x%x",
73 3448adb0 2022-11-02 op enc_test[i].exp_arr[j]);
74 3448adb0 2022-11-02 op if (j + 1 < enc_test[i].exp_len) {
75 3448adb0 2022-11-02 op fprintf(stderr, " ");
78 3448adb0 2022-11-02 op fprintf(stderr, "), but got (");
79 3448adb0 2022-11-02 op for (j = 0; j < len; j++) {
80 3448adb0 2022-11-02 op fprintf(stderr, "0x%x", arr[j]);
81 3448adb0 2022-11-02 op if (j + 1 < len) {
82 3448adb0 2022-11-02 op fprintf(stderr, " ");
85 3448adb0 2022-11-02 op fprintf(stderr, ").\n");
89 3448adb0 2022-11-02 op printf("%s: %zu/%zu unit tests passed.\n", argv[0],
90 3448adb0 2022-11-02 op LEN(enc_test) - failed, LEN(enc_test));
92 3448adb0 2022-11-02 op return (failed > 0) ? 1 : 0;