1 3448adb0 2022-11-02 op /* See LICENSE file for copyright and license details. */
2 3448adb0 2022-11-02 op #include <stdio.h>
3 3448adb0 2022-11-02 op #include <stdlib.h>
4 3448adb0 2022-11-02 op #include <string.h>
6 3448adb0 2022-11-02 op #include "util.h"
8 3448adb0 2022-11-02 op #define FILE_EMOJI "data/emoji-data.txt"
9 3448adb0 2022-11-02 op #define FILE_WORD "data/WordBreakProperty.txt"
11 3448adb0 2022-11-02 op static const struct property_spec word_break_property[] = {
13 3448adb0 2022-11-02 op .enumname = "OTHER",
15 3448adb0 2022-11-02 op .ucdname = NULL,
18 3448adb0 2022-11-02 op .enumname = "ALETTER",
19 3448adb0 2022-11-02 op .file = FILE_WORD,
20 3448adb0 2022-11-02 op .ucdname = "ALetter",
23 3448adb0 2022-11-02 op .enumname = "BOTH_ALETTER_EXTPICT",
25 3448adb0 2022-11-02 op .ucdname = NULL,
28 3448adb0 2022-11-02 op .enumname = "CR",
29 3448adb0 2022-11-02 op .file = FILE_WORD,
30 3448adb0 2022-11-02 op .ucdname = "CR",
33 3448adb0 2022-11-02 op .enumname = "DOUBLE_QUOTE",
34 3448adb0 2022-11-02 op .file = FILE_WORD,
35 3448adb0 2022-11-02 op .ucdname = "Double_Quote",
38 3448adb0 2022-11-02 op .enumname = "EXTEND",
39 3448adb0 2022-11-02 op .file = FILE_WORD,
40 3448adb0 2022-11-02 op .ucdname = "Extend",
43 3448adb0 2022-11-02 op .enumname = "EXTENDED_PICTOGRAPHIC",
44 3448adb0 2022-11-02 op .file = FILE_EMOJI,
45 3448adb0 2022-11-02 op .ucdname = "Extended_Pictographic",
48 3448adb0 2022-11-02 op .enumname = "EXTENDNUMLET",
49 3448adb0 2022-11-02 op .file = FILE_WORD,
50 3448adb0 2022-11-02 op .ucdname = "ExtendNumLet",
53 3448adb0 2022-11-02 op .enumname = "FORMAT",
54 3448adb0 2022-11-02 op .file = FILE_WORD,
55 3448adb0 2022-11-02 op .ucdname = "Format",
58 3448adb0 2022-11-02 op .enumname = "HEBREW_LETTER",
59 3448adb0 2022-11-02 op .file = FILE_WORD,
60 3448adb0 2022-11-02 op .ucdname = "Hebrew_Letter",
63 3448adb0 2022-11-02 op .enumname = "KATAKANA",
64 3448adb0 2022-11-02 op .file = FILE_WORD,
65 3448adb0 2022-11-02 op .ucdname = "Katakana",
68 3448adb0 2022-11-02 op .enumname = "LF",
69 3448adb0 2022-11-02 op .file = FILE_WORD,
70 3448adb0 2022-11-02 op .ucdname = "LF",
73 3448adb0 2022-11-02 op .enumname = "MIDLETTER",
74 3448adb0 2022-11-02 op .file = FILE_WORD,
75 3448adb0 2022-11-02 op .ucdname = "MidLetter",
78 3448adb0 2022-11-02 op .enumname = "MIDNUM",
79 3448adb0 2022-11-02 op .file = FILE_WORD,
80 3448adb0 2022-11-02 op .ucdname = "MidNum",
83 3448adb0 2022-11-02 op .enumname = "MIDNUMLET",
84 3448adb0 2022-11-02 op .file = FILE_WORD,
85 3448adb0 2022-11-02 op .ucdname = "MidNumLet",
88 3448adb0 2022-11-02 op .enumname = "NEWLINE",
89 3448adb0 2022-11-02 op .file = FILE_WORD,
90 3448adb0 2022-11-02 op .ucdname = "Newline",
93 3448adb0 2022-11-02 op .enumname = "NUMERIC",
94 3448adb0 2022-11-02 op .file = FILE_WORD,
95 3448adb0 2022-11-02 op .ucdname = "Numeric",
98 3448adb0 2022-11-02 op .enumname = "REGIONAL_INDICATOR",
99 3448adb0 2022-11-02 op .file = FILE_WORD,
100 3448adb0 2022-11-02 op .ucdname = "Regional_Indicator",
103 3448adb0 2022-11-02 op .enumname = "SINGLE_QUOTE",
104 3448adb0 2022-11-02 op .file = FILE_WORD,
105 3448adb0 2022-11-02 op .ucdname = "Single_Quote",
108 3448adb0 2022-11-02 op .enumname = "WSEGSPACE",
109 3448adb0 2022-11-02 op .file = FILE_WORD,
110 3448adb0 2022-11-02 op .ucdname = "WSegSpace",
113 3448adb0 2022-11-02 op .enumname = "ZWJ",
114 3448adb0 2022-11-02 op .file = FILE_WORD,
115 3448adb0 2022-11-02 op .ucdname = "ZWJ",
119 3448adb0 2022-11-02 op static uint_least8_t
120 3448adb0 2022-11-02 op handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t prop2)
122 3448adb0 2022-11-02 op uint_least8_t result;
126 3448adb0 2022-11-02 op if ((!strcmp(word_break_property[prop1].enumname, "ALETTER") &&
127 3448adb0 2022-11-02 op !strcmp(word_break_property[prop2].enumname, "EXTENDED_PICTOGRAPHIC")) ||
128 3448adb0 2022-11-02 op (!strcmp(word_break_property[prop1].enumname, "EXTENDED_PICTOGRAPHIC") &&
129 3448adb0 2022-11-02 op !strcmp(word_break_property[prop2].enumname, "ALETTER"))) {
130 3448adb0 2022-11-02 op for (result = 0; result < LEN(word_break_property); result++) {
131 3448adb0 2022-11-02 op if (!strcmp(word_break_property[result].enumname,
132 3448adb0 2022-11-02 op "BOTH_ALETTER_EXTPICT")) {
136 3448adb0 2022-11-02 op if (result == LEN(word_break_property)) {
137 3448adb0 2022-11-02 op fprintf(stderr, "handle_conflict: Internal error.\n");
141 3448adb0 2022-11-02 op fprintf(stderr, "handle_conflict: Cannot handle conflict.\n");
145 3448adb0 2022-11-02 op return result;
149 3448adb0 2022-11-02 op main(int argc, char *argv[])
153 3448adb0 2022-11-02 op properties_generate_break_property(word_break_property,
154 3448adb0 2022-11-02 op LEN(word_break_property),
155 3448adb0 2022-11-02 op handle_conflict, NULL, "word_break",