Blame


1 3448adb0 2022-11-02 op /* See LICENSE file for copyright and license details. */
2 3448adb0 2022-11-02 op #include <stdio.h>
3 3448adb0 2022-11-02 op #include <stdlib.h>
4 3448adb0 2022-11-02 op #include <string.h>
5 3448adb0 2022-11-02 op
6 3448adb0 2022-11-02 op #include "util.h"
7 3448adb0 2022-11-02 op
8 3448adb0 2022-11-02 op #define FILE_EMOJI "data/emoji-data.txt"
9 3448adb0 2022-11-02 op #define FILE_WORD "data/WordBreakProperty.txt"
10 3448adb0 2022-11-02 op
11 3448adb0 2022-11-02 op static const struct property_spec word_break_property[] = {
12 3448adb0 2022-11-02 op {
13 3448adb0 2022-11-02 op .enumname = "OTHER",
14 3448adb0 2022-11-02 op .file = NULL,
15 3448adb0 2022-11-02 op .ucdname = NULL,
16 3448adb0 2022-11-02 op },
17 3448adb0 2022-11-02 op {
18 3448adb0 2022-11-02 op .enumname = "ALETTER",
19 3448adb0 2022-11-02 op .file = FILE_WORD,
20 3448adb0 2022-11-02 op .ucdname = "ALetter",
21 3448adb0 2022-11-02 op },
22 3448adb0 2022-11-02 op {
23 3448adb0 2022-11-02 op .enumname = "BOTH_ALETTER_EXTPICT",
24 3448adb0 2022-11-02 op .file = NULL,
25 3448adb0 2022-11-02 op .ucdname = NULL,
26 3448adb0 2022-11-02 op },
27 3448adb0 2022-11-02 op {
28 3448adb0 2022-11-02 op .enumname = "CR",
29 3448adb0 2022-11-02 op .file = FILE_WORD,
30 3448adb0 2022-11-02 op .ucdname = "CR",
31 3448adb0 2022-11-02 op },
32 3448adb0 2022-11-02 op {
33 3448adb0 2022-11-02 op .enumname = "DOUBLE_QUOTE",
34 3448adb0 2022-11-02 op .file = FILE_WORD,
35 3448adb0 2022-11-02 op .ucdname = "Double_Quote",
36 3448adb0 2022-11-02 op },
37 3448adb0 2022-11-02 op {
38 3448adb0 2022-11-02 op .enumname = "EXTEND",
39 3448adb0 2022-11-02 op .file = FILE_WORD,
40 3448adb0 2022-11-02 op .ucdname = "Extend",
41 3448adb0 2022-11-02 op },
42 3448adb0 2022-11-02 op {
43 3448adb0 2022-11-02 op .enumname = "EXTENDED_PICTOGRAPHIC",
44 3448adb0 2022-11-02 op .file = FILE_EMOJI,
45 3448adb0 2022-11-02 op .ucdname = "Extended_Pictographic",
46 3448adb0 2022-11-02 op },
47 3448adb0 2022-11-02 op {
48 3448adb0 2022-11-02 op .enumname = "EXTENDNUMLET",
49 3448adb0 2022-11-02 op .file = FILE_WORD,
50 3448adb0 2022-11-02 op .ucdname = "ExtendNumLet",
51 3448adb0 2022-11-02 op },
52 3448adb0 2022-11-02 op {
53 3448adb0 2022-11-02 op .enumname = "FORMAT",
54 3448adb0 2022-11-02 op .file = FILE_WORD,
55 3448adb0 2022-11-02 op .ucdname = "Format",
56 3448adb0 2022-11-02 op },
57 3448adb0 2022-11-02 op {
58 3448adb0 2022-11-02 op .enumname = "HEBREW_LETTER",
59 3448adb0 2022-11-02 op .file = FILE_WORD,
60 3448adb0 2022-11-02 op .ucdname = "Hebrew_Letter",
61 3448adb0 2022-11-02 op },
62 3448adb0 2022-11-02 op {
63 3448adb0 2022-11-02 op .enumname = "KATAKANA",
64 3448adb0 2022-11-02 op .file = FILE_WORD,
65 3448adb0 2022-11-02 op .ucdname = "Katakana",
66 3448adb0 2022-11-02 op },
67 3448adb0 2022-11-02 op {
68 3448adb0 2022-11-02 op .enumname = "LF",
69 3448adb0 2022-11-02 op .file = FILE_WORD,
70 3448adb0 2022-11-02 op .ucdname = "LF",
71 3448adb0 2022-11-02 op },
72 3448adb0 2022-11-02 op {
73 3448adb0 2022-11-02 op .enumname = "MIDLETTER",
74 3448adb0 2022-11-02 op .file = FILE_WORD,
75 3448adb0 2022-11-02 op .ucdname = "MidLetter",
76 3448adb0 2022-11-02 op },
77 3448adb0 2022-11-02 op {
78 3448adb0 2022-11-02 op .enumname = "MIDNUM",
79 3448adb0 2022-11-02 op .file = FILE_WORD,
80 3448adb0 2022-11-02 op .ucdname = "MidNum",
81 3448adb0 2022-11-02 op },
82 3448adb0 2022-11-02 op {
83 3448adb0 2022-11-02 op .enumname = "MIDNUMLET",
84 3448adb0 2022-11-02 op .file = FILE_WORD,
85 3448adb0 2022-11-02 op .ucdname = "MidNumLet",
86 3448adb0 2022-11-02 op },
87 3448adb0 2022-11-02 op {
88 3448adb0 2022-11-02 op .enumname = "NEWLINE",
89 3448adb0 2022-11-02 op .file = FILE_WORD,
90 3448adb0 2022-11-02 op .ucdname = "Newline",
91 3448adb0 2022-11-02 op },
92 3448adb0 2022-11-02 op {
93 3448adb0 2022-11-02 op .enumname = "NUMERIC",
94 3448adb0 2022-11-02 op .file = FILE_WORD,
95 3448adb0 2022-11-02 op .ucdname = "Numeric",
96 3448adb0 2022-11-02 op },
97 3448adb0 2022-11-02 op {
98 3448adb0 2022-11-02 op .enumname = "REGIONAL_INDICATOR",
99 3448adb0 2022-11-02 op .file = FILE_WORD,
100 3448adb0 2022-11-02 op .ucdname = "Regional_Indicator",
101 3448adb0 2022-11-02 op },
102 3448adb0 2022-11-02 op {
103 3448adb0 2022-11-02 op .enumname = "SINGLE_QUOTE",
104 3448adb0 2022-11-02 op .file = FILE_WORD,
105 3448adb0 2022-11-02 op .ucdname = "Single_Quote",
106 3448adb0 2022-11-02 op },
107 3448adb0 2022-11-02 op {
108 3448adb0 2022-11-02 op .enumname = "WSEGSPACE",
109 3448adb0 2022-11-02 op .file = FILE_WORD,
110 3448adb0 2022-11-02 op .ucdname = "WSegSpace",
111 3448adb0 2022-11-02 op },
112 3448adb0 2022-11-02 op {
113 3448adb0 2022-11-02 op .enumname = "ZWJ",
114 3448adb0 2022-11-02 op .file = FILE_WORD,
115 3448adb0 2022-11-02 op .ucdname = "ZWJ",
116 3448adb0 2022-11-02 op },
117 3448adb0 2022-11-02 op };
118 3448adb0 2022-11-02 op
119 3448adb0 2022-11-02 op static uint_least8_t
120 3448adb0 2022-11-02 op handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t prop2)
121 3448adb0 2022-11-02 op {
122 3448adb0 2022-11-02 op uint_least8_t result;
123 3448adb0 2022-11-02 op
124 3448adb0 2022-11-02 op (void)cp;
125 3448adb0 2022-11-02 op
126 3448adb0 2022-11-02 op if ((!strcmp(word_break_property[prop1].enumname, "ALETTER") &&
127 3448adb0 2022-11-02 op !strcmp(word_break_property[prop2].enumname, "EXTENDED_PICTOGRAPHIC")) ||
128 3448adb0 2022-11-02 op (!strcmp(word_break_property[prop1].enumname, "EXTENDED_PICTOGRAPHIC") &&
129 3448adb0 2022-11-02 op !strcmp(word_break_property[prop2].enumname, "ALETTER"))) {
130 3448adb0 2022-11-02 op for (result = 0; result < LEN(word_break_property); result++) {
131 3448adb0 2022-11-02 op if (!strcmp(word_break_property[result].enumname,
132 3448adb0 2022-11-02 op "BOTH_ALETTER_EXTPICT")) {
133 3448adb0 2022-11-02 op break;
134 3448adb0 2022-11-02 op }
135 3448adb0 2022-11-02 op }
136 3448adb0 2022-11-02 op if (result == LEN(word_break_property)) {
137 3448adb0 2022-11-02 op fprintf(stderr, "handle_conflict: Internal error.\n");
138 3448adb0 2022-11-02 op exit(1);
139 3448adb0 2022-11-02 op }
140 3448adb0 2022-11-02 op } else {
141 3448adb0 2022-11-02 op fprintf(stderr, "handle_conflict: Cannot handle conflict.\n");
142 3448adb0 2022-11-02 op exit(1);
143 3448adb0 2022-11-02 op }
144 3448adb0 2022-11-02 op
145 3448adb0 2022-11-02 op return result;
146 3448adb0 2022-11-02 op }
147 3448adb0 2022-11-02 op
148 3448adb0 2022-11-02 op int
149 3448adb0 2022-11-02 op main(int argc, char *argv[])
150 3448adb0 2022-11-02 op {
151 3448adb0 2022-11-02 op (void)argc;
152 3448adb0 2022-11-02 op
153 3448adb0 2022-11-02 op properties_generate_break_property(word_break_property,
154 3448adb0 2022-11-02 op LEN(word_break_property),
155 3448adb0 2022-11-02 op handle_conflict, NULL, "word_break",
156 3448adb0 2022-11-02 op argv[0]);
157 3448adb0 2022-11-02 op
158 3448adb0 2022-11-02 op return 0;
159 3448adb0 2022-11-02 op }