Blame


1 3448adb0 2022-11-02 op /* See LICENSE file for copyright and license details. */
2 3448adb0 2022-11-02 op #include <stdio.h>
3 3448adb0 2022-11-02 op #include <stdlib.h>
4 3448adb0 2022-11-02 op #include <string.h>
5 3448adb0 2022-11-02 op
6 3448adb0 2022-11-02 op #include "util.h"
7 3448adb0 2022-11-02 op
8 3448adb0 2022-11-02 op #define FILE_EAW "data/EastAsianWidth.txt"
9 3448adb0 2022-11-02 op #define FILE_EMOJI "data/emoji-data.txt"
10 3448adb0 2022-11-02 op #define FILE_LINE "data/LineBreak.txt"
11 3448adb0 2022-11-02 op
12 3448adb0 2022-11-02 op static const struct property_spec line_break_property[] = {
13 3448adb0 2022-11-02 op {
14 3448adb0 2022-11-02 op .enumname = "AL",
15 3448adb0 2022-11-02 op .file = FILE_LINE,
16 3448adb0 2022-11-02 op .ucdname = "AL",
17 3448adb0 2022-11-02 op },
18 3448adb0 2022-11-02 op /*
19 3448adb0 2022-11-02 op * Both extended pictographic and cn are large classes,
20 3448adb0 2022-11-02 op * but we are only interested in their intersection for LB30b,
21 3448adb0 2022-11-02 op * so we have the following two temporary classes. At first
22 3448adb0 2022-11-02 op * the extpict-class is filled, then the cn-class, which leads
23 3448adb0 2022-11-02 op * to conflicts (that we handle by putting them in the "proper"
24 3448adb0 2022-11-02 op * class BOTH_CN_EXTPICT). We make use of the fact that there
25 3448adb0 2022-11-02 op * is no intersection between AL and Cn.
26 3448adb0 2022-11-02 op *
27 3448adb0 2022-11-02 op * Any consecutive conflicts are permitted to overwrite
28 3448adb0 2022-11-02 op * TMP_EXTENDED_PICTOGRAPHIC and TMP_CN, because we don't need
29 3448adb0 2022-11-02 op * them, and in the final postprocessing we "reset" all
30 3448adb0 2022-11-02 op * remaining matches (that then didn't fit any of the other
31 3448adb0 2022-11-02 op * classes) to the generic class AL.
32 3448adb0 2022-11-02 op */
33 3448adb0 2022-11-02 op {
34 3448adb0 2022-11-02 op .enumname = "TMP_CN",
35 3448adb0 2022-11-02 op .file = FILE_LINE,
36 3448adb0 2022-11-02 op .ucdname = "Cn",
37 3448adb0 2022-11-02 op },
38 3448adb0 2022-11-02 op {
39 3448adb0 2022-11-02 op .enumname = "TMP_EXTENDED_PICTOGRAPHIC",
40 3448adb0 2022-11-02 op .file = FILE_EMOJI,
41 3448adb0 2022-11-02 op .ucdname = "Extended_Pictographic",
42 3448adb0 2022-11-02 op },
43 3448adb0 2022-11-02 op /* end of special block */
44 3448adb0 2022-11-02 op {
45 3448adb0 2022-11-02 op .enumname = "B2",
46 3448adb0 2022-11-02 op .file = FILE_LINE,
47 3448adb0 2022-11-02 op .ucdname = "B2",
48 3448adb0 2022-11-02 op },
49 3448adb0 2022-11-02 op {
50 3448adb0 2022-11-02 op .enumname = "BA",
51 3448adb0 2022-11-02 op .file = FILE_LINE,
52 3448adb0 2022-11-02 op .ucdname = "BA",
53 3448adb0 2022-11-02 op },
54 3448adb0 2022-11-02 op {
55 3448adb0 2022-11-02 op .enumname = "BB",
56 3448adb0 2022-11-02 op .file = FILE_LINE,
57 3448adb0 2022-11-02 op .ucdname = "BB",
58 3448adb0 2022-11-02 op },
59 3448adb0 2022-11-02 op {
60 3448adb0 2022-11-02 op .enumname = "BK",
61 3448adb0 2022-11-02 op .file = FILE_LINE,
62 3448adb0 2022-11-02 op .ucdname = "BK",
63 3448adb0 2022-11-02 op },
64 3448adb0 2022-11-02 op {
65 3448adb0 2022-11-02 op .enumname = "BOTH_CN_EXTPICT",
66 3448adb0 2022-11-02 op .file = NULL,
67 3448adb0 2022-11-02 op .ucdname = NULL,
68 3448adb0 2022-11-02 op },
69 3448adb0 2022-11-02 op {
70 3448adb0 2022-11-02 op .enumname = "CB",
71 3448adb0 2022-11-02 op .file = FILE_LINE,
72 3448adb0 2022-11-02 op .ucdname = "CB",
73 3448adb0 2022-11-02 op },
74 3448adb0 2022-11-02 op {
75 3448adb0 2022-11-02 op .enumname = "CL",
76 3448adb0 2022-11-02 op .file = FILE_LINE,
77 3448adb0 2022-11-02 op .ucdname = "CL",
78 3448adb0 2022-11-02 op },
79 3448adb0 2022-11-02 op {
80 3448adb0 2022-11-02 op .enumname = "CM",
81 3448adb0 2022-11-02 op .file = FILE_LINE,
82 3448adb0 2022-11-02 op .ucdname = "CM",
83 3448adb0 2022-11-02 op },
84 3448adb0 2022-11-02 op {
85 3448adb0 2022-11-02 op .enumname = "CP_WITHOUT_EAW_HWF",
86 3448adb0 2022-11-02 op .file = FILE_LINE,
87 3448adb0 2022-11-02 op .ucdname = "CP",
88 3448adb0 2022-11-02 op },
89 3448adb0 2022-11-02 op {
90 3448adb0 2022-11-02 op .enumname = "CP_WITH_EAW_HWF",
91 3448adb0 2022-11-02 op .file = NULL,
92 3448adb0 2022-11-02 op .ucdname = NULL,
93 3448adb0 2022-11-02 op },
94 3448adb0 2022-11-02 op {
95 3448adb0 2022-11-02 op .enumname = "CR",
96 3448adb0 2022-11-02 op .file = FILE_LINE,
97 3448adb0 2022-11-02 op .ucdname = "CR",
98 3448adb0 2022-11-02 op },
99 3448adb0 2022-11-02 op {
100 3448adb0 2022-11-02 op .enumname = "EB",
101 3448adb0 2022-11-02 op .file = FILE_LINE,
102 3448adb0 2022-11-02 op .ucdname = "EB",
103 3448adb0 2022-11-02 op },
104 3448adb0 2022-11-02 op {
105 3448adb0 2022-11-02 op .enumname = "EM",
106 3448adb0 2022-11-02 op .file = FILE_LINE,
107 3448adb0 2022-11-02 op .ucdname = "EM",
108 3448adb0 2022-11-02 op },
109 3448adb0 2022-11-02 op {
110 3448adb0 2022-11-02 op .enumname = "EX",
111 3448adb0 2022-11-02 op .file = FILE_LINE,
112 3448adb0 2022-11-02 op .ucdname = "EX",
113 3448adb0 2022-11-02 op },
114 3448adb0 2022-11-02 op {
115 3448adb0 2022-11-02 op .enumname = "GL",
116 3448adb0 2022-11-02 op .file = FILE_LINE,
117 3448adb0 2022-11-02 op .ucdname = "GL",
118 3448adb0 2022-11-02 op },
119 3448adb0 2022-11-02 op {
120 3448adb0 2022-11-02 op .enumname = "H2",
121 3448adb0 2022-11-02 op .file = FILE_LINE,
122 3448adb0 2022-11-02 op .ucdname = "H2",
123 3448adb0 2022-11-02 op },
124 3448adb0 2022-11-02 op {
125 3448adb0 2022-11-02 op .enumname = "H3",
126 3448adb0 2022-11-02 op .file = FILE_LINE,
127 3448adb0 2022-11-02 op .ucdname = "H3",
128 3448adb0 2022-11-02 op },
129 3448adb0 2022-11-02 op {
130 3448adb0 2022-11-02 op .enumname = "HL",
131 3448adb0 2022-11-02 op .file = FILE_LINE,
132 3448adb0 2022-11-02 op .ucdname = "HL",
133 3448adb0 2022-11-02 op },
134 3448adb0 2022-11-02 op {
135 3448adb0 2022-11-02 op .enumname = "HY",
136 3448adb0 2022-11-02 op .file = FILE_LINE,
137 3448adb0 2022-11-02 op .ucdname = "HY",
138 3448adb0 2022-11-02 op },
139 3448adb0 2022-11-02 op {
140 3448adb0 2022-11-02 op .enumname = "ID",
141 3448adb0 2022-11-02 op .file = FILE_LINE,
142 3448adb0 2022-11-02 op .ucdname = "ID",
143 3448adb0 2022-11-02 op },
144 3448adb0 2022-11-02 op {
145 3448adb0 2022-11-02 op .enumname = "IN",
146 3448adb0 2022-11-02 op .file = FILE_LINE,
147 3448adb0 2022-11-02 op .ucdname = "IN",
148 3448adb0 2022-11-02 op },
149 3448adb0 2022-11-02 op {
150 3448adb0 2022-11-02 op .enumname = "IS",
151 3448adb0 2022-11-02 op .file = FILE_LINE,
152 3448adb0 2022-11-02 op .ucdname = "IS",
153 3448adb0 2022-11-02 op },
154 3448adb0 2022-11-02 op {
155 3448adb0 2022-11-02 op .enumname = "JL",
156 3448adb0 2022-11-02 op .file = FILE_LINE,
157 3448adb0 2022-11-02 op .ucdname = "JL",
158 3448adb0 2022-11-02 op },
159 3448adb0 2022-11-02 op {
160 3448adb0 2022-11-02 op .enumname = "JT",
161 3448adb0 2022-11-02 op .file = FILE_LINE,
162 3448adb0 2022-11-02 op .ucdname = "JT",
163 3448adb0 2022-11-02 op },
164 3448adb0 2022-11-02 op {
165 3448adb0 2022-11-02 op .enumname = "JV",
166 3448adb0 2022-11-02 op .file = FILE_LINE,
167 3448adb0 2022-11-02 op .ucdname = "JV",
168 3448adb0 2022-11-02 op },
169 3448adb0 2022-11-02 op {
170 3448adb0 2022-11-02 op .enumname = "LF",
171 3448adb0 2022-11-02 op .file = FILE_LINE,
172 3448adb0 2022-11-02 op .ucdname = "LF",
173 3448adb0 2022-11-02 op },
174 3448adb0 2022-11-02 op {
175 3448adb0 2022-11-02 op .enumname = "NL",
176 3448adb0 2022-11-02 op .file = FILE_LINE,
177 3448adb0 2022-11-02 op .ucdname = "NL",
178 3448adb0 2022-11-02 op },
179 3448adb0 2022-11-02 op {
180 3448adb0 2022-11-02 op .enumname = "NS",
181 3448adb0 2022-11-02 op .file = FILE_LINE,
182 3448adb0 2022-11-02 op .ucdname = "NS",
183 3448adb0 2022-11-02 op },
184 3448adb0 2022-11-02 op {
185 3448adb0 2022-11-02 op .enumname = "NU",
186 3448adb0 2022-11-02 op .file = FILE_LINE,
187 3448adb0 2022-11-02 op .ucdname = "NU",
188 3448adb0 2022-11-02 op },
189 3448adb0 2022-11-02 op {
190 3448adb0 2022-11-02 op .enumname = "OP_WITHOUT_EAW_HWF",
191 3448adb0 2022-11-02 op .file = FILE_LINE,
192 3448adb0 2022-11-02 op .ucdname = "OP",
193 3448adb0 2022-11-02 op },
194 3448adb0 2022-11-02 op {
195 3448adb0 2022-11-02 op .enumname = "OP_WITH_EAW_HWF",
196 3448adb0 2022-11-02 op .file = NULL,
197 3448adb0 2022-11-02 op .ucdname = NULL,
198 3448adb0 2022-11-02 op },
199 3448adb0 2022-11-02 op {
200 3448adb0 2022-11-02 op .enumname = "PO",
201 3448adb0 2022-11-02 op .file = FILE_LINE,
202 3448adb0 2022-11-02 op .ucdname = "PO",
203 3448adb0 2022-11-02 op },
204 3448adb0 2022-11-02 op {
205 3448adb0 2022-11-02 op .enumname = "PR",
206 3448adb0 2022-11-02 op .file = FILE_LINE,
207 3448adb0 2022-11-02 op .ucdname = "PR",
208 3448adb0 2022-11-02 op },
209 3448adb0 2022-11-02 op {
210 3448adb0 2022-11-02 op .enumname = "QU",
211 3448adb0 2022-11-02 op .file = FILE_LINE,
212 3448adb0 2022-11-02 op .ucdname = "QU",
213 3448adb0 2022-11-02 op },
214 3448adb0 2022-11-02 op {
215 3448adb0 2022-11-02 op .enumname = "RI",
216 3448adb0 2022-11-02 op .file = FILE_LINE,
217 3448adb0 2022-11-02 op .ucdname = "RI",
218 3448adb0 2022-11-02 op },
219 3448adb0 2022-11-02 op {
220 3448adb0 2022-11-02 op .enumname = "SP",
221 3448adb0 2022-11-02 op .file = FILE_LINE,
222 3448adb0 2022-11-02 op .ucdname = "SP",
223 3448adb0 2022-11-02 op },
224 3448adb0 2022-11-02 op {
225 3448adb0 2022-11-02 op .enumname = "SY",
226 3448adb0 2022-11-02 op .file = FILE_LINE,
227 3448adb0 2022-11-02 op .ucdname = "SY",
228 3448adb0 2022-11-02 op },
229 3448adb0 2022-11-02 op {
230 3448adb0 2022-11-02 op .enumname = "WJ",
231 3448adb0 2022-11-02 op .file = FILE_LINE,
232 3448adb0 2022-11-02 op .ucdname = "WJ",
233 3448adb0 2022-11-02 op },
234 3448adb0 2022-11-02 op {
235 3448adb0 2022-11-02 op .enumname = "ZW",
236 3448adb0 2022-11-02 op .file = FILE_LINE,
237 3448adb0 2022-11-02 op .ucdname = "ZW",
238 3448adb0 2022-11-02 op },
239 3448adb0 2022-11-02 op {
240 3448adb0 2022-11-02 op .enumname = "ZWJ",
241 3448adb0 2022-11-02 op .file = FILE_LINE,
242 3448adb0 2022-11-02 op .ucdname = "ZWJ",
243 3448adb0 2022-11-02 op },
244 3448adb0 2022-11-02 op {
245 3448adb0 2022-11-02 op .enumname = "TMP_AI",
246 3448adb0 2022-11-02 op .file = FILE_LINE,
247 3448adb0 2022-11-02 op .ucdname = "AI",
248 3448adb0 2022-11-02 op },
249 3448adb0 2022-11-02 op {
250 3448adb0 2022-11-02 op .enumname = "TMP_CJ",
251 3448adb0 2022-11-02 op .file = FILE_LINE,
252 3448adb0 2022-11-02 op .ucdname = "CJ",
253 3448adb0 2022-11-02 op },
254 3448adb0 2022-11-02 op {
255 3448adb0 2022-11-02 op .enumname = "TMP_XX",
256 3448adb0 2022-11-02 op .file = NULL,
257 3448adb0 2022-11-02 op .ucdname = NULL,
258 3448adb0 2022-11-02 op },
259 3448adb0 2022-11-02 op {
260 3448adb0 2022-11-02 op .enumname = "TMP_MN",
261 3448adb0 2022-11-02 op .file = FILE_LINE,
262 3448adb0 2022-11-02 op .ucdname = "Mn",
263 3448adb0 2022-11-02 op },
264 3448adb0 2022-11-02 op {
265 3448adb0 2022-11-02 op .enumname = "TMP_MC",
266 3448adb0 2022-11-02 op .file = FILE_LINE,
267 3448adb0 2022-11-02 op .ucdname = "Mc",
268 3448adb0 2022-11-02 op },
269 3448adb0 2022-11-02 op {
270 3448adb0 2022-11-02 op .enumname = "TMP_SA_WITHOUT_MN_OR_MC",
271 3448adb0 2022-11-02 op .file = FILE_LINE,
272 3448adb0 2022-11-02 op .ucdname = "SA",
273 3448adb0 2022-11-02 op },
274 3448adb0 2022-11-02 op {
275 3448adb0 2022-11-02 op .enumname = "TMP_SA_WITH_MN_OR_MC",
276 3448adb0 2022-11-02 op .file = FILE_LINE,
277 3448adb0 2022-11-02 op .ucdname = "SA",
278 3448adb0 2022-11-02 op },
279 3448adb0 2022-11-02 op {
280 3448adb0 2022-11-02 op .enumname = "TMP_SG",
281 3448adb0 2022-11-02 op .file = FILE_LINE,
282 3448adb0 2022-11-02 op .ucdname = "SG",
283 3448adb0 2022-11-02 op },
284 3448adb0 2022-11-02 op {
285 3448adb0 2022-11-02 op .enumname = "TMP_EAW_H",
286 3448adb0 2022-11-02 op .file = FILE_EAW,
287 3448adb0 2022-11-02 op .ucdname = "H",
288 3448adb0 2022-11-02 op },
289 3448adb0 2022-11-02 op {
290 3448adb0 2022-11-02 op .enumname = "TMP_EAW_W",
291 3448adb0 2022-11-02 op .file = FILE_EAW,
292 3448adb0 2022-11-02 op .ucdname = "W",
293 3448adb0 2022-11-02 op },
294 3448adb0 2022-11-02 op {
295 3448adb0 2022-11-02 op .enumname = "TMP_EAW_F",
296 3448adb0 2022-11-02 op .file = FILE_EAW,
297 3448adb0 2022-11-02 op .ucdname = "F",
298 3448adb0 2022-11-02 op },
299 3448adb0 2022-11-02 op };
300 3448adb0 2022-11-02 op
301 3448adb0 2022-11-02 op static uint_least8_t
302 3448adb0 2022-11-02 op handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t prop2)
303 3448adb0 2022-11-02 op {
304 3448adb0 2022-11-02 op uint_least8_t result = prop2;
305 3448adb0 2022-11-02 op char *target = NULL;
306 3448adb0 2022-11-02 op
307 3448adb0 2022-11-02 op (void)cp;
308 3448adb0 2022-11-02 op
309 3448adb0 2022-11-02 op if ((!strcmp(line_break_property[prop1].enumname, "TMP_EAW_H") ||
310 3448adb0 2022-11-02 op !strcmp(line_break_property[prop1].enumname, "TMP_EAW_W") ||
311 3448adb0 2022-11-02 op !strcmp(line_break_property[prop1].enumname, "TMP_EAW_F")) ||
312 3448adb0 2022-11-02 op (!strcmp(line_break_property[prop2].enumname, "TMP_EAW_H") ||
313 3448adb0 2022-11-02 op !strcmp(line_break_property[prop2].enumname, "TMP_EAW_W") ||
314 3448adb0 2022-11-02 op !strcmp(line_break_property[prop2].enumname, "TMP_EAW_F"))) {
315 3448adb0 2022-11-02 op if (!strcmp(line_break_property[prop1].enumname, "CP_WITHOUT_EAW_HWF") ||
316 3448adb0 2022-11-02 op !strcmp(line_break_property[prop2].enumname, "CP_WITHOUT_EAW_HWF")) {
317 3448adb0 2022-11-02 op target = "CP_WITH_EAW_HWF";
318 3448adb0 2022-11-02 op } else if (!strcmp(line_break_property[prop1].enumname, "OP_WITHOUT_EAW_HWF") ||
319 3448adb0 2022-11-02 op !strcmp(line_break_property[prop2].enumname, "OP_WITHOUT_EAW_HWF")) {
320 3448adb0 2022-11-02 op target = "OP_WITH_EAW_HWF";
321 3448adb0 2022-11-02 op } else {
322 3448adb0 2022-11-02 op /* ignore EAW for the rest */
323 3448adb0 2022-11-02 op if ((!strcmp(line_break_property[prop1].enumname, "TMP_EAW_H") ||
324 3448adb0 2022-11-02 op !strcmp(line_break_property[prop1].enumname, "TMP_EAW_W") ||
325 3448adb0 2022-11-02 op !strcmp(line_break_property[prop1].enumname, "TMP_EAW_F"))) {
326 3448adb0 2022-11-02 op result = prop2;
327 3448adb0 2022-11-02 op } else {
328 3448adb0 2022-11-02 op result = prop1;
329 3448adb0 2022-11-02 op }
330 3448adb0 2022-11-02 op }
331 3448adb0 2022-11-02 op } else if ((!strcmp(line_break_property[prop1].enumname, "TMP_MN") ||
332 3448adb0 2022-11-02 op !strcmp(line_break_property[prop1].enumname, "TMP_MC")) ||
333 3448adb0 2022-11-02 op (!strcmp(line_break_property[prop2].enumname, "TMP_MN") ||
334 3448adb0 2022-11-02 op !strcmp(line_break_property[prop2].enumname, "TMP_MC"))) {
335 3448adb0 2022-11-02 op if (!strcmp(line_break_property[prop1].enumname, "SA_WITHOUT_MN_OR_MC") ||
336 3448adb0 2022-11-02 op !strcmp(line_break_property[prop2].enumname, "SA_WITHOUT_MN_OR_MC")) {
337 3448adb0 2022-11-02 op target = "SA_WITH_MN_OR_MC";
338 3448adb0 2022-11-02 op } else {
339 3448adb0 2022-11-02 op /* ignore Mn and Mc for the rest */
340 3448adb0 2022-11-02 op if ((!strcmp(line_break_property[prop1].enumname, "TMP_MN") ||
341 3448adb0 2022-11-02 op !strcmp(line_break_property[prop1].enumname, "TMP_MC"))) {
342 3448adb0 2022-11-02 op result = prop2;
343 3448adb0 2022-11-02 op } else {
344 3448adb0 2022-11-02 op result = prop1;
345 3448adb0 2022-11-02 op }
346 3448adb0 2022-11-02 op }
347 3448adb0 2022-11-02 op } else if (!strcmp(line_break_property[prop1].enumname, "TMP_CN") ||
348 3448adb0 2022-11-02 op !strcmp(line_break_property[prop2].enumname, "TMP_CN")) {
349 3448adb0 2022-11-02 op if (!strcmp(line_break_property[prop1].enumname, "TMP_EXTENDED_PICTOGRAPHIC") ||
350 3448adb0 2022-11-02 op !strcmp(line_break_property[prop2].enumname, "TMP_EXTENDED_PICTOGRAPHIC")) {
351 3448adb0 2022-11-02 op target = "BOTH_CN_EXTPICT";
352 3448adb0 2022-11-02 op } else {
353 3448adb0 2022-11-02 op /* ignore Cn for all the other properties */
354 3448adb0 2022-11-02 op if (!strcmp(line_break_property[prop1].enumname, "TMP_CN")) {
355 3448adb0 2022-11-02 op result = prop2;
356 3448adb0 2022-11-02 op } else {
357 3448adb0 2022-11-02 op result = prop1;
358 3448adb0 2022-11-02 op }
359 3448adb0 2022-11-02 op }
360 3448adb0 2022-11-02 op } else if (!strcmp(line_break_property[prop1].enumname, "TMP_EXTENDED_PICTOGRAPHIC") ||
361 3448adb0 2022-11-02 op !strcmp(line_break_property[prop2].enumname, "TMP_EXTENDED_PICTOGRAPHIC")) {
362 3448adb0 2022-11-02 op if (!strcmp(line_break_property[prop1].enumname, "TMP_CN") ||
363 3448adb0 2022-11-02 op !strcmp(line_break_property[prop2].enumname, "TMP_CN")) {
364 3448adb0 2022-11-02 op target = "BOTH_CN_EXTPICT";
365 3448adb0 2022-11-02 op } else {
366 3448adb0 2022-11-02 op /* ignore Extended_Pictographic for all the other properties */
367 3448adb0 2022-11-02 op if (!strcmp(line_break_property[prop1].enumname, "TMP_EXTENDED_PICTOGRAPHIC")) {
368 3448adb0 2022-11-02 op result = prop2;
369 3448adb0 2022-11-02 op } else {
370 3448adb0 2022-11-02 op result = prop1;
371 3448adb0 2022-11-02 op }
372 3448adb0 2022-11-02 op }
373 3448adb0 2022-11-02 op } else {
374 3448adb0 2022-11-02 op fprintf(stderr, "handle_conflict: Cannot handle conflict %s <- %s.\n",
375 3448adb0 2022-11-02 op line_break_property[prop1].enumname, line_break_property[prop2].enumname);
376 3448adb0 2022-11-02 op exit(1);
377 3448adb0 2022-11-02 op }
378 3448adb0 2022-11-02 op
379 3448adb0 2022-11-02 op if (target) {
380 3448adb0 2022-11-02 op for (result = 0; result < LEN(line_break_property); result++) {
381 3448adb0 2022-11-02 op if (!strcmp(line_break_property[result].enumname,
382 3448adb0 2022-11-02 op target)) {
383 3448adb0 2022-11-02 op break;
384 3448adb0 2022-11-02 op }
385 3448adb0 2022-11-02 op }
386 3448adb0 2022-11-02 op if (result == LEN(line_break_property)) {
387 3448adb0 2022-11-02 op fprintf(stderr, "handle_conflict: Internal error.\n");
388 3448adb0 2022-11-02 op exit(1);
389 3448adb0 2022-11-02 op }
390 3448adb0 2022-11-02 op }
391 3448adb0 2022-11-02 op
392 3448adb0 2022-11-02 op return result;
393 3448adb0 2022-11-02 op }
394 3448adb0 2022-11-02 op
395 3448adb0 2022-11-02 op static uint_least8_t
396 3448adb0 2022-11-02 op post_process(uint_least8_t prop)
397 3448adb0 2022-11-02 op {
398 3448adb0 2022-11-02 op const char *target = NULL;
399 3448adb0 2022-11-02 op uint_least8_t result;
400 3448adb0 2022-11-02 op
401 3448adb0 2022-11-02 op /* LB1 */
402 3448adb0 2022-11-02 op if (!strcmp(line_break_property[prop].enumname, "TMP_AI") ||
403 3448adb0 2022-11-02 op !strcmp(line_break_property[prop].enumname, "TMP_SG") ||
404 3448adb0 2022-11-02 op !strcmp(line_break_property[prop].enumname, "TMP_XX")) {
405 3448adb0 2022-11-02 op /* map AI, SG and XX to AL */
406 3448adb0 2022-11-02 op target = "AL";
407 3448adb0 2022-11-02 op } else if (!strcmp(line_break_property[prop].enumname, "TMP_SA_WITH_MN_OR_MC")) {
408 3448adb0 2022-11-02 op /* map SA (with General_Category Mn or Mc) to CM */
409 3448adb0 2022-11-02 op target = "CM";
410 3448adb0 2022-11-02 op } else if (!strcmp(line_break_property[prop].enumname, "TMP_SA_WITHOUT_MN_OR_MC")) {
411 3448adb0 2022-11-02 op /* map SA (without General_Category Mn or Mc) to AL */
412 3448adb0 2022-11-02 op target = "AL";
413 3448adb0 2022-11-02 op } else if (!strcmp(line_break_property[prop].enumname, "TMP_CJ")) {
414 3448adb0 2022-11-02 op /* map CJ to NS */
415 3448adb0 2022-11-02 op target = "NS";
416 3448adb0 2022-11-02 op } else if (!strcmp(line_break_property[prop].enumname, "TMP_CN") ||
417 3448adb0 2022-11-02 op !strcmp(line_break_property[prop].enumname, "TMP_EXTENDED_PICTOGRAPHIC") ||
418 3448adb0 2022-11-02 op !strcmp(line_break_property[prop].enumname, "TMP_MN") ||
419 3448adb0 2022-11-02 op !strcmp(line_break_property[prop].enumname, "TMP_MC") ||
420 3448adb0 2022-11-02 op !strcmp(line_break_property[prop].enumname, "TMP_EAW_H") ||
421 3448adb0 2022-11-02 op !strcmp(line_break_property[prop].enumname, "TMP_EAW_W") ||
422 3448adb0 2022-11-02 op !strcmp(line_break_property[prop].enumname, "TMP_EAW_F")) {
423 3448adb0 2022-11-02 op /* map all the temporary classes "residue" to AL */
424 3448adb0 2022-11-02 op target = "AL";
425 3448adb0 2022-11-02 op }
426 3448adb0 2022-11-02 op
427 3448adb0 2022-11-02 op if (target) {
428 3448adb0 2022-11-02 op for (result = 0; result < LEN(line_break_property); result++) {
429 3448adb0 2022-11-02 op if (!strcmp(line_break_property[result].enumname,
430 3448adb0 2022-11-02 op target)) {
431 3448adb0 2022-11-02 op break;
432 3448adb0 2022-11-02 op }
433 3448adb0 2022-11-02 op }
434 3448adb0 2022-11-02 op if (result == LEN(line_break_property)) {
435 3448adb0 2022-11-02 op fprintf(stderr, "handle_conflict: Internal error.\n");
436 3448adb0 2022-11-02 op exit(1);
437 3448adb0 2022-11-02 op }
438 3448adb0 2022-11-02 op
439 3448adb0 2022-11-02 op return result;
440 3448adb0 2022-11-02 op } else {
441 3448adb0 2022-11-02 op return prop;
442 3448adb0 2022-11-02 op }
443 3448adb0 2022-11-02 op }
444 3448adb0 2022-11-02 op
445 3448adb0 2022-11-02 op int
446 3448adb0 2022-11-02 op main(int argc, char *argv[])
447 3448adb0 2022-11-02 op {
448 3448adb0 2022-11-02 op (void)argc;
449 3448adb0 2022-11-02 op
450 3448adb0 2022-11-02 op properties_generate_break_property(line_break_property,
451 3448adb0 2022-11-02 op LEN(line_break_property),
452 3448adb0 2022-11-02 op handle_conflict, post_process,
453 3448adb0 2022-11-02 op "line_break", argv[0]);
454 3448adb0 2022-11-02 op
455 3448adb0 2022-11-02 op return 0;
456 3448adb0 2022-11-02 op }