1 /* See LICENSE file for copyright and license details. */
8 #define FILE_EAW "data/EastAsianWidth.txt"
9 #define FILE_EMOJI "data/emoji-data.txt"
10 #define FILE_LINE "data/LineBreak.txt"
12 static const struct property_spec line_break_property[] = {
19 * Both extended pictographic and cn are large classes,
20 * but we are only interested in their intersection for LB30b,
21 * so we have the following two temporary classes. At first
22 * the extpict-class is filled, then the cn-class, which leads
23 * to conflicts (that we handle by putting them in the "proper"
24 * class BOTH_CN_EXTPICT). We make use of the fact that there
25 * is no intersection between AL and Cn.
27 * Any consecutive conflicts are permitted to overwrite
28 * TMP_EXTENDED_PICTOGRAPHIC and TMP_CN, because we don't need
29 * them, and in the final postprocessing we "reset" all
30 * remaining matches (that then didn't fit any of the other
31 * classes) to the generic class AL.
39 .enumname = "TMP_EXTENDED_PICTOGRAPHIC",
41 .ucdname = "Extended_Pictographic",
43 /* end of special block */
65 .enumname = "BOTH_CN_EXTPICT",
85 .enumname = "CP_WITHOUT_EAW_HWF",
90 .enumname = "CP_WITH_EAW_HWF",
190 .enumname = "OP_WITHOUT_EAW_HWF",
195 .enumname = "OP_WITH_EAW_HWF",
245 .enumname = "TMP_AI",
250 .enumname = "TMP_CJ",
255 .enumname = "TMP_XX",
260 .enumname = "TMP_MN",
265 .enumname = "TMP_MC",
270 .enumname = "TMP_SA_WITHOUT_MN_OR_MC",
275 .enumname = "TMP_SA_WITH_MN_OR_MC",
280 .enumname = "TMP_SG",
285 .enumname = "TMP_EAW_H",
290 .enumname = "TMP_EAW_W",
295 .enumname = "TMP_EAW_F",
302 handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t prop2)
304 uint_least8_t result = prop2;
309 if ((!strcmp(line_break_property[prop1].enumname, "TMP_EAW_H") ||
310 !strcmp(line_break_property[prop1].enumname, "TMP_EAW_W") ||
311 !strcmp(line_break_property[prop1].enumname, "TMP_EAW_F")) ||
312 (!strcmp(line_break_property[prop2].enumname, "TMP_EAW_H") ||
313 !strcmp(line_break_property[prop2].enumname, "TMP_EAW_W") ||
314 !strcmp(line_break_property[prop2].enumname, "TMP_EAW_F"))) {
315 if (!strcmp(line_break_property[prop1].enumname, "CP_WITHOUT_EAW_HWF") ||
316 !strcmp(line_break_property[prop2].enumname, "CP_WITHOUT_EAW_HWF")) {
317 target = "CP_WITH_EAW_HWF";
318 } else if (!strcmp(line_break_property[prop1].enumname, "OP_WITHOUT_EAW_HWF") ||
319 !strcmp(line_break_property[prop2].enumname, "OP_WITHOUT_EAW_HWF")) {
320 target = "OP_WITH_EAW_HWF";
322 /* ignore EAW for the rest */
323 if ((!strcmp(line_break_property[prop1].enumname, "TMP_EAW_H") ||
324 !strcmp(line_break_property[prop1].enumname, "TMP_EAW_W") ||
325 !strcmp(line_break_property[prop1].enumname, "TMP_EAW_F"))) {
331 } else if ((!strcmp(line_break_property[prop1].enumname, "TMP_MN") ||
332 !strcmp(line_break_property[prop1].enumname, "TMP_MC")) ||
333 (!strcmp(line_break_property[prop2].enumname, "TMP_MN") ||
334 !strcmp(line_break_property[prop2].enumname, "TMP_MC"))) {
335 if (!strcmp(line_break_property[prop1].enumname, "SA_WITHOUT_MN_OR_MC") ||
336 !strcmp(line_break_property[prop2].enumname, "SA_WITHOUT_MN_OR_MC")) {
337 target = "SA_WITH_MN_OR_MC";
339 /* ignore Mn and Mc for the rest */
340 if ((!strcmp(line_break_property[prop1].enumname, "TMP_MN") ||
341 !strcmp(line_break_property[prop1].enumname, "TMP_MC"))) {
347 } else if (!strcmp(line_break_property[prop1].enumname, "TMP_CN") ||
348 !strcmp(line_break_property[prop2].enumname, "TMP_CN")) {
349 if (!strcmp(line_break_property[prop1].enumname, "TMP_EXTENDED_PICTOGRAPHIC") ||
350 !strcmp(line_break_property[prop2].enumname, "TMP_EXTENDED_PICTOGRAPHIC")) {
351 target = "BOTH_CN_EXTPICT";
353 /* ignore Cn for all the other properties */
354 if (!strcmp(line_break_property[prop1].enumname, "TMP_CN")) {
360 } else if (!strcmp(line_break_property[prop1].enumname, "TMP_EXTENDED_PICTOGRAPHIC") ||
361 !strcmp(line_break_property[prop2].enumname, "TMP_EXTENDED_PICTOGRAPHIC")) {
362 if (!strcmp(line_break_property[prop1].enumname, "TMP_CN") ||
363 !strcmp(line_break_property[prop2].enumname, "TMP_CN")) {
364 target = "BOTH_CN_EXTPICT";
366 /* ignore Extended_Pictographic for all the other properties */
367 if (!strcmp(line_break_property[prop1].enumname, "TMP_EXTENDED_PICTOGRAPHIC")) {
374 fprintf(stderr, "handle_conflict: Cannot handle conflict %s <- %s.\n",
375 line_break_property[prop1].enumname, line_break_property[prop2].enumname);
380 for (result = 0; result < LEN(line_break_property); result++) {
381 if (!strcmp(line_break_property[result].enumname,
386 if (result == LEN(line_break_property)) {
387 fprintf(stderr, "handle_conflict: Internal error.\n");
396 post_process(uint_least8_t prop)
398 const char *target = NULL;
399 uint_least8_t result;
402 if (!strcmp(line_break_property[prop].enumname, "TMP_AI") ||
403 !strcmp(line_break_property[prop].enumname, "TMP_SG") ||
404 !strcmp(line_break_property[prop].enumname, "TMP_XX")) {
405 /* map AI, SG and XX to AL */
407 } else if (!strcmp(line_break_property[prop].enumname, "TMP_SA_WITH_MN_OR_MC")) {
408 /* map SA (with General_Category Mn or Mc) to CM */
410 } else if (!strcmp(line_break_property[prop].enumname, "TMP_SA_WITHOUT_MN_OR_MC")) {
411 /* map SA (without General_Category Mn or Mc) to AL */
413 } else if (!strcmp(line_break_property[prop].enumname, "TMP_CJ")) {
416 } else if (!strcmp(line_break_property[prop].enumname, "TMP_CN") ||
417 !strcmp(line_break_property[prop].enumname, "TMP_EXTENDED_PICTOGRAPHIC") ||
418 !strcmp(line_break_property[prop].enumname, "TMP_MN") ||
419 !strcmp(line_break_property[prop].enumname, "TMP_MC") ||
420 !strcmp(line_break_property[prop].enumname, "TMP_EAW_H") ||
421 !strcmp(line_break_property[prop].enumname, "TMP_EAW_W") ||
422 !strcmp(line_break_property[prop].enumname, "TMP_EAW_F")) {
423 /* map all the temporary classes "residue" to AL */
428 for (result = 0; result < LEN(line_break_property); result++) {
429 if (!strcmp(line_break_property[result].enumname,
434 if (result == LEN(line_break_property)) {
435 fprintf(stderr, "handle_conflict: Internal error.\n");
446 main(int argc, char *argv[])
450 properties_generate_break_property(line_break_property,
451 LEN(line_break_property),
452 handle_conflict, post_process,
453 "line_break", argv[0]);