Blob


1 /* See LICENSE file for copyright and license details. */
2 #include <stdio.h>
3 #include <stdlib.h>
4 #include <string.h>
6 #include "util.h"
8 #define FILE_EMOJI "data/emoji-data.txt"
9 #define FILE_WORD "data/WordBreakProperty.txt"
11 static const struct property_spec word_break_property[] = {
12 {
13 .enumname = "OTHER",
14 .file = NULL,
15 .ucdname = NULL,
16 },
17 {
18 .enumname = "ALETTER",
19 .file = FILE_WORD,
20 .ucdname = "ALetter",
21 },
22 {
23 .enumname = "BOTH_ALETTER_EXTPICT",
24 .file = NULL,
25 .ucdname = NULL,
26 },
27 {
28 .enumname = "CR",
29 .file = FILE_WORD,
30 .ucdname = "CR",
31 },
32 {
33 .enumname = "DOUBLE_QUOTE",
34 .file = FILE_WORD,
35 .ucdname = "Double_Quote",
36 },
37 {
38 .enumname = "EXTEND",
39 .file = FILE_WORD,
40 .ucdname = "Extend",
41 },
42 {
43 .enumname = "EXTENDED_PICTOGRAPHIC",
44 .file = FILE_EMOJI,
45 .ucdname = "Extended_Pictographic",
46 },
47 {
48 .enumname = "EXTENDNUMLET",
49 .file = FILE_WORD,
50 .ucdname = "ExtendNumLet",
51 },
52 {
53 .enumname = "FORMAT",
54 .file = FILE_WORD,
55 .ucdname = "Format",
56 },
57 {
58 .enumname = "HEBREW_LETTER",
59 .file = FILE_WORD,
60 .ucdname = "Hebrew_Letter",
61 },
62 {
63 .enumname = "KATAKANA",
64 .file = FILE_WORD,
65 .ucdname = "Katakana",
66 },
67 {
68 .enumname = "LF",
69 .file = FILE_WORD,
70 .ucdname = "LF",
71 },
72 {
73 .enumname = "MIDLETTER",
74 .file = FILE_WORD,
75 .ucdname = "MidLetter",
76 },
77 {
78 .enumname = "MIDNUM",
79 .file = FILE_WORD,
80 .ucdname = "MidNum",
81 },
82 {
83 .enumname = "MIDNUMLET",
84 .file = FILE_WORD,
85 .ucdname = "MidNumLet",
86 },
87 {
88 .enumname = "NEWLINE",
89 .file = FILE_WORD,
90 .ucdname = "Newline",
91 },
92 {
93 .enumname = "NUMERIC",
94 .file = FILE_WORD,
95 .ucdname = "Numeric",
96 },
97 {
98 .enumname = "REGIONAL_INDICATOR",
99 .file = FILE_WORD,
100 .ucdname = "Regional_Indicator",
101 },
103 .enumname = "SINGLE_QUOTE",
104 .file = FILE_WORD,
105 .ucdname = "Single_Quote",
106 },
108 .enumname = "WSEGSPACE",
109 .file = FILE_WORD,
110 .ucdname = "WSegSpace",
111 },
113 .enumname = "ZWJ",
114 .file = FILE_WORD,
115 .ucdname = "ZWJ",
116 },
117 };
119 static uint_least8_t
120 handle_conflict(uint_least32_t cp, uint_least8_t prop1, uint_least8_t prop2)
122 uint_least8_t result;
124 (void)cp;
126 if ((!strcmp(word_break_property[prop1].enumname, "ALETTER") &&
127 !strcmp(word_break_property[prop2].enumname, "EXTENDED_PICTOGRAPHIC")) ||
128 (!strcmp(word_break_property[prop1].enumname, "EXTENDED_PICTOGRAPHIC") &&
129 !strcmp(word_break_property[prop2].enumname, "ALETTER"))) {
130 for (result = 0; result < LEN(word_break_property); result++) {
131 if (!strcmp(word_break_property[result].enumname,
132 "BOTH_ALETTER_EXTPICT")) {
133 break;
136 if (result == LEN(word_break_property)) {
137 fprintf(stderr, "handle_conflict: Internal error.\n");
138 exit(1);
140 } else {
141 fprintf(stderr, "handle_conflict: Cannot handle conflict.\n");
142 exit(1);
145 return result;
148 int
149 main(int argc, char *argv[])
151 (void)argc;
153 properties_generate_break_property(word_break_property,
154 LEN(word_break_property),
155 handle_conflict, NULL, "word_break",
156 argv[0]);
158 return 0;