Blame


1 3448adb0 2022-11-02 op /* See LICENSE file for copyright and license details. */
2 3448adb0 2022-11-02 op #include <stdbool.h>
3 3448adb0 2022-11-02 op #include <ctype.h>
4 3448adb0 2022-11-02 op #include <errno.h>
5 3448adb0 2022-11-02 op #include <inttypes.h>
6 3448adb0 2022-11-02 op #include <stdbool.h>
7 3448adb0 2022-11-02 op #include <stddef.h>
8 3448adb0 2022-11-02 op #include <stdint.h>
9 3448adb0 2022-11-02 op #include <stdlib.h>
10 3448adb0 2022-11-02 op #include <stdio.h>
11 3448adb0 2022-11-02 op #include <string.h>
12 3448adb0 2022-11-02 op
13 3448adb0 2022-11-02 op #include "util.h"
14 3448adb0 2022-11-02 op
15 3448adb0 2022-11-02 op struct range {
16 3448adb0 2022-11-02 op uint_least32_t lower;
17 3448adb0 2022-11-02 op uint_least32_t upper;
18 3448adb0 2022-11-02 op };
19 3448adb0 2022-11-02 op
20 3448adb0 2022-11-02 op struct properties_payload {
21 3448adb0 2022-11-02 op struct properties *prop;
22 3448adb0 2022-11-02 op const struct property_spec *spec;
23 3448adb0 2022-11-02 op uint_least8_t speclen;
24 3448adb0 2022-11-02 op int (*set_value)(struct properties_payload *, uint_least32_t, int_least64_t);
25 3448adb0 2022-11-02 op uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t, uint_least8_t);
26 3448adb0 2022-11-02 op };
27 3448adb0 2022-11-02 op
28 3448adb0 2022-11-02 op struct break_test_payload
29 3448adb0 2022-11-02 op {
30 3448adb0 2022-11-02 op struct break_test **test;
31 3448adb0 2022-11-02 op size_t *testlen;
32 3448adb0 2022-11-02 op };
33 3448adb0 2022-11-02 op
34 3448adb0 2022-11-02 op static void *
35 3448adb0 2022-11-02 op reallocate_array(void *p, size_t len, size_t size)
36 3448adb0 2022-11-02 op {
37 3448adb0 2022-11-02 op if (len > 0 && size > SIZE_MAX / len) {
38 3448adb0 2022-11-02 op errno = ENOMEM;
39 3448adb0 2022-11-02 op return NULL;
40 3448adb0 2022-11-02 op }
41 3448adb0 2022-11-02 op
42 3448adb0 2022-11-02 op return realloc(p, len * size);
43 3448adb0 2022-11-02 op }
44 3448adb0 2022-11-02 op
45 3448adb0 2022-11-02 op int
46 3448adb0 2022-11-02 op hextocp(const char *str, size_t len, uint_least32_t *cp)
47 3448adb0 2022-11-02 op {
48 3448adb0 2022-11-02 op size_t i;
49 3448adb0 2022-11-02 op int off;
50 3448adb0 2022-11-02 op char relative;
51 3448adb0 2022-11-02 op
52 3448adb0 2022-11-02 op /* the maximum valid codepoint is 0x10FFFF */
53 3448adb0 2022-11-02 op if (len > 6) {
54 3448adb0 2022-11-02 op fprintf(stderr, "hextocp: '%.*s' is too long.\n",
55 3448adb0 2022-11-02 op (int)len, str);
56 3448adb0 2022-11-02 op return 1;
57 3448adb0 2022-11-02 op }
58 3448adb0 2022-11-02 op
59 3448adb0 2022-11-02 op for (i = 0, *cp = 0; i < len; i++) {
60 3448adb0 2022-11-02 op if (str[i] >= '0' && str[i] <= '9') {
61 3448adb0 2022-11-02 op relative = '0';
62 3448adb0 2022-11-02 op off = 0;
63 3448adb0 2022-11-02 op } else if (str[i] >= 'a' && str[i] <= 'f') {
64 3448adb0 2022-11-02 op relative = 'a';
65 3448adb0 2022-11-02 op off = 10;
66 3448adb0 2022-11-02 op } else if (str[i] >= 'A' && str[i] <= 'F') {
67 3448adb0 2022-11-02 op relative = 'A';
68 3448adb0 2022-11-02 op off = 10;
69 3448adb0 2022-11-02 op } else {
70 3448adb0 2022-11-02 op fprintf(stderr, "hextocp: '%.*s' is not hexadecimal.\n",
71 3448adb0 2022-11-02 op (int)len, str);
72 3448adb0 2022-11-02 op return 1;
73 3448adb0 2022-11-02 op }
74 3448adb0 2022-11-02 op
75 3448adb0 2022-11-02 op *cp += ((uint_least32_t)1 << (4 * (len - i - 1))) *
76 3448adb0 2022-11-02 op (uint_least32_t)(str[i] - relative + off);
77 3448adb0 2022-11-02 op }
78 3448adb0 2022-11-02 op
79 3448adb0 2022-11-02 op if (*cp > UINT32_C(0x10FFFF)) {
80 3448adb0 2022-11-02 op fprintf(stderr, "hextocp: '%.*s' is too large.\n",
81 3448adb0 2022-11-02 op (int)len, str);
82 3448adb0 2022-11-02 op return 1;
83 3448adb0 2022-11-02 op }
84 3448adb0 2022-11-02 op
85 3448adb0 2022-11-02 op return 0;
86 3448adb0 2022-11-02 op }
87 3448adb0 2022-11-02 op
88 3448adb0 2022-11-02 op static int
89 3448adb0 2022-11-02 op range_parse(const char *str, struct range *range)
90 3448adb0 2022-11-02 op {
91 3448adb0 2022-11-02 op char *p;
92 3448adb0 2022-11-02 op
93 3448adb0 2022-11-02 op if ((p = strstr(str, "..")) == NULL) {
94 3448adb0 2022-11-02 op /* input has the form "XXXXXX" */
95 3448adb0 2022-11-02 op if (hextocp(str, strlen(str), &range->lower)) {
96 3448adb0 2022-11-02 op return 1;
97 3448adb0 2022-11-02 op }
98 3448adb0 2022-11-02 op range->upper = range->lower;
99 3448adb0 2022-11-02 op } else {
100 3448adb0 2022-11-02 op /* input has the form "XXXXXX..XXXXXX" */
101 3448adb0 2022-11-02 op if (hextocp(str, (size_t)(p - str), &range->lower) ||
102 3448adb0 2022-11-02 op hextocp(p + 2, strlen(p + 2), &range->upper)) {
103 3448adb0 2022-11-02 op return 1;
104 3448adb0 2022-11-02 op }
105 3448adb0 2022-11-02 op }
106 3448adb0 2022-11-02 op
107 3448adb0 2022-11-02 op return 0;
108 3448adb0 2022-11-02 op }
109 3448adb0 2022-11-02 op
110 3448adb0 2022-11-02 op void
111 3448adb0 2022-11-02 op parse_file_with_callback(const char *fname, int (*callback)(const char *,
112 3448adb0 2022-11-02 op char **, size_t, char *, void *), void *payload)
113 3448adb0 2022-11-02 op {
114 3448adb0 2022-11-02 op FILE *fp;
115 3448adb0 2022-11-02 op char *line = NULL, **field = NULL, *comment;
116 3448adb0 2022-11-02 op size_t linebufsize = 0, i, fieldbufsize = 0, j, nfields;
117 3448adb0 2022-11-02 op ssize_t len;
118 3448adb0 2022-11-02 op
119 3448adb0 2022-11-02 op /* open file */
120 3448adb0 2022-11-02 op if (!(fp = fopen(fname, "r"))) {
121 3448adb0 2022-11-02 op fprintf(stderr, "parse_file_with_callback: fopen '%s': %s.\n",
122 3448adb0 2022-11-02 op fname, strerror(errno));
123 3448adb0 2022-11-02 op exit(1);
124 3448adb0 2022-11-02 op }
125 3448adb0 2022-11-02 op
126 3448adb0 2022-11-02 op while ((len = getline(&line, &linebufsize, fp)) >= 0) {
127 3448adb0 2022-11-02 op /* remove trailing newline */
128 3448adb0 2022-11-02 op if (len > 0 && line[len - 1] == '\n') {
129 3448adb0 2022-11-02 op line[len - 1] = '\0';
130 3448adb0 2022-11-02 op len--;
131 3448adb0 2022-11-02 op }
132 3448adb0 2022-11-02 op
133 3448adb0 2022-11-02 op /* skip empty lines and comment lines */
134 3448adb0 2022-11-02 op if (len == 0 || line[0] == '#') {
135 3448adb0 2022-11-02 op continue;
136 3448adb0 2022-11-02 op }
137 3448adb0 2022-11-02 op
138 3448adb0 2022-11-02 op /* tokenize line into fields */
139 3448adb0 2022-11-02 op for (i = 0, nfields = 0, comment = NULL; i < (size_t)len; i++) {
140 3448adb0 2022-11-02 op /* skip leading whitespace */
141 3448adb0 2022-11-02 op while (line[i] == ' ') {
142 3448adb0 2022-11-02 op i++;
143 3448adb0 2022-11-02 op }
144 3448adb0 2022-11-02 op
145 3448adb0 2022-11-02 op /* check if we crashed into the comment */
146 3448adb0 2022-11-02 op if (line[i] != '#') {
147 3448adb0 2022-11-02 op /* extend field buffer, if necessary */
148 3448adb0 2022-11-02 op if (++nfields > fieldbufsize) {
149 3448adb0 2022-11-02 op if ((field = realloc(field, nfields *
150 3448adb0 2022-11-02 op sizeof(*field))) == NULL) {
151 3448adb0 2022-11-02 op fprintf(stderr, "parse_file_with_"
152 3448adb0 2022-11-02 op "callback: realloc: %s.\n",
153 3448adb0 2022-11-02 op strerror(errno));
154 3448adb0 2022-11-02 op exit(1);
155 3448adb0 2022-11-02 op }
156 3448adb0 2022-11-02 op fieldbufsize = nfields;
157 3448adb0 2022-11-02 op }
158 3448adb0 2022-11-02 op
159 3448adb0 2022-11-02 op /* set current position as field start */
160 3448adb0 2022-11-02 op field[nfields - 1] = &line[i];
161 3448adb0 2022-11-02 op
162 3448adb0 2022-11-02 op /* continue until we reach ';' or '#' or end */
163 3448adb0 2022-11-02 op while (line[i] != ';' && line[i] != '#' &&
164 3448adb0 2022-11-02 op line[i] != '\0') {
165 3448adb0 2022-11-02 op i++;
166 3448adb0 2022-11-02 op }
167 3448adb0 2022-11-02 op }
168 3448adb0 2022-11-02 op
169 3448adb0 2022-11-02 op if (line[i] == '#') {
170 3448adb0 2022-11-02 op /* set comment-variable for later */
171 3448adb0 2022-11-02 op comment = &line[i + 1];
172 3448adb0 2022-11-02 op }
173 3448adb0 2022-11-02 op
174 3448adb0 2022-11-02 op /* go back whitespace and terminate field there */
175 3448adb0 2022-11-02 op if (i > 0) {
176 3448adb0 2022-11-02 op for (j = i - 1; line[j] == ' '; j--)
177 3448adb0 2022-11-02 op ;
178 3448adb0 2022-11-02 op line[j + 1] = '\0';
179 3448adb0 2022-11-02 op } else {
180 3448adb0 2022-11-02 op line[i] = '\0';
181 3448adb0 2022-11-02 op }
182 3448adb0 2022-11-02 op
183 3448adb0 2022-11-02 op /* if comment is set, we are done */
184 3448adb0 2022-11-02 op if (comment != NULL) {
185 3448adb0 2022-11-02 op break;
186 3448adb0 2022-11-02 op }
187 3448adb0 2022-11-02 op }
188 3448adb0 2022-11-02 op
189 3448adb0 2022-11-02 op /* skip leading whitespace in comment */
190 3448adb0 2022-11-02 op while (comment != NULL && comment[0] == ' ') {
191 3448adb0 2022-11-02 op comment++;
192 3448adb0 2022-11-02 op }
193 3448adb0 2022-11-02 op
194 3448adb0 2022-11-02 op /* call callback function */
195 3448adb0 2022-11-02 op if (callback(fname, field, nfields, comment, payload)) {
196 3448adb0 2022-11-02 op fprintf(stderr, "parse_file_with_callback: "
197 3448adb0 2022-11-02 op "Malformed input.\n");
198 3448adb0 2022-11-02 op exit(1);
199 3448adb0 2022-11-02 op }
200 3448adb0 2022-11-02 op }
201 3448adb0 2022-11-02 op
202 3448adb0 2022-11-02 op free(line);
203 3448adb0 2022-11-02 op free(field);
204 3448adb0 2022-11-02 op }
205 3448adb0 2022-11-02 op
206 3448adb0 2022-11-02 op static int
207 3448adb0 2022-11-02 op properties_callback(const char *file, char **field, size_t nfields,
208 3448adb0 2022-11-02 op char *comment, void *payload)
209 3448adb0 2022-11-02 op {
210 3448adb0 2022-11-02 op /* prop always has the length 0x110000 */
211 3448adb0 2022-11-02 op struct properties_payload *p = (struct properties_payload *)payload;
212 3448adb0 2022-11-02 op struct range r;
213 3448adb0 2022-11-02 op uint_least8_t i;
214 3448adb0 2022-11-02 op uint_least32_t cp;
215 3448adb0 2022-11-02 op
216 3448adb0 2022-11-02 op (void)comment;
217 3448adb0 2022-11-02 op
218 3448adb0 2022-11-02 op if (nfields < 2) {
219 3448adb0 2022-11-02 op return 1;
220 3448adb0 2022-11-02 op }
221 3448adb0 2022-11-02 op
222 3448adb0 2022-11-02 op for (i = 0; i < p->speclen; i++) {
223 3448adb0 2022-11-02 op /* identify fitting file and identifier */
224 3448adb0 2022-11-02 op if (p->spec[i].file &&
225 3448adb0 2022-11-02 op !strcmp(p->spec[i].file, file) &&
226 3448adb0 2022-11-02 op (!strcmp(p->spec[i].ucdname, field[1]) ||
227 3448adb0 2022-11-02 op (comment != NULL && !strncmp(p->spec[i].ucdname, comment, strlen(p->spec[i].ucdname)) &&
228 3448adb0 2022-11-02 op comment[strlen(p->spec[i].ucdname)] == ' '))) {
229 3448adb0 2022-11-02 op /* parse range in first field */
230 3448adb0 2022-11-02 op if (range_parse(field[0], &r)) {
231 3448adb0 2022-11-02 op return 1;
232 3448adb0 2022-11-02 op }
233 3448adb0 2022-11-02 op
234 3448adb0 2022-11-02 op /* apply to all codepoints in the range */
235 3448adb0 2022-11-02 op for (cp = r.lower; cp <= r.upper; cp++) {
236 3448adb0 2022-11-02 op if (p->set_value(payload, cp, i)) {
237 3448adb0 2022-11-02 op exit(1);
238 3448adb0 2022-11-02 op }
239 3448adb0 2022-11-02 op }
240 3448adb0 2022-11-02 op break;
241 3448adb0 2022-11-02 op }
242 3448adb0 2022-11-02 op }
243 3448adb0 2022-11-02 op
244 3448adb0 2022-11-02 op return 0;
245 3448adb0 2022-11-02 op }
246 3448adb0 2022-11-02 op
247 3448adb0 2022-11-02 op void
248 3448adb0 2022-11-02 op properties_compress(const struct properties *prop,
249 3448adb0 2022-11-02 op struct properties_compressed *comp)
250 3448adb0 2022-11-02 op {
251 3448adb0 2022-11-02 op uint_least32_t cp, i;
252 3448adb0 2022-11-02 op
253 3448adb0 2022-11-02 op /* initialization */
254 3448adb0 2022-11-02 op if (!(comp->offset = malloc((size_t)UINT32_C(0x110000) * sizeof(*(comp->offset))))) {
255 3448adb0 2022-11-02 op fprintf(stderr, "malloc: %s\n", strerror(errno));
256 3448adb0 2022-11-02 op exit(1);
257 3448adb0 2022-11-02 op }
258 3448adb0 2022-11-02 op comp->data = NULL;
259 3448adb0 2022-11-02 op comp->datalen = 0;
260 3448adb0 2022-11-02 op
261 3448adb0 2022-11-02 op for (cp = 0; cp < UINT32_C(0x110000); cp++) {
262 3448adb0 2022-11-02 op for (i = 0; i < comp->datalen; i++) {
263 3448adb0 2022-11-02 op if (!memcmp(&(prop[cp]), &(comp->data[i]), sizeof(*prop))) {
264 3448adb0 2022-11-02 op /* found a match! */
265 3448adb0 2022-11-02 op comp->offset[cp] = i;
266 3448adb0 2022-11-02 op break;
267 3448adb0 2022-11-02 op }
268 3448adb0 2022-11-02 op }
269 3448adb0 2022-11-02 op if (i == comp->datalen) {
270 3448adb0 2022-11-02 op /*
271 3448adb0 2022-11-02 op * found no matching properties-struct, so
272 3448adb0 2022-11-02 op * add current properties to data and add the
273 3448adb0 2022-11-02 op * offset in the offset-table
274 3448adb0 2022-11-02 op */
275 3448adb0 2022-11-02 op if (!(comp->data = reallocate_array(comp->data,
276 3448adb0 2022-11-02 op ++(comp->datalen),
277 3448adb0 2022-11-02 op sizeof(*(comp->data))))) {
278 3448adb0 2022-11-02 op fprintf(stderr, "reallocate_array: %s\n",
279 3448adb0 2022-11-02 op strerror(errno));
280 3448adb0 2022-11-02 op exit(1);
281 3448adb0 2022-11-02 op }
282 3448adb0 2022-11-02 op memcpy(&(comp->data[comp->datalen - 1]), &(prop[cp]),
283 3448adb0 2022-11-02 op sizeof(*prop));
284 3448adb0 2022-11-02 op comp->offset[cp] = comp->datalen - 1;
285 3448adb0 2022-11-02 op }
286 3448adb0 2022-11-02 op }
287 3448adb0 2022-11-02 op }
288 3448adb0 2022-11-02 op
289 3448adb0 2022-11-02 op double
290 3448adb0 2022-11-02 op properties_get_major_minor(const struct properties_compressed *comp,
291 3448adb0 2022-11-02 op struct properties_major_minor *mm)
292 3448adb0 2022-11-02 op {
293 3448adb0 2022-11-02 op size_t i, j, compression_count = 0;
294 3448adb0 2022-11-02 op
295 3448adb0 2022-11-02 op /*
296 3448adb0 2022-11-02 op * we currently have an array comp->offset which maps the
297 3448adb0 2022-11-02 op * codepoints 0..0x110000 to offsets into comp->data.
298 3448adb0 2022-11-02 op * To improve cache-locality instead and allow a bit of
299 3448adb0 2022-11-02 op * compressing, instead of directly mapping a codepoint
300 3448adb0 2022-11-02 op * 0xAAAABB with comp->offset, we generate two arrays major
301 3448adb0 2022-11-02 op * and minor such that
302 3448adb0 2022-11-02 op * comp->offset(0xAAAABB) == minor[major[0xAAAA] + 0xBB]
303 3448adb0 2022-11-02 op * This yields a major-array of length 2^16 and a minor array
304 3448adb0 2022-11-02 op * of variable length depending on how many common subsequences
305 3448adb0 2022-11-02 op * can be filtered out.
306 3448adb0 2022-11-02 op */
307 3448adb0 2022-11-02 op
308 3448adb0 2022-11-02 op /* initialize */
309 3448adb0 2022-11-02 op if (!(mm->major = malloc((size_t)0x1100 * sizeof(*(mm->major))))) {
310 3448adb0 2022-11-02 op fprintf(stderr, "malloc: %s\n", strerror(errno));
311 3448adb0 2022-11-02 op exit(1);
312 3448adb0 2022-11-02 op }
313 3448adb0 2022-11-02 op mm->minor = NULL;
314 3448adb0 2022-11-02 op mm->minorlen = 0;
315 3448adb0 2022-11-02 op
316 3448adb0 2022-11-02 op for (i = 0; i < (size_t)0x1100; i++) {
317 3448adb0 2022-11-02 op /*
318 3448adb0 2022-11-02 op * we now look at the cp-range (i << 8)..(i << 8 + 0xFF)
319 3448adb0 2022-11-02 op * and check if its corresponding offset-data already
320 3448adb0 2022-11-02 op * exists in minor (because then we just point there
321 3448adb0 2022-11-02 op * and need less storage)
322 3448adb0 2022-11-02 op */
323 3448adb0 2022-11-02 op for (j = 0; j + 0xFF < mm->minorlen; j++) {
324 3448adb0 2022-11-02 op if (!memcmp(&(comp->offset[i << 8]),
325 3448adb0 2022-11-02 op &(mm->minor[j]),
326 3448adb0 2022-11-02 op sizeof(*(comp->offset)) * 0x100)) {
327 3448adb0 2022-11-02 op break;
328 3448adb0 2022-11-02 op }
329 3448adb0 2022-11-02 op }
330 3448adb0 2022-11-02 op if (j + 0xFF < mm->minorlen) {
331 3448adb0 2022-11-02 op /* found an index */
332 3448adb0 2022-11-02 op compression_count++;
333 3448adb0 2022-11-02 op mm->major[i] = j;
334 3448adb0 2022-11-02 op } else {
335 3448adb0 2022-11-02 op /*
336 3448adb0 2022-11-02 op * add "new" sequence to minor and point to it
337 3448adb0 2022-11-02 op * in major
338 3448adb0 2022-11-02 op */
339 3448adb0 2022-11-02 op mm->minorlen += 0x100;
340 3448adb0 2022-11-02 op if (!(mm->minor = reallocate_array(mm->minor,
341 3448adb0 2022-11-02 op mm->minorlen,
342 3448adb0 2022-11-02 op sizeof(*(mm->minor))))) {
343 3448adb0 2022-11-02 op fprintf(stderr, "reallocate_array: %s\n",
344 3448adb0 2022-11-02 op strerror(errno));
345 3448adb0 2022-11-02 op exit(1);
346 3448adb0 2022-11-02 op }
347 3448adb0 2022-11-02 op memcpy(&(mm->minor[mm->minorlen - 0x100]),
348 3448adb0 2022-11-02 op &(comp->offset[i << 8]),
349 3448adb0 2022-11-02 op sizeof(*(mm->minor)) * 0x100);
350 3448adb0 2022-11-02 op mm->major[i] = mm->minorlen - 0x100;
351 3448adb0 2022-11-02 op }
352 3448adb0 2022-11-02 op }
353 3448adb0 2022-11-02 op
354 3448adb0 2022-11-02 op /* return compression ratio */
355 3448adb0 2022-11-02 op return (double)compression_count / 0x1100 * 100;
356 3448adb0 2022-11-02 op }
357 3448adb0 2022-11-02 op
358 3448adb0 2022-11-02 op void
359 3448adb0 2022-11-02 op properties_print_lookup_table(char *name, size_t *data, size_t datalen)
360 3448adb0 2022-11-02 op {
361 3448adb0 2022-11-02 op char *type;
362 3448adb0 2022-11-02 op size_t i, maxval;
363 3448adb0 2022-11-02 op
364 3448adb0 2022-11-02 op for (i = 0, maxval = 0; i < datalen; i++) {
365 3448adb0 2022-11-02 op if (data[i] > maxval) {
366 3448adb0 2022-11-02 op maxval = data[i];
367 3448adb0 2022-11-02 op }
368 3448adb0 2022-11-02 op }
369 3448adb0 2022-11-02 op
370 3448adb0 2022-11-02 op type = (maxval <= UINT_LEAST8_MAX) ? "uint_least8_t" :
371 3448adb0 2022-11-02 op (maxval <= UINT_LEAST16_MAX) ? "uint_least16_t" :
372 3448adb0 2022-11-02 op (maxval <= UINT_LEAST32_MAX) ? "uint_least32_t" :
373 3448adb0 2022-11-02 op "uint_least64_t";
374 3448adb0 2022-11-02 op
375 3448adb0 2022-11-02 op printf("static const %s %s[] = {\n\t", type, name);
376 3448adb0 2022-11-02 op for (i = 0; i < datalen; i++) {
377 3448adb0 2022-11-02 op printf("%zu", data[i]);
378 3448adb0 2022-11-02 op if (i + 1 == datalen) {
379 3448adb0 2022-11-02 op printf("\n");
380 3448adb0 2022-11-02 op } else if ((i + 1) % 8 != 0) {
381 3448adb0 2022-11-02 op printf(", ");
382 3448adb0 2022-11-02 op } else {
383 3448adb0 2022-11-02 op printf(",\n\t");
384 3448adb0 2022-11-02 op }
385 3448adb0 2022-11-02 op
386 3448adb0 2022-11-02 op }
387 3448adb0 2022-11-02 op printf("};\n");
388 3448adb0 2022-11-02 op }
389 3448adb0 2022-11-02 op
390 3448adb0 2022-11-02 op void
391 3448adb0 2022-11-02 op properties_print_derived_lookup_table(char *name, char *type, size_t *offset, size_t offsetlen,
392 3448adb0 2022-11-02 op int_least64_t (*get_value)(const struct properties *,
393 3448adb0 2022-11-02 op size_t), const void *payload)
394 3448adb0 2022-11-02 op {
395 3448adb0 2022-11-02 op size_t i;
396 3448adb0 2022-11-02 op
397 3448adb0 2022-11-02 op printf("static const %s %s[] = {\n\t", type, name);
398 3448adb0 2022-11-02 op for (i = 0; i < offsetlen; i++) {
399 3448adb0 2022-11-02 op printf("%"PRIiLEAST64, get_value(payload, offset[i]));
400 3448adb0 2022-11-02 op if (i + 1 == offsetlen) {
401 3448adb0 2022-11-02 op printf("\n");
402 3448adb0 2022-11-02 op } else if ((i + 1) % 8 != 0) {
403 3448adb0 2022-11-02 op printf(", ");
404 3448adb0 2022-11-02 op } else {
405 3448adb0 2022-11-02 op printf(",\n\t");
406 3448adb0 2022-11-02 op }
407 3448adb0 2022-11-02 op
408 3448adb0 2022-11-02 op }
409 3448adb0 2022-11-02 op printf("};\n");
410 3448adb0 2022-11-02 op }
411 3448adb0 2022-11-02 op
412 3448adb0 2022-11-02 op static void
413 3448adb0 2022-11-02 op properties_print_enum(const struct property_spec *spec, size_t speclen,
414 3448adb0 2022-11-02 op const char *enumname, const char *enumprefix)
415 3448adb0 2022-11-02 op {
416 3448adb0 2022-11-02 op size_t i;
417 3448adb0 2022-11-02 op
418 3448adb0 2022-11-02 op printf("enum %s {\n", enumname);
419 3448adb0 2022-11-02 op for (i = 0; i < speclen; i++) {
420 3448adb0 2022-11-02 op printf("\t%s_%s,\n", enumprefix, spec[i].enumname);
421 3448adb0 2022-11-02 op }
422 3448adb0 2022-11-02 op printf("\tNUM_%sS,\n};\n\n", enumprefix);
423 3448adb0 2022-11-02 op }
424 3448adb0 2022-11-02 op
425 3448adb0 2022-11-02 op static int
426 3448adb0 2022-11-02 op set_value_bp(struct properties_payload *payload, uint_least32_t cp,
427 3448adb0 2022-11-02 op int_least64_t value)
428 3448adb0 2022-11-02 op {
429 3448adb0 2022-11-02 op if (payload->prop[cp].property != 0) {
430 3448adb0 2022-11-02 op if (payload->handle_conflict == NULL) {
431 3448adb0 2022-11-02 op fprintf(stderr, "set_value_bp: "
432 3448adb0 2022-11-02 op "Unhandled character break property "
433 3448adb0 2022-11-02 op "overwrite for 0x%06X (%s <- %s).\n",
434 3448adb0 2022-11-02 op cp, payload->spec[payload->prop[cp].
435 3448adb0 2022-11-02 op property].enumname,
436 3448adb0 2022-11-02 op payload->spec[value].enumname);
437 3448adb0 2022-11-02 op return 1;
438 3448adb0 2022-11-02 op } else {
439 3448adb0 2022-11-02 op value = payload->handle_conflict(cp,
440 3448adb0 2022-11-02 op (uint_least8_t)payload->prop[cp].property,
441 3448adb0 2022-11-02 op (uint_least8_t)value);
442 3448adb0 2022-11-02 op }
443 3448adb0 2022-11-02 op }
444 3448adb0 2022-11-02 op payload->prop[cp].property = value;
445 3448adb0 2022-11-02 op
446 3448adb0 2022-11-02 op return 0;
447 3448adb0 2022-11-02 op }
448 3448adb0 2022-11-02 op
449 3448adb0 2022-11-02 op static int_least64_t
450 3448adb0 2022-11-02 op get_value_bp(const struct properties *prop, size_t offset)
451 3448adb0 2022-11-02 op {
452 3448adb0 2022-11-02 op return (uint_least8_t)prop[offset].property;
453 3448adb0 2022-11-02 op }
454 3448adb0 2022-11-02 op
455 3448adb0 2022-11-02 op void
456 3448adb0 2022-11-02 op properties_generate_break_property(const struct property_spec *spec,
457 3448adb0 2022-11-02 op uint_least8_t speclen,
458 3448adb0 2022-11-02 op uint_least8_t (*handle_conflict)(
459 3448adb0 2022-11-02 op uint_least32_t, uint_least8_t,
460 3448adb0 2022-11-02 op uint_least8_t), uint_least8_t
461 3448adb0 2022-11-02 op (*post_process)(uint_least8_t),
462 3448adb0 2022-11-02 op const char *prefix, const char *argv0)
463 3448adb0 2022-11-02 op {
464 3448adb0 2022-11-02 op struct properties_compressed comp;
465 3448adb0 2022-11-02 op struct properties_major_minor mm;
466 3448adb0 2022-11-02 op struct properties_payload payload;
467 3448adb0 2022-11-02 op struct properties *prop;
468 3448adb0 2022-11-02 op size_t i, j, prefixlen = strlen(prefix);
469 3448adb0 2022-11-02 op char buf1[64], prefix_uc[64], buf2[64], buf3[64], buf4[64];
470 3448adb0 2022-11-02 op
471 3448adb0 2022-11-02 op /* allocate property buffer for all 0x110000 codepoints */
472 3448adb0 2022-11-02 op if (!(prop = calloc(UINT32_C(0x110000), sizeof(*prop)))) {
473 3448adb0 2022-11-02 op fprintf(stderr, "calloc: %s\n", strerror(errno));
474 3448adb0 2022-11-02 op exit(1);
475 3448adb0 2022-11-02 op }
476 3448adb0 2022-11-02 op
477 3448adb0 2022-11-02 op /* generate data */
478 3448adb0 2022-11-02 op payload.prop = prop;
479 3448adb0 2022-11-02 op payload.spec = spec;
480 3448adb0 2022-11-02 op payload.speclen = speclen;
481 3448adb0 2022-11-02 op payload.set_value = set_value_bp;
482 3448adb0 2022-11-02 op payload.handle_conflict = handle_conflict;
483 3448adb0 2022-11-02 op
484 3448adb0 2022-11-02 op /* parse each file exactly once and ignore NULL-fields */
485 3448adb0 2022-11-02 op for (i = 0; i < speclen; i++) {
486 3448adb0 2022-11-02 op for (j = 0; j < i; j++) {
487 3448adb0 2022-11-02 op if (spec[i].file && spec[j].file &&
488 3448adb0 2022-11-02 op !strcmp(spec[i].file, spec[j].file)) {
489 3448adb0 2022-11-02 op /* file has already been parsed */
490 3448adb0 2022-11-02 op break;
491 3448adb0 2022-11-02 op }
492 3448adb0 2022-11-02 op }
493 3448adb0 2022-11-02 op if (i == j && spec[i].file) {
494 3448adb0 2022-11-02 op /* file has not been processed yet */
495 3448adb0 2022-11-02 op parse_file_with_callback(spec[i].file,
496 3448adb0 2022-11-02 op properties_callback,
497 3448adb0 2022-11-02 op &payload);
498 3448adb0 2022-11-02 op }
499 3448adb0 2022-11-02 op }
500 3448adb0 2022-11-02 op
501 3448adb0 2022-11-02 op /* post-processing */
502 3448adb0 2022-11-02 op if (post_process != NULL) {
503 3448adb0 2022-11-02 op for (i = 0; i < UINT32_C(0x110000); i++) {
504 3448adb0 2022-11-02 op payload.prop[i].property =
505 3448adb0 2022-11-02 op post_process((uint_least8_t)payload.prop[i].property);
506 3448adb0 2022-11-02 op }
507 3448adb0 2022-11-02 op }
508 3448adb0 2022-11-02 op
509 3448adb0 2022-11-02 op /* compress data */
510 3448adb0 2022-11-02 op printf("/* Automatically generated by %s */\n#include <stdint.h>\n\n", argv0);
511 3448adb0 2022-11-02 op properties_compress(prop, &comp);
512 3448adb0 2022-11-02 op
513 3448adb0 2022-11-02 op fprintf(stderr, "%s: %s-LUT compression-ratio: %.2f%%\n", argv0,
514 3448adb0 2022-11-02 op prefix, properties_get_major_minor(&comp, &mm));
515 3448adb0 2022-11-02 op
516 3448adb0 2022-11-02 op /* prepare names */
517 3448adb0 2022-11-02 op if ((size_t)snprintf(buf1, LEN(buf1), "%s_property", prefix) >= LEN(buf1)) {
518 3448adb0 2022-11-02 op fprintf(stderr, "snprintf: String truncated.\n");
519 3448adb0 2022-11-02 op exit(1);
520 3448adb0 2022-11-02 op }
521 3448adb0 2022-11-02 op if (LEN(prefix_uc) + 1 < prefixlen) {
522 3448adb0 2022-11-02 op fprintf(stderr, "snprintf: Buffer too small.\n");
523 3448adb0 2022-11-02 op exit(1);
524 3448adb0 2022-11-02 op }
525 3448adb0 2022-11-02 op for (i = 0; i < prefixlen; i++) {
526 3448adb0 2022-11-02 op prefix_uc[i] = (char)toupper(prefix[i]);
527 3448adb0 2022-11-02 op }
528 3448adb0 2022-11-02 op prefix_uc[prefixlen] = '\0';
529 3448adb0 2022-11-02 op if ((size_t)snprintf(buf2, LEN(buf2), "%s_PROP", prefix_uc) >= LEN(buf2) ||
530 3448adb0 2022-11-02 op (size_t)snprintf(buf3, LEN(buf3), "%s_major", prefix) >= LEN(buf3) ||
531 3448adb0 2022-11-02 op (size_t)snprintf(buf4, LEN(buf4), "%s_minor", prefix) >= LEN(buf4)) {
532 3448adb0 2022-11-02 op fprintf(stderr, "snprintf: String truncated.\n");
533 3448adb0 2022-11-02 op exit(1);
534 3448adb0 2022-11-02 op }
535 3448adb0 2022-11-02 op
536 3448adb0 2022-11-02 op /* print data */
537 3448adb0 2022-11-02 op properties_print_enum(spec, speclen, buf1, buf2);
538 3448adb0 2022-11-02 op properties_print_lookup_table(buf3, mm.major, 0x1100);
539 3448adb0 2022-11-02 op printf("\n");
540 3448adb0 2022-11-02 op properties_print_derived_lookup_table(buf4, "uint_least8_t", mm.minor, mm.minorlen,
541 3448adb0 2022-11-02 op get_value_bp, comp.data);
542 3448adb0 2022-11-02 op
543 3448adb0 2022-11-02 op /* free data */
544 3448adb0 2022-11-02 op free(prop);
545 3448adb0 2022-11-02 op free(comp.data);
546 3448adb0 2022-11-02 op free(comp.offset);
547 3448adb0 2022-11-02 op free(mm.major);
548 3448adb0 2022-11-02 op free(mm.minor);
549 3448adb0 2022-11-02 op }
550 3448adb0 2022-11-02 op
551 3448adb0 2022-11-02 op static int
552 3448adb0 2022-11-02 op break_test_callback(const char *fname, char **field, size_t nfields,
553 3448adb0 2022-11-02 op char *comment, void *payload)
554 3448adb0 2022-11-02 op {
555 3448adb0 2022-11-02 op struct break_test *t,
556 3448adb0 2022-11-02 op **test = ((struct break_test_payload *)payload)->test;
557 3448adb0 2022-11-02 op size_t i, *testlen = ((struct break_test_payload *)payload)->testlen;
558 3448adb0 2022-11-02 op char *token;
559 3448adb0 2022-11-02 op
560 3448adb0 2022-11-02 op (void)fname;
561 3448adb0 2022-11-02 op
562 3448adb0 2022-11-02 op if (nfields < 1) {
563 3448adb0 2022-11-02 op return 1;
564 3448adb0 2022-11-02 op }
565 3448adb0 2022-11-02 op
566 3448adb0 2022-11-02 op /* append new testcase and initialize with zeroes */
567 3448adb0 2022-11-02 op if ((*test = realloc(*test, ++(*testlen) * sizeof(**test))) == NULL) {
568 3448adb0 2022-11-02 op fprintf(stderr, "break_test_callback: realloc: %s.\n",
569 3448adb0 2022-11-02 op strerror(errno));
570 3448adb0 2022-11-02 op return 1;
571 3448adb0 2022-11-02 op }
572 3448adb0 2022-11-02 op t = &(*test)[*testlen - 1];
573 3448adb0 2022-11-02 op memset(t, 0, sizeof(*t));
574 3448adb0 2022-11-02 op
575 3448adb0 2022-11-02 op /* parse testcase "<÷|×> <cp> <÷|×> ... <cp> <÷|×>" */
576 3448adb0 2022-11-02 op for (token = strtok(field[0], " "), i = 0; token != NULL; i++,
577 3448adb0 2022-11-02 op token = strtok(NULL, " ")) {
578 3448adb0 2022-11-02 op if (i % 2 == 0) {
579 3448adb0 2022-11-02 op /* delimiter or start of sequence */
580 3448adb0 2022-11-02 op if (i == 0 || !strncmp(token, "\xC3\xB7", 2)) { /* UTF-8 */
581 3448adb0 2022-11-02 op /*
582 3448adb0 2022-11-02 op * '÷' indicates a breakpoint,
583 3448adb0 2022-11-02 op * the current length is done; allocate
584 3448adb0 2022-11-02 op * a new length field and set it to 0
585 3448adb0 2022-11-02 op */
586 3448adb0 2022-11-02 op if ((t->len = realloc(t->len,
587 3448adb0 2022-11-02 op ++t->lenlen * sizeof(*t->len))) == NULL) {
588 3448adb0 2022-11-02 op fprintf(stderr, "break_test_"
589 3448adb0 2022-11-02 op "callback: realloc: %s.\n",
590 3448adb0 2022-11-02 op strerror(errno));
591 3448adb0 2022-11-02 op return 1;
592 3448adb0 2022-11-02 op }
593 3448adb0 2022-11-02 op t->len[t->lenlen - 1] = 0;
594 3448adb0 2022-11-02 op } else if (!strncmp(token, "\xC3\x97", 2)) { /* UTF-8 */
595 3448adb0 2022-11-02 op /*
596 3448adb0 2022-11-02 op * '×' indicates a non-breakpoint, do nothing
597 3448adb0 2022-11-02 op */
598 3448adb0 2022-11-02 op } else {
599 3448adb0 2022-11-02 op fprintf(stderr, "break_test_callback: "
600 3448adb0 2022-11-02 op "Malformed delimiter '%s'.\n", token);
601 3448adb0 2022-11-02 op return 1;
602 3448adb0 2022-11-02 op }
603 3448adb0 2022-11-02 op } else {
604 3448adb0 2022-11-02 op /* add codepoint to cp-array */
605 3448adb0 2022-11-02 op if ((t->cp = realloc(t->cp, ++t->cplen *
606 3448adb0 2022-11-02 op sizeof(*t->cp))) == NULL) {
607 3448adb0 2022-11-02 op fprintf(stderr, "break_test_callback: "
608 3448adb0 2022-11-02 op "realloc: %s.\n", strerror(errno));
609 3448adb0 2022-11-02 op return 1;
610 3448adb0 2022-11-02 op }
611 3448adb0 2022-11-02 op if (hextocp(token, strlen(token), &t->cp[t->cplen - 1])) {
612 3448adb0 2022-11-02 op return 1;
613 3448adb0 2022-11-02 op }
614 3448adb0 2022-11-02 op if (t->lenlen > 0) {
615 3448adb0 2022-11-02 op t->len[t->lenlen - 1]++;
616 3448adb0 2022-11-02 op }
617 3448adb0 2022-11-02 op }
618 3448adb0 2022-11-02 op }
619 3448adb0 2022-11-02 op if (t->len[t->lenlen - 1] == 0) {
620 3448adb0 2022-11-02 op /*
621 3448adb0 2022-11-02 op * we allocated one more length than we needed because
622 3448adb0 2022-11-02 op * the breakpoint was at the end
623 3448adb0 2022-11-02 op */
624 3448adb0 2022-11-02 op t->lenlen--;
625 3448adb0 2022-11-02 op }
626 3448adb0 2022-11-02 op
627 3448adb0 2022-11-02 op /* store comment */
628 3448adb0 2022-11-02 op if (((*test)[*testlen - 1].descr = strdup(comment)) == NULL) {
629 3448adb0 2022-11-02 op fprintf(stderr, "break_test_callback: strdup: %s.\n",
630 3448adb0 2022-11-02 op strerror(errno));
631 3448adb0 2022-11-02 op return 1;
632 3448adb0 2022-11-02 op }
633 3448adb0 2022-11-02 op
634 3448adb0 2022-11-02 op return 0;
635 3448adb0 2022-11-02 op }
636 3448adb0 2022-11-02 op
637 3448adb0 2022-11-02 op void
638 3448adb0 2022-11-02 op break_test_list_parse(char *fname, struct break_test **test,
639 3448adb0 2022-11-02 op size_t *testlen)
640 3448adb0 2022-11-02 op {
641 3448adb0 2022-11-02 op struct break_test_payload pl = {
642 3448adb0 2022-11-02 op .test = test,
643 3448adb0 2022-11-02 op .testlen = testlen,
644 3448adb0 2022-11-02 op };
645 3448adb0 2022-11-02 op *test = NULL;
646 3448adb0 2022-11-02 op *testlen = 0;
647 3448adb0 2022-11-02 op
648 3448adb0 2022-11-02 op parse_file_with_callback(fname, break_test_callback, &pl);
649 3448adb0 2022-11-02 op }
650 3448adb0 2022-11-02 op
651 3448adb0 2022-11-02 op void
652 3448adb0 2022-11-02 op break_test_list_print(const struct break_test *test, size_t testlen,
653 3448adb0 2022-11-02 op const char *identifier, const char *progname)
654 3448adb0 2022-11-02 op {
655 3448adb0 2022-11-02 op size_t i, j;
656 3448adb0 2022-11-02 op
657 3448adb0 2022-11-02 op printf("/* Automatically generated by %s */\n"
658 3448adb0 2022-11-02 op "#include <stdint.h>\n#include <stddef.h>\n\n"
659 3448adb0 2022-11-02 op "#include \"../gen/types.h\"\n\n", progname);
660 3448adb0 2022-11-02 op
661 3448adb0 2022-11-02 op printf("static const struct break_test %s[] = {\n", identifier);
662 3448adb0 2022-11-02 op for (i = 0; i < testlen; i++) {
663 3448adb0 2022-11-02 op printf("\t{\n");
664 3448adb0 2022-11-02 op
665 3448adb0 2022-11-02 op printf("\t\t.cp = (uint_least32_t[]){");
666 3448adb0 2022-11-02 op for (j = 0; j < test[i].cplen; j++) {
667 3448adb0 2022-11-02 op printf(" UINT32_C(0x%06X)", test[i].cp[j]);
668 3448adb0 2022-11-02 op if (j + 1 < test[i].cplen) {
669 3448adb0 2022-11-02 op putchar(',');
670 3448adb0 2022-11-02 op }
671 3448adb0 2022-11-02 op }
672 3448adb0 2022-11-02 op printf(" },\n");
673 3448adb0 2022-11-02 op printf("\t\t.cplen = %zu,\n", test[i].cplen);
674 3448adb0 2022-11-02 op
675 3448adb0 2022-11-02 op printf("\t\t.len = (size_t[]){");
676 3448adb0 2022-11-02 op for (j = 0; j < test[i].lenlen; j++) {
677 3448adb0 2022-11-02 op printf(" %zu", test[i].len[j]);
678 3448adb0 2022-11-02 op if (j + 1 < test[i].lenlen) {
679 3448adb0 2022-11-02 op putchar(',');
680 3448adb0 2022-11-02 op }
681 3448adb0 2022-11-02 op }
682 3448adb0 2022-11-02 op printf(" },\n");
683 3448adb0 2022-11-02 op printf("\t\t.lenlen = %zu,\n", test[i].lenlen);
684 3448adb0 2022-11-02 op
685 3448adb0 2022-11-02 op printf("\t\t.descr = \"%s\",\n", test[i].descr);
686 3448adb0 2022-11-02 op
687 3448adb0 2022-11-02 op printf("\t},\n");
688 3448adb0 2022-11-02 op }
689 3448adb0 2022-11-02 op printf("};\n");
690 3448adb0 2022-11-02 op }
691 3448adb0 2022-11-02 op
692 3448adb0 2022-11-02 op void
693 3448adb0 2022-11-02 op break_test_list_free(struct break_test *test, size_t testlen)
694 3448adb0 2022-11-02 op {
695 3448adb0 2022-11-02 op size_t i;
696 3448adb0 2022-11-02 op
697 3448adb0 2022-11-02 op for (i = 0; i < testlen; i++) {
698 3448adb0 2022-11-02 op free(test[i].cp);
699 3448adb0 2022-11-02 op free(test[i].len);
700 3448adb0 2022-11-02 op free(test[i].descr);
701 3448adb0 2022-11-02 op }
702 3448adb0 2022-11-02 op
703 3448adb0 2022-11-02 op free(test);
704 3448adb0 2022-11-02 op }