1 3448adb0 2022-11-02 op /* See LICENSE file for copyright and license details. */
2 3448adb0 2022-11-02 op #include <stdbool.h>
3 3448adb0 2022-11-02 op #include <ctype.h>
4 3448adb0 2022-11-02 op #include <errno.h>
5 3448adb0 2022-11-02 op #include <inttypes.h>
6 3448adb0 2022-11-02 op #include <stdbool.h>
7 3448adb0 2022-11-02 op #include <stddef.h>
8 3448adb0 2022-11-02 op #include <stdint.h>
9 3448adb0 2022-11-02 op #include <stdlib.h>
10 3448adb0 2022-11-02 op #include <stdio.h>
11 3448adb0 2022-11-02 op #include <string.h>
13 3448adb0 2022-11-02 op #include "util.h"
15 3448adb0 2022-11-02 op struct range {
16 3448adb0 2022-11-02 op uint_least32_t lower;
17 3448adb0 2022-11-02 op uint_least32_t upper;
20 3448adb0 2022-11-02 op struct properties_payload {
21 3448adb0 2022-11-02 op struct properties *prop;
22 3448adb0 2022-11-02 op const struct property_spec *spec;
23 3448adb0 2022-11-02 op uint_least8_t speclen;
24 3448adb0 2022-11-02 op int (*set_value)(struct properties_payload *, uint_least32_t, int_least64_t);
25 3448adb0 2022-11-02 op uint_least8_t (*handle_conflict)(uint_least32_t, uint_least8_t, uint_least8_t);
28 3448adb0 2022-11-02 op struct break_test_payload
30 3448adb0 2022-11-02 op struct break_test **test;
31 3448adb0 2022-11-02 op size_t *testlen;
35 3448adb0 2022-11-02 op reallocate_array(void *p, size_t len, size_t size)
37 3448adb0 2022-11-02 op if (len > 0 && size > SIZE_MAX / len) {
38 3448adb0 2022-11-02 op errno = ENOMEM;
42 3448adb0 2022-11-02 op return realloc(p, len * size);
46 3448adb0 2022-11-02 op hextocp(const char *str, size_t len, uint_least32_t *cp)
50 3448adb0 2022-11-02 op char relative;
52 3448adb0 2022-11-02 op /* the maximum valid codepoint is 0x10FFFF */
53 3448adb0 2022-11-02 op if (len > 6) {
54 3448adb0 2022-11-02 op fprintf(stderr, "hextocp: '%.*s' is too long.\n",
55 3448adb0 2022-11-02 op (int)len, str);
59 3448adb0 2022-11-02 op for (i = 0, *cp = 0; i < len; i++) {
60 3448adb0 2022-11-02 op if (str[i] >= '0' && str[i] <= '9') {
61 3448adb0 2022-11-02 op relative = '0';
63 3448adb0 2022-11-02 op } else if (str[i] >= 'a' && str[i] <= 'f') {
64 3448adb0 2022-11-02 op relative = 'a';
66 3448adb0 2022-11-02 op } else if (str[i] >= 'A' && str[i] <= 'F') {
67 3448adb0 2022-11-02 op relative = 'A';
70 3448adb0 2022-11-02 op fprintf(stderr, "hextocp: '%.*s' is not hexadecimal.\n",
71 3448adb0 2022-11-02 op (int)len, str);
75 3448adb0 2022-11-02 op *cp += ((uint_least32_t)1 << (4 * (len - i - 1))) *
76 3448adb0 2022-11-02 op (uint_least32_t)(str[i] - relative + off);
79 3448adb0 2022-11-02 op if (*cp > UINT32_C(0x10FFFF)) {
80 3448adb0 2022-11-02 op fprintf(stderr, "hextocp: '%.*s' is too large.\n",
81 3448adb0 2022-11-02 op (int)len, str);
89 3448adb0 2022-11-02 op range_parse(const char *str, struct range *range)
93 3448adb0 2022-11-02 op if ((p = strstr(str, "..")) == NULL) {
94 3448adb0 2022-11-02 op /* input has the form "XXXXXX" */
95 3448adb0 2022-11-02 op if (hextocp(str, strlen(str), &range->lower)) {
98 3448adb0 2022-11-02 op range->upper = range->lower;
100 3448adb0 2022-11-02 op /* input has the form "XXXXXX..XXXXXX" */
101 3448adb0 2022-11-02 op if (hextocp(str, (size_t)(p - str), &range->lower) ||
102 3448adb0 2022-11-02 op hextocp(p + 2, strlen(p + 2), &range->upper)) {
111 3448adb0 2022-11-02 op parse_file_with_callback(const char *fname, int (*callback)(const char *,
112 3448adb0 2022-11-02 op char **, size_t, char *, void *), void *payload)
115 3448adb0 2022-11-02 op char *line = NULL, **field = NULL, *comment;
116 3448adb0 2022-11-02 op size_t linebufsize = 0, i, fieldbufsize = 0, j, nfields;
119 3448adb0 2022-11-02 op /* open file */
120 3448adb0 2022-11-02 op if (!(fp = fopen(fname, "r"))) {
121 3448adb0 2022-11-02 op fprintf(stderr, "parse_file_with_callback: fopen '%s': %s.\n",
122 3448adb0 2022-11-02 op fname, strerror(errno));
126 3448adb0 2022-11-02 op while ((len = getline(&line, &linebufsize, fp)) >= 0) {
127 3448adb0 2022-11-02 op /* remove trailing newline */
128 3448adb0 2022-11-02 op if (len > 0 && line[len - 1] == '\n') {
129 3448adb0 2022-11-02 op line[len - 1] = '\0';
133 3448adb0 2022-11-02 op /* skip empty lines and comment lines */
134 3448adb0 2022-11-02 op if (len == 0 || line[0] == '#') {
138 3448adb0 2022-11-02 op /* tokenize line into fields */
139 3448adb0 2022-11-02 op for (i = 0, nfields = 0, comment = NULL; i < (size_t)len; i++) {
140 3448adb0 2022-11-02 op /* skip leading whitespace */
141 3448adb0 2022-11-02 op while (line[i] == ' ') {
145 3448adb0 2022-11-02 op /* check if we crashed into the comment */
146 3448adb0 2022-11-02 op if (line[i] != '#') {
147 3448adb0 2022-11-02 op /* extend field buffer, if necessary */
148 3448adb0 2022-11-02 op if (++nfields > fieldbufsize) {
149 3448adb0 2022-11-02 op if ((field = realloc(field, nfields *
150 3448adb0 2022-11-02 op sizeof(*field))) == NULL) {
151 3448adb0 2022-11-02 op fprintf(stderr, "parse_file_with_"
152 3448adb0 2022-11-02 op "callback: realloc: %s.\n",
153 3448adb0 2022-11-02 op strerror(errno));
156 3448adb0 2022-11-02 op fieldbufsize = nfields;
159 3448adb0 2022-11-02 op /* set current position as field start */
160 3448adb0 2022-11-02 op field[nfields - 1] = &line[i];
162 3448adb0 2022-11-02 op /* continue until we reach ';' or '#' or end */
163 3448adb0 2022-11-02 op while (line[i] != ';' && line[i] != '#' &&
164 3448adb0 2022-11-02 op line[i] != '\0') {
169 3448adb0 2022-11-02 op if (line[i] == '#') {
170 3448adb0 2022-11-02 op /* set comment-variable for later */
171 3448adb0 2022-11-02 op comment = &line[i + 1];
174 3448adb0 2022-11-02 op /* go back whitespace and terminate field there */
176 3448adb0 2022-11-02 op for (j = i - 1; line[j] == ' '; j--)
178 3448adb0 2022-11-02 op line[j + 1] = '\0';
180 3448adb0 2022-11-02 op line[i] = '\0';
183 3448adb0 2022-11-02 op /* if comment is set, we are done */
184 3448adb0 2022-11-02 op if (comment != NULL) {
189 3448adb0 2022-11-02 op /* skip leading whitespace in comment */
190 3448adb0 2022-11-02 op while (comment != NULL && comment[0] == ' ') {
194 3448adb0 2022-11-02 op /* call callback function */
195 3448adb0 2022-11-02 op if (callback(fname, field, nfields, comment, payload)) {
196 3448adb0 2022-11-02 op fprintf(stderr, "parse_file_with_callback: "
197 3448adb0 2022-11-02 op "Malformed input.\n");
207 3448adb0 2022-11-02 op properties_callback(const char *file, char **field, size_t nfields,
208 3448adb0 2022-11-02 op char *comment, void *payload)
210 3448adb0 2022-11-02 op /* prop always has the length 0x110000 */
211 3448adb0 2022-11-02 op struct properties_payload *p = (struct properties_payload *)payload;
212 3448adb0 2022-11-02 op struct range r;
213 3448adb0 2022-11-02 op uint_least8_t i;
214 3448adb0 2022-11-02 op uint_least32_t cp;
216 3448adb0 2022-11-02 op (void)comment;
218 3448adb0 2022-11-02 op if (nfields < 2) {
222 3448adb0 2022-11-02 op for (i = 0; i < p->speclen; i++) {
223 3448adb0 2022-11-02 op /* identify fitting file and identifier */
224 3448adb0 2022-11-02 op if (p->spec[i].file &&
225 3448adb0 2022-11-02 op !strcmp(p->spec[i].file, file) &&
226 3448adb0 2022-11-02 op (!strcmp(p->spec[i].ucdname, field[1]) ||
227 3448adb0 2022-11-02 op (comment != NULL && !strncmp(p->spec[i].ucdname, comment, strlen(p->spec[i].ucdname)) &&
228 3448adb0 2022-11-02 op comment[strlen(p->spec[i].ucdname)] == ' '))) {
229 3448adb0 2022-11-02 op /* parse range in first field */
230 3448adb0 2022-11-02 op if (range_parse(field[0], &r)) {
234 3448adb0 2022-11-02 op /* apply to all codepoints in the range */
235 3448adb0 2022-11-02 op for (cp = r.lower; cp <= r.upper; cp++) {
236 3448adb0 2022-11-02 op if (p->set_value(payload, cp, i)) {
248 3448adb0 2022-11-02 op properties_compress(const struct properties *prop,
249 3448adb0 2022-11-02 op struct properties_compressed *comp)
251 3448adb0 2022-11-02 op uint_least32_t cp, i;
253 3448adb0 2022-11-02 op /* initialization */
254 3448adb0 2022-11-02 op if (!(comp->offset = malloc((size_t)UINT32_C(0x110000) * sizeof(*(comp->offset))))) {
255 3448adb0 2022-11-02 op fprintf(stderr, "malloc: %s\n", strerror(errno));
258 3448adb0 2022-11-02 op comp->data = NULL;
259 3448adb0 2022-11-02 op comp->datalen = 0;
261 3448adb0 2022-11-02 op for (cp = 0; cp < UINT32_C(0x110000); cp++) {
262 3448adb0 2022-11-02 op for (i = 0; i < comp->datalen; i++) {
263 3448adb0 2022-11-02 op if (!memcmp(&(prop[cp]), &(comp->data[i]), sizeof(*prop))) {
264 3448adb0 2022-11-02 op /* found a match! */
265 3448adb0 2022-11-02 op comp->offset[cp] = i;
269 3448adb0 2022-11-02 op if (i == comp->datalen) {
271 3448adb0 2022-11-02 op * found no matching properties-struct, so
272 3448adb0 2022-11-02 op * add current properties to data and add the
273 3448adb0 2022-11-02 op * offset in the offset-table
275 3448adb0 2022-11-02 op if (!(comp->data = reallocate_array(comp->data,
276 3448adb0 2022-11-02 op ++(comp->datalen),
277 3448adb0 2022-11-02 op sizeof(*(comp->data))))) {
278 3448adb0 2022-11-02 op fprintf(stderr, "reallocate_array: %s\n",
279 3448adb0 2022-11-02 op strerror(errno));
282 3448adb0 2022-11-02 op memcpy(&(comp->data[comp->datalen - 1]), &(prop[cp]),
283 3448adb0 2022-11-02 op sizeof(*prop));
284 3448adb0 2022-11-02 op comp->offset[cp] = comp->datalen - 1;
290 3448adb0 2022-11-02 op properties_get_major_minor(const struct properties_compressed *comp,
291 3448adb0 2022-11-02 op struct properties_major_minor *mm)
293 3448adb0 2022-11-02 op size_t i, j, compression_count = 0;
296 3448adb0 2022-11-02 op * we currently have an array comp->offset which maps the
297 3448adb0 2022-11-02 op * codepoints 0..0x110000 to offsets into comp->data.
298 3448adb0 2022-11-02 op * To improve cache-locality instead and allow a bit of
299 3448adb0 2022-11-02 op * compressing, instead of directly mapping a codepoint
300 3448adb0 2022-11-02 op * 0xAAAABB with comp->offset, we generate two arrays major
301 3448adb0 2022-11-02 op * and minor such that
302 3448adb0 2022-11-02 op * comp->offset(0xAAAABB) == minor[major[0xAAAA] + 0xBB]
303 3448adb0 2022-11-02 op * This yields a major-array of length 2^16 and a minor array
304 3448adb0 2022-11-02 op * of variable length depending on how many common subsequences
305 3448adb0 2022-11-02 op * can be filtered out.
308 3448adb0 2022-11-02 op /* initialize */
309 3448adb0 2022-11-02 op if (!(mm->major = malloc((size_t)0x1100 * sizeof(*(mm->major))))) {
310 3448adb0 2022-11-02 op fprintf(stderr, "malloc: %s\n", strerror(errno));
313 3448adb0 2022-11-02 op mm->minor = NULL;
314 3448adb0 2022-11-02 op mm->minorlen = 0;
316 3448adb0 2022-11-02 op for (i = 0; i < (size_t)0x1100; i++) {
318 3448adb0 2022-11-02 op * we now look at the cp-range (i << 8)..(i << 8 + 0xFF)
319 3448adb0 2022-11-02 op * and check if its corresponding offset-data already
320 3448adb0 2022-11-02 op * exists in minor (because then we just point there
321 3448adb0 2022-11-02 op * and need less storage)
323 3448adb0 2022-11-02 op for (j = 0; j + 0xFF < mm->minorlen; j++) {
324 3448adb0 2022-11-02 op if (!memcmp(&(comp->offset[i << 8]),
325 3448adb0 2022-11-02 op &(mm->minor[j]),
326 3448adb0 2022-11-02 op sizeof(*(comp->offset)) * 0x100)) {
330 3448adb0 2022-11-02 op if (j + 0xFF < mm->minorlen) {
331 3448adb0 2022-11-02 op /* found an index */
332 3448adb0 2022-11-02 op compression_count++;
333 3448adb0 2022-11-02 op mm->major[i] = j;
336 3448adb0 2022-11-02 op * add "new" sequence to minor and point to it
339 3448adb0 2022-11-02 op mm->minorlen += 0x100;
340 3448adb0 2022-11-02 op if (!(mm->minor = reallocate_array(mm->minor,
341 3448adb0 2022-11-02 op mm->minorlen,
342 3448adb0 2022-11-02 op sizeof(*(mm->minor))))) {
343 3448adb0 2022-11-02 op fprintf(stderr, "reallocate_array: %s\n",
344 3448adb0 2022-11-02 op strerror(errno));
347 3448adb0 2022-11-02 op memcpy(&(mm->minor[mm->minorlen - 0x100]),
348 3448adb0 2022-11-02 op &(comp->offset[i << 8]),
349 3448adb0 2022-11-02 op sizeof(*(mm->minor)) * 0x100);
350 3448adb0 2022-11-02 op mm->major[i] = mm->minorlen - 0x100;
354 3448adb0 2022-11-02 op /* return compression ratio */
355 3448adb0 2022-11-02 op return (double)compression_count / 0x1100 * 100;
359 3448adb0 2022-11-02 op properties_print_lookup_table(char *name, size_t *data, size_t datalen)
362 3448adb0 2022-11-02 op size_t i, maxval;
364 3448adb0 2022-11-02 op for (i = 0, maxval = 0; i < datalen; i++) {
365 3448adb0 2022-11-02 op if (data[i] > maxval) {
366 3448adb0 2022-11-02 op maxval = data[i];
370 3448adb0 2022-11-02 op type = (maxval <= UINT_LEAST8_MAX) ? "uint_least8_t" :
371 3448adb0 2022-11-02 op (maxval <= UINT_LEAST16_MAX) ? "uint_least16_t" :
372 3448adb0 2022-11-02 op (maxval <= UINT_LEAST32_MAX) ? "uint_least32_t" :
373 3448adb0 2022-11-02 op "uint_least64_t";
375 3448adb0 2022-11-02 op printf("static const %s %s[] = {\n\t", type, name);
376 3448adb0 2022-11-02 op for (i = 0; i < datalen; i++) {
377 3448adb0 2022-11-02 op printf("%zu", data[i]);
378 3448adb0 2022-11-02 op if (i + 1 == datalen) {
379 3448adb0 2022-11-02 op printf("\n");
380 3448adb0 2022-11-02 op } else if ((i + 1) % 8 != 0) {
381 3448adb0 2022-11-02 op printf(", ");
383 3448adb0 2022-11-02 op printf(",\n\t");
387 3448adb0 2022-11-02 op printf("};\n");
391 3448adb0 2022-11-02 op properties_print_derived_lookup_table(char *name, char *type, size_t *offset, size_t offsetlen,
392 3448adb0 2022-11-02 op int_least64_t (*get_value)(const struct properties *,
393 3448adb0 2022-11-02 op size_t), const void *payload)
397 3448adb0 2022-11-02 op printf("static const %s %s[] = {\n\t", type, name);
398 3448adb0 2022-11-02 op for (i = 0; i < offsetlen; i++) {
399 3448adb0 2022-11-02 op printf("%"PRIiLEAST64, get_value(payload, offset[i]));
400 3448adb0 2022-11-02 op if (i + 1 == offsetlen) {
401 3448adb0 2022-11-02 op printf("\n");
402 3448adb0 2022-11-02 op } else if ((i + 1) % 8 != 0) {
403 3448adb0 2022-11-02 op printf(", ");
405 3448adb0 2022-11-02 op printf(",\n\t");
409 3448adb0 2022-11-02 op printf("};\n");
413 3448adb0 2022-11-02 op properties_print_enum(const struct property_spec *spec, size_t speclen,
414 3448adb0 2022-11-02 op const char *enumname, const char *enumprefix)
418 3448adb0 2022-11-02 op printf("enum %s {\n", enumname);
419 3448adb0 2022-11-02 op for (i = 0; i < speclen; i++) {
420 3448adb0 2022-11-02 op printf("\t%s_%s,\n", enumprefix, spec[i].enumname);
422 3448adb0 2022-11-02 op printf("\tNUM_%sS,\n};\n\n", enumprefix);
426 3448adb0 2022-11-02 op set_value_bp(struct properties_payload *payload, uint_least32_t cp,
427 3448adb0 2022-11-02 op int_least64_t value)
429 3448adb0 2022-11-02 op if (payload->prop[cp].property != 0) {
430 3448adb0 2022-11-02 op if (payload->handle_conflict == NULL) {
431 3448adb0 2022-11-02 op fprintf(stderr, "set_value_bp: "
432 3448adb0 2022-11-02 op "Unhandled character break property "
433 3448adb0 2022-11-02 op "overwrite for 0x%06X (%s <- %s).\n",
434 3448adb0 2022-11-02 op cp, payload->spec[payload->prop[cp].
435 3448adb0 2022-11-02 op property].enumname,
436 3448adb0 2022-11-02 op payload->spec[value].enumname);
439 3448adb0 2022-11-02 op value = payload->handle_conflict(cp,
440 3448adb0 2022-11-02 op (uint_least8_t)payload->prop[cp].property,
441 3448adb0 2022-11-02 op (uint_least8_t)value);
444 3448adb0 2022-11-02 op payload->prop[cp].property = value;
449 3448adb0 2022-11-02 op static int_least64_t
450 3448adb0 2022-11-02 op get_value_bp(const struct properties *prop, size_t offset)
452 3448adb0 2022-11-02 op return (uint_least8_t)prop[offset].property;
456 3448adb0 2022-11-02 op properties_generate_break_property(const struct property_spec *spec,
457 3448adb0 2022-11-02 op uint_least8_t speclen,
458 3448adb0 2022-11-02 op uint_least8_t (*handle_conflict)(
459 3448adb0 2022-11-02 op uint_least32_t, uint_least8_t,
460 3448adb0 2022-11-02 op uint_least8_t), uint_least8_t
461 3448adb0 2022-11-02 op (*post_process)(uint_least8_t),
462 3448adb0 2022-11-02 op const char *prefix, const char *argv0)
464 3448adb0 2022-11-02 op struct properties_compressed comp;
465 3448adb0 2022-11-02 op struct properties_major_minor mm;
466 3448adb0 2022-11-02 op struct properties_payload payload;
467 3448adb0 2022-11-02 op struct properties *prop;
468 3448adb0 2022-11-02 op size_t i, j, prefixlen = strlen(prefix);
469 3448adb0 2022-11-02 op char buf1[64], prefix_uc[64], buf2[64], buf3[64], buf4[64];
471 3448adb0 2022-11-02 op /* allocate property buffer for all 0x110000 codepoints */
472 3448adb0 2022-11-02 op if (!(prop = calloc(UINT32_C(0x110000), sizeof(*prop)))) {
473 3448adb0 2022-11-02 op fprintf(stderr, "calloc: %s\n", strerror(errno));
477 3448adb0 2022-11-02 op /* generate data */
478 3448adb0 2022-11-02 op payload.prop = prop;
479 3448adb0 2022-11-02 op payload.spec = spec;
480 3448adb0 2022-11-02 op payload.speclen = speclen;
481 3448adb0 2022-11-02 op payload.set_value = set_value_bp;
482 3448adb0 2022-11-02 op payload.handle_conflict = handle_conflict;
484 3448adb0 2022-11-02 op /* parse each file exactly once and ignore NULL-fields */
485 3448adb0 2022-11-02 op for (i = 0; i < speclen; i++) {
486 3448adb0 2022-11-02 op for (j = 0; j < i; j++) {
487 3448adb0 2022-11-02 op if (spec[i].file && spec[j].file &&
488 3448adb0 2022-11-02 op !strcmp(spec[i].file, spec[j].file)) {
489 3448adb0 2022-11-02 op /* file has already been parsed */
493 3448adb0 2022-11-02 op if (i == j && spec[i].file) {
494 3448adb0 2022-11-02 op /* file has not been processed yet */
495 3448adb0 2022-11-02 op parse_file_with_callback(spec[i].file,
496 3448adb0 2022-11-02 op properties_callback,
501 3448adb0 2022-11-02 op /* post-processing */
502 3448adb0 2022-11-02 op if (post_process != NULL) {
503 3448adb0 2022-11-02 op for (i = 0; i < UINT32_C(0x110000); i++) {
504 3448adb0 2022-11-02 op payload.prop[i].property =
505 3448adb0 2022-11-02 op post_process((uint_least8_t)payload.prop[i].property);
509 3448adb0 2022-11-02 op /* compress data */
510 3448adb0 2022-11-02 op printf("/* Automatically generated by %s */\n#include <stdint.h>\n\n", argv0);
511 3448adb0 2022-11-02 op properties_compress(prop, &comp);
513 3448adb0 2022-11-02 op fprintf(stderr, "%s: %s-LUT compression-ratio: %.2f%%\n", argv0,
514 3448adb0 2022-11-02 op prefix, properties_get_major_minor(&comp, &mm));
516 3448adb0 2022-11-02 op /* prepare names */
517 3448adb0 2022-11-02 op if ((size_t)snprintf(buf1, LEN(buf1), "%s_property", prefix) >= LEN(buf1)) {
518 3448adb0 2022-11-02 op fprintf(stderr, "snprintf: String truncated.\n");
521 3448adb0 2022-11-02 op if (LEN(prefix_uc) + 1 < prefixlen) {
522 3448adb0 2022-11-02 op fprintf(stderr, "snprintf: Buffer too small.\n");
525 3448adb0 2022-11-02 op for (i = 0; i < prefixlen; i++) {
526 3448adb0 2022-11-02 op prefix_uc[i] = (char)toupper(prefix[i]);
528 3448adb0 2022-11-02 op prefix_uc[prefixlen] = '\0';
529 3448adb0 2022-11-02 op if ((size_t)snprintf(buf2, LEN(buf2), "%s_PROP", prefix_uc) >= LEN(buf2) ||
530 3448adb0 2022-11-02 op (size_t)snprintf(buf3, LEN(buf3), "%s_major", prefix) >= LEN(buf3) ||
531 3448adb0 2022-11-02 op (size_t)snprintf(buf4, LEN(buf4), "%s_minor", prefix) >= LEN(buf4)) {
532 3448adb0 2022-11-02 op fprintf(stderr, "snprintf: String truncated.\n");
536 3448adb0 2022-11-02 op /* print data */
537 3448adb0 2022-11-02 op properties_print_enum(spec, speclen, buf1, buf2);
538 3448adb0 2022-11-02 op properties_print_lookup_table(buf3, mm.major, 0x1100);
539 3448adb0 2022-11-02 op printf("\n");
540 3448adb0 2022-11-02 op properties_print_derived_lookup_table(buf4, "uint_least8_t", mm.minor, mm.minorlen,
541 3448adb0 2022-11-02 op get_value_bp, comp.data);
543 3448adb0 2022-11-02 op /* free data */
545 3448adb0 2022-11-02 op free(comp.data);
546 3448adb0 2022-11-02 op free(comp.offset);
547 3448adb0 2022-11-02 op free(mm.major);
548 3448adb0 2022-11-02 op free(mm.minor);
552 3448adb0 2022-11-02 op break_test_callback(const char *fname, char **field, size_t nfields,
553 3448adb0 2022-11-02 op char *comment, void *payload)
555 3448adb0 2022-11-02 op struct break_test *t,
556 3448adb0 2022-11-02 op **test = ((struct break_test_payload *)payload)->test;
557 3448adb0 2022-11-02 op size_t i, *testlen = ((struct break_test_payload *)payload)->testlen;
562 3448adb0 2022-11-02 op if (nfields < 1) {
566 3448adb0 2022-11-02 op /* append new testcase and initialize with zeroes */
567 3448adb0 2022-11-02 op if ((*test = realloc(*test, ++(*testlen) * sizeof(**test))) == NULL) {
568 3448adb0 2022-11-02 op fprintf(stderr, "break_test_callback: realloc: %s.\n",
569 3448adb0 2022-11-02 op strerror(errno));
572 3448adb0 2022-11-02 op t = &(*test)[*testlen - 1];
573 3448adb0 2022-11-02 op memset(t, 0, sizeof(*t));
575 3448adb0 2022-11-02 op /* parse testcase "<÷|×> <cp> <÷|×> ... <cp> <÷|×>" */
576 3448adb0 2022-11-02 op for (token = strtok(field[0], " "), i = 0; token != NULL; i++,
577 3448adb0 2022-11-02 op token = strtok(NULL, " ")) {
578 3448adb0 2022-11-02 op if (i % 2 == 0) {
579 3448adb0 2022-11-02 op /* delimiter or start of sequence */
580 3448adb0 2022-11-02 op if (i == 0 || !strncmp(token, "\xC3\xB7", 2)) { /* UTF-8 */
582 3448adb0 2022-11-02 op * '÷' indicates a breakpoint,
583 3448adb0 2022-11-02 op * the current length is done; allocate
584 3448adb0 2022-11-02 op * a new length field and set it to 0
586 3448adb0 2022-11-02 op if ((t->len = realloc(t->len,
587 3448adb0 2022-11-02 op ++t->lenlen * sizeof(*t->len))) == NULL) {
588 3448adb0 2022-11-02 op fprintf(stderr, "break_test_"
589 3448adb0 2022-11-02 op "callback: realloc: %s.\n",
590 3448adb0 2022-11-02 op strerror(errno));
593 3448adb0 2022-11-02 op t->len[t->lenlen - 1] = 0;
594 3448adb0 2022-11-02 op } else if (!strncmp(token, "\xC3\x97", 2)) { /* UTF-8 */
596 3448adb0 2022-11-02 op * '×' indicates a non-breakpoint, do nothing
599 3448adb0 2022-11-02 op fprintf(stderr, "break_test_callback: "
600 3448adb0 2022-11-02 op "Malformed delimiter '%s'.\n", token);
604 3448adb0 2022-11-02 op /* add codepoint to cp-array */
605 3448adb0 2022-11-02 op if ((t->cp = realloc(t->cp, ++t->cplen *
606 3448adb0 2022-11-02 op sizeof(*t->cp))) == NULL) {
607 3448adb0 2022-11-02 op fprintf(stderr, "break_test_callback: "
608 3448adb0 2022-11-02 op "realloc: %s.\n", strerror(errno));
611 3448adb0 2022-11-02 op if (hextocp(token, strlen(token), &t->cp[t->cplen - 1])) {
614 3448adb0 2022-11-02 op if (t->lenlen > 0) {
615 3448adb0 2022-11-02 op t->len[t->lenlen - 1]++;
619 3448adb0 2022-11-02 op if (t->len[t->lenlen - 1] == 0) {
621 3448adb0 2022-11-02 op * we allocated one more length than we needed because
622 3448adb0 2022-11-02 op * the breakpoint was at the end
627 3448adb0 2022-11-02 op /* store comment */
628 3448adb0 2022-11-02 op if (((*test)[*testlen - 1].descr = strdup(comment)) == NULL) {
629 3448adb0 2022-11-02 op fprintf(stderr, "break_test_callback: strdup: %s.\n",
630 3448adb0 2022-11-02 op strerror(errno));
638 3448adb0 2022-11-02 op break_test_list_parse(char *fname, struct break_test **test,
639 3448adb0 2022-11-02 op size_t *testlen)
641 3448adb0 2022-11-02 op struct break_test_payload pl = {
642 3448adb0 2022-11-02 op .test = test,
643 3448adb0 2022-11-02 op .testlen = testlen,
645 3448adb0 2022-11-02 op *test = NULL;
646 3448adb0 2022-11-02 op *testlen = 0;
648 3448adb0 2022-11-02 op parse_file_with_callback(fname, break_test_callback, &pl);
652 3448adb0 2022-11-02 op break_test_list_print(const struct break_test *test, size_t testlen,
653 3448adb0 2022-11-02 op const char *identifier, const char *progname)
657 3448adb0 2022-11-02 op printf("/* Automatically generated by %s */\n"
658 3448adb0 2022-11-02 op "#include <stdint.h>\n#include <stddef.h>\n\n"
659 3448adb0 2022-11-02 op "#include \"../gen/types.h\"\n\n", progname);
661 3448adb0 2022-11-02 op printf("static const struct break_test %s[] = {\n", identifier);
662 3448adb0 2022-11-02 op for (i = 0; i < testlen; i++) {
663 3448adb0 2022-11-02 op printf("\t{\n");
665 3448adb0 2022-11-02 op printf("\t\t.cp = (uint_least32_t[]){");
666 3448adb0 2022-11-02 op for (j = 0; j < test[i].cplen; j++) {
667 3448adb0 2022-11-02 op printf(" UINT32_C(0x%06X)", test[i].cp[j]);
668 3448adb0 2022-11-02 op if (j + 1 < test[i].cplen) {
669 3448adb0 2022-11-02 op putchar(',');
672 3448adb0 2022-11-02 op printf(" },\n");
673 3448adb0 2022-11-02 op printf("\t\t.cplen = %zu,\n", test[i].cplen);
675 3448adb0 2022-11-02 op printf("\t\t.len = (size_t[]){");
676 3448adb0 2022-11-02 op for (j = 0; j < test[i].lenlen; j++) {
677 3448adb0 2022-11-02 op printf(" %zu", test[i].len[j]);
678 3448adb0 2022-11-02 op if (j + 1 < test[i].lenlen) {
679 3448adb0 2022-11-02 op putchar(',');
682 3448adb0 2022-11-02 op printf(" },\n");
683 3448adb0 2022-11-02 op printf("\t\t.lenlen = %zu,\n", test[i].lenlen);
685 3448adb0 2022-11-02 op printf("\t\t.descr = \"%s\",\n", test[i].descr);
687 3448adb0 2022-11-02 op printf("\t},\n");
689 3448adb0 2022-11-02 op printf("};\n");
693 3448adb0 2022-11-02 op break_test_list_free(struct break_test *test, size_t testlen)
697 3448adb0 2022-11-02 op for (i = 0; i < testlen; i++) {
698 3448adb0 2022-11-02 op free(test[i].cp);
699 3448adb0 2022-11-02 op free(test[i].len);
700 3448adb0 2022-11-02 op free(test[i].descr);