Blob


1 /* See LICENSE file for copyright and license details. */
2 #include <errno.h>
3 #include <math.h>
4 #include <stdint.h>
5 #include <stdio.h>
6 #include <stdlib.h>
7 #include <string.h>
9 #include "../grapheme.h"
10 #include "../gen/character-test.h"
11 #include "util.h"
13 #include <utf8proc.h>
15 #define NUM_ITERATIONS 100000
17 struct utf8_benchmark_payload {
18 char *buf;
19 utf8proc_uint8_t *buf_utf8proc;
20 size_t buflen;
21 };
23 void
24 libgrapheme(const void *payload)
25 {
26 const struct utf8_benchmark_payload *p = payload;
27 uint_least32_t cp;
28 size_t ret, off;
30 for (off = 0; off < p->buflen; off += ret) {
31 if ((ret = grapheme_decode_utf8(p->buf + off,
32 p->buflen - off, &cp)) >
33 (p->buflen - off)) {
34 break;
35 }
36 (void)cp;
37 }
38 }
40 void
41 libutf8proc(const void *payload)
42 {
43 const struct utf8_benchmark_payload *p = payload;
44 utf8proc_int32_t cp;
45 utf8proc_ssize_t ret;
46 size_t off;
48 for (off = 0; off < p->buflen; off += (size_t)ret) {
49 if ((ret = utf8proc_iterate(p->buf_utf8proc + off,
50 (utf8proc_ssize_t)(p->buflen - off),
51 &cp)) < 0) {
52 break;
53 }
54 (void)cp;
55 }
56 }
58 int
59 main(int argc, char *argv[])
60 {
61 struct utf8_benchmark_payload p;
62 size_t i;
63 double baseline = (double)NAN;
65 (void)argc;
67 p.buf = generate_utf8_test_buffer(character_break_test,
68 LEN(character_break_test),
69 &(p.buflen));
71 /* convert cp-buffer to stupid custom libutf8proc-uint8-type */
72 if ((p.buf_utf8proc = malloc(p.buflen)) == NULL) {
73 fprintf(stderr, "malloc: %s\n", strerror(errno));
74 exit(1);
75 }
76 for (i = 0; i < p.buflen; i++) {
77 /*
78 * even if char is larger than 8 bit, it will only have
79 * any of the first 8 bits set (by construction).
80 */
81 p.buf_utf8proc[i] = (utf8proc_uint8_t)p.buf[i];
82 }
84 printf("%s\n", argv[0]);
85 run_benchmark(libgrapheme, &p, "libgrapheme ", NULL,
86 "byte", &baseline, NUM_ITERATIONS, p.buflen);
87 run_benchmark(libutf8proc, &p, "libutf8proc ",
88 "but unsafe (does not detect overlong encodings)",
89 "byte", &baseline, NUM_ITERATIONS, p.buflen);
91 free(p.buf);
92 free(p.buf_utf8proc);
94 return 0;
95 }