Blame


1 3448adb0 2022-11-02 op /* See LICENSE file for copyright and license details. */
2 3448adb0 2022-11-02 op #include <limits.h>
3 3448adb0 2022-11-02 op #include <stdbool.h>
4 3448adb0 2022-11-02 op #include <stddef.h>
5 3448adb0 2022-11-02 op #include <stdint.h>
6 3448adb0 2022-11-02 op
7 3448adb0 2022-11-02 op #include "../gen/types.h"
8 3448adb0 2022-11-02 op #include "../grapheme.h"
9 3448adb0 2022-11-02 op #include "util.h"
10 3448adb0 2022-11-02 op
11 3448adb0 2022-11-02 op void
12 3448adb0 2022-11-02 op herodotus_reader_init(HERODOTUS_READER *r, enum herodotus_type type,
13 3448adb0 2022-11-02 op const void *src, size_t srclen)
14 3448adb0 2022-11-02 op {
15 3448adb0 2022-11-02 op size_t i;
16 3448adb0 2022-11-02 op
17 3448adb0 2022-11-02 op r->type = type;
18 3448adb0 2022-11-02 op r->src = src;
19 3448adb0 2022-11-02 op r->srclen = srclen;
20 3448adb0 2022-11-02 op r->off = 0;
21 3448adb0 2022-11-02 op r->terminated_by_null = false;
22 3448adb0 2022-11-02 op
23 3448adb0 2022-11-02 op for (i = 0; i < LEN(r->soft_limit); i++) {
24 3448adb0 2022-11-02 op r->soft_limit[i] = SIZE_MAX;
25 3448adb0 2022-11-02 op }
26 3448adb0 2022-11-02 op }
27 3448adb0 2022-11-02 op
28 3448adb0 2022-11-02 op void
29 3448adb0 2022-11-02 op herodotus_reader_copy(const HERODOTUS_READER *src, HERODOTUS_READER *dest)
30 3448adb0 2022-11-02 op {
31 3448adb0 2022-11-02 op size_t i;
32 3448adb0 2022-11-02 op
33 3448adb0 2022-11-02 op /*
34 3448adb0 2022-11-02 op * we copy such that we have a "fresh" start and build on the
35 3448adb0 2022-11-02 op * fact that src->soft_limit[i] for any i and src->srclen are
36 3448adb0 2022-11-02 op * always larger or equal to src->off
37 3448adb0 2022-11-02 op */
38 3448adb0 2022-11-02 op dest->type = src->type;
39 3448adb0 2022-11-02 op if (src->type == HERODOTUS_TYPE_CODEPOINT) {
40 3448adb0 2022-11-02 op dest->src = (src->src == NULL) ? NULL :
41 3448adb0 2022-11-02 op ((const uint_least32_t *)(src->src)) + src->off;
42 3448adb0 2022-11-02 op } else { /* src->type == HERODOTUS_TYPE_UTF8 */
43 3448adb0 2022-11-02 op dest->src = (src->src == NULL) ? NULL :
44 3448adb0 2022-11-02 op ((const char *)(src->src)) + src->off;
45 3448adb0 2022-11-02 op }
46 3448adb0 2022-11-02 op if (src->srclen == SIZE_MAX) {
47 3448adb0 2022-11-02 op dest->srclen = SIZE_MAX;
48 3448adb0 2022-11-02 op } else {
49 3448adb0 2022-11-02 op dest->srclen = (src->off < src->srclen) ? src->srclen - src->off : 0;
50 3448adb0 2022-11-02 op }
51 3448adb0 2022-11-02 op dest->off = 0;
52 3448adb0 2022-11-02 op dest->terminated_by_null = src->terminated_by_null;
53 3448adb0 2022-11-02 op
54 3448adb0 2022-11-02 op for (i = 0; i < LEN(src->soft_limit); i++) {
55 3448adb0 2022-11-02 op if (src->soft_limit[i] == SIZE_MAX) {
56 3448adb0 2022-11-02 op dest->soft_limit[i] = SIZE_MAX;
57 3448adb0 2022-11-02 op } else {
58 3448adb0 2022-11-02 op /*
59 3448adb0 2022-11-02 op * if we have a degenerate case where the offset is
60 3448adb0 2022-11-02 op * higher than the soft-limit, we simply clamp the
61 3448adb0 2022-11-02 op * soft-limit to zero given we can't decide here
62 3448adb0 2022-11-02 op * to release the limit and, instead, we just
63 3448adb0 2022-11-02 op * prevent any more reads
64 3448adb0 2022-11-02 op */
65 3448adb0 2022-11-02 op dest->soft_limit[i] = (src->off < src->soft_limit[i]) ?
66 3448adb0 2022-11-02 op src->soft_limit[i] - src->off : 0;
67 3448adb0 2022-11-02 op }
68 3448adb0 2022-11-02 op }
69 3448adb0 2022-11-02 op }
70 3448adb0 2022-11-02 op
71 3448adb0 2022-11-02 op void
72 3448adb0 2022-11-02 op herodotus_reader_push_advance_limit(HERODOTUS_READER *r, size_t count)
73 3448adb0 2022-11-02 op {
74 3448adb0 2022-11-02 op size_t i;
75 3448adb0 2022-11-02 op
76 3448adb0 2022-11-02 op for (i = LEN(r->soft_limit) - 1; i >= 1; i--) {
77 3448adb0 2022-11-02 op r->soft_limit[i] = r->soft_limit[i - 1];
78 3448adb0 2022-11-02 op }
79 3448adb0 2022-11-02 op r->soft_limit[0] = r->off + count;
80 3448adb0 2022-11-02 op }
81 3448adb0 2022-11-02 op
82 3448adb0 2022-11-02 op void
83 3448adb0 2022-11-02 op herodotus_reader_pop_limit(HERODOTUS_READER *r)
84 3448adb0 2022-11-02 op {
85 3448adb0 2022-11-02 op size_t i;
86 3448adb0 2022-11-02 op
87 3448adb0 2022-11-02 op for (i = 0; i < LEN(r->soft_limit) - 1; i++) {
88 3448adb0 2022-11-02 op r->soft_limit[i] = r->soft_limit[i + 1];
89 3448adb0 2022-11-02 op }
90 3448adb0 2022-11-02 op r->soft_limit[LEN(r->soft_limit) - 1] = SIZE_MAX;
91 3448adb0 2022-11-02 op }
92 3448adb0 2022-11-02 op
93 3448adb0 2022-11-02 op size_t
94 3448adb0 2022-11-02 op herodotus_reader_next_word_break(const HERODOTUS_READER *r)
95 3448adb0 2022-11-02 op {
96 3448adb0 2022-11-02 op if (r->type == HERODOTUS_TYPE_CODEPOINT) {
97 3448adb0 2022-11-02 op return grapheme_next_word_break(
98 3448adb0 2022-11-02 op (const uint_least32_t *)(r->src) + r->off,
99 3448adb0 2022-11-02 op MIN(r->srclen, r->soft_limit[0]) - r->off);
100 3448adb0 2022-11-02 op } else { /* r->type == HERODOTUS_TYPE_UTF8 */
101 3448adb0 2022-11-02 op return grapheme_next_word_break_utf8(
102 3448adb0 2022-11-02 op (const char *)(r->src) + r->off,
103 3448adb0 2022-11-02 op MIN(r->srclen, r->soft_limit[0]) - r->off);
104 3448adb0 2022-11-02 op }
105 3448adb0 2022-11-02 op }
106 3448adb0 2022-11-02 op
107 3448adb0 2022-11-02 op size_t
108 3448adb0 2022-11-02 op herodotus_reader_next_codepoint_break(const HERODOTUS_READER *r)
109 3448adb0 2022-11-02 op {
110 3448adb0 2022-11-02 op if (r->type == HERODOTUS_TYPE_CODEPOINT) {
111 3448adb0 2022-11-02 op return (r->off < MIN(r->srclen, r->soft_limit[0])) ? 1 : 0;
112 3448adb0 2022-11-02 op } else { /* r->type == HERODOTUS_TYPE_UTF8 */
113 3448adb0 2022-11-02 op return grapheme_decode_utf8(
114 3448adb0 2022-11-02 op (const char *)(r->src) + r->off,
115 3448adb0 2022-11-02 op MIN(r->srclen, r->soft_limit[0]) - r->off, NULL);
116 3448adb0 2022-11-02 op }
117 3448adb0 2022-11-02 op }
118 3448adb0 2022-11-02 op
119 3448adb0 2022-11-02 op size_t
120 3448adb0 2022-11-02 op herodotus_reader_number_read(const HERODOTUS_READER *r)
121 3448adb0 2022-11-02 op {
122 3448adb0 2022-11-02 op return r->off;
123 3448adb0 2022-11-02 op }
124 3448adb0 2022-11-02 op
125 3448adb0 2022-11-02 op enum herodotus_status
126 3448adb0 2022-11-02 op herodotus_read_codepoint(HERODOTUS_READER *r, bool advance, uint_least32_t *cp)
127 3448adb0 2022-11-02 op {
128 3448adb0 2022-11-02 op size_t ret;
129 3448adb0 2022-11-02 op
130 3448adb0 2022-11-02 op if (r->terminated_by_null || r->off >= r->srclen || r->src == NULL) {
131 3448adb0 2022-11-02 op *cp = GRAPHEME_INVALID_CODEPOINT;
132 3448adb0 2022-11-02 op return HERODOTUS_STATUS_END_OF_BUFFER;
133 3448adb0 2022-11-02 op }
134 3448adb0 2022-11-02 op
135 3448adb0 2022-11-02 op if (r->off >= r->soft_limit[0]) {
136 3448adb0 2022-11-02 op *cp = GRAPHEME_INVALID_CODEPOINT;
137 3448adb0 2022-11-02 op return HERODOTUS_STATUS_SOFT_LIMIT_REACHED;
138 3448adb0 2022-11-02 op }
139 3448adb0 2022-11-02 op
140 3448adb0 2022-11-02 op if (r->type == HERODOTUS_TYPE_CODEPOINT) {
141 3448adb0 2022-11-02 op *cp = ((const uint_least32_t *)(r->src))[r->off];
142 3448adb0 2022-11-02 op ret = 1;
143 3448adb0 2022-11-02 op } else { /* r->type == HERODOTUS_TYPE_UTF8 */
144 3448adb0 2022-11-02 op ret = grapheme_decode_utf8((const char *)r->src + r->off,
145 3448adb0 2022-11-02 op MIN(r->srclen, r->soft_limit[0]) -
146 3448adb0 2022-11-02 op r->off, cp);
147 3448adb0 2022-11-02 op }
148 3448adb0 2022-11-02 op
149 3448adb0 2022-11-02 op if (unlikely(r->srclen == SIZE_MAX && *cp == 0)) {
150 3448adb0 2022-11-02 op /*
151 3448adb0 2022-11-02 op * We encountered a null-codepoint. Don't increment
152 3448adb0 2022-11-02 op * offset and return as if the buffer had ended here all
153 3448adb0 2022-11-02 op * along
154 3448adb0 2022-11-02 op */
155 3448adb0 2022-11-02 op r->terminated_by_null = true;
156 3448adb0 2022-11-02 op return HERODOTUS_STATUS_END_OF_BUFFER;
157 3448adb0 2022-11-02 op }
158 3448adb0 2022-11-02 op
159 3448adb0 2022-11-02 op if (r->off + ret > MIN(r->srclen, r->soft_limit[0])) {
160 3448adb0 2022-11-02 op /*
161 3448adb0 2022-11-02 op * we want more than we have; instead of returning
162 3448adb0 2022-11-02 op * garbage we terminate here.
163 3448adb0 2022-11-02 op */
164 3448adb0 2022-11-02 op return HERODOTUS_STATUS_END_OF_BUFFER;
165 3448adb0 2022-11-02 op }
166 3448adb0 2022-11-02 op
167 3448adb0 2022-11-02 op /*
168 3448adb0 2022-11-02 op * Increase offset which we now know won't surpass the limits,
169 3448adb0 2022-11-02 op * unless we got told otherwise
170 3448adb0 2022-11-02 op */
171 3448adb0 2022-11-02 op if (advance) {
172 3448adb0 2022-11-02 op r->off += ret;
173 3448adb0 2022-11-02 op }
174 3448adb0 2022-11-02 op
175 3448adb0 2022-11-02 op return HERODOTUS_STATUS_SUCCESS;
176 3448adb0 2022-11-02 op }
177 3448adb0 2022-11-02 op
178 3448adb0 2022-11-02 op void
179 3448adb0 2022-11-02 op herodotus_writer_init(HERODOTUS_WRITER *w, enum herodotus_type type,
180 3448adb0 2022-11-02 op void *dest, size_t destlen)
181 3448adb0 2022-11-02 op {
182 3448adb0 2022-11-02 op w->type = type;
183 3448adb0 2022-11-02 op w->dest = dest;
184 3448adb0 2022-11-02 op w->destlen = destlen;
185 3448adb0 2022-11-02 op w->off = 0;
186 3448adb0 2022-11-02 op w->first_unwritable_offset = SIZE_MAX;
187 3448adb0 2022-11-02 op }
188 3448adb0 2022-11-02 op
189 3448adb0 2022-11-02 op void
190 3448adb0 2022-11-02 op herodotus_writer_nul_terminate(HERODOTUS_WRITER *w)
191 3448adb0 2022-11-02 op {
192 3448adb0 2022-11-02 op if (w->dest == NULL) {
193 3448adb0 2022-11-02 op return;
194 3448adb0 2022-11-02 op }
195 3448adb0 2022-11-02 op
196 3448adb0 2022-11-02 op if (w->off < w->destlen) {
197 3448adb0 2022-11-02 op /* We still have space in the buffer. Simply use it */
198 3448adb0 2022-11-02 op if (w->type == HERODOTUS_TYPE_CODEPOINT) {
199 3448adb0 2022-11-02 op ((uint_least32_t *)(w->dest))[w->off] = 0;
200 3448adb0 2022-11-02 op } else { /* w->type == HERODOTUS_TYPE_UTF8 */
201 3448adb0 2022-11-02 op ((char *)(w->dest))[w->off] = '\0';
202 3448adb0 2022-11-02 op }
203 3448adb0 2022-11-02 op } else if (w->first_unwritable_offset < w->destlen) {
204 3448adb0 2022-11-02 op /*
205 3448adb0 2022-11-02 op * There is no more space in the buffer. However,
206 3448adb0 2022-11-02 op * we have noted down the first offset we couldn't
207 3448adb0 2022-11-02 op * use to write into the buffer and it's smaller than
208 3448adb0 2022-11-02 op * destlen. Thus we bailed writing into the
209 3448adb0 2022-11-02 op * destination when a multibyte-codepoint couldn't be
210 3448adb0 2022-11-02 op * written. So the last "real" byte might be at
211 3448adb0 2022-11-02 op * destlen-4, destlen-3, destlen-2 or destlen-1
212 3448adb0 2022-11-02 op * (the last case meaning truncation).
213 3448adb0 2022-11-02 op */
214 3448adb0 2022-11-02 op if (w->type == HERODOTUS_TYPE_CODEPOINT) {
215 3448adb0 2022-11-02 op ((uint_least32_t *)(w->dest))
216 3448adb0 2022-11-02 op [w->first_unwritable_offset] = 0;
217 3448adb0 2022-11-02 op } else { /* w->type == HERODOTUS_TYPE_UTF8 */
218 3448adb0 2022-11-02 op ((char *)(w->dest))[w->first_unwritable_offset] = '\0';
219 3448adb0 2022-11-02 op }
220 3448adb0 2022-11-02 op } else if (w->destlen > 0) {
221 3448adb0 2022-11-02 op /*
222 3448adb0 2022-11-02 op * In this case, there is no more space in the buffer and
223 3448adb0 2022-11-02 op * the last unwritable offset is larger than
224 3448adb0 2022-11-02 op * or equal to the destination buffer length. This means
225 3448adb0 2022-11-02 op * that we are forced to simply write into the last
226 3448adb0 2022-11-02 op * byte.
227 3448adb0 2022-11-02 op */
228 3448adb0 2022-11-02 op if (w->type == HERODOTUS_TYPE_CODEPOINT) {
229 3448adb0 2022-11-02 op ((uint_least32_t *)(w->dest))
230 3448adb0 2022-11-02 op [w->destlen - 1] = 0;
231 3448adb0 2022-11-02 op } else { /* w->type == HERODOTUS_TYPE_UTF8 */
232 3448adb0 2022-11-02 op ((char *)(w->dest))[w->destlen - 1] = '\0';
233 3448adb0 2022-11-02 op }
234 3448adb0 2022-11-02 op }
235 3448adb0 2022-11-02 op
236 3448adb0 2022-11-02 op /* w->off is not incremented in any case */
237 3448adb0 2022-11-02 op }
238 3448adb0 2022-11-02 op
239 3448adb0 2022-11-02 op size_t
240 3448adb0 2022-11-02 op herodotus_writer_number_written(const HERODOTUS_WRITER *w)
241 3448adb0 2022-11-02 op {
242 3448adb0 2022-11-02 op return w->off;
243 3448adb0 2022-11-02 op }
244 3448adb0 2022-11-02 op
245 3448adb0 2022-11-02 op void
246 3448adb0 2022-11-02 op herodotus_write_codepoint(HERODOTUS_WRITER *w, uint_least32_t cp)
247 3448adb0 2022-11-02 op {
248 3448adb0 2022-11-02 op size_t ret;
249 3448adb0 2022-11-02 op
250 3448adb0 2022-11-02 op /*
251 3448adb0 2022-11-02 op * This function will always faithfully say how many codepoints
252 3448adb0 2022-11-02 op * were written, even if the buffer ends. This is used to enable
253 3448adb0 2022-11-02 op * truncation detection.
254 3448adb0 2022-11-02 op */
255 3448adb0 2022-11-02 op if (w->type == HERODOTUS_TYPE_CODEPOINT) {
256 3448adb0 2022-11-02 op if (w->dest != NULL && w->off < w->destlen) {
257 3448adb0 2022-11-02 op ((uint_least32_t *)(w->dest))[w->off] = cp;
258 3448adb0 2022-11-02 op }
259 3448adb0 2022-11-02 op
260 3448adb0 2022-11-02 op w->off += 1;
261 3448adb0 2022-11-02 op } else { /* w->type == HERODOTUS_TYPE_UTF8 */
262 3448adb0 2022-11-02 op /*
263 3448adb0 2022-11-02 op * First determine how many bytes we need to encode the
264 3448adb0 2022-11-02 op * codepoint
265 3448adb0 2022-11-02 op */
266 3448adb0 2022-11-02 op ret = grapheme_encode_utf8(cp, NULL, 0);
267 3448adb0 2022-11-02 op
268 3448adb0 2022-11-02 op if (w->dest != NULL && w->off + ret < w->destlen) {
269 3448adb0 2022-11-02 op /* we still have enough room in the buffer */
270 3448adb0 2022-11-02 op grapheme_encode_utf8(cp, (char *)(w->dest) +
271 3448adb0 2022-11-02 op w->off, w->destlen - w->off);
272 3448adb0 2022-11-02 op } else if (w->first_unwritable_offset == SIZE_MAX) {
273 3448adb0 2022-11-02 op /*
274 3448adb0 2022-11-02 op * the first unwritable offset has not been
275 3448adb0 2022-11-02 op * noted down, so this is the first time we can't
276 3448adb0 2022-11-02 op * write (completely) to an offset
277 3448adb0 2022-11-02 op */
278 3448adb0 2022-11-02 op w->first_unwritable_offset = w->off;
279 3448adb0 2022-11-02 op }
280 3448adb0 2022-11-02 op
281 3448adb0 2022-11-02 op w->off += ret;
282 3448adb0 2022-11-02 op }
283 3448adb0 2022-11-02 op }
284 3448adb0 2022-11-02 op
285 3448adb0 2022-11-02 op void
286 3448adb0 2022-11-02 op proper_init(const HERODOTUS_READER *r, void *state, uint_least8_t no_prop,
287 3448adb0 2022-11-02 op uint_least8_t (*get_break_prop)(uint_least32_t),
288 3448adb0 2022-11-02 op bool (*is_skippable_prop)(uint_least8_t),
289 3448adb0 2022-11-02 op void (*skip_shift_callback)(uint_least8_t, void *),
290 3448adb0 2022-11-02 op struct proper *p)
291 3448adb0 2022-11-02 op {
292 3448adb0 2022-11-02 op uint_least8_t prop;
293 3448adb0 2022-11-02 op uint_least32_t cp;
294 3448adb0 2022-11-02 op size_t i;
295 3448adb0 2022-11-02 op
296 3448adb0 2022-11-02 op /* set internal variables */
297 3448adb0 2022-11-02 op p->state = state;
298 3448adb0 2022-11-02 op p->no_prop = no_prop;
299 3448adb0 2022-11-02 op p->get_break_prop = get_break_prop;
300 3448adb0 2022-11-02 op p->is_skippable_prop = is_skippable_prop;
301 3448adb0 2022-11-02 op p->skip_shift_callback = skip_shift_callback;
302 3448adb0 2022-11-02 op
303 3448adb0 2022-11-02 op /*
304 3448adb0 2022-11-02 op * Initialize mid-reader, which is basically just there
305 3448adb0 2022-11-02 op * to reflect the current position of the viewing-line
306 3448adb0 2022-11-02 op */
307 3448adb0 2022-11-02 op herodotus_reader_copy(r, &(p->mid_reader));
308 3448adb0 2022-11-02 op
309 3448adb0 2022-11-02 op /*
310 3448adb0 2022-11-02 op * In the initialization, we simply (try to) fill in next_prop.
311 3448adb0 2022-11-02 op * If we cannot read in more (due to the buffer ending), we
312 3448adb0 2022-11-02 op * fill in the prop as invalid
313 3448adb0 2022-11-02 op */
314 3448adb0 2022-11-02 op
315 3448adb0 2022-11-02 op /*
316 3448adb0 2022-11-02 op * initialize the previous properties to have no property
317 3448adb0 2022-11-02 op * (given we are at the start of the buffer)
318 3448adb0 2022-11-02 op */
319 3448adb0 2022-11-02 op p->raw.prev_prop[1] = p->raw.prev_prop[0] = p->no_prop;
320 3448adb0 2022-11-02 op p->skip.prev_prop[1] = p->skip.prev_prop[0] = p->no_prop;
321 3448adb0 2022-11-02 op
322 3448adb0 2022-11-02 op /*
323 3448adb0 2022-11-02 op * initialize the next properties
324 3448adb0 2022-11-02 op */
325 3448adb0 2022-11-02 op
326 3448adb0 2022-11-02 op /* initialize the raw reader */
327 3448adb0 2022-11-02 op herodotus_reader_copy(r, &(p->raw_reader));
328 3448adb0 2022-11-02 op
329 3448adb0 2022-11-02 op /* fill in the two next raw properties (after no-initialization) */
330 3448adb0 2022-11-02 op p->raw.next_prop[0] = p->raw.next_prop[1] = p->no_prop;
331 3448adb0 2022-11-02 op for (i = 0; i < 2 && herodotus_read_codepoint(&(p->raw_reader), true, &cp) ==
332 3448adb0 2022-11-02 op HERODOTUS_STATUS_SUCCESS; ) {
333 3448adb0 2022-11-02 op p->raw.next_prop[i++] = p->get_break_prop(cp);
334 3448adb0 2022-11-02 op }
335 3448adb0 2022-11-02 op
336 3448adb0 2022-11-02 op /* initialize the skip reader */
337 3448adb0 2022-11-02 op herodotus_reader_copy(r, &(p->skip_reader));
338 3448adb0 2022-11-02 op
339 3448adb0 2022-11-02 op /* fill in the two next skip properties (after no-initialization) */
340 3448adb0 2022-11-02 op p->skip.next_prop[0] = p->skip.next_prop[1] = p->no_prop;
341 3448adb0 2022-11-02 op for (i = 0; i < 2 && herodotus_read_codepoint(&(p->skip_reader), true, &cp) ==
342 3448adb0 2022-11-02 op HERODOTUS_STATUS_SUCCESS; ) {
343 3448adb0 2022-11-02 op prop = p->get_break_prop(cp);
344 3448adb0 2022-11-02 op if (!p->is_skippable_prop(prop)) {
345 3448adb0 2022-11-02 op p->skip.next_prop[i++] = prop;
346 3448adb0 2022-11-02 op }
347 3448adb0 2022-11-02 op }
348 3448adb0 2022-11-02 op }
349 3448adb0 2022-11-02 op
350 3448adb0 2022-11-02 op int
351 3448adb0 2022-11-02 op proper_advance(struct proper *p)
352 3448adb0 2022-11-02 op {
353 3448adb0 2022-11-02 op uint_least8_t prop;
354 3448adb0 2022-11-02 op uint_least32_t cp;
355 3448adb0 2022-11-02 op
356 3448adb0 2022-11-02 op /* read in next "raw" property */
357 3448adb0 2022-11-02 op if (herodotus_read_codepoint(&(p->raw_reader), true, &cp) ==
358 3448adb0 2022-11-02 op HERODOTUS_STATUS_SUCCESS) {
359 3448adb0 2022-11-02 op prop = p->get_break_prop(cp);
360 3448adb0 2022-11-02 op } else {
361 3448adb0 2022-11-02 op prop = p->no_prop;
362 3448adb0 2022-11-02 op }
363 3448adb0 2022-11-02 op
364 3448adb0 2022-11-02 op /*
365 3448adb0 2022-11-02 op * do a shift-in, unless we find that the property that is to
366 3448adb0 2022-11-02 op * be moved past the "raw-viewing-line" (this property is stored
367 3448adb0 2022-11-02 op * in p->raw.next_prop[0]) is a no_prop, indicating that
368 3448adb0 2022-11-02 op * we are at the end of the buffer.
369 3448adb0 2022-11-02 op */
370 3448adb0 2022-11-02 op if (p->raw.next_prop[0] == p->no_prop) {
371 3448adb0 2022-11-02 op return 1;
372 3448adb0 2022-11-02 op }
373 3448adb0 2022-11-02 op
374 3448adb0 2022-11-02 op /* shift in the properties */
375 3448adb0 2022-11-02 op p->raw.prev_prop[1] = p->raw.prev_prop[0];
376 3448adb0 2022-11-02 op p->raw.prev_prop[0] = p->raw.next_prop[0];
377 3448adb0 2022-11-02 op p->raw.next_prop[0] = p->raw.next_prop[1];
378 3448adb0 2022-11-02 op p->raw.next_prop[1] = prop;
379 3448adb0 2022-11-02 op
380 3448adb0 2022-11-02 op /* advance the middle reader viewing-line */
381 3448adb0 2022-11-02 op (void)herodotus_read_codepoint(&(p->mid_reader), true, &cp);
382 3448adb0 2022-11-02 op
383 3448adb0 2022-11-02 op /* check skippability-property */
384 3448adb0 2022-11-02 op if (!p->is_skippable_prop(p->raw.prev_prop[0])) {
385 3448adb0 2022-11-02 op /*
386 3448adb0 2022-11-02 op * the property that has moved past the "raw-viewing-line"
387 3448adb0 2022-11-02 op * (this property is now (after the raw-shift) stored in
388 3448adb0 2022-11-02 op * p->raw.prev_prop[0] and guaranteed not to be a no-prop,
389 3448adb0 2022-11-02 op * guaranteeing that we won't shift a no-prop past the
390 3448adb0 2022-11-02 op * "viewing-line" in the skip-properties) is not a skippable
391 3448adb0 2022-11-02 op * property, thus we need to shift the skip property as well.
392 3448adb0 2022-11-02 op */
393 3448adb0 2022-11-02 op p->skip.prev_prop[1] = p->skip.prev_prop[0];
394 3448adb0 2022-11-02 op p->skip.prev_prop[0] = p->skip.next_prop[0];
395 3448adb0 2022-11-02 op p->skip.next_prop[0] = p->skip.next_prop[1];
396 3448adb0 2022-11-02 op
397 3448adb0 2022-11-02 op /*
398 3448adb0 2022-11-02 op * call the skip-shift-callback on the property that
399 3448adb0 2022-11-02 op * passed the skip-viewing-line (this property is now
400 3448adb0 2022-11-02 op * stored in p->skip.prev_prop[0]).
401 3448adb0 2022-11-02 op */
402 3448adb0 2022-11-02 op p->skip_shift_callback(p->skip.prev_prop[0], p->state);
403 3448adb0 2022-11-02 op
404 3448adb0 2022-11-02 op /* determine the next shift property */
405 3448adb0 2022-11-02 op p->skip.next_prop[1] = p->no_prop;
406 3448adb0 2022-11-02 op while (herodotus_read_codepoint(&(p->skip_reader), true, &cp) ==
407 3448adb0 2022-11-02 op HERODOTUS_STATUS_SUCCESS) {
408 3448adb0 2022-11-02 op prop = p->get_break_prop(cp);
409 3448adb0 2022-11-02 op if (!p->is_skippable_prop(prop)) {
410 3448adb0 2022-11-02 op p->skip.next_prop[1] = prop;
411 3448adb0 2022-11-02 op break;
412 3448adb0 2022-11-02 op }
413 3448adb0 2022-11-02 op }
414 3448adb0 2022-11-02 op }
415 3448adb0 2022-11-02 op
416 3448adb0 2022-11-02 op return 0;
417 3448adb0 2022-11-02 op }