1 3448adb0 2022-11-02 op /* See LICENSE file for copyright and license details. */
2 3448adb0 2022-11-02 op #include <stdbool.h>
3 3448adb0 2022-11-02 op #include <stddef.h>
5 3448adb0 2022-11-02 op #include "../gen/line.h"
6 3448adb0 2022-11-02 op #include "../grapheme.h"
7 3448adb0 2022-11-02 op #include "util.h"
9 3448adb0 2022-11-02 op static inline enum line_break_property
10 3448adb0 2022-11-02 op get_break_prop(uint_least32_t cp)
12 3448adb0 2022-11-02 op if (likely(cp <= UINT32_C(0x10FFFF))) {
13 3448adb0 2022-11-02 op return (enum line_break_property)
14 3448adb0 2022-11-02 op line_break_minor[line_break_major[cp >> 8] + (cp & 0xff)];
16 3448adb0 2022-11-02 op return LINE_BREAK_PROP_AL;
21 3448adb0 2022-11-02 op next_line_break(HERODOTUS_READER *r)
23 3448adb0 2022-11-02 op HERODOTUS_READER tmp;
24 3448adb0 2022-11-02 op enum line_break_property cp0_prop, cp1_prop, last_non_cm_or_zwj_prop,
25 3448adb0 2022-11-02 op last_non_sp_prop, last_non_sp_cm_or_zwj_prop;
26 3448adb0 2022-11-02 op uint_least32_t cp;
27 3448adb0 2022-11-02 op uint_least8_t lb25_level = 0;
28 3448adb0 2022-11-02 op bool lb21a_flag = false, ri_even = true;
31 3448adb0 2022-11-02 op * Apply line breaking algorithm (UAX #14), see
32 3448adb0 2022-11-02 op * https://unicode.org/reports/tr14/#Algorithm and tailoring
33 3448adb0 2022-11-02 op * https://unicode.org/reports/tr14/#Examples (example 7),
34 3448adb0 2022-11-02 op * given the automatic test-cases implement this example for
35 3448adb0 2022-11-02 op * better number handling.
40 3448adb0 2022-11-02 op * Initialize the different properties such that we have
41 3448adb0 2022-11-02 op * a good state after the state-update in the loop
43 3448adb0 2022-11-02 op last_non_cm_or_zwj_prop = LINE_BREAK_PROP_AL; /* according to LB10 */
44 3448adb0 2022-11-02 op last_non_sp_prop = last_non_sp_cm_or_zwj_prop = NUM_LINE_BREAK_PROPS;
46 3448adb0 2022-11-02 op for (herodotus_read_codepoint(r, true, &cp), cp0_prop = get_break_prop(cp);
47 3448adb0 2022-11-02 op herodotus_read_codepoint(r, false, &cp) == HERODOTUS_STATUS_SUCCESS;
48 3448adb0 2022-11-02 op herodotus_read_codepoint(r, true, &cp), cp0_prop = cp1_prop) {
49 3448adb0 2022-11-02 op /* get property of the right codepoint */
50 3448adb0 2022-11-02 op cp1_prop = get_break_prop(cp);
52 3448adb0 2022-11-02 op /* update retention-states */
55 3448adb0 2022-11-02 op * store the last observed non-CM-or-ZWJ-property for
56 3448adb0 2022-11-02 op * LB9 and following.
58 3448adb0 2022-11-02 op if (cp0_prop != LINE_BREAK_PROP_CM &&
59 3448adb0 2022-11-02 op cp0_prop != LINE_BREAK_PROP_ZWJ) {
61 3448adb0 2022-11-02 op * check if the property we are overwriting now is an
62 3448adb0 2022-11-02 op * HL. If so, we set the LB21a-flag which depends on this
65 3448adb0 2022-11-02 op lb21a_flag = (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL);
67 3448adb0 2022-11-02 op /* check regional indicator state */
68 3448adb0 2022-11-02 op if (cp0_prop == LINE_BREAK_PROP_RI) {
70 3448adb0 2022-11-02 op * The property we just shifted in is
71 3448adb0 2022-11-02 op * a regional indicator, increasing the
72 3448adb0 2022-11-02 op * number of consecutive RIs on the left
73 3448adb0 2022-11-02 op * side of the breakpoint by one, changing
74 3448adb0 2022-11-02 op * the oddness.
77 3448adb0 2022-11-02 op ri_even = !ri_even;
80 3448adb0 2022-11-02 op * We saw no regional indicator, so the
81 3448adb0 2022-11-02 op * number of consecutive RIs on the left
82 3448adb0 2022-11-02 op * side of the breakpoint is zero, which
83 3448adb0 2022-11-02 op * is an even number.
86 3448adb0 2022-11-02 op ri_even = true;
90 3448adb0 2022-11-02 op * Here comes a bit of magic. The tailored rule
91 3448adb0 2022-11-02 op * LB25 (using example 7) has a very complicated
92 3448adb0 2022-11-02 op * left-hand-side-rule of the form
94 3448adb0 2022-11-02 op * NU (NU | SY | IS)* (CL | CP)?
96 3448adb0 2022-11-02 op * but instead of backtracking, we keep the state
97 3448adb0 2022-11-02 op * as some kind of "power level" in the variable
101 3448adb0 2022-11-02 op * that goes from 0 to 3
103 3448adb0 2022-11-02 op * 0: we are not in the sequence
104 3448adb0 2022-11-02 op * 1: we have one NU to the left of the middle
106 3448adb0 2022-11-02 op * 2: we have one NU and one or more (NU | SY | IS)
107 3448adb0 2022-11-02 op * to the left of the middle spot
108 3448adb0 2022-11-02 op * 3: we have one NU, zero or more (NU | SY | IS)
109 3448adb0 2022-11-02 op * and one (CL | CP) to the left of the middle
112 3448adb0 2022-11-02 op if ((lb25_level == 0 ||
113 3448adb0 2022-11-02 op lb25_level == 1) &&
114 3448adb0 2022-11-02 op cp0_prop == LINE_BREAK_PROP_NU) {
115 3448adb0 2022-11-02 op /* sequence has begun */
116 3448adb0 2022-11-02 op lb25_level = 1;
117 3448adb0 2022-11-02 op } else if ((lb25_level == 1 || lb25_level == 2) &&
118 3448adb0 2022-11-02 op (cp0_prop == LINE_BREAK_PROP_NU ||
119 3448adb0 2022-11-02 op cp0_prop == LINE_BREAK_PROP_SY ||
120 3448adb0 2022-11-02 op cp0_prop == LINE_BREAK_PROP_IS)) {
121 3448adb0 2022-11-02 op /* (NU | SY | IS) sequence begins or continued */
122 3448adb0 2022-11-02 op lb25_level = 2;
123 3448adb0 2022-11-02 op } else if ((lb25_level == 1 || lb25_level == 2) &&
124 3448adb0 2022-11-02 op (cp0_prop == LINE_BREAK_PROP_CL ||
125 3448adb0 2022-11-02 op cp0_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF ||
126 3448adb0 2022-11-02 op cp0_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF)) {
127 3448adb0 2022-11-02 op /* CL or CP at the end of the sequence */
128 3448adb0 2022-11-02 op lb25_level = 3;
130 3448adb0 2022-11-02 op /* sequence broke */
131 3448adb0 2022-11-02 op lb25_level = 0;
134 3448adb0 2022-11-02 op last_non_cm_or_zwj_prop = cp0_prop;
138 3448adb0 2022-11-02 op * store the last observed non-SP-property for LB8, LB14,
139 3448adb0 2022-11-02 op * LB15, LB16 and LB17. LB8 gets its own unskipped property,
140 3448adb0 2022-11-02 op * whereas the others build on top of the CM-ZWJ-skipped
141 3448adb0 2022-11-02 op * properties as they come after LB9
143 3448adb0 2022-11-02 op if (cp0_prop != LINE_BREAK_PROP_SP) {
144 3448adb0 2022-11-02 op last_non_sp_prop = cp0_prop;
146 3448adb0 2022-11-02 op if (last_non_cm_or_zwj_prop != LINE_BREAK_PROP_SP) {
147 3448adb0 2022-11-02 op last_non_sp_cm_or_zwj_prop = last_non_cm_or_zwj_prop;
150 3448adb0 2022-11-02 op /* apply the algorithm */
153 3448adb0 2022-11-02 op if (cp0_prop == LINE_BREAK_PROP_BK) {
158 3448adb0 2022-11-02 op if (cp0_prop == LINE_BREAK_PROP_CR &&
159 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_LF) {
162 3448adb0 2022-11-02 op if (cp0_prop == LINE_BREAK_PROP_CR ||
163 3448adb0 2022-11-02 op cp0_prop == LINE_BREAK_PROP_LF ||
164 3448adb0 2022-11-02 op cp0_prop == LINE_BREAK_PROP_NL) {
169 3448adb0 2022-11-02 op if (cp1_prop == LINE_BREAK_PROP_BK ||
170 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_CR ||
171 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_LF ||
172 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_NL) {
177 3448adb0 2022-11-02 op if (cp1_prop == LINE_BREAK_PROP_SP ||
178 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_ZW) {
183 3448adb0 2022-11-02 op if (last_non_sp_prop == LINE_BREAK_PROP_ZW) {
188 3448adb0 2022-11-02 op if (cp0_prop == LINE_BREAK_PROP_ZWJ) {
193 3448adb0 2022-11-02 op if ((cp0_prop != LINE_BREAK_PROP_BK &&
194 3448adb0 2022-11-02 op cp0_prop != LINE_BREAK_PROP_CR &&
195 3448adb0 2022-11-02 op cp0_prop != LINE_BREAK_PROP_LF &&
196 3448adb0 2022-11-02 op cp0_prop != LINE_BREAK_PROP_NL &&
197 3448adb0 2022-11-02 op cp0_prop != LINE_BREAK_PROP_SP &&
198 3448adb0 2022-11-02 op cp0_prop != LINE_BREAK_PROP_ZW) &&
199 3448adb0 2022-11-02 op (cp1_prop == LINE_BREAK_PROP_CM ||
200 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_ZWJ)) {
202 3448adb0 2022-11-02 op * given we skip them, we don't break in such
208 3448adb0 2022-11-02 op /* LB10 is baked into the following rules */
211 3448adb0 2022-11-02 op if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_WJ ||
212 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_WJ) {
217 3448adb0 2022-11-02 op if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_GL) {
222 3448adb0 2022-11-02 op if ((last_non_cm_or_zwj_prop != LINE_BREAK_PROP_SP &&
223 3448adb0 2022-11-02 op last_non_cm_or_zwj_prop != LINE_BREAK_PROP_BA &&
224 3448adb0 2022-11-02 op last_non_cm_or_zwj_prop != LINE_BREAK_PROP_HY) &&
225 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_GL) {
229 3448adb0 2022-11-02 op /* LB13 (affected by tailoring for LB25, see example 7) */
230 3448adb0 2022-11-02 op if (cp1_prop == LINE_BREAK_PROP_EX ||
231 3448adb0 2022-11-02 op (last_non_cm_or_zwj_prop != LINE_BREAK_PROP_NU &&
232 3448adb0 2022-11-02 op (cp1_prop == LINE_BREAK_PROP_CL ||
233 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF ||
234 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF ||
235 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_IS ||
236 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_SY))) {
241 3448adb0 2022-11-02 op if (last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF ||
242 3448adb0 2022-11-02 op last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF) {
247 3448adb0 2022-11-02 op if (last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_QU &&
248 3448adb0 2022-11-02 op (cp1_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF ||
249 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF)) {
254 3448adb0 2022-11-02 op if ((last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CL ||
255 3448adb0 2022-11-02 op last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF ||
256 3448adb0 2022-11-02 op last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF) &&
257 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_NS) {
262 3448adb0 2022-11-02 op if (last_non_sp_cm_or_zwj_prop == LINE_BREAK_PROP_B2 &&
263 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_B2) {
268 3448adb0 2022-11-02 op if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_SP) {
273 3448adb0 2022-11-02 op if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_QU ||
274 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_QU) {
279 3448adb0 2022-11-02 op if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_CB ||
280 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_CB) {
285 3448adb0 2022-11-02 op if (cp1_prop == LINE_BREAK_PROP_BA ||
286 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_HY ||
287 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_NS ||
288 3448adb0 2022-11-02 op last_non_cm_or_zwj_prop == LINE_BREAK_PROP_BB) {
293 3448adb0 2022-11-02 op if (lb21a_flag &&
294 3448adb0 2022-11-02 op (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HY ||
295 3448adb0 2022-11-02 op last_non_cm_or_zwj_prop == LINE_BREAK_PROP_BA)) {
300 3448adb0 2022-11-02 op if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_SY &&
301 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_HL) {
306 3448adb0 2022-11-02 op if (cp1_prop == LINE_BREAK_PROP_IN) {
311 3448adb0 2022-11-02 op if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL ||
312 3448adb0 2022-11-02 op last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL) &&
313 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_NU) {
316 3448adb0 2022-11-02 op if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_NU &&
317 3448adb0 2022-11-02 op (cp1_prop == LINE_BREAK_PROP_AL ||
318 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_HL)) {
323 3448adb0 2022-11-02 op if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PR &&
324 3448adb0 2022-11-02 op (cp1_prop == LINE_BREAK_PROP_ID ||
325 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_EB ||
326 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_EM)) {
329 3448adb0 2022-11-02 op if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_ID ||
330 3448adb0 2022-11-02 op last_non_cm_or_zwj_prop == LINE_BREAK_PROP_EB ||
331 3448adb0 2022-11-02 op last_non_cm_or_zwj_prop == LINE_BREAK_PROP_EM) &&
332 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_PO) {
337 3448adb0 2022-11-02 op if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PR ||
338 3448adb0 2022-11-02 op last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PO) &&
339 3448adb0 2022-11-02 op (cp1_prop == LINE_BREAK_PROP_AL ||
340 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_HL)) {
343 3448adb0 2022-11-02 op if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL ||
344 3448adb0 2022-11-02 op last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL) &&
345 3448adb0 2022-11-02 op (cp1_prop == LINE_BREAK_PROP_PR ||
346 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_PO)) {
350 3448adb0 2022-11-02 op /* LB25 (tailored with example 7) */
351 3448adb0 2022-11-02 op if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PR ||
352 3448adb0 2022-11-02 op last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PO)) {
353 3448adb0 2022-11-02 op if (cp1_prop == LINE_BREAK_PROP_NU) {
357 3448adb0 2022-11-02 op /* this stupid rule is the reason why we cannot
358 3448adb0 2022-11-02 op * simply have a stateful break-detection between
359 3448adb0 2022-11-02 op * two adjacent codepoints as we have it with
360 3448adb0 2022-11-02 op * characters.
362 3448adb0 2022-11-02 op herodotus_reader_copy(r, &tmp);
363 3448adb0 2022-11-02 op herodotus_read_codepoint(&tmp, true, &cp);
364 3448adb0 2022-11-02 op if (herodotus_read_codepoint(&tmp, true, &cp) ==
365 3448adb0 2022-11-02 op HERODOTUS_STATUS_SUCCESS &&
366 3448adb0 2022-11-02 op (cp1_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF ||
367 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF ||
368 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_HY)) {
369 3448adb0 2022-11-02 op if (get_break_prop(cp) == LINE_BREAK_PROP_NU) {
374 3448adb0 2022-11-02 op if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF ||
375 3448adb0 2022-11-02 op last_non_cm_or_zwj_prop == LINE_BREAK_PROP_OP_WITH_EAW_HWF ||
376 3448adb0 2022-11-02 op last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HY) &&
377 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_NU) {
380 3448adb0 2022-11-02 op if (lb25_level == 1 &&
381 3448adb0 2022-11-02 op (cp1_prop == LINE_BREAK_PROP_NU ||
382 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_SY ||
383 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_IS)) {
386 3448adb0 2022-11-02 op if ((lb25_level == 1 || lb25_level == 2) &&
387 3448adb0 2022-11-02 op (cp1_prop == LINE_BREAK_PROP_NU ||
388 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_SY ||
389 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_IS ||
390 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_CL ||
391 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF ||
392 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_CP_WITH_EAW_HWF)) {
395 3448adb0 2022-11-02 op if ((lb25_level == 1 || lb25_level == 2 || lb25_level == 3) &&
396 3448adb0 2022-11-02 op (cp1_prop == LINE_BREAK_PROP_PO ||
397 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_PR)) {
402 3448adb0 2022-11-02 op if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_JL &&
403 3448adb0 2022-11-02 op (cp1_prop == LINE_BREAK_PROP_JL ||
404 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_JV ||
405 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_H2 ||
406 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_H3)) {
409 3448adb0 2022-11-02 op if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_JV ||
410 3448adb0 2022-11-02 op last_non_cm_or_zwj_prop == LINE_BREAK_PROP_H2) &&
411 3448adb0 2022-11-02 op (cp1_prop == LINE_BREAK_PROP_JV ||
412 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_JT)) {
415 3448adb0 2022-11-02 op if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_JT ||
416 3448adb0 2022-11-02 op last_non_cm_or_zwj_prop == LINE_BREAK_PROP_H3) &&
417 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_JT) {
422 3448adb0 2022-11-02 op if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_JL ||
423 3448adb0 2022-11-02 op last_non_cm_or_zwj_prop == LINE_BREAK_PROP_JV ||
424 3448adb0 2022-11-02 op last_non_cm_or_zwj_prop == LINE_BREAK_PROP_JT ||
425 3448adb0 2022-11-02 op last_non_cm_or_zwj_prop == LINE_BREAK_PROP_H2 ||
426 3448adb0 2022-11-02 op last_non_cm_or_zwj_prop == LINE_BREAK_PROP_H3) &&
427 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_PO) {
430 3448adb0 2022-11-02 op if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_PR &&
431 3448adb0 2022-11-02 op (cp1_prop == LINE_BREAK_PROP_JL ||
432 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_JV ||
433 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_JT ||
434 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_H2 ||
435 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_H3)) {
440 3448adb0 2022-11-02 op if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL ||
441 3448adb0 2022-11-02 op last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL) &&
442 3448adb0 2022-11-02 op (cp1_prop == LINE_BREAK_PROP_AL ||
443 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_HL)) {
448 3448adb0 2022-11-02 op if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_IS &&
449 3448adb0 2022-11-02 op (cp1_prop == LINE_BREAK_PROP_AL ||
450 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_HL)) {
455 3448adb0 2022-11-02 op if ((last_non_cm_or_zwj_prop == LINE_BREAK_PROP_AL ||
456 3448adb0 2022-11-02 op last_non_cm_or_zwj_prop == LINE_BREAK_PROP_HL ||
457 3448adb0 2022-11-02 op last_non_cm_or_zwj_prop == LINE_BREAK_PROP_NU) &&
458 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_OP_WITHOUT_EAW_HWF) {
461 3448adb0 2022-11-02 op if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_CP_WITHOUT_EAW_HWF &&
462 3448adb0 2022-11-02 op (cp1_prop == LINE_BREAK_PROP_AL ||
463 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_HL ||
464 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_NU)) {
469 3448adb0 2022-11-02 op if (!ri_even &&
470 3448adb0 2022-11-02 op last_non_cm_or_zwj_prop == LINE_BREAK_PROP_RI &&
471 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_RI) {
476 3448adb0 2022-11-02 op if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_EB &&
477 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_EM) {
480 3448adb0 2022-11-02 op if (last_non_cm_or_zwj_prop == LINE_BREAK_PROP_BOTH_CN_EXTPICT &&
481 3448adb0 2022-11-02 op cp1_prop == LINE_BREAK_PROP_EM) {
489 3448adb0 2022-11-02 op return herodotus_reader_number_read(r);
493 3448adb0 2022-11-02 op grapheme_next_line_break(const uint_least32_t *str, size_t len)
495 3448adb0 2022-11-02 op HERODOTUS_READER r;
497 3448adb0 2022-11-02 op herodotus_reader_init(&r, HERODOTUS_TYPE_CODEPOINT, str, len);
499 3448adb0 2022-11-02 op return next_line_break(&r);
503 3448adb0 2022-11-02 op grapheme_next_line_break_utf8(const char *str, size_t len)
505 3448adb0 2022-11-02 op HERODOTUS_READER r;
507 3448adb0 2022-11-02 op herodotus_reader_init(&r, HERODOTUS_TYPE_UTF8, str, len);
509 3448adb0 2022-11-02 op return next_line_break(&r);