Blob


1 if [ "$ENCODING" = "utf8" ]; then
2 UNIT="byte"
3 SUFFIX="_utf8"
4 ANTISUFFIX=""
5 else
6 UNIT="codepoint"
7 SUFFIX=""
8 ANTISUFFIX="_utf8"
9 fi
11 cat << EOF
12 .Dd ${MAN_DATE}
13 .Dt GRAPHEME_NEXT_$(printf "%s_break%s" "$TYPE" "$SUFFIX" | tr [:lower:] [:upper:]) 3
14 .Os suckless.org
15 .Sh NAME
16 .Nm grapheme_next_${TYPE}_break${SUFFIX}
17 .Nd determine ${UNIT}-offset to next ${REALTYPE} break
18 .Sh SYNOPSIS
19 .In grapheme.h
20 .Ft size_t
21 .Fn grapheme_next_${TYPE}_break${SUFFIX} "const $(if [ "$ENCODING" = "utf8" ]; then printf "char"; else printf "uint_least32_t"; fi) *str" "size_t len"
22 .Sh DESCRIPTION
23 The
24 .Fn grapheme_next_${TYPE}_break${SUFFIX}
25 function computes the offset (in ${UNIT}s) to the next ${REALTYPE}
26 break (see
27 .Xr libgrapheme 7 )
28 in the $(if [ "$ENCODING" = "utf8" ]; then printf "UTF-8-encoded string"; else printf "codepoint array"; fi)
29 .Va str
30 of length
31 .Va len .$(if [ "$TYPE" != "line" ]; then printf "\nIf a ${REALTYPE} begins at
32 .Va str
33 this offset is equal to the length of said ${REALTYPE}."; fi)
34 .Pp
35 If
36 .Va len
37 is set to
38 .Dv SIZE_MAX
39 (stdint.h is already included by grapheme.h) the string
40 .Va str
41 is interpreted to be NUL-terminated and processing stops when
42 a $(if [ "$ENCODING" = "utf8" ]; then printf "NUL-byte"; else printf "codepoint with the value 0"; fi) is encountered.
43 .Pp
44 For $(if [ "$ENCODING" != "utf8" ]; then printf "UTF-8-encoded"; else printf "non-UTF-8"; fi) input
45 data$(if [ "$TYPE" = "character" ] && [ "$ENCODING" = "utf8" ]; then printf "\n.Xr grapheme_is_character_break 3 and"; fi)
46 .Xr grapheme_next_${TYPE}_break${ANTISUFFIX} 3
47 can be used instead.
48 .Sh RETURN VALUES
49 The
50 .Fn grapheme_next_${TYPE}_break${SUFFIX}
51 function returns the offset (in ${UNIT}s) to the next ${REALTYPE}
52 break in
53 .Va str
54 or 0 if
55 .Va str
56 is
57 .Dv NULL .
58 EOF
60 if [ "$ENCODING" = "utf8" ]; then
61 cat << EOF
62 .Sh EXAMPLES
63 .Bd -literal
64 /* cc (-static) -o example example.c -lgrapheme */
65 #include <grapheme.h>
66 #include <stdint.h>
67 #include <stdio.h>
69 int
70 main(void)
71 {
72 /* UTF-8 encoded input */
73 char *s = "T\\\\xC3\\\\xABst \\\\xF0\\\\x9F\\\\x91\\\\xA8\\\\xE2\\\\x80\\\\x8D\\\\xF0"
74 "\\\\x9F\\\\x91\\\\xA9\\\\xE2\\\\x80\\\\x8D\\\\xF0\\\\x9F\\\\x91\\\\xA6 \\\\xF0"
75 "\\\\x9F\\\\x87\\\\xBA\\\\xF0\\\\x9F\\\\x87\\\\xB8 \\\\xE0\\\\xA4\\\\xA8\\\\xE0"
76 "\\\\xA5\\\\x80 \\\\xE0\\\\xAE\\\\xA8\\\\xE0\\\\xAE\\\\xBF!";
77 size_t ret, len, off;
79 printf("Input: \\\\"%s\\\\"\\\\n", s);
81 /* print each ${REALTYPE} with byte-length */
82 printf("${REALTYPE}s in NUL-delimited input:\\\\n");
83 for (off = 0; s[off] != '\\\\0'; off += ret) {
84 ret = grapheme_next_${TYPE}_break_utf8(s + off, SIZE_MAX);
85 printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off);
86 }
87 printf("\\\\n");
89 /* do the same, but this time string is length-delimited */
90 len = 17;
91 printf("${REALTYPE}s in input delimited to %zu bytes:\\\\n", len);
92 for (off = 0; off < len; off += ret) {
93 ret = grapheme_next_${TYPE}_break_utf8(s + off, len - off);
94 printf("%2zu bytes | %.*s\\\\n", ret, (int)ret, s + off);
95 }
97 return 0;
98 }
99 .Ed
100 EOF
101 fi
103 cat << EOF
104 .Sh SEE ALSO$(if [ "$TYPE" = "character" ] && [ "$ENCODING" != "utf8" ]; then printf "\n.Xr grapheme_is_character_break 3 ,"; fi)
105 .Xr grapheme_next_${TYPE}_break${ANTISUFFIX} 3 ,
106 .Xr libgrapheme 7
107 .Sh STANDARDS
108 .Fn grapheme_next_${TYPE}_break${SUFFIX}
109 is compliant with the Unicode ${UNICODE_VERSION} specification.
110 .Sh AUTHORS
111 .An Laslo Hunhold Aq Mt dev@frign.de
112 EOF