3 .Dt GRAPHEME_DECODE_UTF8 3
6 .Nm grapheme_decode_utf8
7 .Nd decode first codepoint in UTF-8-encoded string
11 .Fn grapheme_decode_utf8 "const char *str" "size_t len" "uint_least32_t *cp"
14 .Fn grapheme_decode_utf8
15 function decodes the first codepoint in the UTF-8-encoded string
19 If the UTF-8-sequence is invalid (overlong encoding, unexpected byte,
20 string ends unexpectedly, empty string, etc.) the decoding is stopped
21 at the last processed byte and the decoded codepoint set to
22 .Dv GRAPHEME_INVALID_CODEPOINT .
28 the decoded codepoint is stored in the memory pointed to by
31 Given NUL has a unique 1 byte representation, it is safe to operate on
32 NUL-terminated strings by setting
36 (stdint.h is already included by grapheme.h) and terminating when
43 .Fn grapheme_decode_utf8
44 function returns the number of processed bytes and 0 if
51 If the string ends unexpectedly in a multibyte sequence, the desired
52 length (that is larger than
57 /* cc (-static) -o example example.c -lgrapheme */
63 print_cps(const char *str, size_t len)
68 for (off = 0; off < len; off += ret) {
69 if ((ret = grapheme_decode_utf8(str + off,
70 len - off, &cp)) > (len - off)) {
72 * string ended unexpectedly in the middle of a
73 * multibyte sequence and we have the choice
74 * here to possibly expand str by ret - len + off
75 * bytes to get a full sequence, but we just
76 * bail out in this case.
80 printf("%"PRIxLEAST32"\\\\n", cp);
85 print_cps_nul_terminated(const char *str)
90 for (off = 0; (ret = grapheme_decode_utf8(str + off,
91 SIZE_MAX, &cp)) > 0 &&
92 cp != 0; off += ret) {
93 printf("%"PRIxLEAST32"\\\\n", cp);
98 .Xr grapheme_encode_utf8 3 ,
101 .An Laslo Hunhold Aq Mt dev@frign.de