2 3448adb0 2022-11-02 op .Dd ${MAN_DATE}
3 3448adb0 2022-11-02 op .Dt GRAPHEME_DECODE_UTF8 3
4 3448adb0 2022-11-02 op .Os suckless.org
6 3448adb0 2022-11-02 op .Nm grapheme_decode_utf8
7 3448adb0 2022-11-02 op .Nd decode first codepoint in UTF-8-encoded string
11 3448adb0 2022-11-02 op .Fn grapheme_decode_utf8 "const char *str" "size_t len" "uint_least32_t *cp"
12 3448adb0 2022-11-02 op .Sh DESCRIPTION
14 3448adb0 2022-11-02 op .Fn grapheme_decode_utf8
15 3448adb0 2022-11-02 op function decodes the first codepoint in the UTF-8-encoded string
19 3448adb0 2022-11-02 op If the UTF-8-sequence is invalid (overlong encoding, unexpected byte,
20 3448adb0 2022-11-02 op string ends unexpectedly, empty string, etc.) the decoding is stopped
21 3448adb0 2022-11-02 op at the last processed byte and the decoded codepoint set to
22 3448adb0 2022-11-02 op .Dv GRAPHEME_INVALID_CODEPOINT .
28 3448adb0 2022-11-02 op the decoded codepoint is stored in the memory pointed to by
31 3448adb0 2022-11-02 op Given NUL has a unique 1 byte representation, it is safe to operate on
32 3448adb0 2022-11-02 op NUL-terminated strings by setting
36 3448adb0 2022-11-02 op (stdint.h is already included by grapheme.h) and terminating when
40 3448adb0 2022-11-02 op for an example).
41 3448adb0 2022-11-02 op .Sh RETURN VALUES
43 3448adb0 2022-11-02 op .Fn grapheme_decode_utf8
44 3448adb0 2022-11-02 op function returns the number of processed bytes and 0 if
51 3448adb0 2022-11-02 op If the string ends unexpectedly in a multibyte sequence, the desired
52 3448adb0 2022-11-02 op length (that is larger than
57 3448adb0 2022-11-02 op /* cc (-static) -o example example.c -lgrapheme */
58 3448adb0 2022-11-02 op #include <grapheme.h>
59 3448adb0 2022-11-02 op #include <inttypes.h>
60 3448adb0 2022-11-02 op #include <stdio.h>
63 3448adb0 2022-11-02 op print_cps(const char *str, size_t len)
65 3448adb0 2022-11-02 op size_t ret, off;
66 3448adb0 2022-11-02 op uint_least32_t cp;
68 3448adb0 2022-11-02 op for (off = 0; off < len; off += ret) {
69 3448adb0 2022-11-02 op if ((ret = grapheme_decode_utf8(str + off,
70 3448adb0 2022-11-02 op len - off, &cp)) > (len - off)) {
72 3448adb0 2022-11-02 op * string ended unexpectedly in the middle of a
73 3448adb0 2022-11-02 op * multibyte sequence and we have the choice
74 3448adb0 2022-11-02 op * here to possibly expand str by ret - len + off
75 3448adb0 2022-11-02 op * bytes to get a full sequence, but we just
76 3448adb0 2022-11-02 op * bail out in this case.
80 3448adb0 2022-11-02 op printf("%"PRIxLEAST32"\\\\n", cp);
85 3448adb0 2022-11-02 op print_cps_nul_terminated(const char *str)
87 3448adb0 2022-11-02 op size_t ret, off;
88 3448adb0 2022-11-02 op uint_least32_t cp;
90 3448adb0 2022-11-02 op for (off = 0; (ret = grapheme_decode_utf8(str + off,
91 3448adb0 2022-11-02 op SIZE_MAX, &cp)) > 0 &&
92 3448adb0 2022-11-02 op cp != 0; off += ret) {
93 3448adb0 2022-11-02 op printf("%"PRIxLEAST32"\\\\n", cp);
98 3448adb0 2022-11-02 op .Xr grapheme_encode_utf8 3 ,
99 3448adb0 2022-11-02 op .Xr libgrapheme 7
101 3448adb0 2022-11-02 op .An Laslo Hunhold Aq Mt dev@frign.de