Blob


1 cat << EOF
2 .Dd ${MAN_DATE}
3 .Dt GRAPHEME_DECODE_UTF8 3
4 .Os suckless.org
5 .Sh NAME
6 .Nm grapheme_decode_utf8
7 .Nd decode first codepoint in UTF-8-encoded string
8 .Sh SYNOPSIS
9 .In grapheme.h
10 .Ft size_t
11 .Fn grapheme_decode_utf8 "const char *str" "size_t len" "uint_least32_t *cp"
12 .Sh DESCRIPTION
13 The
14 .Fn grapheme_decode_utf8
15 function decodes the first codepoint in the UTF-8-encoded string
16 .Va str
17 of length
18 .Va len .
19 If the UTF-8-sequence is invalid (overlong encoding, unexpected byte,
20 string ends unexpectedly, empty string, etc.) the decoding is stopped
21 at the last processed byte and the decoded codepoint set to
22 .Dv GRAPHEME_INVALID_CODEPOINT .
23 .Pp
24 If
25 .Va cp
26 is not
27 .Dv NULL
28 the decoded codepoint is stored in the memory pointed to by
29 .Va cp .
30 .Pp
31 Given NUL has a unique 1 byte representation, it is safe to operate on
32 NUL-terminated strings by setting
33 .Va len
34 to
35 .Dv SIZE_MAX
36 (stdint.h is already included by grapheme.h) and terminating when
37 .Va cp
38 is 0 (see
39 .Sx EXAMPLES
40 for an example).
41 .Sh RETURN VALUES
42 The
43 .Fn grapheme_decode_utf8
44 function returns the number of processed bytes and 0 if
45 .Va str
46 is
47 .Dv NULL
48 or
49 .Va len
50 is 0.
51 If the string ends unexpectedly in a multibyte sequence, the desired
52 length (that is larger than
53 .Va len )
54 is returned.
55 .Sh EXAMPLES
56 .Bd -literal
57 /* cc (-static) -o example example.c -lgrapheme */
58 #include <grapheme.h>
59 #include <inttypes.h>
60 #include <stdio.h>
62 void
63 print_cps(const char *str, size_t len)
64 {
65 size_t ret, off;
66 uint_least32_t cp;
68 for (off = 0; off < len; off += ret) {
69 if ((ret = grapheme_decode_utf8(str + off,
70 len - off, &cp)) > (len - off)) {
71 /*
72 * string ended unexpectedly in the middle of a
73 * multibyte sequence and we have the choice
74 * here to possibly expand str by ret - len + off
75 * bytes to get a full sequence, but we just
76 * bail out in this case.
77 */
78 break;
79 }
80 printf("%"PRIxLEAST32"\\\\n", cp);
81 }
82 }
84 void
85 print_cps_nul_terminated(const char *str)
86 {
87 size_t ret, off;
88 uint_least32_t cp;
90 for (off = 0; (ret = grapheme_decode_utf8(str + off,
91 SIZE_MAX, &cp)) > 0 &&
92 cp != 0; off += ret) {
93 printf("%"PRIxLEAST32"\\\\n", cp);
94 }
95 }
96 .Ed
97 .Sh SEE ALSO
98 .Xr grapheme_encode_utf8 3 ,
99 .Xr libgrapheme 7
100 .Sh AUTHORS
101 .An Laslo Hunhold Aq Mt dev@frign.de
102 EOF