Blame


1 3448adb0 2022-11-02 op cat << EOF
2 3448adb0 2022-11-02 op .Dd ${MAN_DATE}
3 3448adb0 2022-11-02 op .Dt GRAPHEME_ENCODE_UTF8 3
4 3448adb0 2022-11-02 op .Os suckless.org
5 3448adb0 2022-11-02 op .Sh NAME
6 3448adb0 2022-11-02 op .Nm grapheme_encode_utf8
7 3448adb0 2022-11-02 op .Nd encode codepoint into UTF-8 string
8 3448adb0 2022-11-02 op .Sh SYNOPSIS
9 3448adb0 2022-11-02 op .In grapheme.h
10 3448adb0 2022-11-02 op .Ft size_t
11 3448adb0 2022-11-02 op .Fn grapheme_encode_utf8 "uint_least32_t cp" "char *str" "size_t len"
12 3448adb0 2022-11-02 op .Sh DESCRIPTION
13 3448adb0 2022-11-02 op The
14 3448adb0 2022-11-02 op .Fn grapheme_encode_utf8
15 3448adb0 2022-11-02 op function encodes the codepoint
16 3448adb0 2022-11-02 op .Va cp
17 3448adb0 2022-11-02 op into a UTF-8-string.
18 3448adb0 2022-11-02 op If
19 3448adb0 2022-11-02 op .Va str
20 3448adb0 2022-11-02 op is not
21 3448adb0 2022-11-02 op .Dv NULL
22 3448adb0 2022-11-02 op and
23 3448adb0 2022-11-02 op .Va len
24 3448adb0 2022-11-02 op is large enough it writes the UTF-8-string to the memory pointed to by
25 3448adb0 2022-11-02 op .Va str .
26 3448adb0 2022-11-02 op Otherwise no data is written.
27 3448adb0 2022-11-02 op .Sh RETURN VALUES
28 3448adb0 2022-11-02 op The
29 3448adb0 2022-11-02 op .Fn grapheme_encode_utf8
30 3448adb0 2022-11-02 op function returns the length (in bytes) of the UTF-8-string resulting
31 3448adb0 2022-11-02 op from encoding
32 3448adb0 2022-11-02 op .Va cp ,
33 3448adb0 2022-11-02 op even if
34 3448adb0 2022-11-02 op .Va len
35 3448adb0 2022-11-02 op is not large enough or
36 3448adb0 2022-11-02 op .Va str
37 3448adb0 2022-11-02 op is
38 3448adb0 2022-11-02 op .Dv NULL .
39 3448adb0 2022-11-02 op .Sh EXAMPLES
40 3448adb0 2022-11-02 op .Bd -literal
41 3448adb0 2022-11-02 op /* cc (-static) -o example example.c -lgrapheme */
42 3448adb0 2022-11-02 op #include <grapheme.h>
43 3448adb0 2022-11-02 op #include <stddef.h>
44 3448adb0 2022-11-02 op #include <stdlib.h>
45 3448adb0 2022-11-02 op
46 3448adb0 2022-11-02 op size_t
47 3448adb0 2022-11-02 op cps_to_utf8(const uint_least32_t *cp, size_t cplen, char *str, size_t len)
48 3448adb0 2022-11-02 op {
49 3448adb0 2022-11-02 op size_t i, off, ret;
50 3448adb0 2022-11-02 op
51 3448adb0 2022-11-02 op for (i = 0, off = 0; i < cplen; i++, off += ret) {
52 3448adb0 2022-11-02 op if ((ret = grapheme_encode_utf8(cp[i], str + off,
53 3448adb0 2022-11-02 op len - off)) > (len - off)) {
54 3448adb0 2022-11-02 op /* buffer too small */
55 3448adb0 2022-11-02 op break;
56 3448adb0 2022-11-02 op }
57 3448adb0 2022-11-02 op }
58 3448adb0 2022-11-02 op
59 3448adb0 2022-11-02 op return off;
60 3448adb0 2022-11-02 op }
61 3448adb0 2022-11-02 op
62 3448adb0 2022-11-02 op size_t
63 3448adb0 2022-11-02 op cps_bytelen(const uint_least32_t *cp, size_t cplen)
64 3448adb0 2022-11-02 op {
65 3448adb0 2022-11-02 op size_t i, len;
66 3448adb0 2022-11-02 op
67 3448adb0 2022-11-02 op for (i = 0, len = 0; i < cplen; i++) {
68 3448adb0 2022-11-02 op len += grapheme_encode_utf8(cp[i], NULL, 0);
69 3448adb0 2022-11-02 op }
70 3448adb0 2022-11-02 op
71 3448adb0 2022-11-02 op return len;
72 3448adb0 2022-11-02 op }
73 3448adb0 2022-11-02 op
74 3448adb0 2022-11-02 op char *
75 3448adb0 2022-11-02 op cps_to_utf8_alloc(const uint_least32_t *cp, size_t cplen)
76 3448adb0 2022-11-02 op {
77 3448adb0 2022-11-02 op char *str;
78 3448adb0 2022-11-02 op size_t len, i, ret, off;
79 3448adb0 2022-11-02 op
80 3448adb0 2022-11-02 op len = cps_bytelen(cp, cplen);
81 3448adb0 2022-11-02 op
82 3448adb0 2022-11-02 op if (!(str = malloc(len))) {
83 3448adb0 2022-11-02 op return NULL;
84 3448adb0 2022-11-02 op }
85 3448adb0 2022-11-02 op
86 3448adb0 2022-11-02 op for (i = 0, off = 0; i < cplen; i++, off += ret) {
87 3448adb0 2022-11-02 op if ((ret = grapheme_encode_utf8(cp[i], str + off,
88 3448adb0 2022-11-02 op len - off)) > (len - off)) {
89 3448adb0 2022-11-02 op /* buffer too small */
90 3448adb0 2022-11-02 op break;
91 3448adb0 2022-11-02 op }
92 3448adb0 2022-11-02 op }
93 3448adb0 2022-11-02 op str[off] = '\\\\0';
94 3448adb0 2022-11-02 op
95 3448adb0 2022-11-02 op return str;
96 3448adb0 2022-11-02 op }
97 3448adb0 2022-11-02 op .Ed
98 3448adb0 2022-11-02 op .Sh SEE ALSO
99 3448adb0 2022-11-02 op .Xr grapheme_decode_utf8 3 ,
100 3448adb0 2022-11-02 op .Xr libgrapheme 7
101 3448adb0 2022-11-02 op .Sh AUTHORS
102 3448adb0 2022-11-02 op .An Laslo Hunhold Aq Mt dev@frign.de
103 3448adb0 2022-11-02 op EOF