Blame


1 91c13e54 2004-02-29 devnull /*
2 91c13e54 2004-02-29 devnull * The authors of this software are Rob Pike and Ken Thompson.
3 91c13e54 2004-02-29 devnull * Copyright (c) 2002 by Lucent Technologies.
4 91c13e54 2004-02-29 devnull * Permission to use, copy, modify, and distribute this software for any
5 91c13e54 2004-02-29 devnull * purpose without fee is hereby granted, provided that this entire notice
6 91c13e54 2004-02-29 devnull * is included in all copies of any software which is or includes a copy
7 91c13e54 2004-02-29 devnull * or modification of this software and in all copies of the supporting
8 91c13e54 2004-02-29 devnull * documentation for such software.
9 91c13e54 2004-02-29 devnull * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
10 37488654 2004-12-29 devnull * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
11 37488654 2004-12-29 devnull * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
12 91c13e54 2004-02-29 devnull * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
13 91c13e54 2004-02-29 devnull */
14 91c13e54 2004-02-29 devnull #include <stdarg.h>
15 91c13e54 2004-02-29 devnull #include <string.h>
16 e5aa96ac 2004-12-26 devnull #include "plan9.h"
17 91c13e54 2004-02-29 devnull #include "utf.h"
18 91c13e54 2004-02-29 devnull
19 91c13e54 2004-02-29 devnull enum
20 91c13e54 2004-02-29 devnull {
21 91c13e54 2004-02-29 devnull Bit1 = 7,
22 91c13e54 2004-02-29 devnull Bitx = 6,
23 91c13e54 2004-02-29 devnull Bit2 = 5,
24 91c13e54 2004-02-29 devnull Bit3 = 4,
25 91c13e54 2004-02-29 devnull Bit4 = 3,
26 0cadb430 2009-09-11 russcox Bit5 = 2,
27 91c13e54 2004-02-29 devnull
28 91c13e54 2004-02-29 devnull T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
29 91c13e54 2004-02-29 devnull Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
30 91c13e54 2004-02-29 devnull T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
31 91c13e54 2004-02-29 devnull T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
32 91c13e54 2004-02-29 devnull T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
33 0cadb430 2009-09-11 russcox T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
34 91c13e54 2004-02-29 devnull
35 0cadb430 2009-09-11 russcox Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */
36 0cadb430 2009-09-11 russcox Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */
37 0cadb430 2009-09-11 russcox Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */
38 0cadb430 2009-09-11 russcox Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */
39 91c13e54 2004-02-29 devnull
40 91c13e54 2004-02-29 devnull Maskx = (1<<Bitx)-1, /* 0011 1111 */
41 91c13e54 2004-02-29 devnull Testx = Maskx ^ 0xFF, /* 1100 0000 */
42 91c13e54 2004-02-29 devnull
43 cbeb0b26 2006-04-01 devnull Bad = Runeerror
44 91c13e54 2004-02-29 devnull };
45 91c13e54 2004-02-29 devnull
46 91c13e54 2004-02-29 devnull int
47 91c13e54 2004-02-29 devnull chartorune(Rune *rune, char *str)
48 91c13e54 2004-02-29 devnull {
49 0cadb430 2009-09-11 russcox int c, c1, c2, c3;
50 91c13e54 2004-02-29 devnull long l;
51 91c13e54 2004-02-29 devnull
52 91c13e54 2004-02-29 devnull /*
53 91c13e54 2004-02-29 devnull * one character sequence
54 91c13e54 2004-02-29 devnull * 00000-0007F => T1
55 91c13e54 2004-02-29 devnull */
56 91c13e54 2004-02-29 devnull c = *(uchar*)str;
57 91c13e54 2004-02-29 devnull if(c < Tx) {
58 91c13e54 2004-02-29 devnull *rune = c;
59 91c13e54 2004-02-29 devnull return 1;
60 91c13e54 2004-02-29 devnull }
61 91c13e54 2004-02-29 devnull
62 91c13e54 2004-02-29 devnull /*
63 91c13e54 2004-02-29 devnull * two character sequence
64 91c13e54 2004-02-29 devnull * 0080-07FF => T2 Tx
65 91c13e54 2004-02-29 devnull */
66 91c13e54 2004-02-29 devnull c1 = *(uchar*)(str+1) ^ Tx;
67 91c13e54 2004-02-29 devnull if(c1 & Testx)
68 91c13e54 2004-02-29 devnull goto bad;
69 91c13e54 2004-02-29 devnull if(c < T3) {
70 91c13e54 2004-02-29 devnull if(c < T2)
71 91c13e54 2004-02-29 devnull goto bad;
72 91c13e54 2004-02-29 devnull l = ((c << Bitx) | c1) & Rune2;
73 91c13e54 2004-02-29 devnull if(l <= Rune1)
74 91c13e54 2004-02-29 devnull goto bad;
75 91c13e54 2004-02-29 devnull *rune = l;
76 91c13e54 2004-02-29 devnull return 2;
77 91c13e54 2004-02-29 devnull }
78 91c13e54 2004-02-29 devnull
79 91c13e54 2004-02-29 devnull /*
80 91c13e54 2004-02-29 devnull * three character sequence
81 91c13e54 2004-02-29 devnull * 0800-FFFF => T3 Tx Tx
82 91c13e54 2004-02-29 devnull */
83 91c13e54 2004-02-29 devnull c2 = *(uchar*)(str+2) ^ Tx;
84 91c13e54 2004-02-29 devnull if(c2 & Testx)
85 91c13e54 2004-02-29 devnull goto bad;
86 91c13e54 2004-02-29 devnull if(c < T4) {
87 91c13e54 2004-02-29 devnull l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
88 91c13e54 2004-02-29 devnull if(l <= Rune2)
89 91c13e54 2004-02-29 devnull goto bad;
90 91c13e54 2004-02-29 devnull *rune = l;
91 91c13e54 2004-02-29 devnull return 3;
92 91c13e54 2004-02-29 devnull }
93 91c13e54 2004-02-29 devnull
94 91c13e54 2004-02-29 devnull /*
95 0cadb430 2009-09-11 russcox * four character sequence
96 0cadb430 2009-09-11 russcox * 10000-10FFFF => T4 Tx Tx Tx
97 0cadb430 2009-09-11 russcox */
98 0cadb430 2009-09-11 russcox if(UTFmax >= 4) {
99 0cadb430 2009-09-11 russcox c3 = *(uchar*)(str+3) ^ Tx;
100 0cadb430 2009-09-11 russcox if(c3 & Testx)
101 0cadb430 2009-09-11 russcox goto bad;
102 0cadb430 2009-09-11 russcox if(c < T5) {
103 0cadb430 2009-09-11 russcox l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
104 0cadb430 2009-09-11 russcox if(l <= Rune3)
105 0cadb430 2009-09-11 russcox goto bad;
106 0cadb430 2009-09-11 russcox if(l > Runemax)
107 0cadb430 2009-09-11 russcox goto bad;
108 0cadb430 2009-09-11 russcox *rune = l;
109 0cadb430 2009-09-11 russcox return 4;
110 0cadb430 2009-09-11 russcox }
111 0cadb430 2009-09-11 russcox }
112 0cadb430 2009-09-11 russcox
113 0cadb430 2009-09-11 russcox /*
114 91c13e54 2004-02-29 devnull * bad decoding
115 91c13e54 2004-02-29 devnull */
116 91c13e54 2004-02-29 devnull bad:
117 91c13e54 2004-02-29 devnull *rune = Bad;
118 91c13e54 2004-02-29 devnull return 1;
119 91c13e54 2004-02-29 devnull }
120 91c13e54 2004-02-29 devnull
121 91c13e54 2004-02-29 devnull int
122 91c13e54 2004-02-29 devnull runetochar(char *str, Rune *rune)
123 91c13e54 2004-02-29 devnull {
124 91c13e54 2004-02-29 devnull long c;
125 91c13e54 2004-02-29 devnull
126 91c13e54 2004-02-29 devnull /*
127 91c13e54 2004-02-29 devnull * one character sequence
128 91c13e54 2004-02-29 devnull * 00000-0007F => 00-7F
129 91c13e54 2004-02-29 devnull */
130 91c13e54 2004-02-29 devnull c = *rune;
131 91c13e54 2004-02-29 devnull if(c <= Rune1) {
132 91c13e54 2004-02-29 devnull str[0] = c;
133 91c13e54 2004-02-29 devnull return 1;
134 91c13e54 2004-02-29 devnull }
135 91c13e54 2004-02-29 devnull
136 91c13e54 2004-02-29 devnull /*
137 91c13e54 2004-02-29 devnull * two character sequence
138 0cadb430 2009-09-11 russcox * 00080-007FF => T2 Tx
139 91c13e54 2004-02-29 devnull */
140 91c13e54 2004-02-29 devnull if(c <= Rune2) {
141 91c13e54 2004-02-29 devnull str[0] = T2 | (c >> 1*Bitx);
142 91c13e54 2004-02-29 devnull str[1] = Tx | (c & Maskx);
143 91c13e54 2004-02-29 devnull return 2;
144 91c13e54 2004-02-29 devnull }
145 91c13e54 2004-02-29 devnull
146 91c13e54 2004-02-29 devnull /*
147 91c13e54 2004-02-29 devnull * three character sequence
148 0cadb430 2009-09-11 russcox * 00800-0FFFF => T3 Tx Tx
149 91c13e54 2004-02-29 devnull */
150 0cadb430 2009-09-11 russcox if(c > Runemax)
151 0cadb430 2009-09-11 russcox c = Runeerror;
152 0cadb430 2009-09-11 russcox if(c <= Rune3) {
153 0cadb430 2009-09-11 russcox str[0] = T3 | (c >> 2*Bitx);
154 0cadb430 2009-09-11 russcox str[1] = Tx | ((c >> 1*Bitx) & Maskx);
155 0cadb430 2009-09-11 russcox str[2] = Tx | (c & Maskx);
156 0cadb430 2009-09-11 russcox return 3;
157 0cadb430 2009-09-11 russcox }
158 0cadb430 2009-09-11 russcox
159 0cadb430 2009-09-11 russcox /*
160 0cadb430 2009-09-11 russcox * four character sequence
161 0cadb430 2009-09-11 russcox * 010000-1FFFFF => T4 Tx Tx Tx
162 0cadb430 2009-09-11 russcox */
163 0cadb430 2009-09-11 russcox str[0] = T4 | (c >> 3*Bitx);
164 0cadb430 2009-09-11 russcox str[1] = Tx | ((c >> 2*Bitx) & Maskx);
165 0cadb430 2009-09-11 russcox str[2] = Tx | ((c >> 1*Bitx) & Maskx);
166 0cadb430 2009-09-11 russcox str[3] = Tx | (c & Maskx);
167 0cadb430 2009-09-11 russcox return 4;
168 91c13e54 2004-02-29 devnull }
169 91c13e54 2004-02-29 devnull
170 91c13e54 2004-02-29 devnull int
171 91c13e54 2004-02-29 devnull runelen(long c)
172 91c13e54 2004-02-29 devnull {
173 91c13e54 2004-02-29 devnull Rune rune;
174 91c13e54 2004-02-29 devnull char str[10];
175 91c13e54 2004-02-29 devnull
176 91c13e54 2004-02-29 devnull rune = c;
177 91c13e54 2004-02-29 devnull return runetochar(str, &rune);
178 91c13e54 2004-02-29 devnull }
179 91c13e54 2004-02-29 devnull
180 91c13e54 2004-02-29 devnull int
181 91c13e54 2004-02-29 devnull runenlen(Rune *r, int nrune)
182 91c13e54 2004-02-29 devnull {
183 91c13e54 2004-02-29 devnull int nb, c;
184 91c13e54 2004-02-29 devnull
185 91c13e54 2004-02-29 devnull nb = 0;
186 91c13e54 2004-02-29 devnull while(nrune--) {
187 91c13e54 2004-02-29 devnull c = *r++;
188 91c13e54 2004-02-29 devnull if(c <= Rune1)
189 91c13e54 2004-02-29 devnull nb++;
190 91c13e54 2004-02-29 devnull else
191 91c13e54 2004-02-29 devnull if(c <= Rune2)
192 91c13e54 2004-02-29 devnull nb += 2;
193 91c13e54 2004-02-29 devnull else
194 0cadb430 2009-09-11 russcox if(c <= Rune3 || c > Runemax)
195 91c13e54 2004-02-29 devnull nb += 3;
196 0cadb430 2009-09-11 russcox else
197 0cadb430 2009-09-11 russcox nb += 4;
198 91c13e54 2004-02-29 devnull }
199 91c13e54 2004-02-29 devnull return nb;
200 91c13e54 2004-02-29 devnull }
201 91c13e54 2004-02-29 devnull
202 91c13e54 2004-02-29 devnull int
203 91c13e54 2004-02-29 devnull fullrune(char *str, int n)
204 91c13e54 2004-02-29 devnull {
205 91c13e54 2004-02-29 devnull int c;
206 91c13e54 2004-02-29 devnull
207 0cadb430 2009-09-11 russcox if(n <= 0)
208 0cadb430 2009-09-11 russcox return 0;
209 0cadb430 2009-09-11 russcox c = *(uchar*)str;
210 0cadb430 2009-09-11 russcox if(c < Tx)
211 0cadb430 2009-09-11 russcox return 1;
212 0cadb430 2009-09-11 russcox if(c < T3)
213 0cadb430 2009-09-11 russcox return n >= 2;
214 0cadb430 2009-09-11 russcox if(UTFmax == 3 || c < T4)
215 0cadb430 2009-09-11 russcox return n >= 3;
216 0cadb430 2009-09-11 russcox return n >= 4;
217 91c13e54 2004-02-29 devnull }