Blob


1 #include "rune.h"
3 enum
4 {
5 Bit1 = 7,
6 Bitx = 6,
7 Bit2 = 5,
8 Bit3 = 4,
9 Bit4 = 3,
11 T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
12 Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
13 T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
14 T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
15 T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
17 Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0111 1111 */
18 Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0111 1111 1111 */
19 Rune3 = (1<<(Bit3+2*Bitx))-1, /* 1111 1111 1111 1111 */
21 Maskx = (1<<Bitx)-1, /* 0011 1111 */
22 Testx = Maskx ^ 0xFF, /* 1100 0000 */
24 Bad = Runeerror
25 };
27 int
28 chartorune(Rune *rune, char *str)
29 {
30 int c, c1, c2;
31 long l;
33 /*
34 * one character sequence
35 * 00000-0007F => T1
36 */
37 c = *(unsigned char*)str;
38 if(c < Tx) {
39 *rune = c;
40 return 1;
41 }
43 /*
44 * two character sequence
45 * 0080-07FF => T2 Tx
46 */
47 c1 = *(unsigned char*)(str+1) ^ Tx;
48 if(c1 & Testx)
49 goto bad;
50 if(c < T3) {
51 if(c < T2)
52 goto bad;
53 l = ((c << Bitx) | c1) & Rune2;
54 if(l <= Rune1)
55 goto bad;
56 *rune = l;
57 return 2;
58 }
60 /*
61 * three character sequence
62 * 0800-FFFF => T3 Tx Tx
63 */
64 c2 = *(unsigned char*)(str+2) ^ Tx;
65 if(c2 & Testx)
66 goto bad;
67 if(c < T4) {
68 l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
69 if(l <= Rune2)
70 goto bad;
71 *rune = l;
72 return 3;
73 }
75 /*
76 * bad decoding
77 */
78 bad:
79 *rune = Bad;
80 return 1;
81 }
83 int
84 runetochar(char *str, Rune *rune)
85 {
86 long c;
88 /*
89 * one character sequence
90 * 00000-0007F => 00-7F
91 */
92 c = *rune;
93 if(c <= Rune1) {
94 str[0] = c;
95 return 1;
96 }
98 /*
99 * two character sequence
100 * 0080-07FF => T2 Tx
101 */
102 if(c <= Rune2) {
103 str[0] = T2 | (c >> 1*Bitx);
104 str[1] = Tx | (c & Maskx);
105 return 2;
108 /*
109 * three character sequence
110 * 0800-FFFF => T3 Tx Tx
111 */
112 str[0] = T3 | (c >> 2*Bitx);
113 str[1] = Tx | ((c >> 1*Bitx) & Maskx);
114 str[2] = Tx | (c & Maskx);
115 return 3;
118 int
119 runelen(long c)
121 Rune rune;
122 char str[10];
124 rune = c;
125 return runetochar(str, &rune);
128 int
129 fullrune(char *str, int n)
131 int c;
133 if(n > 0) {
134 c = *(unsigned char*)str;
135 if(c < Tx)
136 return 1;
137 if(n > 1)
138 if(c < T3 || n > 2)
139 return 1;
141 return 0;