Blob


1 #include <u.h>
2 #include <libc.h>
4 typedef struct PCB /* Control block controlling specification parse */
5 {
6 char *base; /* start of specification */
7 char *current; /* current parse point */
8 long last; /* last Rune returned */
9 long final; /* final Rune in a span */
10 } Pcb;
12 uchar bits[] = { 1, 2, 4, 8, 16, 32, 64, 128 };
14 #define SETBIT(a, c) ((a)[(c)/8] |= bits[(c)&07])
15 #define CLEARBIT(a,c) ((a)[(c)/8] &= ~bits[(c)&07])
16 #define BITSET(a,c) ((a)[(c)/8] & bits[(c)&07])
18 #define MAXRUNE Runemax
20 uchar f[(MAXRUNE+1)/8];
21 uchar t[(MAXRUNE+1)/8];
22 char wbuf[4096];
23 char *wptr;
25 Pcb pfrom, pto;
27 int cflag;
28 int dflag;
29 int sflag;
31 void complement(void);
32 void delete(void);
33 void squeeze(void);
34 void translit(void);
35 void error(char*);
36 long canon(Pcb*);
37 char *getrune(char*, Rune*);
38 void Pinit(Pcb*, char*);
39 void Prewind(Pcb *p);
40 int readrune(int, long*);
41 void wflush(int);
42 void writerune(int, Rune);
44 void
45 main(int argc, char **argv)
46 {
47 ARGBEGIN{
48 case 's': sflag++; break;
49 case 'd': dflag++; break;
50 case 'c': cflag++; break;
51 default: error("bad option");
52 }ARGEND
53 if(argc>0)
54 Pinit(&pfrom, argv[0]);
55 if(argc>1)
56 Pinit(&pto, argv[1]);
57 if(argc>2)
58 error("arg count");
59 if(dflag) {
60 if ((sflag && argc != 2) || (!sflag && argc != 1))
61 error("arg count");
62 delete();
63 } else {
64 if (argc != 2)
65 error("arg count");
66 if (cflag)
67 complement();
68 else translit();
69 }
70 exits(0);
71 }
73 void
74 delete(void)
75 {
76 long c, last;
78 if (cflag) {
79 memset((char *) f, 0xff, sizeof f);
80 while ((c = canon(&pfrom)) >= 0)
81 CLEARBIT(f, c);
82 } else {
83 while ((c = canon(&pfrom)) >= 0)
84 SETBIT(f, c);
85 }
86 if (sflag) {
87 while ((c = canon(&pto)) >= 0)
88 SETBIT(t, c);
89 }
91 last = Runemax+1;
92 while (readrune(0, &c) > 0) {
93 if(!BITSET(f, c) && (c != last || !BITSET(t,c))) {
94 last = c;
95 writerune(1, (Rune) c);
96 }
97 }
98 wflush(1);
99 }
101 void
102 complement(void)
104 Rune *p;
105 int i;
106 long from, to, lastc, high;
108 lastc = 0;
109 high = 0;
110 while ((from = canon(&pfrom)) >= 0) {
111 if (from > high) high = from;
112 SETBIT(f, from);
114 while ((to = canon(&pto)) > 0) {
115 if (to > high) high = to;
116 SETBIT(t,to);
118 Prewind(&pto);
119 if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0)
120 error("can't allocate memory");
121 for (i = 0; i <= high; i++){
122 if (!BITSET(f,i)) {
123 if ((to = canon(&pto)) < 0)
124 to = lastc;
125 else lastc = to;
126 p[i] = to;
128 else p[i] = i;
130 if (sflag){
131 lastc = Runemax+1;
132 while (readrune(0, &from) > 0) {
133 if (from > high)
134 from = to;
135 else
136 from = p[from];
137 if (from != lastc || !BITSET(t,from)) {
138 lastc = from;
139 writerune(1, (Rune) from);
143 } else {
144 while (readrune(0, &from) > 0){
145 if (from > high)
146 from = to;
147 else
148 from = p[from];
149 writerune(1, (Rune) from);
152 wflush(1);
155 void
156 translit(void)
158 Rune *p;
159 int i;
160 long from, to, lastc, high;
162 lastc = 0;
163 high = 0;
164 while ((from = canon(&pfrom)) >= 0)
165 if (from > high) high = from;
166 Prewind(&pfrom);
167 if ((p = (Rune *) malloc((high+1)*sizeof(Rune))) == 0)
168 error("can't allocate memory");
169 for (i = 0; i <= high; i++)
170 p[i] = i;
171 while ((from = canon(&pfrom)) >= 0) {
172 if ((to = canon(&pto)) < 0)
173 to = lastc;
174 else lastc = to;
175 if (BITSET(f,from) && p[from] != to)
176 error("ambiguous translation");
177 SETBIT(f,from);
178 p[from] = to;
179 SETBIT(t,to);
181 while ((to = canon(&pto)) >= 0) {
182 SETBIT(t,to);
184 if (sflag){
185 lastc = Runemax+1;
186 while (readrune(0, &from) > 0) {
187 if (from <= high)
188 from = p[from];
189 if (from != lastc || !BITSET(t,from)) {
190 lastc = from;
191 writerune(1, (Rune) from);
195 } else {
196 while (readrune(0, &from) > 0) {
197 if (from <= high)
198 from = p[from];
199 writerune(1, (Rune) from);
202 wflush(1);
205 int
206 readrune(int fd, long *rp)
208 Rune r;
209 int j;
210 static int i, n;
211 static char buf[4096];
213 j = i;
214 for (;;) {
215 if (i >= n) {
216 wflush(1);
217 if (j != i)
218 memcpy(buf, buf+j, n-j);
219 i = n-j;
220 n = read(fd, &buf[i], sizeof(buf)-i);
221 if (n < 0)
222 error("read error");
223 if (n == 0)
224 return 0;
225 j = 0;
226 n += i;
228 i++;
229 if (fullrune(&buf[j], i-j))
230 break;
232 chartorune(&r, &buf[j]);
233 *rp = r;
234 return 1;
237 void
238 writerune(int fd, Rune r)
240 char buf[UTFmax];
241 int n;
243 if (!wptr)
244 wptr = wbuf;
245 n = runetochar(buf, (Rune*)&r);
246 if (wptr+n >= wbuf+sizeof(wbuf))
247 wflush(fd);
248 memcpy(wptr, buf, n);
249 wptr += n;
252 void
253 wflush(int fd)
255 if (wptr && wptr > wbuf)
256 if (write(fd, wbuf, wptr-wbuf) != wptr-wbuf)
257 error("write error");
258 wptr = wbuf;
261 char *
262 getrune(char *s, Rune *rp)
264 Rune r;
265 char *save;
266 int i, n;
268 s += chartorune(rp, s);
269 if((r = *rp) == '\\' && *s){
270 n = 0;
271 if (*s == 'x') {
272 s++;
273 for (i = 0; i < 4; i++) {
274 save = s;
275 s += chartorune(&r, s);
276 if ('0' <= r && r <= '9')
277 n = 16*n + r - '0';
278 else if ('a' <= r && r <= 'f')
279 n = 16*n + r - 'a' + 10;
280 else if ('A' <= r && r <= 'F')
281 n = 16*n + r - 'A' + 10;
282 else {
283 if (i == 0)
284 *rp = 'x';
285 else *rp = n;
286 return save;
289 } else {
290 for(i = 0; i < 3; i++) {
291 save = s;
292 s += chartorune(&r, s);
293 if('0' <= r && r <= '7')
294 n = 8*n + r - '0';
295 else {
296 if (i == 0)
298 *rp = r;
299 return s;
301 *rp = n;
302 return save;
305 if(n > 0377)
306 error("char>0377");
308 *rp = n;
310 return s;
313 long
314 canon(Pcb *p)
316 Rune r;
318 if (p->final >= 0) {
319 if (p->last < p->final)
320 return ++p->last;
321 p->final = -1;
323 if (*p->current == '\0')
324 return -1;
325 if(*p->current == '-' && p->last >= 0 && p->current[1]){
326 p->current = getrune(p->current+1, &r);
327 if (r < p->last)
328 error ("Invalid range specification");
329 if (r > p->last) {
330 p->final = r;
331 return ++p->last;
334 p->current = getrune(p->current, &r);
335 p->last = r;
336 return p->last;
339 void
340 Pinit(Pcb *p, char *cp)
342 p->current = p->base = cp;
343 p->last = p->final = -1;
345 void
346 Prewind(Pcb *p)
348 p->current = p->base;
349 p->last = p->final = -1;
351 void
352 error(char *s)
354 fprint(2, "%s: %s\n", argv0, s);
355 exits(s);