Blob


1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 /* Macros for Rune support of ctype.h-like functions */
6 #undef isupper
7 #undef islower
8 #undef isalpha
9 #undef isdigit
10 #undef isalnum
11 #undef isspace
12 #undef tolower
13 #define isupper(r) ('A' <= (r) && (r) <= 'Z')
14 #define islower(r) ('a' <= (r) && (r) <= 'z')
15 #define isalpha(r) (isupper(r) || islower(r))
16 #define islatin1(r) (0xC0 <= (r) && (r) <= 0xFF)
18 #define isdigit(r) ('0' <= (r) && (r) <= '9')
20 #define isalnum(r) (isalpha(r) || isdigit(r))
22 #define isspace(r) ((r) == ' ' || (r) == '\t' \
23 || (0x0A <= (r) && (r) <= 0x0D))
25 #define tolower(r) ((r)-'A'+'a')
27 #define sgn(v) ((v) < 0 ? -1 : ((v) > 0 ? 1 : 0))
29 #define WORDSIZ 4000
30 char *filename = "#9/lib/words";
31 Biobuf *dfile;
32 Biobuf bout;
33 Biobuf bin;
35 int fold;
36 int direc;
37 int exact;
38 int iflag;
39 int rev = 1; /*-1 for reverse-ordered file, not implemented*/
40 int (*compare)(Rune*, Rune*);
41 Rune tab = '\t';
42 Rune entry[WORDSIZ];
43 Rune word[WORDSIZ];
44 Rune key[50], orig[50];
45 Rune latin_fold_tab[] =
46 {
47 /* Table to fold latin 1 characters to ASCII equivalents
48 based at Rune value 0xc0
50 À Á Â Ã Ä Å Æ Ç
51 È É Ê Ë Ì Í Î Ï
52 Ð Ñ Ò Ó Ô Õ Ö ×
53 Ø Ù Ú Û Ü Ý Þ ß
54 à á â ã ä å æ ç
55 è é ê ë ì í î ï
56 ð ñ ò ó ô õ ö ÷
57 ø ù ú û ü ý þ ÿ
58 */
59 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
60 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
61 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
62 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 ,
63 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
64 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
65 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
66 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y',
67 };
69 int locate(void);
70 int acomp(Rune*, Rune*);
71 int getword(Biobuf*, Rune *rp, int n);
72 void torune(char*, Rune*);
73 void rcanon(Rune*, Rune*);
74 int ncomp(Rune*, Rune*);
76 void
77 main(int argc, char *argv[])
78 {
79 int n;
81 filename = unsharp(filename);
83 Binit(&bin, 0, OREAD);
84 Binit(&bout, 1, OWRITE);
85 compare = acomp;
86 ARGBEGIN{
87 case 'd':
88 direc++;
89 break;
90 case 'f':
91 fold++;
92 break;
93 case 'i':
94 iflag++;
95 break;
96 case 'n':
97 compare = ncomp;
98 break;
99 case 't':
100 chartorune(&tab,ARGF());
101 break;
102 case 'x':
103 exact++;
104 break;
105 default:
106 fprint(2, "%s: bad option %c\n", argv0, ARGC());
107 fprint(2, "usage: %s -[dfinx] [-t c] [string] [file]\n", argv0);
108 exits("usage");
109 } ARGEND
110 if(!iflag){
111 if(argc >= 1) {
112 torune(argv[0], orig);
113 argv++;
114 argc--;
115 } else
116 iflag++;
118 if(argc < 1) {
119 direc++;
120 fold++;
121 } else
122 filename = argv[0];
123 if (!iflag)
124 rcanon(orig, key);
125 dfile = Bopen(filename, OREAD);
126 if(dfile == 0) {
127 fprint(2, "look: can't open %s\n", filename);
128 exits("no dictionary");
130 if(!iflag)
131 if(!locate())
132 exits("not found");
133 do {
134 if(iflag) {
135 Bflush(&bout);
136 if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0])))
137 exits(0);
138 rcanon(orig, key);
139 if(!locate())
140 continue;
142 if (!exact || !acomp(word, key))
143 Bprint(&bout, "%S\n", entry);
144 while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
145 rcanon(entry, word);
146 n = compare(key, word);
147 switch(n) {
148 case -1:
149 if(exact)
150 break;
151 case 0:
152 if (!exact || !acomp(word, orig))
153 Bprint(&bout, "%S\n", entry);
154 continue;
156 break;
158 } while(iflag);
159 exits(0);
162 int
163 locate(void)
165 vlong top, bot, mid;
166 int c;
167 int n;
169 bot = 0;
170 top = Bseek(dfile, 0L, 2);
171 for(;;) {
172 mid = (top+bot) / 2;
173 Bseek(dfile, mid, 0);
174 do
175 c = Bgetrune(dfile);
176 while(c>=0 && c!='\n');
177 mid = Boffset(dfile);
178 if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0])))
179 break;
180 rcanon(entry, word);
181 n = compare(key, word);
182 switch(n) {
183 case -2:
184 case -1:
185 case 0:
186 if(top <= mid)
187 break;
188 top = mid;
189 continue;
190 case 1:
191 case 2:
192 bot = mid;
193 continue;
195 break;
197 Bseek(dfile, bot, 0);
198 while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
199 rcanon(entry, word);
200 n = compare(key, word);
201 switch(n) {
202 case -2:
203 return 0;
204 case -1:
205 if(exact)
206 return 0;
207 case 0:
208 return 1;
209 case 1:
210 case 2:
211 continue;
214 return 0;
217 /*
218 * acomp(s, t) returns:
219 * -2 if s strictly precedes t
220 * -1 if s is a prefix of t
221 * 0 if s is the same as t
222 * 1 if t is a prefix of s
223 * 2 if t strictly precedes s
224 */
226 int
227 acomp(Rune *s, Rune *t)
229 int cs, ct;
231 for(;;) {
232 cs = *s;
233 ct = *t;
234 if(cs != ct)
235 break;
236 if(cs == 0)
237 return 0;
238 s++;
239 t++;
241 if(cs == 0)
242 return -1;
243 if(ct == 0)
244 return 1;
245 if(cs < ct)
246 return -2;
247 return 2;
250 void
251 torune(char *old, Rune *new)
253 do old += chartorune(new, old);
254 while(*new++);
257 void
258 rcanon(Rune *old, Rune *new)
260 Rune r;
262 while((r = *old++) && r != tab) {
263 if (islatin1(r) && latin_fold_tab[r-0xc0])
264 r = latin_fold_tab[r-0xc0];
265 if(direc)
266 if(!(isalnum(r) || r == ' ' || r == '\t'))
267 continue;
268 if(fold)
269 if(isupper(r))
270 r = tolower(r);
271 *new++ = r;
273 *new = 0;
276 int
277 ncomp(Rune *s, Rune *t)
279 Rune *is, *it, *js, *jt;
280 int a, b;
281 int ssgn, tsgn;
283 while(isspace(*s))
284 s++;
285 while(isspace(*t))
286 t++;
287 ssgn = tsgn = -2*rev;
288 if(*s == '-') {
289 s++;
290 ssgn = -ssgn;
292 if(*t == '-') {
293 t++;
294 tsgn = -tsgn;
296 for(is = s; isdigit(*is); is++)
298 for(it = t; isdigit(*it); it++)
300 js = is;
301 jt = it;
302 a = 0;
303 if(ssgn == tsgn)
304 while(it>t && is>s)
305 if(b = *--it - *--is)
306 a = b;
307 while(is > s)
308 if(*--is != '0')
309 return -ssgn;
310 while(it > t)
311 if(*--it != '0')
312 return tsgn;
313 if(a)
314 return sgn(a)*ssgn;
315 if(*(s=js) == '.')
316 s++;
317 if(*(t=jt) == '.')
318 t++;
319 if(ssgn == tsgn)
320 while(isdigit(*s) && isdigit(*t))
321 if(a = *t++ - *s++)
322 return sgn(a)*ssgn;
323 while(isdigit(*s))
324 if(*s++ != '0')
325 return -ssgn;
326 while(isdigit(*t))
327 if(*t++ != '0')
328 return tsgn;
329 return 0;
332 int
333 getword(Biobuf *f, Rune *rp, int n)
335 long c;
337 while(n-- > 0) {
338 c = Bgetrune(f);
339 if(c < 0)
340 return 0;
341 if(c == '\n') {
342 *rp = '\0';
343 return 1;
345 *rp++ = c;
347 fprint(2, "Look: word too long. Bailing out.\n");
348 return 0;