Blob


1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 /* Macros for Rune support of ctype.h-like functions */
6 #define isupper(r) (L'A' <= (r) && (r) <= L'Z')
7 #define islower(r) (L'a' <= (r) && (r) <= L'z')
8 #define isalpha(r) (isupper(r) || islower(r))
9 #define islatin1(r) (0xC0 <= (r) && (r) <= 0xFF)
11 #define isdigit(r) (L'0' <= (r) && (r) <= L'9')
13 #define isalnum(r) (isalpha(r) || isdigit(r))
15 #define isspace(r) ((r) == L' ' || (r) == L'\t' \
16 || (0x0A <= (r) && (r) <= 0x0D))
18 #define tolower(r) ((r)-'A'+'a')
20 #define sgn(v) ((v) < 0 ? -1 : ((v) > 0 ? 1 : 0))
22 #define WORDSIZ 4000
23 char *filename = "#9/lib/words";
24 Biobuf *dfile;
25 Biobuf bout;
26 Biobuf bin;
28 int fold;
29 int direc;
30 int exact;
31 int iflag;
32 int rev = 1; /*-1 for reverse-ordered file, not implemented*/
33 int (*compare)(Rune*, Rune*);
34 Rune tab = '\t';
35 Rune entry[WORDSIZ];
36 Rune word[WORDSIZ];
37 Rune key[50], orig[50];
38 Rune latin_fold_tab[] =
39 {
40 /* Table to fold latin 1 characters to ASCII equivalents
41 based at Rune value 0xc0
43 À Á Â Ã Ä Å Æ Ç
44 È É Ê Ë Ì Í Î Ï
45 Ð Ñ Ò Ó Ô Õ Ö ×
46 Ø Ù Ú Û Ü Ý Þ ß
47 à á â ã ä å æ ç
48 è é ê ë ì í î ï
49 ð ñ ò ó ô õ ö ÷
50 ø ù ú û ü ý þ ÿ
51 */
52 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
53 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
54 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
55 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 ,
56 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
57 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
58 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
59 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y',
60 };
62 int locate(void);
63 int acomp(Rune*, Rune*);
64 int getword(Biobuf*, Rune *rp, int n);
65 void torune(char*, Rune*);
66 void rcanon(Rune*, Rune*);
67 int ncomp(Rune*, Rune*);
69 void
70 main(int argc, char *argv[])
71 {
72 int n;
74 filename = unsharp(filename);
76 Binit(&bin, 0, OREAD);
77 Binit(&bout, 1, OWRITE);
78 compare = acomp;
79 ARGBEGIN{
80 case 'd':
81 direc++;
82 break;
83 case 'f':
84 fold++;
85 break;
86 case 'i':
87 iflag++;
88 break;
89 case 'n':
90 compare = ncomp;
91 break;
92 case 't':
93 chartorune(&tab,ARGF());
94 break;
95 case 'x':
96 exact++;
97 break;
98 default:
99 fprint(2, "%s: bad option %c\n", argv0, ARGC());
100 fprint(2, "usage: %s -[dfinx] [-t c] [string] [file]\n", argv0);
101 exits("usage");
102 } ARGEND
103 if(!iflag){
104 if(argc >= 1) {
105 torune(argv[0], orig);
106 argv++;
107 argc--;
108 } else
109 iflag++;
111 if(argc < 1) {
112 direc++;
113 fold++;
114 } else
115 filename = argv[0];
116 if (!iflag)
117 rcanon(orig, key);
118 dfile = Bopen(filename, OREAD);
119 if(dfile == 0) {
120 fprint(2, "look: can't open %s\n", filename);
121 exits("no dictionary");
123 if(!iflag)
124 if(!locate())
125 exits("not found");
126 do {
127 if(iflag) {
128 Bflush(&bout);
129 if(!getword(&bin, orig, sizeof(orig)/sizeof(orig[0])))
130 exits(0);
131 rcanon(orig, key);
132 if(!locate())
133 continue;
135 if (!exact || !acomp(word, key))
136 Bprint(&bout, "%S\n", entry);
137 while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
138 rcanon(entry, word);
139 n = compare(key, word);
140 switch(n) {
141 case -1:
142 if(exact)
143 break;
144 case 0:
145 if (!exact || !acomp(word, orig))
146 Bprint(&bout, "%S\n", entry);
147 continue;
149 break;
151 } while(iflag);
152 exits(0);
155 int
156 locate(void)
158 long top, bot, mid;
159 long c;
160 int n;
162 bot = 0;
163 top = Bseek(dfile, 0L, 2);
164 for(;;) {
165 mid = (top+bot) / 2;
166 Bseek(dfile, mid, 0);
167 do
168 c = Bgetrune(dfile);
169 while(c>=0 && c!='\n');
170 mid = Boffset(dfile);
171 if(!getword(dfile, entry, sizeof(entry)/sizeof(entry[0])))
172 break;
173 rcanon(entry, word);
174 n = compare(key, word);
175 switch(n) {
176 case -2:
177 case -1:
178 case 0:
179 if(top <= mid)
180 break;
181 top = mid;
182 continue;
183 case 1:
184 case 2:
185 bot = mid;
186 continue;
188 break;
190 Bseek(dfile, bot, 0);
191 while(getword(dfile, entry, sizeof(entry)/sizeof(entry[0]))) {
192 rcanon(entry, word);
193 n = compare(key, word);
194 switch(n) {
195 case -2:
196 return 0;
197 case -1:
198 if(exact)
199 return 0;
200 case 0:
201 return 1;
202 case 1:
203 case 2:
204 continue;
207 return 0;
210 /*
211 * acomp(s, t) returns:
212 * -2 if s strictly precedes t
213 * -1 if s is a prefix of t
214 * 0 if s is the same as t
215 * 1 if t is a prefix of s
216 * 2 if t strictly precedes s
217 */
219 int
220 acomp(Rune *s, Rune *t)
222 int cs, ct;
224 for(;;) {
225 cs = *s;
226 ct = *t;
227 if(cs != ct)
228 break;
229 if(cs == 0)
230 return 0;
231 s++;
232 t++;
234 if(cs == 0)
235 return -1;
236 if(ct == 0)
237 return 1;
238 if(cs < ct)
239 return -2;
240 return 2;
243 void
244 torune(char *old, Rune *new)
246 do old += chartorune(new, old);
247 while(*new++);
250 void
251 rcanon(Rune *old, Rune *new)
253 Rune r;
255 while((r = *old++) && r != tab) {
256 if (islatin1(r) && latin_fold_tab[r-0xc0])
257 r = latin_fold_tab[r-0xc0];
258 if(direc)
259 if(!(isalnum(r) || r == L' ' || r == L'\t'))
260 continue;
261 if(fold)
262 if(isupper(r))
263 r = tolower(r);
264 *new++ = r;
266 *new = 0;
269 int
270 ncomp(Rune *s, Rune *t)
272 Rune *is, *it, *js, *jt;
273 int a, b;
274 int ssgn, tsgn;
276 while(isspace(*s))
277 s++;
278 while(isspace(*t))
279 t++;
280 ssgn = tsgn = -2*rev;
281 if(*s == '-') {
282 s++;
283 ssgn = -ssgn;
285 if(*t == '-') {
286 t++;
287 tsgn = -tsgn;
289 for(is = s; isdigit(*is); is++)
291 for(it = t; isdigit(*it); it++)
293 js = is;
294 jt = it;
295 a = 0;
296 if(ssgn == tsgn)
297 while(it>t && is>s)
298 if(b = *--it - *--is)
299 a = b;
300 while(is > s)
301 if(*--is != '0')
302 return -ssgn;
303 while(it > t)
304 if(*--it != '0')
305 return tsgn;
306 if(a)
307 return sgn(a)*ssgn;
308 if(*(s=js) == '.')
309 s++;
310 if(*(t=jt) == '.')
311 t++;
312 if(ssgn == tsgn)
313 while(isdigit(*s) && isdigit(*t))
314 if(a = *t++ - *s++)
315 return sgn(a)*ssgn;
316 while(isdigit(*s))
317 if(*s++ != '0')
318 return -ssgn;
319 while(isdigit(*t))
320 if(*t++ != '0')
321 return tsgn;
322 return 0;
325 int
326 getword(Biobuf *f, Rune *rp, int n)
328 long c;
330 while(n-- > 0) {
331 c = Bgetrune(f);
332 if(c < 0)
333 return 0;
334 if(c == '\n') {
335 *rp = L'\0';
336 return 1;
338 *rp++ = c;
340 fprint(2, "Look: word too long. Bailing out.\n");
341 return 0;