9 * Assumed index file structure: lines of form
11 * First field is key, second is byte offset into dictionary.
12 * Should be sorted with args -u -t' ' +0f -1 +0 -1 +1n -2
14 typedef struct Addr Addr;
17 int n; /* number of offsets */
18 int cur; /* current position within doff array */
19 int maxn; /* actual current size of doff array */
20 ulong doff[1]; /* doff[maxn], with 0..n-1 significant */
25 Biobuf *bin = &binbuf; /* user cmd input */
26 Biobuf *bout = &boutbuf; /* output */
27 Biobuf *bdict; /* dictionary */
28 Biobuf *bindex; /* index file */
29 long indextop; /* index offset at end of file */
30 int lastcmd; /* last executed command */
31 Addr *dot; /* "current" address */
32 Dict *dict; /* current dictionary */
39 int getpref(char*, Rune*);
43 int parseaddr(char*, char**);
45 int search(char*, int);
46 long seeknextline(Biobuf*, long);
47 void setdotnext(void);
48 void setdotprev(void);
53 Plen=300, /* max length of a search pattern */
54 Fieldlen=200, /* max length of an index field */
55 Aslots=10 /* initial number of slots in an address */
59 main(int argc, char **argv)
64 Binit(&binbuf, 0, OREAD);
65 Binit(&boutbuf, 1, OWRITE);
70 for(i=0; dicts[i].name; i++){
71 if(access(dictfile(dicts[i].path), 0)>=0 && access(dictfile(dicts[i].indexpath), 0)>=0){
81 for(i=0; dicts[i].name; i++)
82 if(strcmp(p, dicts[i].name)==0) {
105 err("no dictionaries present on this system");
119 line = malloc(strlen(p)+5);
120 sprint(line, "/%s/P\n", p);
122 dict->path = dictfile(dict->path);
123 dict->indexpath = dictfile(dict->indexpath);
124 bdict = Bopen(dict->path, OREAD);
126 err("can't open dictionary %s", dict->path);
129 bindex = Bopen(dict->indexpath, OREAD);
131 err("can't open index %s", dict->indexpath);
134 indextop = Bseek(bindex, 0L, 2);
136 dot = malloc(sizeof(Addr)+(Aslots-1)*sizeof(ulong));
143 cmd = parsecmd(line);
150 line = Brdline(bin, '\n');
154 cmd = parsecmd(line);
170 Bprint(bout, "Usage: %s [-d dict] [-k] [-c cmd] [word]\n", argv0);
171 Bprint(bout, "dictionaries (brackets mark dictionaries not present on this system):\n");
172 for(i = 0; dicts[i].name; i++){
174 if(access(dictfile(dicts[i].path), 0)<0 || access(dictfile(dicts[i].indexpath), 0)<0){
178 Bprint(bout, " %s%s\t%s%s\n", a, dicts[i].name, dicts[i].desc, b);
189 if(parseaddr(line, &e) >= 0)
197 if(!(cmd == 'a' || cmd == 'h' || cmd == 'p' || cmd == 'r' ||
199 err("unknown command %c", cmd);
204 case 0: ans = 'H'; break;
205 case 'H': ans = 'p'; break;
206 default : ans = lastcmd; break;
208 else if(line[1] != '\n' && line[1] != 0)
209 err("extra stuff after command %c ignored", cmd);
227 if(debug && doall && cmd == 'a')
228 Bprint(bout, "%d entries, cur=%d\n", dot->n, cur+1);
233 Bprint(bout, "%d\t", cur+1);
234 linelen += 4 + (cur >= 10);
238 Bprint(bout, "#%lud\n", dot->doff[cur]);
244 (*dict->printentry)(e, cmd);
249 if(cmd == 'p' || cmd == 'r') {
262 * Address syntax: ('.' | '/' re '/' | '!' re '!' | number | '#' number) ('+' | '-')*
263 * Answer goes in dot.
264 * Return -1 if address starts, but get error.
265 * Return 0 if no address.
268 parseaddr(char *line, char **eptr)
275 if(*line == '/' || *line == '!') {
276 /* anchored regular expression match; '!' means no folding */
279 e = strpbrk(line+1, "/\n");
282 e = strpbrk(line+1, "!\n");
286 err("pattern too big");
290 memcpy(pat+1, line+1, plen);
297 if(!search(pat, delim == '/')) {
298 err("pattern not found");
301 } else if(*line == '#') {
302 /* absolute byte offset into dictionary */
304 if(!isdigit((uchar)*line))
306 v = strtoul(line, &e, 10);
311 } else if(isdigit((uchar)*line)) {
312 v = strtoul(line, &e, 10);
314 if(v < 1 || v > dot->n)
315 err(".%d not in range [1,%d], ignored",
319 } else if(*line == '.') {
325 while(*line == '+' || *line == '-') {
337 * Index file is sorted by folded field1.
338 * Method: find pre, a folded prefix of r.e. pat,
339 * and then low = offset to beginning of
340 * line in index file where first match of prefix occurs.
341 * Then go through index until prefix no longer matches,
342 * adding each line that matches real pattern to dot.
343 * Finally, sort dot offsets (uniquing).
344 * We know pat len < Plen, and that it is surrounded by ^..$
347 search(char *pat, int dofold)
349 int needre, prelen, match, n;
354 Rune entry[Fieldlen];
357 prelen = getpref(pat+1, pre);
358 if(pat[prelen+1] == 0 || pat[prelen+1] == '$') {
377 Bseek(bindex, ioff, 0);
384 match = rregexec(re, entry, 0, 0);
386 match = (acomp(lit, entry) == 0);
391 if(dot->n >= dot->maxn) {
394 sizeof(Addr)+(n-1)*sizeof(long));
396 err("out of memory");
401 dot->doff[dot->n++] = v;
406 n = acomp(pre, entry);
407 if(n < -1 || (!needre && n < 0))
410 /* get to next index entry */
421 * Return offset in index file of first line whose folded
422 * first field has pre as a prefix. -1 if none found.
428 Rune entry[Fieldlen];
435 fprint(2, "locate looking for prefix %S\n", pre);
438 * Loop invariant: foldkey(bot) < pre <= foldkey(top)
439 * and bot < top, and bot,top point at beginning of lines
442 mid = seeknextline(bindex, mid);
444 fprint(2, "bot=%ld, mid=%ld->%ld, top=%ld\n",
445 bot, (top+bot) / 2, mid, top);
446 if(mid == top || !getfield(entry))
449 fprint(2, "key=%S\n", entry);
451 * here mid is strictly between bot and top
454 if(acomp(pre, entry) <= 0)
460 * bot < top, but they don't necessarily point at successive lines
461 * Use linear search from bot to find first line that pre is a
464 while((bot = seeknextline(bindex, bot)) <= top) {
468 fprint(2, "key=%S\n", entry);
470 switch(acomp(pre, entry)) {
486 * Get prefix of non re-metacharacters, runified, into pre,
490 getpref(char *pat, Rune *pre)
497 n = chartorune(pre, p);
500 case 0x2e: case 0x2a: case 0x2b: case 0x3f:
501 case 0x5b: case 0x5d: case 0x28: case ')':
502 case 0x7c: case 0x5e: case 0x24:
507 p += chartorune(++pre, p);
519 seeknextline(Biobuf *b, long off)
526 } while(c>=0 && c!='\n');
531 * Get next field out of index file (either tab- or nl- terminated)
532 * Answer in *rp, assumed to be Fieldlen long.
533 * Return 0 if read error first.
541 for(n=Fieldlen; n-- > 0; ) {
542 if ((c = Bgetrune(bindex)) < 0)
544 if(c == '\t' || c == '\n') {
550 err("word too long");
555 * A compare longs function suitable for qsort
558 longcmp(const void *av, const void *bv)
584 qsort(a->doff, a->n, sizeof(long), longcmp);
586 /* remove duplicates */
587 for(i=0, j=0; j < a->n; j++) {
589 if(i > 0 && v == a->doff[i-1])
601 static int anslen = 0;
604 e = (*dict->nextoff)(b+1);
607 err("couldn't seek to entry");
613 ans.start = realloc(ans.start, n+1);
615 err("out of memory");
621 n = Bread(bdict, ans.start, n);
622 ans.end = ans.start + n;
633 b = (*dict->nextoff)(dot->doff[dot->cur]+1);
635 err("couldn't find a next entry");
649 if(dot->cur < 0 || dot->cur >= dot->n)
652 here = dot->doff[dot->cur];
659 p = (*dict->nextoff)(p+1);
661 return; /* shouldn't happen */
667 if(here - tryback < 0) {
668 err("can't find a previous entry");
680 * find the specified file and return a path.
681 * default location is #9/dict, but can be
682 * in $dictdir instead.
691 dict = getenv("dictpath");
696 return smprint("%s/%s", dict, f);
697 return unsharp(smprint("#9/dict/%s", f));