Blame


1 08708877 2003-11-25 devnull #include <u.h>
2 08708877 2003-11-25 devnull #include <libc.h>
3 08708877 2003-11-25 devnull #include <bio.h>
4 08708877 2003-11-25 devnull #include <regexp.h>
5 08708877 2003-11-25 devnull #include <ctype.h>
6 08708877 2003-11-25 devnull #include "dict.h"
7 08708877 2003-11-25 devnull
8 08708877 2003-11-25 devnull /*
9 08708877 2003-11-25 devnull * Assumed index file structure: lines of form
10 08708877 2003-11-25 devnull * [^\t]+\t[0-9]+
11 08708877 2003-11-25 devnull * First field is key, second is byte offset into dictionary.
12 08708877 2003-11-25 devnull * Should be sorted with args -u -t' ' +0f -1 +0 -1 +1n -2
13 08708877 2003-11-25 devnull */
14 08708877 2003-11-25 devnull typedef struct Addr Addr;
15 08708877 2003-11-25 devnull
16 08708877 2003-11-25 devnull struct Addr {
17 08708877 2003-11-25 devnull int n; /* number of offsets */
18 08708877 2003-11-25 devnull int cur; /* current position within doff array */
19 08708877 2003-11-25 devnull int maxn; /* actual current size of doff array */
20 08708877 2003-11-25 devnull ulong doff[1]; /* doff[maxn], with 0..n-1 significant */
21 08708877 2003-11-25 devnull };
22 08708877 2003-11-25 devnull
23 08708877 2003-11-25 devnull Biobuf binbuf;
24 08708877 2003-11-25 devnull Biobuf boutbuf;
25 08708877 2003-11-25 devnull Biobuf *bin = &binbuf; /* user cmd input */
26 08708877 2003-11-25 devnull Biobuf *bout = &boutbuf; /* output */
27 08708877 2003-11-25 devnull Biobuf *bdict; /* dictionary */
28 08708877 2003-11-25 devnull Biobuf *bindex; /* index file */
29 08708877 2003-11-25 devnull long indextop; /* index offset at end of file */
30 08708877 2003-11-25 devnull int lastcmd; /* last executed command */
31 08708877 2003-11-25 devnull Addr *dot; /* "current" address */
32 08708877 2003-11-25 devnull Dict *dict; /* current dictionary */
33 08708877 2003-11-25 devnull int linelen;
34 08708877 2003-11-25 devnull int breaklen = 60;
35 08708877 2003-11-25 devnull int outinhibit;
36 08708877 2003-11-25 devnull int debug;
37 08708877 2003-11-25 devnull
38 08708877 2003-11-25 devnull void execcmd(int);
39 08708877 2003-11-25 devnull int getpref(char*, Rune*);
40 08708877 2003-11-25 devnull Entry getentry(int);
41 08708877 2003-11-25 devnull int getfield(Rune*);
42 08708877 2003-11-25 devnull long locate(Rune*);
43 08708877 2003-11-25 devnull int parseaddr(char*, char**);
44 08708877 2003-11-25 devnull int parsecmd(char*);
45 08708877 2003-11-25 devnull int search(char*, int);
46 08708877 2003-11-25 devnull long seeknextline(Biobuf*, long);
47 08708877 2003-11-25 devnull void setdotnext(void);
48 08708877 2003-11-25 devnull void setdotprev(void);
49 08708877 2003-11-25 devnull void sortaddr(Addr*);
50 08708877 2003-11-25 devnull void usage(void);
51 08708877 2003-11-25 devnull
52 08708877 2003-11-25 devnull enum {
53 08708877 2003-11-25 devnull Plen=300, /* max length of a search pattern */
54 08708877 2003-11-25 devnull Fieldlen=200, /* max length of an index field */
55 cbeb0b26 2006-04-01 devnull Aslots=10 /* initial number of slots in an address */
56 08708877 2003-11-25 devnull };
57 08708877 2003-11-25 devnull
58 08708877 2003-11-25 devnull void
59 08708877 2003-11-25 devnull main(int argc, char **argv)
60 08708877 2003-11-25 devnull {
61 08708877 2003-11-25 devnull int i, cmd, kflag;
62 32f69c36 2003-12-11 devnull char *line, *p;
63 08708877 2003-11-25 devnull
64 08708877 2003-11-25 devnull Binit(&binbuf, 0, OREAD);
65 08708877 2003-11-25 devnull Binit(&boutbuf, 1, OWRITE);
66 08708877 2003-11-25 devnull kflag = 0;
67 08708877 2003-11-25 devnull line = 0;
68 08708877 2003-11-25 devnull dict = 0;
69 08708877 2003-11-25 devnull
70 08708877 2003-11-25 devnull for(i=0; dicts[i].name; i++){
71 ebda53e1 2007-08-22 rsc if(access(dictfile(dicts[i].path), 0)>=0 && access(dictfile(dicts[i].indexpath), 0)>=0){
72 08708877 2003-11-25 devnull dict = &dicts[i];
73 08708877 2003-11-25 devnull break;
74 08708877 2003-11-25 devnull }
75 08708877 2003-11-25 devnull }
76 08708877 2003-11-25 devnull ARGBEGIN {
77 08708877 2003-11-25 devnull case 'd':
78 08708877 2003-11-25 devnull p = ARGF();
79 08708877 2003-11-25 devnull dict = 0;
80 08708877 2003-11-25 devnull if(p) {
81 08708877 2003-11-25 devnull for(i=0; dicts[i].name; i++)
82 08708877 2003-11-25 devnull if(strcmp(p, dicts[i].name)==0) {
83 08708877 2003-11-25 devnull dict = &dicts[i];
84 08708877 2003-11-25 devnull break;
85 08708877 2003-11-25 devnull }
86 08708877 2003-11-25 devnull }
87 08708877 2003-11-25 devnull if(!dict)
88 08708877 2003-11-25 devnull usage();
89 08708877 2003-11-25 devnull break;
90 08708877 2003-11-25 devnull case 'c':
91 08708877 2003-11-25 devnull line = ARGF();
92 08708877 2003-11-25 devnull if(!line)
93 08708877 2003-11-25 devnull usage();
94 08708877 2003-11-25 devnull break;
95 08708877 2003-11-25 devnull case 'k':
96 08708877 2003-11-25 devnull kflag++;
97 08708877 2003-11-25 devnull break;
98 08708877 2003-11-25 devnull case 'D':
99 08708877 2003-11-25 devnull debug++;
100 08708877 2003-11-25 devnull break;
101 08708877 2003-11-25 devnull default:
102 08708877 2003-11-25 devnull usage();
103 08708877 2003-11-25 devnull ARGEND }
104 08708877 2003-11-25 devnull if(dict == 0){
105 08708877 2003-11-25 devnull err("no dictionaries present on this system");
106 08708877 2003-11-25 devnull exits("nodict");
107 08708877 2003-11-25 devnull }
108 08708877 2003-11-25 devnull
109 08708877 2003-11-25 devnull if(kflag) {
110 08708877 2003-11-25 devnull (*dict->printkey)();
111 08708877 2003-11-25 devnull exits(0);
112 08708877 2003-11-25 devnull }
113 08708877 2003-11-25 devnull if(argc > 1)
114 08708877 2003-11-25 devnull usage();
115 08708877 2003-11-25 devnull else if(argc == 1) {
116 08708877 2003-11-25 devnull if(line)
117 08708877 2003-11-25 devnull usage();
118 08708877 2003-11-25 devnull p = argv[0];
119 08708877 2003-11-25 devnull line = malloc(strlen(p)+5);
120 08708877 2003-11-25 devnull sprint(line, "/%s/P\n", p);
121 08708877 2003-11-25 devnull }
122 ebda53e1 2007-08-22 rsc dict->path = dictfile(dict->path);
123 ebda53e1 2007-08-22 rsc dict->indexpath = dictfile(dict->indexpath);
124 08708877 2003-11-25 devnull bdict = Bopen(dict->path, OREAD);
125 08708877 2003-11-25 devnull if(!bdict) {
126 32f69c36 2003-12-11 devnull err("can't open dictionary %s", dict->path);
127 08708877 2003-11-25 devnull exits("nodict");
128 08708877 2003-11-25 devnull }
129 08708877 2003-11-25 devnull bindex = Bopen(dict->indexpath, OREAD);
130 08708877 2003-11-25 devnull if(!bindex) {
131 32f69c36 2003-12-11 devnull err("can't open index %s", dict->indexpath);
132 08708877 2003-11-25 devnull exits("noindex");
133 08708877 2003-11-25 devnull }
134 08708877 2003-11-25 devnull indextop = Bseek(bindex, 0L, 2);
135 08708877 2003-11-25 devnull
136 08708877 2003-11-25 devnull dot = malloc(sizeof(Addr)+(Aslots-1)*sizeof(ulong));
137 08708877 2003-11-25 devnull dot->n = 0;
138 08708877 2003-11-25 devnull dot->cur = 0;
139 08708877 2003-11-25 devnull dot->maxn = Aslots;
140 08708877 2003-11-25 devnull lastcmd = 0;
141 08708877 2003-11-25 devnull
142 08708877 2003-11-25 devnull if(line) {
143 08708877 2003-11-25 devnull cmd = parsecmd(line);
144 08708877 2003-11-25 devnull if(cmd)
145 08708877 2003-11-25 devnull execcmd(cmd);
146 08708877 2003-11-25 devnull } else {
147 08708877 2003-11-25 devnull for(;;) {
148 08708877 2003-11-25 devnull Bprint(bout, "*");
149 08708877 2003-11-25 devnull Bflush(bout);
150 08708877 2003-11-25 devnull line = Brdline(bin, '\n');
151 08708877 2003-11-25 devnull linelen = 0;
152 08708877 2003-11-25 devnull if(!line)
153 08708877 2003-11-25 devnull break;
154 08708877 2003-11-25 devnull cmd = parsecmd(line);
155 08708877 2003-11-25 devnull if(cmd) {
156 08708877 2003-11-25 devnull execcmd(cmd);
157 08708877 2003-11-25 devnull lastcmd = cmd;
158 08708877 2003-11-25 devnull }
159 08708877 2003-11-25 devnull }
160 08708877 2003-11-25 devnull }
161 08708877 2003-11-25 devnull exits(0);
162 08708877 2003-11-25 devnull }
163 08708877 2003-11-25 devnull
164 08708877 2003-11-25 devnull void
165 08708877 2003-11-25 devnull usage(void)
166 08708877 2003-11-25 devnull {
167 08708877 2003-11-25 devnull int i;
168 08708877 2003-11-25 devnull char *a, *b;
169 08708877 2003-11-25 devnull
170 08708877 2003-11-25 devnull Bprint(bout, "Usage: %s [-d dict] [-k] [-c cmd] [word]\n", argv0);
171 08708877 2003-11-25 devnull Bprint(bout, "dictionaries (brackets mark dictionaries not present on this system):\n");
172 08708877 2003-11-25 devnull for(i = 0; dicts[i].name; i++){
173 08708877 2003-11-25 devnull a = b = "";
174 ebda53e1 2007-08-22 rsc if(access(dictfile(dicts[i].path), 0)<0 || access(dictfile(dicts[i].indexpath), 0)<0){
175 08708877 2003-11-25 devnull a = "[";
176 08708877 2003-11-25 devnull b = "]";
177 08708877 2003-11-25 devnull }
178 08708877 2003-11-25 devnull Bprint(bout, " %s%s\t%s%s\n", a, dicts[i].name, dicts[i].desc, b);
179 08708877 2003-11-25 devnull }
180 08708877 2003-11-25 devnull exits("usage");
181 08708877 2003-11-25 devnull }
182 08708877 2003-11-25 devnull
183 08708877 2003-11-25 devnull int
184 08708877 2003-11-25 devnull parsecmd(char *line)
185 08708877 2003-11-25 devnull {
186 08708877 2003-11-25 devnull char *e;
187 08708877 2003-11-25 devnull int cmd, ans;
188 08708877 2003-11-25 devnull
189 08708877 2003-11-25 devnull if(parseaddr(line, &e) >= 0)
190 08708877 2003-11-25 devnull line = e;
191 08708877 2003-11-25 devnull else
192 08708877 2003-11-25 devnull return 0;
193 08708877 2003-11-25 devnull cmd = *line;
194 08708877 2003-11-25 devnull ans = cmd;
195 08708877 2003-11-25 devnull if(isupper(cmd))
196 08708877 2003-11-25 devnull cmd = tolower(cmd);
197 08708877 2003-11-25 devnull if(!(cmd == 'a' || cmd == 'h' || cmd == 'p' || cmd == 'r' ||
198 08708877 2003-11-25 devnull cmd == '\n')) {
199 08708877 2003-11-25 devnull err("unknown command %c", cmd);
200 08708877 2003-11-25 devnull return 0;
201 08708877 2003-11-25 devnull }
202 08708877 2003-11-25 devnull if(cmd == '\n')
203 08708877 2003-11-25 devnull switch(lastcmd) {
204 08708877 2003-11-25 devnull case 0: ans = 'H'; break;
205 08708877 2003-11-25 devnull case 'H': ans = 'p'; break;
206 08708877 2003-11-25 devnull default : ans = lastcmd; break;
207 08708877 2003-11-25 devnull }
208 08708877 2003-11-25 devnull else if(line[1] != '\n' && line[1] != 0)
209 08708877 2003-11-25 devnull err("extra stuff after command %c ignored", cmd);
210 08708877 2003-11-25 devnull return ans;
211 08708877 2003-11-25 devnull }
212 08708877 2003-11-25 devnull
213 08708877 2003-11-25 devnull void
214 08708877 2003-11-25 devnull execcmd(int cmd)
215 08708877 2003-11-25 devnull {
216 08708877 2003-11-25 devnull Entry e;
217 08708877 2003-11-25 devnull int cur, doall;
218 08708877 2003-11-25 devnull
219 08708877 2003-11-25 devnull if(isupper(cmd)) {
220 08708877 2003-11-25 devnull doall = 1;
221 08708877 2003-11-25 devnull cmd = tolower(cmd);
222 08708877 2003-11-25 devnull cur = 0;
223 08708877 2003-11-25 devnull } else {
224 08708877 2003-11-25 devnull doall = 0;
225 08708877 2003-11-25 devnull cur = dot->cur;
226 08708877 2003-11-25 devnull }
227 08708877 2003-11-25 devnull if(debug && doall && cmd == 'a')
228 08708877 2003-11-25 devnull Bprint(bout, "%d entries, cur=%d\n", dot->n, cur+1);
229 08708877 2003-11-25 devnull for(;;){
230 08708877 2003-11-25 devnull if(cur >= dot->n)
231 08708877 2003-11-25 devnull break;
232 08708877 2003-11-25 devnull if(doall) {
233 08708877 2003-11-25 devnull Bprint(bout, "%d\t", cur+1);
234 08708877 2003-11-25 devnull linelen += 4 + (cur >= 10);
235 08708877 2003-11-25 devnull }
236 08708877 2003-11-25 devnull switch(cmd) {
237 08708877 2003-11-25 devnull case 'a':
238 08708877 2003-11-25 devnull Bprint(bout, "#%lud\n", dot->doff[cur]);
239 08708877 2003-11-25 devnull break;
240 08708877 2003-11-25 devnull case 'h':
241 08708877 2003-11-25 devnull case 'p':
242 08708877 2003-11-25 devnull case 'r':
243 08708877 2003-11-25 devnull e = getentry(cur);
244 08708877 2003-11-25 devnull (*dict->printentry)(e, cmd);
245 08708877 2003-11-25 devnull break;
246 08708877 2003-11-25 devnull }
247 08708877 2003-11-25 devnull cur++;
248 08708877 2003-11-25 devnull if(doall) {
249 08708877 2003-11-25 devnull if(cmd == 'p' || cmd == 'r') {
250 08708877 2003-11-25 devnull Bputc(bout, '\n');
251 08708877 2003-11-25 devnull linelen = 0;
252 08708877 2003-11-25 devnull }
253 08708877 2003-11-25 devnull } else
254 08708877 2003-11-25 devnull break;
255 08708877 2003-11-25 devnull }
256 08708877 2003-11-25 devnull if(cur >= dot->n)
257 08708877 2003-11-25 devnull cur = 0;
258 08708877 2003-11-25 devnull dot->cur = cur;
259 08708877 2003-11-25 devnull }
260 08708877 2003-11-25 devnull
261 08708877 2003-11-25 devnull /*
262 08708877 2003-11-25 devnull * Address syntax: ('.' | '/' re '/' | '!' re '!' | number | '#' number) ('+' | '-')*
263 08708877 2003-11-25 devnull * Answer goes in dot.
264 08708877 2003-11-25 devnull * Return -1 if address starts, but get error.
265 08708877 2003-11-25 devnull * Return 0 if no address.
266 08708877 2003-11-25 devnull */
267 08708877 2003-11-25 devnull int
268 08708877 2003-11-25 devnull parseaddr(char *line, char **eptr)
269 08708877 2003-11-25 devnull {
270 08708877 2003-11-25 devnull int delim, plen;
271 08708877 2003-11-25 devnull ulong v;
272 08708877 2003-11-25 devnull char *e;
273 08708877 2003-11-25 devnull char pat[Plen];
274 08708877 2003-11-25 devnull
275 08708877 2003-11-25 devnull if(*line == '/' || *line == '!') {
276 08708877 2003-11-25 devnull /* anchored regular expression match; '!' means no folding */
277 08708877 2003-11-25 devnull if(*line == '/') {
278 08708877 2003-11-25 devnull delim = '/';
279 08708877 2003-11-25 devnull e = strpbrk(line+1, "/\n");
280 08708877 2003-11-25 devnull } else {
281 08708877 2003-11-25 devnull delim = '!';
282 08708877 2003-11-25 devnull e = strpbrk(line+1, "!\n");
283 08708877 2003-11-25 devnull }
284 08708877 2003-11-25 devnull plen = e-line-1;
285 08708877 2003-11-25 devnull if(plen >= Plen-3) {
286 08708877 2003-11-25 devnull err("pattern too big");
287 08708877 2003-11-25 devnull return -1;
288 08708877 2003-11-25 devnull }
289 08708877 2003-11-25 devnull pat[0] = '^';
290 08708877 2003-11-25 devnull memcpy(pat+1, line+1, plen);
291 08708877 2003-11-25 devnull pat[plen+1] = '$';
292 08708877 2003-11-25 devnull pat[plen+2] = 0;
293 08708877 2003-11-25 devnull if(*e == '\n')
294 08708877 2003-11-25 devnull line = e;
295 08708877 2003-11-25 devnull else
296 08708877 2003-11-25 devnull line = e+1;
297 08708877 2003-11-25 devnull if(!search(pat, delim == '/')) {
298 08708877 2003-11-25 devnull err("pattern not found");
299 08708877 2003-11-25 devnull return -1;
300 08708877 2003-11-25 devnull }
301 08708877 2003-11-25 devnull } else if(*line == '#') {
302 08708877 2003-11-25 devnull /* absolute byte offset into dictionary */
303 08708877 2003-11-25 devnull line++;
304 3bd56b04 2005-09-09 devnull if(!isdigit((uchar)*line))
305 08708877 2003-11-25 devnull return -1;
306 08708877 2003-11-25 devnull v = strtoul(line, &e, 10);
307 08708877 2003-11-25 devnull line = e;
308 08708877 2003-11-25 devnull dot->doff[0] = v;
309 08708877 2003-11-25 devnull dot->n = 1;
310 08708877 2003-11-25 devnull dot->cur = 0;
311 3bd56b04 2005-09-09 devnull } else if(isdigit((uchar)*line)) {
312 08708877 2003-11-25 devnull v = strtoul(line, &e, 10);
313 08708877 2003-11-25 devnull line = e;
314 08708877 2003-11-25 devnull if(v < 1 || v > dot->n)
315 08708877 2003-11-25 devnull err(".%d not in range [1,%d], ignored",
316 08708877 2003-11-25 devnull v, dot->n);
317 08708877 2003-11-25 devnull else
318 08708877 2003-11-25 devnull dot->cur = v-1;
319 08708877 2003-11-25 devnull } else if(*line == '.') {
320 08708877 2003-11-25 devnull line++;
321 08708877 2003-11-25 devnull } else {
322 08708877 2003-11-25 devnull *eptr = line;
323 08708877 2003-11-25 devnull return 0;
324 08708877 2003-11-25 devnull }
325 08708877 2003-11-25 devnull while(*line == '+' || *line == '-') {
326 08708877 2003-11-25 devnull if(*line == '+')
327 08708877 2003-11-25 devnull setdotnext();
328 08708877 2003-11-25 devnull else
329 08708877 2003-11-25 devnull setdotprev();
330 08708877 2003-11-25 devnull line++;
331 08708877 2003-11-25 devnull }
332 08708877 2003-11-25 devnull *eptr = line;
333 08708877 2003-11-25 devnull return 1;
334 08708877 2003-11-25 devnull }
335 08708877 2003-11-25 devnull
336 08708877 2003-11-25 devnull /*
337 08708877 2003-11-25 devnull * Index file is sorted by folded field1.
338 08708877 2003-11-25 devnull * Method: find pre, a folded prefix of r.e. pat,
339 08708877 2003-11-25 devnull * and then low = offset to beginning of
340 08708877 2003-11-25 devnull * line in index file where first match of prefix occurs.
341 08708877 2003-11-25 devnull * Then go through index until prefix no longer matches,
342 08708877 2003-11-25 devnull * adding each line that matches real pattern to dot.
343 08708877 2003-11-25 devnull * Finally, sort dot offsets (uniquing).
344 08708877 2003-11-25 devnull * We know pat len < Plen, and that it is surrounded by ^..$
345 08708877 2003-11-25 devnull */
346 08708877 2003-11-25 devnull int
347 08708877 2003-11-25 devnull search(char *pat, int dofold)
348 08708877 2003-11-25 devnull {
349 08708877 2003-11-25 devnull int needre, prelen, match, n;
350 08708877 2003-11-25 devnull Reprog *re;
351 08708877 2003-11-25 devnull long ioff, v;
352 08708877 2003-11-25 devnull Rune pre[Plen];
353 08708877 2003-11-25 devnull Rune lit[Plen];
354 08708877 2003-11-25 devnull Rune entry[Fieldlen];
355 08708877 2003-11-25 devnull char fpat[Plen];
356 08708877 2003-11-25 devnull
357 08708877 2003-11-25 devnull prelen = getpref(pat+1, pre);
358 08708877 2003-11-25 devnull if(pat[prelen+1] == 0 || pat[prelen+1] == '$') {
359 08708877 2003-11-25 devnull runescpy(lit, pre);
360 08708877 2003-11-25 devnull if(dofold)
361 08708877 2003-11-25 devnull fold(lit);
362 08708877 2003-11-25 devnull needre = 0;
363 08708877 2003-11-25 devnull SET(re);
364 08708877 2003-11-25 devnull } else {
365 08708877 2003-11-25 devnull needre = 1;
366 08708877 2003-11-25 devnull if(dofold) {
367 08708877 2003-11-25 devnull foldre(fpat, pat);
368 08708877 2003-11-25 devnull re = regcomp(fpat);
369 08708877 2003-11-25 devnull } else
370 08708877 2003-11-25 devnull re = regcomp(pat);
371 08708877 2003-11-25 devnull }
372 08708877 2003-11-25 devnull fold(pre);
373 08708877 2003-11-25 devnull ioff = locate(pre);
374 08708877 2003-11-25 devnull if(ioff < 0)
375 08708877 2003-11-25 devnull return 0;
376 08708877 2003-11-25 devnull dot->n = 0;
377 08708877 2003-11-25 devnull Bseek(bindex, ioff, 0);
378 08708877 2003-11-25 devnull for(;;) {
379 08708877 2003-11-25 devnull if(!getfield(entry))
380 08708877 2003-11-25 devnull break;
381 08708877 2003-11-25 devnull if(dofold)
382 08708877 2003-11-25 devnull fold(entry);
383 08708877 2003-11-25 devnull if(needre)
384 08708877 2003-11-25 devnull match = rregexec(re, entry, 0, 0);
385 08708877 2003-11-25 devnull else
386 08708877 2003-11-25 devnull match = (acomp(lit, entry) == 0);
387 08708877 2003-11-25 devnull if(match) {
388 08708877 2003-11-25 devnull if(!getfield(entry))
389 08708877 2003-11-25 devnull break;
390 08708877 2003-11-25 devnull v = runetol(entry);
391 08708877 2003-11-25 devnull if(dot->n >= dot->maxn) {
392 08708877 2003-11-25 devnull n = 2*dot->maxn;
393 08708877 2003-11-25 devnull dot = realloc(dot,
394 08708877 2003-11-25 devnull sizeof(Addr)+(n-1)*sizeof(long));
395 08708877 2003-11-25 devnull if(!dot) {
396 08708877 2003-11-25 devnull err("out of memory");
397 08708877 2003-11-25 devnull exits("nomem");
398 08708877 2003-11-25 devnull }
399 08708877 2003-11-25 devnull dot->maxn = n;
400 08708877 2003-11-25 devnull }
401 08708877 2003-11-25 devnull dot->doff[dot->n++] = v;
402 08708877 2003-11-25 devnull } else {
403 08708877 2003-11-25 devnull if(!dofold)
404 08708877 2003-11-25 devnull fold(entry);
405 08708877 2003-11-25 devnull if(*pre) {
406 08708877 2003-11-25 devnull n = acomp(pre, entry);
407 08708877 2003-11-25 devnull if(n < -1 || (!needre && n < 0))
408 08708877 2003-11-25 devnull break;
409 08708877 2003-11-25 devnull }
410 08708877 2003-11-25 devnull /* get to next index entry */
411 08708877 2003-11-25 devnull if(!getfield(entry))
412 08708877 2003-11-25 devnull break;
413 08708877 2003-11-25 devnull }
414 08708877 2003-11-25 devnull }
415 08708877 2003-11-25 devnull sortaddr(dot);
416 08708877 2003-11-25 devnull dot->cur = 0;
417 08708877 2003-11-25 devnull return dot->n;
418 08708877 2003-11-25 devnull }
419 08708877 2003-11-25 devnull
420 08708877 2003-11-25 devnull /*
421 08708877 2003-11-25 devnull * Return offset in index file of first line whose folded
422 08708877 2003-11-25 devnull * first field has pre as a prefix. -1 if none found.
423 08708877 2003-11-25 devnull */
424 08708877 2003-11-25 devnull long
425 08708877 2003-11-25 devnull locate(Rune *pre)
426 08708877 2003-11-25 devnull {
427 08708877 2003-11-25 devnull long top, bot, mid;
428 08708877 2003-11-25 devnull Rune entry[Fieldlen];
429 08708877 2003-11-25 devnull
430 08708877 2003-11-25 devnull if(*pre == 0)
431 08708877 2003-11-25 devnull return 0;
432 08708877 2003-11-25 devnull bot = 0;
433 08708877 2003-11-25 devnull top = indextop;
434 08708877 2003-11-25 devnull if(debug>1)
435 08708877 2003-11-25 devnull fprint(2, "locate looking for prefix %S\n", pre);
436 08708877 2003-11-25 devnull for(;;) {
437 08708877 2003-11-25 devnull /*
438 08708877 2003-11-25 devnull * Loop invariant: foldkey(bot) < pre <= foldkey(top)
439 08708877 2003-11-25 devnull * and bot < top, and bot,top point at beginning of lines
440 08708877 2003-11-25 devnull */
441 08708877 2003-11-25 devnull mid = (top+bot) / 2;
442 08708877 2003-11-25 devnull mid = seeknextline(bindex, mid);
443 08708877 2003-11-25 devnull if(debug > 1)
444 08708877 2003-11-25 devnull fprint(2, "bot=%ld, mid=%ld->%ld, top=%ld\n",
445 08708877 2003-11-25 devnull bot, (top+bot) / 2, mid, top);
446 08708877 2003-11-25 devnull if(mid == top || !getfield(entry))
447 08708877 2003-11-25 devnull break;
448 08708877 2003-11-25 devnull if(debug > 1)
449 08708877 2003-11-25 devnull fprint(2, "key=%S\n", entry);
450 08708877 2003-11-25 devnull /*
451 08708877 2003-11-25 devnull * here mid is strictly between bot and top
452 08708877 2003-11-25 devnull */
453 08708877 2003-11-25 devnull fold(entry);
454 08708877 2003-11-25 devnull if(acomp(pre, entry) <= 0)
455 08708877 2003-11-25 devnull top = mid;
456 08708877 2003-11-25 devnull else
457 08708877 2003-11-25 devnull bot = mid;
458 08708877 2003-11-25 devnull }
459 08708877 2003-11-25 devnull /*
460 08708877 2003-11-25 devnull * bot < top, but they don't necessarily point at successive lines
461 08708877 2003-11-25 devnull * Use linear search from bot to find first line that pre is a
462 08708877 2003-11-25 devnull * prefix of
463 08708877 2003-11-25 devnull */
464 08708877 2003-11-25 devnull while((bot = seeknextline(bindex, bot)) <= top) {
465 08708877 2003-11-25 devnull if(!getfield(entry))
466 08708877 2003-11-25 devnull return -1;
467 08708877 2003-11-25 devnull if(debug > 1)
468 08708877 2003-11-25 devnull fprint(2, "key=%S\n", entry);
469 08708877 2003-11-25 devnull fold(entry);
470 08708877 2003-11-25 devnull switch(acomp(pre, entry)) {
471 08708877 2003-11-25 devnull case -2:
472 08708877 2003-11-25 devnull return -1;
473 08708877 2003-11-25 devnull case -1:
474 08708877 2003-11-25 devnull case 0:
475 08708877 2003-11-25 devnull return bot;
476 08708877 2003-11-25 devnull case 1:
477 08708877 2003-11-25 devnull case 2:
478 08708877 2003-11-25 devnull continue;
479 08708877 2003-11-25 devnull }
480 08708877 2003-11-25 devnull }
481 08708877 2003-11-25 devnull return -1;
482 08708877 2003-11-25 devnull
483 08708877 2003-11-25 devnull }
484 08708877 2003-11-25 devnull
485 08708877 2003-11-25 devnull /*
486 08708877 2003-11-25 devnull * Get prefix of non re-metacharacters, runified, into pre,
487 08708877 2003-11-25 devnull * and return length
488 08708877 2003-11-25 devnull */
489 08708877 2003-11-25 devnull int
490 08708877 2003-11-25 devnull getpref(char *pat, Rune *pre)
491 08708877 2003-11-25 devnull {
492 08708877 2003-11-25 devnull int n, r;
493 08708877 2003-11-25 devnull char *p;
494 08708877 2003-11-25 devnull
495 08708877 2003-11-25 devnull p = pat;
496 08708877 2003-11-25 devnull while(*p) {
497 08708877 2003-11-25 devnull n = chartorune(pre, p);
498 08708877 2003-11-25 devnull r = *pre;
499 08708877 2003-11-25 devnull switch(r) {
500 08708877 2003-11-25 devnull case 0x2e: case 0x2a: case 0x2b: case 0x3f:
501 08708877 2003-11-25 devnull case 0x5b: case 0x5d: case 0x28: case ')':
502 08708877 2003-11-25 devnull case 0x7c: case 0x5e: case 0x24:
503 08708877 2003-11-25 devnull *pre = 0;
504 08708877 2003-11-25 devnull return p-pat;
505 bb0266fe 2005-05-07 devnull case '\\':
506 08708877 2003-11-25 devnull p += n;
507 08708877 2003-11-25 devnull p += chartorune(++pre, p);
508 08708877 2003-11-25 devnull pre++;
509 08708877 2003-11-25 devnull break;
510 08708877 2003-11-25 devnull default:
511 08708877 2003-11-25 devnull p += n;
512 08708877 2003-11-25 devnull pre++;
513 08708877 2003-11-25 devnull }
514 08708877 2003-11-25 devnull }
515 08708877 2003-11-25 devnull return p-pat;
516 08708877 2003-11-25 devnull }
517 08708877 2003-11-25 devnull
518 08708877 2003-11-25 devnull long
519 08708877 2003-11-25 devnull seeknextline(Biobuf *b, long off)
520 08708877 2003-11-25 devnull {
521 08708877 2003-11-25 devnull long c;
522 08708877 2003-11-25 devnull
523 08708877 2003-11-25 devnull Bseek(b, off, 0);
524 08708877 2003-11-25 devnull do {
525 08708877 2003-11-25 devnull c = Bgetrune(b);
526 08708877 2003-11-25 devnull } while(c>=0 && c!='\n');
527 08708877 2003-11-25 devnull return Boffset(b);
528 08708877 2003-11-25 devnull }
529 08708877 2003-11-25 devnull
530 08708877 2003-11-25 devnull /*
531 08708877 2003-11-25 devnull * Get next field out of index file (either tab- or nl- terminated)
532 08708877 2003-11-25 devnull * Answer in *rp, assumed to be Fieldlen long.
533 08708877 2003-11-25 devnull * Return 0 if read error first.
534 08708877 2003-11-25 devnull */
535 08708877 2003-11-25 devnull int
536 08708877 2003-11-25 devnull getfield(Rune *rp)
537 08708877 2003-11-25 devnull {
538 08708877 2003-11-25 devnull long c;
539 08708877 2003-11-25 devnull int n;
540 08708877 2003-11-25 devnull
541 08708877 2003-11-25 devnull for(n=Fieldlen; n-- > 0; ) {
542 08708877 2003-11-25 devnull if ((c = Bgetrune(bindex)) < 0)
543 08708877 2003-11-25 devnull return 0;
544 08708877 2003-11-25 devnull if(c == '\t' || c == '\n') {
545 bb0266fe 2005-05-07 devnull *rp = '\0';
546 08708877 2003-11-25 devnull return 1;
547 08708877 2003-11-25 devnull }
548 08708877 2003-11-25 devnull *rp++ = c;
549 08708877 2003-11-25 devnull }
550 08708877 2003-11-25 devnull err("word too long");
551 08708877 2003-11-25 devnull return 0;
552 08708877 2003-11-25 devnull }
553 08708877 2003-11-25 devnull
554 08708877 2003-11-25 devnull /*
555 08708877 2003-11-25 devnull * A compare longs function suitable for qsort
556 08708877 2003-11-25 devnull */
557 08708877 2003-11-25 devnull static int
558 08708877 2003-11-25 devnull longcmp(const void *av, const void *bv)
559 08708877 2003-11-25 devnull {
560 08708877 2003-11-25 devnull long v;
561 08708877 2003-11-25 devnull long *a, *b;
562 08708877 2003-11-25 devnull
563 08708877 2003-11-25 devnull a = (long*)av;
564 08708877 2003-11-25 devnull b = (long*)bv;
565 08708877 2003-11-25 devnull
566 08708877 2003-11-25 devnull v = *a - *b;
567 08708877 2003-11-25 devnull if(v < 0)
568 08708877 2003-11-25 devnull return -1;
569 08708877 2003-11-25 devnull else if(v == 0)
570 08708877 2003-11-25 devnull return 0;
571 08708877 2003-11-25 devnull else
572 08708877 2003-11-25 devnull return 1;
573 08708877 2003-11-25 devnull }
574 08708877 2003-11-25 devnull
575 08708877 2003-11-25 devnull void
576 08708877 2003-11-25 devnull sortaddr(Addr *a)
577 08708877 2003-11-25 devnull {
578 08708877 2003-11-25 devnull int i, j;
579 08708877 2003-11-25 devnull long v;
580 08708877 2003-11-25 devnull
581 08708877 2003-11-25 devnull if(a->n <= 1)
582 08708877 2003-11-25 devnull return;
583 08708877 2003-11-25 devnull
584 08708877 2003-11-25 devnull qsort(a->doff, a->n, sizeof(long), longcmp);
585 08708877 2003-11-25 devnull
586 08708877 2003-11-25 devnull /* remove duplicates */
587 08708877 2003-11-25 devnull for(i=0, j=0; j < a->n; j++) {
588 08708877 2003-11-25 devnull v = a->doff[j];
589 08708877 2003-11-25 devnull if(i > 0 && v == a->doff[i-1])
590 08708877 2003-11-25 devnull continue;
591 08708877 2003-11-25 devnull a->doff[i++] = v;
592 08708877 2003-11-25 devnull }
593 08708877 2003-11-25 devnull a->n = i;
594 08708877 2003-11-25 devnull }
595 08708877 2003-11-25 devnull
596 08708877 2003-11-25 devnull Entry
597 08708877 2003-11-25 devnull getentry(int i)
598 08708877 2003-11-25 devnull {
599 08708877 2003-11-25 devnull long b, e, n;
600 08708877 2003-11-25 devnull static Entry ans;
601 08708877 2003-11-25 devnull static int anslen = 0;
602 08708877 2003-11-25 devnull
603 08708877 2003-11-25 devnull b = dot->doff[i];
604 08708877 2003-11-25 devnull e = (*dict->nextoff)(b+1);
605 08708877 2003-11-25 devnull ans.doff = b;
606 08708877 2003-11-25 devnull if(e < 0) {
607 08708877 2003-11-25 devnull err("couldn't seek to entry");
608 08708877 2003-11-25 devnull ans.start = 0;
609 08708877 2003-11-25 devnull ans.end = 0;
610 08708877 2003-11-25 devnull } else {
611 08708877 2003-11-25 devnull n = e-b;
612 08708877 2003-11-25 devnull if(n+1 > anslen) {
613 08708877 2003-11-25 devnull ans.start = realloc(ans.start, n+1);
614 08708877 2003-11-25 devnull if(!ans.start) {
615 08708877 2003-11-25 devnull err("out of memory");
616 08708877 2003-11-25 devnull exits("nomem");
617 08708877 2003-11-25 devnull }
618 08708877 2003-11-25 devnull anslen = n+1;
619 08708877 2003-11-25 devnull }
620 08708877 2003-11-25 devnull Bseek(bdict, b, 0);
621 08708877 2003-11-25 devnull n = Bread(bdict, ans.start, n);
622 08708877 2003-11-25 devnull ans.end = ans.start + n;
623 08708877 2003-11-25 devnull *ans.end = 0;
624 08708877 2003-11-25 devnull }
625 08708877 2003-11-25 devnull return ans;
626 08708877 2003-11-25 devnull }
627 08708877 2003-11-25 devnull
628 08708877 2003-11-25 devnull void
629 08708877 2003-11-25 devnull setdotnext(void)
630 08708877 2003-11-25 devnull {
631 08708877 2003-11-25 devnull long b;
632 08708877 2003-11-25 devnull
633 08708877 2003-11-25 devnull b = (*dict->nextoff)(dot->doff[dot->cur]+1);
634 08708877 2003-11-25 devnull if(b < 0) {
635 08708877 2003-11-25 devnull err("couldn't find a next entry");
636 08708877 2003-11-25 devnull return;
637 08708877 2003-11-25 devnull }
638 08708877 2003-11-25 devnull dot->doff[0] = b;
639 08708877 2003-11-25 devnull dot->n = 1;
640 08708877 2003-11-25 devnull dot->cur = 0;
641 08708877 2003-11-25 devnull }
642 08708877 2003-11-25 devnull
643 08708877 2003-11-25 devnull void
644 08708877 2003-11-25 devnull setdotprev(void)
645 08708877 2003-11-25 devnull {
646 08708877 2003-11-25 devnull int tryback;
647 08708877 2003-11-25 devnull long here, last, p;
648 08708877 2003-11-25 devnull
649 08708877 2003-11-25 devnull if(dot->cur < 0 || dot->cur >= dot->n)
650 08708877 2003-11-25 devnull return;
651 08708877 2003-11-25 devnull tryback = 2000;
652 08708877 2003-11-25 devnull here = dot->doff[dot->cur];
653 08708877 2003-11-25 devnull last = 0;
654 08708877 2003-11-25 devnull while(last == 0) {
655 08708877 2003-11-25 devnull p = here - tryback;
656 08708877 2003-11-25 devnull if(p < 0)
657 08708877 2003-11-25 devnull p = 0;
658 08708877 2003-11-25 devnull for(;;) {
659 08708877 2003-11-25 devnull p = (*dict->nextoff)(p+1);
660 08708877 2003-11-25 devnull if(p < 0)
661 08708877 2003-11-25 devnull return; /* shouldn't happen */
662 08708877 2003-11-25 devnull if(p >= here)
663 08708877 2003-11-25 devnull break;
664 08708877 2003-11-25 devnull last = p;
665 08708877 2003-11-25 devnull }
666 08708877 2003-11-25 devnull if(!last) {
667 08708877 2003-11-25 devnull if(here - tryback < 0) {
668 08708877 2003-11-25 devnull err("can't find a previous entry");
669 08708877 2003-11-25 devnull return;
670 08708877 2003-11-25 devnull }
671 08708877 2003-11-25 devnull tryback = 2*tryback;
672 08708877 2003-11-25 devnull }
673 08708877 2003-11-25 devnull }
674 08708877 2003-11-25 devnull dot->doff[0] = last;
675 08708877 2003-11-25 devnull dot->n = 1;
676 08708877 2003-11-25 devnull dot->cur = 0;
677 08708877 2003-11-25 devnull }
678 ebda53e1 2007-08-22 rsc
679 ebda53e1 2007-08-22 rsc /*
680 ebda53e1 2007-08-22 rsc * find the specified file and return a path.
681 ebda53e1 2007-08-22 rsc * default location is #9/dict, but can be
682 ebda53e1 2007-08-22 rsc * in $dictdir instead.
683 ebda53e1 2007-08-22 rsc */
684 ebda53e1 2007-08-22 rsc char*
685 ebda53e1 2007-08-22 rsc dictfile(char *f)
686 ebda53e1 2007-08-22 rsc {
687 ebda53e1 2007-08-22 rsc static char *dict;
688 ebda53e1 2007-08-22 rsc static int did;
689 ebda53e1 2007-08-22 rsc
690 ebda53e1 2007-08-22 rsc if(!did){
691 ebda53e1 2007-08-22 rsc dict = getenv("dictpath");
692 ebda53e1 2007-08-22 rsc did = 1;
693 ebda53e1 2007-08-22 rsc }
694 ebda53e1 2007-08-22 rsc
695 ebda53e1 2007-08-22 rsc if(dict)
696 ebda53e1 2007-08-22 rsc return smprint("%s/%s", dict, f);
697 ebda53e1 2007-08-22 rsc return unsharp(smprint("#9/dict/%s", f));
698 ebda53e1 2007-08-22 rsc }