Blame


1 08708877 2003-11-25 devnull #include <u.h>
2 08708877 2003-11-25 devnull #include <libc.h>
3 08708877 2003-11-25 devnull #include <bio.h>
4 08708877 2003-11-25 devnull #include <regexp.h>
5 08708877 2003-11-25 devnull #include <ctype.h>
6 08708877 2003-11-25 devnull #include "dict.h"
7 08708877 2003-11-25 devnull
8 08708877 2003-11-25 devnull /*
9 08708877 2003-11-25 devnull * Assumed index file structure: lines of form
10 08708877 2003-11-25 devnull * [^\t]+\t[0-9]+
11 08708877 2003-11-25 devnull * First field is key, second is byte offset into dictionary.
12 08708877 2003-11-25 devnull * Should be sorted with args -u -t' ' +0f -1 +0 -1 +1n -2
13 08708877 2003-11-25 devnull */
14 08708877 2003-11-25 devnull typedef struct Addr Addr;
15 08708877 2003-11-25 devnull
16 08708877 2003-11-25 devnull struct Addr {
17 08708877 2003-11-25 devnull int n; /* number of offsets */
18 08708877 2003-11-25 devnull int cur; /* current position within doff array */
19 08708877 2003-11-25 devnull int maxn; /* actual current size of doff array */
20 08708877 2003-11-25 devnull ulong doff[1]; /* doff[maxn], with 0..n-1 significant */
21 08708877 2003-11-25 devnull };
22 08708877 2003-11-25 devnull
23 08708877 2003-11-25 devnull Biobuf binbuf;
24 08708877 2003-11-25 devnull Biobuf boutbuf;
25 08708877 2003-11-25 devnull Biobuf *bin = &binbuf; /* user cmd input */
26 08708877 2003-11-25 devnull Biobuf *bout = &boutbuf; /* output */
27 08708877 2003-11-25 devnull Biobuf *bdict; /* dictionary */
28 08708877 2003-11-25 devnull Biobuf *bindex; /* index file */
29 08708877 2003-11-25 devnull long indextop; /* index offset at end of file */
30 08708877 2003-11-25 devnull int lastcmd; /* last executed command */
31 08708877 2003-11-25 devnull Addr *dot; /* "current" address */
32 08708877 2003-11-25 devnull Dict *dict; /* current dictionary */
33 08708877 2003-11-25 devnull int linelen;
34 08708877 2003-11-25 devnull int breaklen = 60;
35 08708877 2003-11-25 devnull int outinhibit;
36 08708877 2003-11-25 devnull int debug;
37 08708877 2003-11-25 devnull
38 08708877 2003-11-25 devnull void execcmd(int);
39 08708877 2003-11-25 devnull int getpref(char*, Rune*);
40 08708877 2003-11-25 devnull Entry getentry(int);
41 08708877 2003-11-25 devnull int getfield(Rune*);
42 08708877 2003-11-25 devnull long locate(Rune*);
43 08708877 2003-11-25 devnull int parseaddr(char*, char**);
44 08708877 2003-11-25 devnull int parsecmd(char*);
45 08708877 2003-11-25 devnull int search(char*, int);
46 08708877 2003-11-25 devnull long seeknextline(Biobuf*, long);
47 08708877 2003-11-25 devnull void setdotnext(void);
48 08708877 2003-11-25 devnull void setdotprev(void);
49 08708877 2003-11-25 devnull void sortaddr(Addr*);
50 08708877 2003-11-25 devnull void usage(void);
51 08708877 2003-11-25 devnull
52 08708877 2003-11-25 devnull enum {
53 08708877 2003-11-25 devnull Plen=300, /* max length of a search pattern */
54 08708877 2003-11-25 devnull Fieldlen=200, /* max length of an index field */
55 08708877 2003-11-25 devnull Aslots=10, /* initial number of slots in an address */
56 08708877 2003-11-25 devnull };
57 08708877 2003-11-25 devnull
58 08708877 2003-11-25 devnull void
59 08708877 2003-11-25 devnull main(int argc, char **argv)
60 08708877 2003-11-25 devnull {
61 08708877 2003-11-25 devnull int i, cmd, kflag;
62 32f69c36 2003-12-11 devnull char *line, *p;
63 08708877 2003-11-25 devnull
64 08708877 2003-11-25 devnull Binit(&binbuf, 0, OREAD);
65 08708877 2003-11-25 devnull Binit(&boutbuf, 1, OWRITE);
66 08708877 2003-11-25 devnull kflag = 0;
67 08708877 2003-11-25 devnull line = 0;
68 08708877 2003-11-25 devnull dict = 0;
69 08708877 2003-11-25 devnull
70 08708877 2003-11-25 devnull for(i=0; dicts[i].name; i++){
71 08708877 2003-11-25 devnull if(access(dicts[i].path, 0)>=0 && access(dicts[i].indexpath, 0)>=0){
72 08708877 2003-11-25 devnull dict = &dicts[i];
73 08708877 2003-11-25 devnull break;
74 08708877 2003-11-25 devnull }
75 08708877 2003-11-25 devnull }
76 08708877 2003-11-25 devnull ARGBEGIN {
77 08708877 2003-11-25 devnull case 'd':
78 08708877 2003-11-25 devnull p = ARGF();
79 08708877 2003-11-25 devnull dict = 0;
80 08708877 2003-11-25 devnull if(p) {
81 08708877 2003-11-25 devnull for(i=0; dicts[i].name; i++)
82 08708877 2003-11-25 devnull if(strcmp(p, dicts[i].name)==0) {
83 08708877 2003-11-25 devnull dict = &dicts[i];
84 08708877 2003-11-25 devnull break;
85 08708877 2003-11-25 devnull }
86 08708877 2003-11-25 devnull }
87 08708877 2003-11-25 devnull if(!dict)
88 08708877 2003-11-25 devnull usage();
89 08708877 2003-11-25 devnull break;
90 08708877 2003-11-25 devnull case 'c':
91 08708877 2003-11-25 devnull line = ARGF();
92 08708877 2003-11-25 devnull if(!line)
93 08708877 2003-11-25 devnull usage();
94 08708877 2003-11-25 devnull break;
95 08708877 2003-11-25 devnull case 'k':
96 08708877 2003-11-25 devnull kflag++;
97 08708877 2003-11-25 devnull break;
98 08708877 2003-11-25 devnull case 'D':
99 08708877 2003-11-25 devnull debug++;
100 08708877 2003-11-25 devnull break;
101 08708877 2003-11-25 devnull default:
102 08708877 2003-11-25 devnull usage();
103 08708877 2003-11-25 devnull ARGEND }
104 08708877 2003-11-25 devnull if(dict == 0){
105 08708877 2003-11-25 devnull err("no dictionaries present on this system");
106 08708877 2003-11-25 devnull exits("nodict");
107 08708877 2003-11-25 devnull }
108 08708877 2003-11-25 devnull
109 08708877 2003-11-25 devnull if(kflag) {
110 08708877 2003-11-25 devnull (*dict->printkey)();
111 08708877 2003-11-25 devnull exits(0);
112 08708877 2003-11-25 devnull }
113 08708877 2003-11-25 devnull if(argc > 1)
114 08708877 2003-11-25 devnull usage();
115 08708877 2003-11-25 devnull else if(argc == 1) {
116 08708877 2003-11-25 devnull if(line)
117 08708877 2003-11-25 devnull usage();
118 08708877 2003-11-25 devnull p = argv[0];
119 08708877 2003-11-25 devnull line = malloc(strlen(p)+5);
120 08708877 2003-11-25 devnull sprint(line, "/%s/P\n", p);
121 08708877 2003-11-25 devnull }
122 08708877 2003-11-25 devnull bdict = Bopen(dict->path, OREAD);
123 08708877 2003-11-25 devnull if(!bdict) {
124 32f69c36 2003-12-11 devnull err("can't open dictionary %s", dict->path);
125 08708877 2003-11-25 devnull exits("nodict");
126 08708877 2003-11-25 devnull }
127 08708877 2003-11-25 devnull bindex = Bopen(dict->indexpath, OREAD);
128 08708877 2003-11-25 devnull if(!bindex) {
129 32f69c36 2003-12-11 devnull err("can't open index %s", dict->indexpath);
130 08708877 2003-11-25 devnull exits("noindex");
131 08708877 2003-11-25 devnull }
132 08708877 2003-11-25 devnull indextop = Bseek(bindex, 0L, 2);
133 08708877 2003-11-25 devnull
134 08708877 2003-11-25 devnull dot = malloc(sizeof(Addr)+(Aslots-1)*sizeof(ulong));
135 08708877 2003-11-25 devnull dot->n = 0;
136 08708877 2003-11-25 devnull dot->cur = 0;
137 08708877 2003-11-25 devnull dot->maxn = Aslots;
138 08708877 2003-11-25 devnull lastcmd = 0;
139 08708877 2003-11-25 devnull
140 08708877 2003-11-25 devnull if(line) {
141 08708877 2003-11-25 devnull cmd = parsecmd(line);
142 08708877 2003-11-25 devnull if(cmd)
143 08708877 2003-11-25 devnull execcmd(cmd);
144 08708877 2003-11-25 devnull } else {
145 08708877 2003-11-25 devnull for(;;) {
146 08708877 2003-11-25 devnull Bprint(bout, "*");
147 08708877 2003-11-25 devnull Bflush(bout);
148 08708877 2003-11-25 devnull line = Brdline(bin, '\n');
149 08708877 2003-11-25 devnull linelen = 0;
150 08708877 2003-11-25 devnull if(!line)
151 08708877 2003-11-25 devnull break;
152 08708877 2003-11-25 devnull cmd = parsecmd(line);
153 08708877 2003-11-25 devnull if(cmd) {
154 08708877 2003-11-25 devnull execcmd(cmd);
155 08708877 2003-11-25 devnull lastcmd = cmd;
156 08708877 2003-11-25 devnull }
157 08708877 2003-11-25 devnull }
158 08708877 2003-11-25 devnull }
159 08708877 2003-11-25 devnull exits(0);
160 08708877 2003-11-25 devnull }
161 08708877 2003-11-25 devnull
162 08708877 2003-11-25 devnull void
163 08708877 2003-11-25 devnull usage(void)
164 08708877 2003-11-25 devnull {
165 08708877 2003-11-25 devnull int i;
166 08708877 2003-11-25 devnull char *a, *b;
167 08708877 2003-11-25 devnull
168 08708877 2003-11-25 devnull Bprint(bout, "Usage: %s [-d dict] [-k] [-c cmd] [word]\n", argv0);
169 08708877 2003-11-25 devnull Bprint(bout, "dictionaries (brackets mark dictionaries not present on this system):\n");
170 08708877 2003-11-25 devnull for(i = 0; dicts[i].name; i++){
171 08708877 2003-11-25 devnull a = b = "";
172 08708877 2003-11-25 devnull if(access(dicts[i].path, 0)<0 || access(dicts[i].indexpath, 0)<0){
173 08708877 2003-11-25 devnull a = "[";
174 08708877 2003-11-25 devnull b = "]";
175 08708877 2003-11-25 devnull }
176 08708877 2003-11-25 devnull Bprint(bout, " %s%s\t%s%s\n", a, dicts[i].name, dicts[i].desc, b);
177 08708877 2003-11-25 devnull }
178 08708877 2003-11-25 devnull exits("usage");
179 08708877 2003-11-25 devnull }
180 08708877 2003-11-25 devnull
181 08708877 2003-11-25 devnull int
182 08708877 2003-11-25 devnull parsecmd(char *line)
183 08708877 2003-11-25 devnull {
184 08708877 2003-11-25 devnull char *e;
185 08708877 2003-11-25 devnull int cmd, ans;
186 08708877 2003-11-25 devnull
187 08708877 2003-11-25 devnull if(parseaddr(line, &e) >= 0)
188 08708877 2003-11-25 devnull line = e;
189 08708877 2003-11-25 devnull else
190 08708877 2003-11-25 devnull return 0;
191 08708877 2003-11-25 devnull cmd = *line;
192 08708877 2003-11-25 devnull ans = cmd;
193 08708877 2003-11-25 devnull if(isupper(cmd))
194 08708877 2003-11-25 devnull cmd = tolower(cmd);
195 08708877 2003-11-25 devnull if(!(cmd == 'a' || cmd == 'h' || cmd == 'p' || cmd == 'r' ||
196 08708877 2003-11-25 devnull cmd == '\n')) {
197 08708877 2003-11-25 devnull err("unknown command %c", cmd);
198 08708877 2003-11-25 devnull return 0;
199 08708877 2003-11-25 devnull }
200 08708877 2003-11-25 devnull if(cmd == '\n')
201 08708877 2003-11-25 devnull switch(lastcmd) {
202 08708877 2003-11-25 devnull case 0: ans = 'H'; break;
203 08708877 2003-11-25 devnull case 'H': ans = 'p'; break;
204 08708877 2003-11-25 devnull default : ans = lastcmd; break;
205 08708877 2003-11-25 devnull }
206 08708877 2003-11-25 devnull else if(line[1] != '\n' && line[1] != 0)
207 08708877 2003-11-25 devnull err("extra stuff after command %c ignored", cmd);
208 08708877 2003-11-25 devnull return ans;
209 08708877 2003-11-25 devnull }
210 08708877 2003-11-25 devnull
211 08708877 2003-11-25 devnull void
212 08708877 2003-11-25 devnull execcmd(int cmd)
213 08708877 2003-11-25 devnull {
214 08708877 2003-11-25 devnull Entry e;
215 08708877 2003-11-25 devnull int cur, doall;
216 08708877 2003-11-25 devnull
217 08708877 2003-11-25 devnull if(isupper(cmd)) {
218 08708877 2003-11-25 devnull doall = 1;
219 08708877 2003-11-25 devnull cmd = tolower(cmd);
220 08708877 2003-11-25 devnull cur = 0;
221 08708877 2003-11-25 devnull } else {
222 08708877 2003-11-25 devnull doall = 0;
223 08708877 2003-11-25 devnull cur = dot->cur;
224 08708877 2003-11-25 devnull }
225 08708877 2003-11-25 devnull if(debug && doall && cmd == 'a')
226 08708877 2003-11-25 devnull Bprint(bout, "%d entries, cur=%d\n", dot->n, cur+1);
227 08708877 2003-11-25 devnull for(;;){
228 08708877 2003-11-25 devnull if(cur >= dot->n)
229 08708877 2003-11-25 devnull break;
230 08708877 2003-11-25 devnull if(doall) {
231 08708877 2003-11-25 devnull Bprint(bout, "%d\t", cur+1);
232 08708877 2003-11-25 devnull linelen += 4 + (cur >= 10);
233 08708877 2003-11-25 devnull }
234 08708877 2003-11-25 devnull switch(cmd) {
235 08708877 2003-11-25 devnull case 'a':
236 08708877 2003-11-25 devnull Bprint(bout, "#%lud\n", dot->doff[cur]);
237 08708877 2003-11-25 devnull break;
238 08708877 2003-11-25 devnull case 'h':
239 08708877 2003-11-25 devnull case 'p':
240 08708877 2003-11-25 devnull case 'r':
241 08708877 2003-11-25 devnull e = getentry(cur);
242 08708877 2003-11-25 devnull (*dict->printentry)(e, cmd);
243 08708877 2003-11-25 devnull break;
244 08708877 2003-11-25 devnull }
245 08708877 2003-11-25 devnull cur++;
246 08708877 2003-11-25 devnull if(doall) {
247 08708877 2003-11-25 devnull if(cmd == 'p' || cmd == 'r') {
248 08708877 2003-11-25 devnull Bputc(bout, '\n');
249 08708877 2003-11-25 devnull linelen = 0;
250 08708877 2003-11-25 devnull }
251 08708877 2003-11-25 devnull } else
252 08708877 2003-11-25 devnull break;
253 08708877 2003-11-25 devnull }
254 08708877 2003-11-25 devnull if(cur >= dot->n)
255 08708877 2003-11-25 devnull cur = 0;
256 08708877 2003-11-25 devnull dot->cur = cur;
257 08708877 2003-11-25 devnull }
258 08708877 2003-11-25 devnull
259 08708877 2003-11-25 devnull /*
260 08708877 2003-11-25 devnull * Address syntax: ('.' | '/' re '/' | '!' re '!' | number | '#' number) ('+' | '-')*
261 08708877 2003-11-25 devnull * Answer goes in dot.
262 08708877 2003-11-25 devnull * Return -1 if address starts, but get error.
263 08708877 2003-11-25 devnull * Return 0 if no address.
264 08708877 2003-11-25 devnull */
265 08708877 2003-11-25 devnull int
266 08708877 2003-11-25 devnull parseaddr(char *line, char **eptr)
267 08708877 2003-11-25 devnull {
268 08708877 2003-11-25 devnull int delim, plen;
269 08708877 2003-11-25 devnull ulong v;
270 08708877 2003-11-25 devnull char *e;
271 08708877 2003-11-25 devnull char pat[Plen];
272 08708877 2003-11-25 devnull
273 08708877 2003-11-25 devnull if(*line == '/' || *line == '!') {
274 08708877 2003-11-25 devnull /* anchored regular expression match; '!' means no folding */
275 08708877 2003-11-25 devnull if(*line == '/') {
276 08708877 2003-11-25 devnull delim = '/';
277 08708877 2003-11-25 devnull e = strpbrk(line+1, "/\n");
278 08708877 2003-11-25 devnull } else {
279 08708877 2003-11-25 devnull delim = '!';
280 08708877 2003-11-25 devnull e = strpbrk(line+1, "!\n");
281 08708877 2003-11-25 devnull }
282 08708877 2003-11-25 devnull plen = e-line-1;
283 08708877 2003-11-25 devnull if(plen >= Plen-3) {
284 08708877 2003-11-25 devnull err("pattern too big");
285 08708877 2003-11-25 devnull return -1;
286 08708877 2003-11-25 devnull }
287 08708877 2003-11-25 devnull pat[0] = '^';
288 08708877 2003-11-25 devnull memcpy(pat+1, line+1, plen);
289 08708877 2003-11-25 devnull pat[plen+1] = '$';
290 08708877 2003-11-25 devnull pat[plen+2] = 0;
291 08708877 2003-11-25 devnull if(*e == '\n')
292 08708877 2003-11-25 devnull line = e;
293 08708877 2003-11-25 devnull else
294 08708877 2003-11-25 devnull line = e+1;
295 08708877 2003-11-25 devnull if(!search(pat, delim == '/')) {
296 08708877 2003-11-25 devnull err("pattern not found");
297 08708877 2003-11-25 devnull return -1;
298 08708877 2003-11-25 devnull }
299 08708877 2003-11-25 devnull } else if(*line == '#') {
300 08708877 2003-11-25 devnull /* absolute byte offset into dictionary */
301 08708877 2003-11-25 devnull line++;
302 08708877 2003-11-25 devnull if(!isdigit(*line))
303 08708877 2003-11-25 devnull return -1;
304 08708877 2003-11-25 devnull v = strtoul(line, &e, 10);
305 08708877 2003-11-25 devnull line = e;
306 08708877 2003-11-25 devnull dot->doff[0] = v;
307 08708877 2003-11-25 devnull dot->n = 1;
308 08708877 2003-11-25 devnull dot->cur = 0;
309 08708877 2003-11-25 devnull } else if(isdigit(*line)) {
310 08708877 2003-11-25 devnull v = strtoul(line, &e, 10);
311 08708877 2003-11-25 devnull line = e;
312 08708877 2003-11-25 devnull if(v < 1 || v > dot->n)
313 08708877 2003-11-25 devnull err(".%d not in range [1,%d], ignored",
314 08708877 2003-11-25 devnull v, dot->n);
315 08708877 2003-11-25 devnull else
316 08708877 2003-11-25 devnull dot->cur = v-1;
317 08708877 2003-11-25 devnull } else if(*line == '.') {
318 08708877 2003-11-25 devnull line++;
319 08708877 2003-11-25 devnull } else {
320 08708877 2003-11-25 devnull *eptr = line;
321 08708877 2003-11-25 devnull return 0;
322 08708877 2003-11-25 devnull }
323 08708877 2003-11-25 devnull while(*line == '+' || *line == '-') {
324 08708877 2003-11-25 devnull if(*line == '+')
325 08708877 2003-11-25 devnull setdotnext();
326 08708877 2003-11-25 devnull else
327 08708877 2003-11-25 devnull setdotprev();
328 08708877 2003-11-25 devnull line++;
329 08708877 2003-11-25 devnull }
330 08708877 2003-11-25 devnull *eptr = line;
331 08708877 2003-11-25 devnull return 1;
332 08708877 2003-11-25 devnull }
333 08708877 2003-11-25 devnull
334 08708877 2003-11-25 devnull /*
335 08708877 2003-11-25 devnull * Index file is sorted by folded field1.
336 08708877 2003-11-25 devnull * Method: find pre, a folded prefix of r.e. pat,
337 08708877 2003-11-25 devnull * and then low = offset to beginning of
338 08708877 2003-11-25 devnull * line in index file where first match of prefix occurs.
339 08708877 2003-11-25 devnull * Then go through index until prefix no longer matches,
340 08708877 2003-11-25 devnull * adding each line that matches real pattern to dot.
341 08708877 2003-11-25 devnull * Finally, sort dot offsets (uniquing).
342 08708877 2003-11-25 devnull * We know pat len < Plen, and that it is surrounded by ^..$
343 08708877 2003-11-25 devnull */
344 08708877 2003-11-25 devnull int
345 08708877 2003-11-25 devnull search(char *pat, int dofold)
346 08708877 2003-11-25 devnull {
347 08708877 2003-11-25 devnull int needre, prelen, match, n;
348 08708877 2003-11-25 devnull Reprog *re;
349 08708877 2003-11-25 devnull long ioff, v;
350 08708877 2003-11-25 devnull Rune pre[Plen];
351 08708877 2003-11-25 devnull Rune lit[Plen];
352 08708877 2003-11-25 devnull Rune entry[Fieldlen];
353 08708877 2003-11-25 devnull char fpat[Plen];
354 08708877 2003-11-25 devnull
355 08708877 2003-11-25 devnull prelen = getpref(pat+1, pre);
356 08708877 2003-11-25 devnull if(pat[prelen+1] == 0 || pat[prelen+1] == '$') {
357 08708877 2003-11-25 devnull runescpy(lit, pre);
358 08708877 2003-11-25 devnull if(dofold)
359 08708877 2003-11-25 devnull fold(lit);
360 08708877 2003-11-25 devnull needre = 0;
361 08708877 2003-11-25 devnull SET(re);
362 08708877 2003-11-25 devnull } else {
363 08708877 2003-11-25 devnull needre = 1;
364 08708877 2003-11-25 devnull if(dofold) {
365 08708877 2003-11-25 devnull foldre(fpat, pat);
366 08708877 2003-11-25 devnull re = regcomp(fpat);
367 08708877 2003-11-25 devnull } else
368 08708877 2003-11-25 devnull re = regcomp(pat);
369 08708877 2003-11-25 devnull }
370 08708877 2003-11-25 devnull fold(pre);
371 08708877 2003-11-25 devnull ioff = locate(pre);
372 08708877 2003-11-25 devnull if(ioff < 0)
373 08708877 2003-11-25 devnull return 0;
374 08708877 2003-11-25 devnull dot->n = 0;
375 08708877 2003-11-25 devnull Bseek(bindex, ioff, 0);
376 08708877 2003-11-25 devnull for(;;) {
377 08708877 2003-11-25 devnull if(!getfield(entry))
378 08708877 2003-11-25 devnull break;
379 08708877 2003-11-25 devnull if(dofold)
380 08708877 2003-11-25 devnull fold(entry);
381 08708877 2003-11-25 devnull if(needre)
382 08708877 2003-11-25 devnull match = rregexec(re, entry, 0, 0);
383 08708877 2003-11-25 devnull else
384 08708877 2003-11-25 devnull match = (acomp(lit, entry) == 0);
385 08708877 2003-11-25 devnull if(match) {
386 08708877 2003-11-25 devnull if(!getfield(entry))
387 08708877 2003-11-25 devnull break;
388 08708877 2003-11-25 devnull v = runetol(entry);
389 08708877 2003-11-25 devnull if(dot->n >= dot->maxn) {
390 08708877 2003-11-25 devnull n = 2*dot->maxn;
391 08708877 2003-11-25 devnull dot = realloc(dot,
392 08708877 2003-11-25 devnull sizeof(Addr)+(n-1)*sizeof(long));
393 08708877 2003-11-25 devnull if(!dot) {
394 08708877 2003-11-25 devnull err("out of memory");
395 08708877 2003-11-25 devnull exits("nomem");
396 08708877 2003-11-25 devnull }
397 08708877 2003-11-25 devnull dot->maxn = n;
398 08708877 2003-11-25 devnull }
399 08708877 2003-11-25 devnull dot->doff[dot->n++] = v;
400 08708877 2003-11-25 devnull } else {
401 08708877 2003-11-25 devnull if(!dofold)
402 08708877 2003-11-25 devnull fold(entry);
403 08708877 2003-11-25 devnull if(*pre) {
404 08708877 2003-11-25 devnull n = acomp(pre, entry);
405 08708877 2003-11-25 devnull if(n < -1 || (!needre && n < 0))
406 08708877 2003-11-25 devnull break;
407 08708877 2003-11-25 devnull }
408 08708877 2003-11-25 devnull /* get to next index entry */
409 08708877 2003-11-25 devnull if(!getfield(entry))
410 08708877 2003-11-25 devnull break;
411 08708877 2003-11-25 devnull }
412 08708877 2003-11-25 devnull }
413 08708877 2003-11-25 devnull sortaddr(dot);
414 08708877 2003-11-25 devnull dot->cur = 0;
415 08708877 2003-11-25 devnull return dot->n;
416 08708877 2003-11-25 devnull }
417 08708877 2003-11-25 devnull
418 08708877 2003-11-25 devnull /*
419 08708877 2003-11-25 devnull * Return offset in index file of first line whose folded
420 08708877 2003-11-25 devnull * first field has pre as a prefix. -1 if none found.
421 08708877 2003-11-25 devnull */
422 08708877 2003-11-25 devnull long
423 08708877 2003-11-25 devnull locate(Rune *pre)
424 08708877 2003-11-25 devnull {
425 08708877 2003-11-25 devnull long top, bot, mid;
426 08708877 2003-11-25 devnull Rune entry[Fieldlen];
427 08708877 2003-11-25 devnull
428 08708877 2003-11-25 devnull if(*pre == 0)
429 08708877 2003-11-25 devnull return 0;
430 08708877 2003-11-25 devnull bot = 0;
431 08708877 2003-11-25 devnull top = indextop;
432 08708877 2003-11-25 devnull if(debug>1)
433 08708877 2003-11-25 devnull fprint(2, "locate looking for prefix %S\n", pre);
434 08708877 2003-11-25 devnull for(;;) {
435 08708877 2003-11-25 devnull /*
436 08708877 2003-11-25 devnull * Loop invariant: foldkey(bot) < pre <= foldkey(top)
437 08708877 2003-11-25 devnull * and bot < top, and bot,top point at beginning of lines
438 08708877 2003-11-25 devnull */
439 08708877 2003-11-25 devnull mid = (top+bot) / 2;
440 08708877 2003-11-25 devnull mid = seeknextline(bindex, mid);
441 08708877 2003-11-25 devnull if(debug > 1)
442 08708877 2003-11-25 devnull fprint(2, "bot=%ld, mid=%ld->%ld, top=%ld\n",
443 08708877 2003-11-25 devnull bot, (top+bot) / 2, mid, top);
444 08708877 2003-11-25 devnull if(mid == top || !getfield(entry))
445 08708877 2003-11-25 devnull break;
446 08708877 2003-11-25 devnull if(debug > 1)
447 08708877 2003-11-25 devnull fprint(2, "key=%S\n", entry);
448 08708877 2003-11-25 devnull /*
449 08708877 2003-11-25 devnull * here mid is strictly between bot and top
450 08708877 2003-11-25 devnull */
451 08708877 2003-11-25 devnull fold(entry);
452 08708877 2003-11-25 devnull if(acomp(pre, entry) <= 0)
453 08708877 2003-11-25 devnull top = mid;
454 08708877 2003-11-25 devnull else
455 08708877 2003-11-25 devnull bot = mid;
456 08708877 2003-11-25 devnull }
457 08708877 2003-11-25 devnull /*
458 08708877 2003-11-25 devnull * bot < top, but they don't necessarily point at successive lines
459 08708877 2003-11-25 devnull * Use linear search from bot to find first line that pre is a
460 08708877 2003-11-25 devnull * prefix of
461 08708877 2003-11-25 devnull */
462 08708877 2003-11-25 devnull while((bot = seeknextline(bindex, bot)) <= top) {
463 08708877 2003-11-25 devnull if(!getfield(entry))
464 08708877 2003-11-25 devnull return -1;
465 08708877 2003-11-25 devnull if(debug > 1)
466 08708877 2003-11-25 devnull fprint(2, "key=%S\n", entry);
467 08708877 2003-11-25 devnull fold(entry);
468 08708877 2003-11-25 devnull switch(acomp(pre, entry)) {
469 08708877 2003-11-25 devnull case -2:
470 08708877 2003-11-25 devnull return -1;
471 08708877 2003-11-25 devnull case -1:
472 08708877 2003-11-25 devnull case 0:
473 08708877 2003-11-25 devnull return bot;
474 08708877 2003-11-25 devnull case 1:
475 08708877 2003-11-25 devnull case 2:
476 08708877 2003-11-25 devnull continue;
477 08708877 2003-11-25 devnull }
478 08708877 2003-11-25 devnull }
479 08708877 2003-11-25 devnull return -1;
480 08708877 2003-11-25 devnull
481 08708877 2003-11-25 devnull }
482 08708877 2003-11-25 devnull
483 08708877 2003-11-25 devnull /*
484 08708877 2003-11-25 devnull * Get prefix of non re-metacharacters, runified, into pre,
485 08708877 2003-11-25 devnull * and return length
486 08708877 2003-11-25 devnull */
487 08708877 2003-11-25 devnull int
488 08708877 2003-11-25 devnull getpref(char *pat, Rune *pre)
489 08708877 2003-11-25 devnull {
490 08708877 2003-11-25 devnull int n, r;
491 08708877 2003-11-25 devnull char *p;
492 08708877 2003-11-25 devnull
493 08708877 2003-11-25 devnull p = pat;
494 08708877 2003-11-25 devnull while(*p) {
495 08708877 2003-11-25 devnull n = chartorune(pre, p);
496 08708877 2003-11-25 devnull r = *pre;
497 08708877 2003-11-25 devnull switch(r) {
498 08708877 2003-11-25 devnull case 0x2e: case 0x2a: case 0x2b: case 0x3f:
499 08708877 2003-11-25 devnull case 0x5b: case 0x5d: case 0x28: case ')':
500 08708877 2003-11-25 devnull case 0x7c: case 0x5e: case 0x24:
501 08708877 2003-11-25 devnull *pre = 0;
502 08708877 2003-11-25 devnull return p-pat;
503 08708877 2003-11-25 devnull case L'\\':
504 08708877 2003-11-25 devnull p += n;
505 08708877 2003-11-25 devnull p += chartorune(++pre, p);
506 08708877 2003-11-25 devnull pre++;
507 08708877 2003-11-25 devnull break;
508 08708877 2003-11-25 devnull default:
509 08708877 2003-11-25 devnull p += n;
510 08708877 2003-11-25 devnull pre++;
511 08708877 2003-11-25 devnull }
512 08708877 2003-11-25 devnull }
513 08708877 2003-11-25 devnull return p-pat;
514 08708877 2003-11-25 devnull }
515 08708877 2003-11-25 devnull
516 08708877 2003-11-25 devnull long
517 08708877 2003-11-25 devnull seeknextline(Biobuf *b, long off)
518 08708877 2003-11-25 devnull {
519 08708877 2003-11-25 devnull long c;
520 08708877 2003-11-25 devnull
521 08708877 2003-11-25 devnull Bseek(b, off, 0);
522 08708877 2003-11-25 devnull do {
523 08708877 2003-11-25 devnull c = Bgetrune(b);
524 08708877 2003-11-25 devnull } while(c>=0 && c!='\n');
525 08708877 2003-11-25 devnull return Boffset(b);
526 08708877 2003-11-25 devnull }
527 08708877 2003-11-25 devnull
528 08708877 2003-11-25 devnull /*
529 08708877 2003-11-25 devnull * Get next field out of index file (either tab- or nl- terminated)
530 08708877 2003-11-25 devnull * Answer in *rp, assumed to be Fieldlen long.
531 08708877 2003-11-25 devnull * Return 0 if read error first.
532 08708877 2003-11-25 devnull */
533 08708877 2003-11-25 devnull int
534 08708877 2003-11-25 devnull getfield(Rune *rp)
535 08708877 2003-11-25 devnull {
536 08708877 2003-11-25 devnull long c;
537 08708877 2003-11-25 devnull int n;
538 08708877 2003-11-25 devnull
539 08708877 2003-11-25 devnull for(n=Fieldlen; n-- > 0; ) {
540 08708877 2003-11-25 devnull if ((c = Bgetrune(bindex)) < 0)
541 08708877 2003-11-25 devnull return 0;
542 08708877 2003-11-25 devnull if(c == '\t' || c == '\n') {
543 08708877 2003-11-25 devnull *rp = L'\0';
544 08708877 2003-11-25 devnull return 1;
545 08708877 2003-11-25 devnull }
546 08708877 2003-11-25 devnull *rp++ = c;
547 08708877 2003-11-25 devnull }
548 08708877 2003-11-25 devnull err("word too long");
549 08708877 2003-11-25 devnull return 0;
550 08708877 2003-11-25 devnull }
551 08708877 2003-11-25 devnull
552 08708877 2003-11-25 devnull /*
553 08708877 2003-11-25 devnull * A compare longs function suitable for qsort
554 08708877 2003-11-25 devnull */
555 08708877 2003-11-25 devnull static int
556 08708877 2003-11-25 devnull longcmp(const void *av, const void *bv)
557 08708877 2003-11-25 devnull {
558 08708877 2003-11-25 devnull long v;
559 08708877 2003-11-25 devnull long *a, *b;
560 08708877 2003-11-25 devnull
561 08708877 2003-11-25 devnull a = (long*)av;
562 08708877 2003-11-25 devnull b = (long*)bv;
563 08708877 2003-11-25 devnull
564 08708877 2003-11-25 devnull v = *a - *b;
565 08708877 2003-11-25 devnull if(v < 0)
566 08708877 2003-11-25 devnull return -1;
567 08708877 2003-11-25 devnull else if(v == 0)
568 08708877 2003-11-25 devnull return 0;
569 08708877 2003-11-25 devnull else
570 08708877 2003-11-25 devnull return 1;
571 08708877 2003-11-25 devnull }
572 08708877 2003-11-25 devnull
573 08708877 2003-11-25 devnull void
574 08708877 2003-11-25 devnull sortaddr(Addr *a)
575 08708877 2003-11-25 devnull {
576 08708877 2003-11-25 devnull int i, j;
577 08708877 2003-11-25 devnull long v;
578 08708877 2003-11-25 devnull
579 08708877 2003-11-25 devnull if(a->n <= 1)
580 08708877 2003-11-25 devnull return;
581 08708877 2003-11-25 devnull
582 08708877 2003-11-25 devnull qsort(a->doff, a->n, sizeof(long), longcmp);
583 08708877 2003-11-25 devnull
584 08708877 2003-11-25 devnull /* remove duplicates */
585 08708877 2003-11-25 devnull for(i=0, j=0; j < a->n; j++) {
586 08708877 2003-11-25 devnull v = a->doff[j];
587 08708877 2003-11-25 devnull if(i > 0 && v == a->doff[i-1])
588 08708877 2003-11-25 devnull continue;
589 08708877 2003-11-25 devnull a->doff[i++] = v;
590 08708877 2003-11-25 devnull }
591 08708877 2003-11-25 devnull a->n = i;
592 08708877 2003-11-25 devnull }
593 08708877 2003-11-25 devnull
594 08708877 2003-11-25 devnull Entry
595 08708877 2003-11-25 devnull getentry(int i)
596 08708877 2003-11-25 devnull {
597 08708877 2003-11-25 devnull long b, e, n;
598 08708877 2003-11-25 devnull static Entry ans;
599 08708877 2003-11-25 devnull static int anslen = 0;
600 08708877 2003-11-25 devnull
601 08708877 2003-11-25 devnull b = dot->doff[i];
602 08708877 2003-11-25 devnull e = (*dict->nextoff)(b+1);
603 08708877 2003-11-25 devnull ans.doff = b;
604 08708877 2003-11-25 devnull if(e < 0) {
605 08708877 2003-11-25 devnull err("couldn't seek to entry");
606 08708877 2003-11-25 devnull ans.start = 0;
607 08708877 2003-11-25 devnull ans.end = 0;
608 08708877 2003-11-25 devnull } else {
609 08708877 2003-11-25 devnull n = e-b;
610 08708877 2003-11-25 devnull if(n+1 > anslen) {
611 08708877 2003-11-25 devnull ans.start = realloc(ans.start, n+1);
612 08708877 2003-11-25 devnull if(!ans.start) {
613 08708877 2003-11-25 devnull err("out of memory");
614 08708877 2003-11-25 devnull exits("nomem");
615 08708877 2003-11-25 devnull }
616 08708877 2003-11-25 devnull anslen = n+1;
617 08708877 2003-11-25 devnull }
618 08708877 2003-11-25 devnull Bseek(bdict, b, 0);
619 08708877 2003-11-25 devnull n = Bread(bdict, ans.start, n);
620 08708877 2003-11-25 devnull ans.end = ans.start + n;
621 08708877 2003-11-25 devnull *ans.end = 0;
622 08708877 2003-11-25 devnull }
623 08708877 2003-11-25 devnull return ans;
624 08708877 2003-11-25 devnull }
625 08708877 2003-11-25 devnull
626 08708877 2003-11-25 devnull void
627 08708877 2003-11-25 devnull setdotnext(void)
628 08708877 2003-11-25 devnull {
629 08708877 2003-11-25 devnull long b;
630 08708877 2003-11-25 devnull
631 08708877 2003-11-25 devnull b = (*dict->nextoff)(dot->doff[dot->cur]+1);
632 08708877 2003-11-25 devnull if(b < 0) {
633 08708877 2003-11-25 devnull err("couldn't find a next entry");
634 08708877 2003-11-25 devnull return;
635 08708877 2003-11-25 devnull }
636 08708877 2003-11-25 devnull dot->doff[0] = b;
637 08708877 2003-11-25 devnull dot->n = 1;
638 08708877 2003-11-25 devnull dot->cur = 0;
639 08708877 2003-11-25 devnull }
640 08708877 2003-11-25 devnull
641 08708877 2003-11-25 devnull void
642 08708877 2003-11-25 devnull setdotprev(void)
643 08708877 2003-11-25 devnull {
644 08708877 2003-11-25 devnull int tryback;
645 08708877 2003-11-25 devnull long here, last, p;
646 08708877 2003-11-25 devnull
647 08708877 2003-11-25 devnull if(dot->cur < 0 || dot->cur >= dot->n)
648 08708877 2003-11-25 devnull return;
649 08708877 2003-11-25 devnull tryback = 2000;
650 08708877 2003-11-25 devnull here = dot->doff[dot->cur];
651 08708877 2003-11-25 devnull last = 0;
652 08708877 2003-11-25 devnull while(last == 0) {
653 08708877 2003-11-25 devnull p = here - tryback;
654 08708877 2003-11-25 devnull if(p < 0)
655 08708877 2003-11-25 devnull p = 0;
656 08708877 2003-11-25 devnull for(;;) {
657 08708877 2003-11-25 devnull p = (*dict->nextoff)(p+1);
658 08708877 2003-11-25 devnull if(p < 0)
659 08708877 2003-11-25 devnull return; /* shouldn't happen */
660 08708877 2003-11-25 devnull if(p >= here)
661 08708877 2003-11-25 devnull break;
662 08708877 2003-11-25 devnull last = p;
663 08708877 2003-11-25 devnull }
664 08708877 2003-11-25 devnull if(!last) {
665 08708877 2003-11-25 devnull if(here - tryback < 0) {
666 08708877 2003-11-25 devnull err("can't find a previous entry");
667 08708877 2003-11-25 devnull return;
668 08708877 2003-11-25 devnull }
669 08708877 2003-11-25 devnull tryback = 2*tryback;
670 08708877 2003-11-25 devnull }
671 08708877 2003-11-25 devnull }
672 08708877 2003-11-25 devnull dot->doff[0] = last;
673 08708877 2003-11-25 devnull dot->n = 1;
674 08708877 2003-11-25 devnull dot->cur = 0;
675 08708877 2003-11-25 devnull }