Blob


1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include "dict.h"
6 Dict dicts[] = {
7 {"oed", "Oxford English Dictionary, 2nd Ed.",
8 "#9/dict/oed2", "#9/dict/oed2index",
9 oednextoff, oedprintentry, oedprintkey},
10 {"ahd", "American Heritage Dictionary, 2nd College Ed.",
11 "#9/dict/ahd/DICT.DB", "#9/dict/ahd/index",
12 ahdnextoff, ahdprintentry, ahdprintkey},
13 {"pgw", "Project Gutenberg Webster Dictionary",
14 "#9/dict/pgw", "#9/dict/pgwindex",
15 pgwnextoff, pgwprintentry, pgwprintkey},
16 {"thesaurus", "Collins Thesaurus",
17 "#9/dict/thesaurus", "#9/dict/thesindex",
18 thesnextoff, thesprintentry, thesprintkey},
19 {"roget", "Project Gutenberg Roget's Thesaurus",
20 "#9/dict/roget", "#9/dict/rogetindex",
21 rogetnextoff, rogetprintentry, rogetprintkey},
23 {"ce", "Gendai Chinese->English",
24 "#9/dict/world/sansdata/sandic24.dat",
25 "#9/dict/world/sansdata/ceindex",
26 worldnextoff, worldprintentry, worldprintkey},
27 {"ceh", "Gendai Chinese->English (Hanzi index)",
28 "#9/dict/world/sansdata/sandic24.dat",
29 "#9/dict/world/sansdata/cehindex",
30 worldnextoff, worldprintentry, worldprintkey},
31 {"ec", "Gendai English->Chinese",
32 "#9/dict/world/sansdata/sandic24.dat",
33 "#9/dict/world/sansdata/ecindex",
34 worldnextoff, worldprintentry, worldprintkey},
36 {"dae", "Gyldendal Danish->English",
37 "#9/dict/world/gylddata/sandic30.dat",
38 "#9/dict/world/gylddata/daeindex",
39 worldnextoff, worldprintentry, worldprintkey},
40 {"eda", "Gyldendal English->Danish",
41 "#9/dict/world/gylddata/sandic29.dat",
42 "#9/dict/world/gylddata/edaindex",
43 worldnextoff, worldprintentry, worldprintkey},
45 {"due", "Wolters-Noordhoff Dutch->English",
46 "#9/dict/world/woltdata/sandic07.dat",
47 "#9/dict/world/woltdata/deindex",
48 worldnextoff, worldprintentry, worldprintkey},
49 {"edu", "Wolters-Noordhoff English->Dutch",
50 "#9/dict/world/woltdata/sandic06.dat",
51 "#9/dict/world/woltdata/edindex",
52 worldnextoff, worldprintentry, worldprintkey},
54 {"fie", "WSOY Finnish->English",
55 "#9/dict/world/werndata/sandic32.dat",
56 "#9/dict/world/werndata/fieindex",
57 worldnextoff, worldprintentry, worldprintkey},
58 {"efi", "WSOY English->Finnish",
59 "#9/dict/world/werndata/sandic31.dat",
60 "#9/dict/world/werndata/efiindex",
61 worldnextoff, worldprintentry, worldprintkey},
63 {"fe", "Collins French->English",
64 "#9/dict/fe", "#9/dict/feindex",
65 pcollnextoff, pcollprintentry, pcollprintkey},
66 {"ef", "Collins English->French",
67 "#9/dict/ef", "#9/dict/efindex",
68 pcollnextoff, pcollprintentry, pcollprintkey},
70 {"ge", "Collins German->English",
71 "#9/dict/ge", "#9/dict/geindex",
72 pcollgnextoff, pcollgprintentry, pcollgprintkey},
73 {"eg", "Collins English->German",
74 "#9/dict/eg", "#9/dict/egindex",
75 pcollgnextoff, pcollgprintentry, pcollgprintkey},
77 {"ie", "Collins Italian->English",
78 "#9/dict/ie", "#9/dict/ieindex",
79 pcollnextoff, pcollprintentry, pcollprintkey},
80 {"ei", "Collins English->Italian",
81 "#9/dict/ei", "#9/dict/eiindex",
82 pcollnextoff, pcollprintentry, pcollprintkey},
84 {"je", "Sanshusha Japanese->English",
85 "#9/dict/world/sansdata/sandic18.dat",
86 "#9/dict/world/sansdata/jeindex",
87 worldnextoff, worldprintentry, worldprintkey},
88 {"jek", "Sanshusha Japanese->English (Kanji index)",
89 "#9/dict/world/sansdata/sandic18.dat",
90 "#9/dict/world/sansdata/jekindex",
91 worldnextoff, worldprintentry, worldprintkey},
92 {"ej", "Sanshusha English->Japanese",
93 "#9/dict/world/sansdata/sandic18.dat",
94 "#9/dict/world/sansdata/ejindex",
95 worldnextoff, worldprintentry, worldprintkey},
97 {"tjeg", "Sanshusha technical Japanese->English,German",
98 "#9/dict/world/sansdata/sandic16.dat",
99 "#9/dict/world/sansdata/tjegindex",
100 worldnextoff, worldprintentry, worldprintkey},
101 {"tjegk", "Sanshusha technical Japanese->English,German (Kanji index)",
102 "#9/dict/world/sansdata/sandic16.dat",
103 "#9/dict/world/sansdata/tjegkindex",
104 worldnextoff, worldprintentry, worldprintkey},
105 {"tegj", "Sanshusha technical English->German,Japanese",
106 "#9/dict/world/sansdata/sandic16.dat",
107 "#9/dict/world/sansdata/tegjindex",
108 worldnextoff, worldprintentry, worldprintkey},
109 {"tgje", "Sanshusha technical German->Japanese,English",
110 "#9/dict/world/sansdata/sandic16.dat",
111 "#9/dict/world/sansdata/tgjeindex",
112 worldnextoff, worldprintentry, worldprintkey},
114 {"ne", "Kunnskapforlaget Norwegian->English",
115 "#9/dict/world/kunndata/sandic28.dat",
116 "#9/dict/world/kunndata/neindex",
117 worldnextoff, worldprintentry, worldprintkey},
118 {"en", "Kunnskapforlaget English->Norwegian",
119 "#9/dict/world/kunndata/sandic27.dat",
120 "#9/dict/world/kunndata/enindex",
121 worldnextoff, worldprintentry, worldprintkey},
123 {"re", "Leon Ungier Russian->English",
124 "#9/dict/re", "#9/dict/reindex",
125 simplenextoff, simpleprintentry, simpleprintkey},
126 {"er", "Leon Ungier English->Russian",
127 "#9/dict/re", "#9/dict/erindex",
128 simplenextoff, simpleprintentry, simpleprintkey},
130 {"se", "Collins Spanish->English",
131 "#9/dict/se", "#9/dict/seindex",
132 pcollnextoff, pcollprintentry, pcollprintkey},
133 {"es", "Collins English->Spanish",
134 "#9/dict/es", "#9/dict/esindex",
135 pcollnextoff, pcollprintentry, pcollprintkey},
137 {"swe", "Esselte Studium Swedish->English",
138 "#9/dict/world/essedata/sandic34.dat",
139 "#9/dict/world/essedata/sweindex",
140 worldnextoff, worldprintentry, worldprintkey},
141 {"esw", "Esselte Studium English->Swedish",
142 "#9/dict/world/essedata/sandic33.dat",
143 "#9/dict/world/essedata/eswindex",
144 worldnextoff, worldprintentry, worldprintkey},
146 {"movie", "Movies -- by title",
147 "movie/data", "#9/dict/movtindex",
148 movienextoff, movieprintentry, movieprintkey},
149 {"moviea", "Movies -- by actor",
150 "movie/data", "#9/dict/movaindex",
151 movienextoff, movieprintentry, movieprintkey},
152 {"movied", "Movies -- by director",
153 "movie/data", "#9/dict/movdindex",
154 movienextoff, movieprintentry, movieprintkey},
156 {"slang", "English Slang",
157 "#9/dict/slang", "#9/dict/slangindex",
158 slangnextoff, slangprintentry, slangprintkey},
160 {"robert", "Robert Électronique",
161 "#9/dict/robert/_pointers", "#9/dict/robert/_index",
162 robertnextoff, robertindexentry, robertprintkey},
163 {"robertv", "Robert Électronique - formes des verbes",
164 "#9/dict/robert/flex.rob", "#9/dict/robert/_flexindex",
165 robertnextflex, robertflexentry, robertprintkey},
167 {0, 0, 0, 0, 0}
168 };
170 typedef struct Lig Lig;
171 struct Lig {
172 Rune start; /* accent rune */
173 Rune pairs[100]; /* <char,accented version> pairs */
174 };
176 /* keep in sync with dict.h */
177 static Lig ligtab[Nligs] = {
178 {0xb4, {0x41, 0xc1, 0x61, 0xe1, 0x43, 0x106, 0x63, 0x107, 0x45, 0xc9, 0x65, 0xe9, 0x67, 0x123, 0x49, 0xcd, 0x69, 0xed, 0x131, 0xed, 0x4c, 0x139, 0x6c, 0x13a, 0x4e, 0x143, 0x6e, 0x144, 0x4f, 0xd3, 0x6f, 0xf3, 0x52, 0x154, 0x72, 0x155, 0x53, 0x15a, 0x73, 0x15b, 0x55, 0xda, 0x75, 0xfa, 0x59, 0xdd, 0x79, 0xfd, 0x5a, 0x179, 0x7a, 0x17a, 0}},
179 {0x2cb, {0x41, 0xc0, 0x61, 0xe0, 0x45, 0xc8, 0x65, 0xe8, 0x49, 0xcc, 0x69, 0xec, 0x131, 0xec, 0x4f, 0xd2, 0x6f, 0xf2, 0x55, 0xd9, 0x75, 0xf9, 0}},
180 {0xa8, {0x41, 0xc4, 0x61, 0xe4, 0x45, 0xcb, 0x65, 0xeb, 0x49, 0xcf, 0x69, 0xef, 0x4f, 0xd6, 0x6f, 0xf6, 0x55, 0xdc, 0x75, 0xfc, 0x59, 0x178, 0x79, 0xff, 0}},
181 {0xb8, {0x43, 0xc7, 0x63, 0xe7, 0x47, 0x122, 0x4b, 0x136, 0x6b, 0x137, 0x4c, 0x13b, 0x6c, 0x13c, 0x4e, 0x145, 0x6e, 0x146, 0x52, 0x156, 0x72, 0x157, 0x53, 0x15e, 0x73, 0x15f, 0x54, 0x162, 0x74, 0x163, 0}},
182 {0x2dc, {0x41, 0xc3, 0x61, 0xe3, 0x49, 0x128, 0x69, 0x129, 0x131, 0x129, 0x4e, 0xd1, 0x6e, 0xf1, 0x4f, 0xd5, 0x6f, 0xf5, 0x55, 0x168, 0x75, 0x169, 0}},
183 {0x2d8, {0x41, 0x102, 0x61, 0x103, 0x45, 0x114, 0x65, 0x115, 0x47, 0x11e, 0x67, 0x11f, 0x49, 0x12c, 0x69, 0x12d, 0x131, 0x12d, 0x4f, 0x14e, 0x6f, 0x14f, 0x55, 0x16c, 0x75, 0x16d, 0}},
184 {0x2da, {0x41, 0xc5, 0x61, 0xe5, 0x55, 0x16e, 0x75, 0x16f, 0}},
185 {0x2d9, {0x43, 0x10a, 0x63, 0x10b, 0x45, 0x116, 0x65, 0x117, 0x47, 0x120, 0x67, 0x121, 0x49, 0x130, 0x4c, 0x13f, 0x6c, 0x140, 0x5a, 0x17b, 0x7a, 0x17c, 0}},
186 {0x2e, {0}},
187 {0x2322, {0x41, 0xc2, 0x61, 0xe2, 0x43, 0x108, 0x63, 0x109, 0x45, 0xca, 0x65, 0xea, 0x47, 0x11c, 0x67, 0x11d, 0x48, 0x124, 0x68, 0x125, 0x49, 0xce, 0x69, 0xee, 0x131, 0xee, 0x4a, 0x134, 0x6a, 0x135, 0x4f, 0xd4, 0x6f, 0xf4, 0x53, 0x15c, 0x73, 0x15d, 0x55, 0xdb, 0x75, 0xfb, 0x57, 0x174, 0x77, 0x175, 0x59, 0x176, 0x79, 0x177, 0}},
188 {0x32f, {0}},
189 {0x2db, {0x41, 0x104, 0x61, 0x105, 0x45, 0x118, 0x65, 0x119, 0x49, 0x12e, 0x69, 0x12f, 0x131, 0x12f, 0x55, 0x172, 0x75, 0x173, 0}},
190 {0xaf, {0x41, 0x100, 0x61, 0x101, 0x45, 0x112, 0x65, 0x113, 0x49, 0x12a, 0x69, 0x12b, 0x131, 0x12b, 0x4f, 0x14c, 0x6f, 0x14d, 0x55, 0x16a, 0x75, 0x16b, 0}},
191 {0x2c7, {0x43, 0x10c, 0x63, 0x10d, 0x44, 0x10e, 0x64, 0x10f, 0x45, 0x11a, 0x65, 0x11b, 0x4c, 0x13d, 0x6c, 0x13e, 0x4e, 0x147, 0x6e, 0x148, 0x52, 0x158, 0x72, 0x159, 0x53, 0x160, 0x73, 0x161, 0x54, 0x164, 0x74, 0x165, 0x5a, 0x17d, 0x7a, 0x17e, 0}},
192 {0x2bd, {0}},
193 {0x2bc, {0}},
194 {0x32e, {0}}
195 };
197 Rune multitab[Nmulti][5] = {
198 {0x2bd, 0x3b1, 0},
199 {0x2bc, 0x3b1, 0},
200 {0x61, 0x6e, 0x64, 0},
201 {0x61, 0x2f, 0x71, 0},
202 {0x3c, 0x7c, 0},
203 {0x2e, 0x2e, 0},
204 {0x2e, 0x2e, 0x2e, 0},
205 {0x2bd, 0x3b5, 0},
206 {0x2bc, 0x3b5, 0},
207 {0x2014, 0x2014, 0},
208 {0x2bd, 0x3b7, 0},
209 {0x2bc, 0x3b7, 0},
210 {0x2bd, 0x3b9, 0},
211 {0x2bc, 0x3b9, 0},
212 {0x63, 0x74, 0},
213 {0x66, 0x66, 0},
214 {0x66, 0x66, 0x69, 0},
215 {0x66, 0x66, 0x6c, 0},
216 {0x66, 0x6c, 0},
217 {0x66, 0x69, 0},
218 {0x26b, 0x26b, 0},
219 {0x73, 0x74, 0},
220 {0x2bd, 0x3bf, 0},
221 {0x2bc, 0x3bf, 0},
222 {0x6f, 0x72, 0},
223 {0x2bd, 0x3c1, 0},
224 {0x2bc, 0x3c1, 0},
225 {0x7e, 0x7e, 0},
226 {0x2bd, 0x3c5, 0},
227 {0x2bc, 0x3c5, 0},
228 {0x2bd, 0x3c9, 0},
229 {0x2bc, 0x3c9, 0},
230 {0x6f, 0x65, 0},
231 {0x20, 0x20, 0}
232 };
234 #define risupper(r) (0x41 <= (r) && (r) <= 0x5a)
235 #define rislatin1(r) (0xC0 <= (r) && (r) <= 0xFF)
236 #define rtolower(r) ((r)-'A'+'a')
238 static Rune latin_fold_tab[] =
240 /* Table to fold latin 1 characters to ASCII equivalents
241 based at Rune value 0xc0
243 À Á Â Ã Ä Å Æ Ç
244 È É Ê Ë Ì Í Î Ï
245 Ð Ñ Ò Ó Ô Õ Ö ×
246 Ø Ù Ú Û Ü Ý Þ ß
247 à á â ã ä å æ ç
248 è é ê ë ì í î ï
249 ð ñ ò ó ô õ ö ÷
250 ø ù ú û ü ý þ ÿ
251 */
252 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
253 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
254 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
255 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 ,
256 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
257 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
258 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
259 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y'
260 };
262 static Rune *ttabstack[20];
263 static int ntt;
265 /*
266 * tab is an array of n Assoc's, sorted by key.
267 * Look for key in tab, and return corresponding val
268 * or -1 if not there
269 */
270 long
271 lookassoc(Assoc *tab, int n, char *key)
273 Assoc *q;
274 long i, low, high;
275 int r;
277 for(low = -1, high = n; high > low+1; ){
278 i = (high+low)/2;
279 q = &tab[i];
280 if((r=strcmp(key, q->key))<0)
281 high = i;
282 else if(r == 0)
283 return q->val;
284 else
285 low=i;
287 return -1;
290 long
291 looknassoc(Nassoc *tab, int n, long key)
293 Nassoc *q;
294 long i, low, high;
296 for(low = -1, high = n; high > low+1; ){
297 i = (high+low)/2;
298 q = &tab[i];
299 if(key < q->key)
300 high = i;
301 else if(key == q->key)
302 return q->val;
303 else
304 low=i;
306 return -1;
309 void
310 err(char *fmt, ...)
312 char buf[1000];
313 va_list v;
315 va_start(v, fmt);
316 vsnprint(buf, sizeof(buf), fmt, v);
317 va_end(v);
318 fprint(2, "%s: %s\n", argv0, buf);
321 /*
322 * Write the rune r to bout, keeping track of line length
323 * and breaking the lines (at blanks) when they get too long
324 */
325 void
326 outrune(long r)
328 if(outinhibit)
329 return;
330 if(++linelen > breaklen && r == 0x20) {
331 Bputc(bout, '\n');
332 linelen = 0;
333 } else
334 Bputrune(bout, r);
337 void
338 outrunes(Rune *rp)
340 Rune r;
342 while((r = *rp++) != 0)
343 outrune(r);
346 /* like outrune, but when arg is know to be a char */
347 void
348 outchar(int c)
350 if(outinhibit)
351 return;
352 if(++linelen > breaklen && c == ' ') {
353 c ='\n';
354 linelen = 0;
356 Bputc(bout, c);
359 void
360 outchars(char *s)
362 char c;
364 while((c = *s++) != 0)
365 outchar(c);
368 void
369 outprint(char *fmt, ...)
371 char buf[1000];
372 va_list v;
374 va_start(v, fmt);
375 vsnprint(buf, sizeof(buf), fmt, v);
376 va_end(v);
377 outchars(buf);
380 void
381 outpiece(char *b, char *e)
383 int c, lastc;
385 lastc = 0;
386 while(b < e) {
387 c = *b++;
388 if(c == '\n')
389 c = ' ';
390 if(!(c == ' ' && lastc == ' '))
391 outchar(c);
392 lastc = c;
396 /*
397 * Go to new line if not already there; indent if ind != 0.
398 * If ind > 1, leave a blank line too.
399 * Slight hack: assume if current line is only one or two
400 * characters long, then they were spaces.
401 */
402 void
403 outnl(int ind)
405 if(outinhibit)
406 return;
407 if(ind) {
408 if(ind > 1) {
409 if(linelen > 2)
410 Bputc(bout, '\n');
411 Bprint(bout, "\n ");
412 } else if(linelen == 0)
413 Bprint(bout, " ");
414 else if(linelen == 1)
415 Bputc(bout, ' ');
416 else if(linelen != 2)
417 Bprint(bout, "\n ");
418 linelen = 2;
419 } else {
420 if(linelen) {
421 Bputc(bout, '\n');
422 linelen = 0;
427 /*
428 * Fold the runes in null-terminated rp.
429 * Use the sort(1) definition of folding (uppercase to lowercase,
430 * latin1-accented characters to corresponding unaccented chars)
431 */
432 void
433 fold(Rune *rp)
435 Rune r;
437 while((r = *rp) != 0) {
438 if (rislatin1(r) && latin_fold_tab[r-0xc0])
439 r = latin_fold_tab[r-0xc0];
440 if(risupper(r))
441 r = rtolower(r);
442 *rp++ = r;
446 /*
447 * Like fold, but put folded result into new
448 * (assumed to have enough space).
449 * old is a regular expression, but we know that
450 * metacharacters aren't affected
451 */
452 void
453 foldre(char *new, char *old)
455 Rune r;
457 while(*old) {
458 old += chartorune(&r, old);
459 if (rislatin1(r) && latin_fold_tab[r-0xc0])
460 r = latin_fold_tab[r-0xc0];
461 if(risupper(r))
462 r = rtolower(r);
463 new += runetochar(new, &r);
465 *new = 0;
468 /*
469 * acomp(s, t) returns:
470 * -2 if s strictly precedes t
471 * -1 if s is a prefix of t
472 * 0 if s is the same as t
473 * 1 if t is a prefix of s
474 * 2 if t strictly precedes s
475 */
477 int
478 acomp(Rune *s, Rune *t)
480 int cs, ct;
482 for(;;) {
483 cs = *s;
484 ct = *t;
485 if(cs != ct)
486 break;
487 if(cs == 0)
488 return 0;
489 s++;
490 t++;
492 if(cs == 0)
493 return -1;
494 if(ct == 0)
495 return 1;
496 if(cs < ct)
497 return -2;
498 return 2;
501 /*
502 * Copy null terminated Runes from 'from' to 'to'.
503 */
504 void
505 runescpy(Rune *to, Rune *from)
507 while((*to++ = *from++) != 0)
508 continue;
511 /*
512 * Conversion of unsigned number to long, no overflow detection
513 */
514 long
515 runetol(Rune *r)
517 int c;
518 long n;
520 n = 0;
521 for(;; r++){
522 c = *r;
523 if(0x30<=c && c<=0x39)
524 c -= '0';
525 else
526 break;
527 n = n*10 + c;
529 return n;
532 /*
533 * See if there is a rune corresponding to the accented
534 * version of r with accent acc (acc in [LIGS..LIGE-1]),
535 * and return it if so, else return NONE.
536 */
537 Rune
538 liglookup(Rune acc, Rune r)
540 Rune *p;
542 if(acc < LIGS || acc >= LIGE)
543 return NONE;
544 for(p = ligtab[acc-LIGS].pairs; *p; p += 2)
545 if(*p == r)
546 return *(p+1);
547 return NONE;
550 /*
551 * Maintain a translation table stack (a translation table
552 * is an array of Runes indexed by bytes or 7-bit bytes).
553 * If starting is true, push the curtab onto the stack
554 * and return newtab; else pop the top of the stack and
555 * return it.
556 * If curtab is 0, initialize the stack and return.
557 */
558 Rune *
559 changett(Rune *curtab, Rune *newtab, int starting)
561 if(curtab == 0) {
562 ntt = 0;
563 return 0;
565 if(starting) {
566 if(ntt >= asize(ttabstack)) {
567 if(debug)
568 err("translation stack overflow");
569 return curtab;
571 ttabstack[ntt++] = curtab;
572 return newtab;
573 } else {
574 if(ntt == 0) {
575 if(debug)
576 err("translation stack underflow");
577 return curtab;
579 return ttabstack[--ntt];