7 {"oed", "Oxford English Dictionary, 2nd Ed.",
9 oednextoff, oedprintentry, oedprintkey},
10 {"ahd", "American Heritage Dictionary, 2nd College Ed.",
11 "ahd/DICT.DB", "ahd/index",
12 ahdnextoff, ahdprintentry, ahdprintkey},
13 {"pgw", "Project Gutenberg Webster Dictionary",
15 pgwnextoff, pgwprintentry, pgwprintkey},
16 {"thesaurus", "Collins Thesaurus",
17 "thesaurus", "thesindex",
18 thesnextoff, thesprintentry, thesprintkey},
19 {"roget", "Project Gutenberg Roget's Thesaurus",
20 "roget", "rogetindex",
21 rogetnextoff, rogetprintentry, rogetprintkey},
23 {"ce", "Gendai Chinese->English",
24 "world/sansdata/sandic24.dat",
25 "world/sansdata/ceindex",
26 worldnextoff, worldprintentry, worldprintkey},
27 {"ceh", "Gendai Chinese->English (Hanzi index)",
28 "world/sansdata/sandic24.dat",
29 "world/sansdata/cehindex",
30 worldnextoff, worldprintentry, worldprintkey},
31 {"ec", "Gendai English->Chinese",
32 "world/sansdata/sandic24.dat",
33 "world/sansdata/ecindex",
34 worldnextoff, worldprintentry, worldprintkey},
36 {"dae", "Gyldendal Danish->English",
37 "world/gylddata/sandic30.dat",
38 "world/gylddata/daeindex",
39 worldnextoff, worldprintentry, worldprintkey},
40 {"eda", "Gyldendal English->Danish",
41 "world/gylddata/sandic29.dat",
42 "world/gylddata/edaindex",
43 worldnextoff, worldprintentry, worldprintkey},
45 {"due", "Wolters-Noordhoff Dutch->English",
46 "world/woltdata/sandic07.dat",
47 "world/woltdata/deindex",
48 worldnextoff, worldprintentry, worldprintkey},
49 {"edu", "Wolters-Noordhoff English->Dutch",
50 "world/woltdata/sandic06.dat",
51 "world/woltdata/edindex",
52 worldnextoff, worldprintentry, worldprintkey},
54 {"fie", "WSOY Finnish->English",
55 "world/werndata/sandic32.dat",
56 "world/werndata/fieindex",
57 worldnextoff, worldprintentry, worldprintkey},
58 {"efi", "WSOY English->Finnish",
59 "world/werndata/sandic31.dat",
60 "world/werndata/efiindex",
61 worldnextoff, worldprintentry, worldprintkey},
63 {"fe", "Collins French->English",
65 pcollnextoff, pcollprintentry, pcollprintkey},
66 {"ef", "Collins English->French",
68 pcollnextoff, pcollprintentry, pcollprintkey},
70 {"ge", "Collins German->English",
72 pcollgnextoff, pcollgprintentry, pcollgprintkey},
73 {"eg", "Collins English->German",
75 pcollgnextoff, pcollgprintentry, pcollgprintkey},
77 {"ie", "Collins Italian->English",
79 pcollnextoff, pcollprintentry, pcollprintkey},
80 {"ei", "Collins English->Italian",
82 pcollnextoff, pcollprintentry, pcollprintkey},
84 {"je", "Sanshusha Japanese->English",
85 "world/sansdata/sandic18.dat",
86 "world/sansdata/jeindex",
87 worldnextoff, worldprintentry, worldprintkey},
88 {"jek", "Sanshusha Japanese->English (Kanji index)",
89 "world/sansdata/sandic18.dat",
90 "world/sansdata/jekindex",
91 worldnextoff, worldprintentry, worldprintkey},
92 {"ej", "Sanshusha English->Japanese",
93 "world/sansdata/sandic18.dat",
94 "world/sansdata/ejindex",
95 worldnextoff, worldprintentry, worldprintkey},
97 {"tjeg", "Sanshusha technical Japanese->English,German",
98 "world/sansdata/sandic16.dat",
99 "world/sansdata/tjegindex",
100 worldnextoff, worldprintentry, worldprintkey},
101 {"tjegk", "Sanshusha technical Japanese->English,German (Kanji index)",
102 "world/sansdata/sandic16.dat",
103 "world/sansdata/tjegkindex",
104 worldnextoff, worldprintentry, worldprintkey},
105 {"tegj", "Sanshusha technical English->German,Japanese",
106 "world/sansdata/sandic16.dat",
107 "world/sansdata/tegjindex",
108 worldnextoff, worldprintentry, worldprintkey},
109 {"tgje", "Sanshusha technical German->Japanese,English",
110 "world/sansdata/sandic16.dat",
111 "world/sansdata/tgjeindex",
112 worldnextoff, worldprintentry, worldprintkey},
114 {"ne", "Kunnskapforlaget Norwegian->English",
115 "world/kunndata/sandic28.dat",
116 "world/kunndata/neindex",
117 worldnextoff, worldprintentry, worldprintkey},
118 {"en", "Kunnskapforlaget English->Norwegian",
119 "world/kunndata/sandic27.dat",
120 "world/kunndata/enindex",
121 worldnextoff, worldprintentry, worldprintkey},
123 {"re", "Leon Ungier Russian->English",
125 simplenextoff, simpleprintentry, simpleprintkey},
126 {"er", "Leon Ungier English->Russian",
128 simplenextoff, simpleprintentry, simpleprintkey},
130 {"se", "Collins Spanish->English",
132 pcollnextoff, pcollprintentry, pcollprintkey},
133 {"es", "Collins English->Spanish",
135 pcollnextoff, pcollprintentry, pcollprintkey},
137 {"swe", "Esselte Studium Swedish->English",
138 "world/essedata/sandic34.dat",
139 "world/essedata/sweindex",
140 worldnextoff, worldprintentry, worldprintkey},
141 {"esw", "Esselte Studium English->Swedish",
142 "world/essedata/sandic33.dat",
143 "world/essedata/eswindex",
144 worldnextoff, worldprintentry, worldprintkey},
146 {"movie", "Movies -- by title",
147 "movie/data", "movtindex",
148 movienextoff, movieprintentry, movieprintkey},
149 {"moviea", "Movies -- by actor",
150 "movie/data", "movaindex",
151 movienextoff, movieprintentry, movieprintkey},
152 {"movied", "Movies -- by director",
153 "movie/data", "movdindex",
154 movienextoff, movieprintentry, movieprintkey},
156 {"slang", "English Slang",
157 "slang", "slangindex",
158 slangnextoff, slangprintentry, slangprintkey},
160 {"robert", "Robert Électronique",
161 "robert/_pointers", "robert/_index",
162 robertnextoff, robertindexentry, robertprintkey},
163 {"robertv", "Robert Électronique - formes des verbes",
164 "robert/flex.rob", "robert/_flexindex",
165 robertnextflex, robertflexentry, robertprintkey},
170 typedef struct Lig Lig;
172 Rune start; /* accent rune */
173 Rune pairs[100]; /* <char,accented version> pairs */
176 /* keep in sync with dict.h */
177 static Lig ligtab[Nligs] = {
178 {0xb4, {0x41, 0xc1, 0x61, 0xe1, 0x43, 0x106, 0x63, 0x107, 0x45, 0xc9, 0x65, 0xe9, 0x67, 0x123, 0x49, 0xcd, 0x69, 0xed, 0x131, 0xed, 0x4c, 0x139, 0x6c, 0x13a, 0x4e, 0x143, 0x6e, 0x144, 0x4f, 0xd3, 0x6f, 0xf3, 0x52, 0x154, 0x72, 0x155, 0x53, 0x15a, 0x73, 0x15b, 0x55, 0xda, 0x75, 0xfa, 0x59, 0xdd, 0x79, 0xfd, 0x5a, 0x179, 0x7a, 0x17a, 0}},
179 {0x2cb, {0x41, 0xc0, 0x61, 0xe0, 0x45, 0xc8, 0x65, 0xe8, 0x49, 0xcc, 0x69, 0xec, 0x131, 0xec, 0x4f, 0xd2, 0x6f, 0xf2, 0x55, 0xd9, 0x75, 0xf9, 0}},
180 {0xa8, {0x41, 0xc4, 0x61, 0xe4, 0x45, 0xcb, 0x65, 0xeb, 0x49, 0xcf, 0x69, 0xef, 0x4f, 0xd6, 0x6f, 0xf6, 0x55, 0xdc, 0x75, 0xfc, 0x59, 0x178, 0x79, 0xff, 0}},
181 {0xb8, {0x43, 0xc7, 0x63, 0xe7, 0x47, 0x122, 0x4b, 0x136, 0x6b, 0x137, 0x4c, 0x13b, 0x6c, 0x13c, 0x4e, 0x145, 0x6e, 0x146, 0x52, 0x156, 0x72, 0x157, 0x53, 0x15e, 0x73, 0x15f, 0x54, 0x162, 0x74, 0x163, 0}},
182 {0x2dc, {0x41, 0xc3, 0x61, 0xe3, 0x49, 0x128, 0x69, 0x129, 0x131, 0x129, 0x4e, 0xd1, 0x6e, 0xf1, 0x4f, 0xd5, 0x6f, 0xf5, 0x55, 0x168, 0x75, 0x169, 0}},
183 {0x2d8, {0x41, 0x102, 0x61, 0x103, 0x45, 0x114, 0x65, 0x115, 0x47, 0x11e, 0x67, 0x11f, 0x49, 0x12c, 0x69, 0x12d, 0x131, 0x12d, 0x4f, 0x14e, 0x6f, 0x14f, 0x55, 0x16c, 0x75, 0x16d, 0}},
184 {0x2da, {0x41, 0xc5, 0x61, 0xe5, 0x55, 0x16e, 0x75, 0x16f, 0}},
185 {0x2d9, {0x43, 0x10a, 0x63, 0x10b, 0x45, 0x116, 0x65, 0x117, 0x47, 0x120, 0x67, 0x121, 0x49, 0x130, 0x4c, 0x13f, 0x6c, 0x140, 0x5a, 0x17b, 0x7a, 0x17c, 0}},
187 {0x2322, {0x41, 0xc2, 0x61, 0xe2, 0x43, 0x108, 0x63, 0x109, 0x45, 0xca, 0x65, 0xea, 0x47, 0x11c, 0x67, 0x11d, 0x48, 0x124, 0x68, 0x125, 0x49, 0xce, 0x69, 0xee, 0x131, 0xee, 0x4a, 0x134, 0x6a, 0x135, 0x4f, 0xd4, 0x6f, 0xf4, 0x53, 0x15c, 0x73, 0x15d, 0x55, 0xdb, 0x75, 0xfb, 0x57, 0x174, 0x77, 0x175, 0x59, 0x176, 0x79, 0x177, 0}},
189 {0x2db, {0x41, 0x104, 0x61, 0x105, 0x45, 0x118, 0x65, 0x119, 0x49, 0x12e, 0x69, 0x12f, 0x131, 0x12f, 0x55, 0x172, 0x75, 0x173, 0}},
190 {0xaf, {0x41, 0x100, 0x61, 0x101, 0x45, 0x112, 0x65, 0x113, 0x49, 0x12a, 0x69, 0x12b, 0x131, 0x12b, 0x4f, 0x14c, 0x6f, 0x14d, 0x55, 0x16a, 0x75, 0x16b, 0}},
191 {0x2c7, {0x43, 0x10c, 0x63, 0x10d, 0x44, 0x10e, 0x64, 0x10f, 0x45, 0x11a, 0x65, 0x11b, 0x4c, 0x13d, 0x6c, 0x13e, 0x4e, 0x147, 0x6e, 0x148, 0x52, 0x158, 0x72, 0x159, 0x53, 0x160, 0x73, 0x161, 0x54, 0x164, 0x74, 0x165, 0x5a, 0x17d, 0x7a, 0x17e, 0}},
197 Rune multitab[Nmulti][5] = {
200 {0x61, 0x6e, 0x64, 0},
201 {0x61, 0x2f, 0x71, 0},
204 {0x2e, 0x2e, 0x2e, 0},
214 {0x66, 0x66, 0x69, 0},
215 {0x66, 0x66, 0x6c, 0},
234 #define risupper(r) (0x41 <= (r) && (r) <= 0x5a)
235 #define rislatin1(r) (0xC0 <= (r) && (r) <= 0xFF)
236 #define rtolower(r) ((r)-'A'+'a')
238 static Rune latin_fold_tab[] =
240 /* Table to fold latin 1 characters to ASCII equivalents
241 based at Rune value 0xc0
252 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
253 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
254 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
255 'o', 'u', 'u', 'u', 'u', 'y', 0 , 0 ,
256 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'c',
257 'e', 'e', 'e', 'e', 'i', 'i', 'i', 'i',
258 'd', 'n', 'o', 'o', 'o', 'o', 'o', 0 ,
259 'o', 'u', 'u', 'u', 'u', 'y', 0 , 'y'
262 static Rune *ttabstack[20];
266 * tab is an array of n Assoc's, sorted by key.
267 * Look for key in tab, and return corresponding val
271 lookassoc(Assoc *tab, int n, char *key)
277 for(low = -1, high = n; high > low+1; ){
280 if((r=strcmp(key, q->key))<0)
291 looknassoc(Nassoc *tab, int n, long key)
296 for(low = -1, high = n; high > low+1; ){
301 else if(key == q->key)
316 vsnprint(buf, sizeof(buf), fmt, v);
318 fprint(2, "%s: %s\n", argv0, buf);
322 * Write the rune r to bout, keeping track of line length
323 * and breaking the lines (at blanks) when they get too long
330 if(++linelen > breaklen && r == 0x20) {
342 while((r = *rp++) != 0)
346 /* like outrune, but when arg is know to be a char */
352 if(++linelen > breaklen && c == ' ') {
364 while((c = *s++) != 0)
369 outprint(char *fmt, ...)
375 vsnprint(buf, sizeof(buf), fmt, v);
381 outpiece(char *b, char *e)
390 if(!(c == ' ' && lastc == ' '))
397 * Go to new line if not already there; indent if ind != 0.
398 * If ind > 1, leave a blank line too.
399 * Slight hack: assume if current line is only one or two
400 * characters long, then they were spaces.
412 } else if(linelen == 0)
414 else if(linelen == 1)
416 else if(linelen != 2)
428 * Fold the runes in null-terminated rp.
429 * Use the sort(1) definition of folding (uppercase to lowercase,
430 * latin1-accented characters to corresponding unaccented chars)
437 while((r = *rp) != 0) {
438 if (rislatin1(r) && latin_fold_tab[r-0xc0])
439 r = latin_fold_tab[r-0xc0];
447 * Like fold, but put folded result into new
448 * (assumed to have enough space).
449 * old is a regular expression, but we know that
450 * metacharacters aren't affected
453 foldre(char *new, char *old)
458 old += chartorune(&r, old);
459 if (rislatin1(r) && latin_fold_tab[r-0xc0])
460 r = latin_fold_tab[r-0xc0];
463 new += runetochar(new, &r);
469 * acomp(s, t) returns:
470 * -2 if s strictly precedes t
471 * -1 if s is a prefix of t
472 * 0 if s is the same as t
473 * 1 if t is a prefix of s
474 * 2 if t strictly precedes s
478 acomp(Rune *s, Rune *t)
502 * Copy null terminated Runes from 'from' to 'to'.
505 runescpy(Rune *to, Rune *from)
507 while((*to++ = *from++) != 0)
512 * Conversion of unsigned number to long, no overflow detection
523 if(0x30<=c && c<=0x39)
533 * See if there is a rune corresponding to the accented
534 * version of r with accent acc (acc in [LIGS..LIGE-1]),
535 * and return it if so, else return NONE.
538 liglookup(Rune acc, Rune r)
542 if(acc < LIGS || acc >= LIGE)
544 for(p = ligtab[acc-LIGS].pairs; *p; p += 2)
551 * Maintain a translation table stack (a translation table
552 * is an array of Runes indexed by bytes or 7-bit bytes).
553 * If starting is true, push the curtab onto the stack
554 * and return newtab; else pop the top of the stack and
556 * If curtab is 0, initialize the stack and return.
559 changett(Rune *curtab, Rune *newtab, int starting)
566 if(ntt >= asize(ttabstack)) {
568 err("translation stack overflow");
571 ttabstack[ntt++] = curtab;
576 err("translation stack underflow");
579 return ttabstack[--ntt];