Blob


1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include "dict.h"
6 enum {
7 Buflen=1000,
8 Maxaux=5
9 };
11 /* Possible tags */
12 enum {
13 A, /* author in quote (small caps) */
14 B, /* bold */
15 Ba, /* author inside bib */
16 Bch, /* builtup chem component */
17 Bib, /* surrounds word 'in' for bibliographic ref */
18 Bl, /* bold */
19 Bo, /* bond over */
20 Bu, /* bond under */
21 Cb, /* ? block of stuff (indent) */
22 Cf, /* cross ref to another entry (italics) */
23 Chem, /* chemistry formula */
24 Co, /* over (preceding sum, integral, etc.) */
25 Col, /* column of table (aux just may be r) */
26 Cu, /* under (preceding sum, integral, etc.) */
27 Dat, /* date */
28 Db, /* def block? indent */
29 Dn, /* denominator of fraction */
30 E, /* main entry */
31 Ed, /* editor's comments (in [...]) */
32 Etym, /* etymology (in [...]) */
33 Fq, /* frequency count (superscript) */
34 Form, /* formula */
35 Fr, /* fraction (contains <nu>, then <dn>) */
36 Gk, /* greek (transliteration) */
37 Gr, /* grammar? (e.g., around 'pa.' in 'pa. pple.') */
38 Hg, /* headword group */
39 Hm, /* homonym (superscript) */
40 Hw, /* headword (bold) */
41 I, /* italics */
42 Il, /* italic list? */
43 In, /* inferior (subscript) */
44 L, /* row of col of table */
45 La, /* status or usage label (italic) */
46 Lc, /* chapter/verse sort of thing for works */
47 N, /* note (smaller type) */
48 Nu, /* numerator of fraction */
49 Ov, /* needs overline */
50 P, /* paragraph (indent) */
51 Ph, /* pronunciation (transliteration) */
52 Pi, /* pile (frac without line) */
53 Pqp, /* subblock of quote */
54 Pr, /* pronunciation (in (...)) */
55 Ps, /* position (e.g., adv.) (italic) */
56 Pt, /* part (in lc) */
57 Q, /* quote in quote block */
58 Qd, /* quote date (bold) */
59 Qig, /* quote number (greek) */
60 Qla, /* status or usage label in quote (italic) */
61 Qp, /* quote block (small type, indent) */
62 Qsn, /* quote number */
63 Qt, /* quote words */
64 R, /* roman type style */
65 Rx, /* relative cross reference (e.g., next) */
66 S, /* another form? (italic) */
67 S0, /* sense (sometimes surrounds several sx's) */
68 S1, /* sense (aux num: indented bold letter) */
69 S2, /* sense (aux num: indented bold capital rom num) */
70 S3, /* sense (aux num: indented number of asterisks) */
71 S4, /* sense (aux num: indented bold number) */
72 S5, /* sense (aux num: indented number of asterisks) */
73 S6, /* subsense (aux num: bold letter) */
74 S7a, /* subsense (aux num: letter) */
75 S7n, /* subsense (aux num: roman numeral) */
76 Sc, /* small caps */
77 Sgk, /* subsense (aux num: transliterated greek) */
78 Sn, /* sense of subdefinition (aux num: roman letter) */
79 Ss, /* sans serif */
80 Ssb, /* sans serif bold */
81 Ssi, /* sans serif italic */
82 Su, /* superior (superscript) */
83 Sub, /* subdefinition */
84 Table, /* table (aux cols=number of columns) */
85 Tt, /* title? (italics) */
86 Vd, /* numeric label for variant form */
87 Ve, /* variant entry */
88 Vf, /* variant form (light bold) */
89 Vfl, /* list of vf's (starts with Also or Forms) */
90 W, /* work (e.g., Beowulf) (italics) */
91 X, /* cross reference to main word (small caps) */
92 Xd, /* cross reference to quotation by date */
93 Xi, /* internal cross reference ? (italic) */
94 Xid, /* cross reference identifer, in quote ? */
95 Xs, /* cross reference sense (lower number) */
96 Xr, /* list of x's */
97 Ntag /* end of tags */
98 };
100 /* Assoc tables must be sorted on first field */
102 static Assoc tagtab[] = {
103 {"a", A},
104 {"b", B},
105 {"ba", Ba},
106 {"bch", Bch},
107 {"bib", Bib},
108 {"bl", Bl},
109 {"bo", Bo},
110 {"bu", Bu},
111 {"cb", Cb},
112 {"cf", Cf},
113 {"chem", Chem},
114 {"co", Co},
115 {"col", Col},
116 {"cu", Cu},
117 {"dat", Dat},
118 {"db", Db},
119 {"dn", Dn},
120 {"e", E},
121 {"ed", Ed},
122 {"et", Etym},
123 {"etym", Etym},
124 {"form", Form},
125 {"fq", Fq},
126 {"fr", Fr},
127 {"frac", Fr},
128 {"gk", Gk},
129 {"gr", Gr},
130 {"hg", Hg},
131 {"hm", Hm},
132 {"hw", Hw},
133 {"i", I},
134 {"il", Il},
135 {"in", In},
136 {"l", L},
137 {"la", La},
138 {"lc", Lc},
139 {"n", N},
140 {"nu", Nu},
141 {"ov", Ov},
142 {"p", P},
143 {"ph", Ph},
144 {"pi", Pi},
145 {"pqp", Pqp},
146 {"pr", Pr},
147 {"ps", Ps},
148 {"pt", Pt},
149 {"q", Q},
150 {"qd", Qd},
151 {"qig", Qig},
152 {"qla", Qla},
153 {"qp", Qp},
154 {"qsn", Qsn},
155 {"qt", Qt},
156 {"r", R},
157 {"rx", Rx},
158 {"s", S},
159 {"s0", S0},
160 {"s1", S1},
161 {"s2", S2},
162 {"s3", S3},
163 {"s4", S4},
164 {"s5", S5},
165 {"s6", S6},
166 {"s7a", S7a},
167 {"s7n", S7n},
168 {"sc", Sc},
169 {"sgk", Sgk},
170 {"sn", Sn},
171 {"ss", Ss,},
172 {"ssb", Ssb},
173 {"ssi", Ssi},
174 {"su", Su},
175 {"sub", Sub},
176 {"table", Table},
177 {"tt", Tt},
178 {"vd", Vd},
179 {"ve", Ve},
180 {"vf", Vf},
181 {"vfl", Vfl},
182 {"w", W},
183 {"x", X},
184 {"xd", Xd},
185 {"xi", Xi},
186 {"xid", Xid},
187 {"xr", Xr},
188 {"xs", Xs}
189 };
191 /* Possible tag auxilliary info */
192 enum {
193 Cols, /* number of columns in a table */
194 Num, /* letter or number, for a sense */
195 St, /* status (e.g., obs) */
196 Naux
197 };
199 static Assoc auxtab[] = {
200 {"cols", Cols},
201 {"num", Num},
202 {"st", St}
203 };
205 static Assoc spectab[] = {
206 {"3on4", 0xbe},
207 {"Aacu", 0xc1},
208 {"Aang", 0xc5},
209 {"Abarab", 0x100},
210 {"Acirc", 0xc2},
211 {"Ae", 0xc6},
212 {"Agrave", 0xc0},
213 {"Alpha", 0x391},
214 {"Amac", 0x100},
215 {"Asg", 0x1b7}, /* Unicyle. Cf "Sake" */
216 {"Auml", 0xc4},
217 {"Beta", 0x392},
218 {"Cced", 0xc7},
219 {"Chacek", 0x10c},
220 {"Chi", 0x3a7},
221 {"Chirho", 0x2627}, /* Chi Rho U+2627 */
222 {"Csigma", 0x3da},
223 {"Delta", 0x394},
224 {"Eacu", 0xc9},
225 {"Ecirc", 0xca},
226 {"Edh", 0xd0},
227 {"Epsilon", 0x395},
228 {"Eta", 0x397},
229 {"Gamma", 0x393},
230 {"Iacu", 0xcd},
231 {"Icirc", 0xce},
232 {"Imac", 0x12a},
233 {"Integ", 0x222b},
234 {"Iota", 0x399},
235 {"Kappa", 0x39a},
236 {"Koppa", 0x3de},
237 {"Lambda", 0x39b},
238 {"Lbar", 0x141},
239 {"Mu", 0x39c},
240 {"Naira", 0x4e}, /* should have bar through */
241 {"Nplus", 0x4e}, /* should have plus above */
242 {"Ntilde", 0xd1},
243 {"Nu", 0x39d},
244 {"Oacu", 0xd3},
245 {"Obar", 0xd8},
246 {"Ocirc", 0xd4},
247 {"Oe", 0x152},
248 {"Omega", 0x3a9},
249 {"Omicron", 0x39f},
250 {"Ouml", 0xd6},
251 {"Phi", 0x3a6},
252 {"Pi", 0x3a0},
253 {"Psi", 0x3a8},
254 {"Rho", 0x3a1},
255 {"Sacu", 0x15a},
256 {"Sigma", 0x3a3},
257 {"Summ", 0x2211},
258 {"Tau", 0x3a4},
259 {"Th", 0xde},
260 {"Theta", 0x398},
261 {"Tse", 0x426},
262 {"Uacu", 0xda},
263 {"Ucirc", 0xdb},
264 {"Upsilon", 0x3a5},
265 {"Uuml", 0xdc},
266 {"Wyn", 0x1bf}, /* wynn U+01BF */
267 {"Xi", 0x39e},
268 {"Ygh", 0x1b7}, /* Yogh U+01B7 */
269 {"Zeta", 0x396},
270 {"Zh", 0x1b7}, /* looks like Yogh. Cf "Sake" */
271 {"a", 0x61}, /* ante */
272 {"aacu", 0xe1},
273 {"aang", 0xe5},
274 {"aasper", MAAS},
275 {"abreve", 0x103},
276 {"acirc", 0xe2},
277 {"acu", LACU},
278 {"ae", 0xe6},
279 {"agrave", 0xe0},
280 {"ahook", 0x105},
281 {"alenis", MALN},
282 {"alpha", 0x3b1},
283 {"amac", 0x101},
284 {"amp", 0x26},
285 {"and", MAND},
286 {"ang", LRNG},
287 {"angle", 0x2220},
288 {"ankh", 0x2625}, /* ankh U+2625 */
289 {"ante", 0x61}, /* before (year) */
290 {"aonq", MAOQ},
291 {"appreq", 0x2243},
292 {"aquar", 0x2652},
293 {"arDadfull", 0x636}, /* Dad U+0636 */
294 {"arHa", 0x62d}, /* haa U+062D */
295 {"arTa", 0x62a}, /* taa U+062A */
296 {"arain", 0x639}, /* ain U+0639 */
297 {"arainfull", 0x639}, /* ain U+0639 */
298 {"aralif", 0x627}, /* alef U+0627 */
299 {"arba", 0x628}, /* baa U+0628 */
300 {"arha", 0x647}, /* ha U+0647 */
301 {"aries", 0x2648},
302 {"arnun", 0x646}, /* noon U+0646 */
303 {"arnunfull", 0x646}, /* noon U+0646 */
304 {"arpa", 0x647}, /* ha U+0647 */
305 {"arqoph", 0x642}, /* qaf U+0642 */
306 {"arshinfull", 0x634}, /* sheen U+0634 */
307 {"arta", 0x62a}, /* taa U+062A */
308 {"artafull", 0x62a}, /* taa U+062A */
309 {"artha", 0x62b}, /* thaa U+062B */
310 {"arwaw", 0x648}, /* waw U+0648 */
311 {"arya", 0x64a}, /* ya U+064A */
312 {"aryafull", 0x64a}, /* ya U+064A */
313 {"arzero", 0x660}, /* indic zero U+0660 */
314 {"asg", 0x292}, /* unicycle character. Cf "hallow" */
315 {"asper", LASP},
316 {"assert", 0x22a2},
317 {"astm", 0x2042}, /* asterism: should be upside down */
318 {"at", 0x40},
319 {"atilde", 0xe3},
320 {"auml", 0xe4},
321 {"ayin", 0x639}, /* arabic ain U+0639 */
322 {"b1", 0x2d}, /* single bond */
323 {"b2", 0x3d}, /* double bond */
324 {"b3", 0x2261}, /* triple bond */
325 {"bbar", 0x180}, /* b with bar U+0180 */
326 {"beta", 0x3b2},
327 {"bigobl", 0x2f},
328 {"blC", 0x43}, /* should be black letter */
329 {"blJ", 0x4a}, /* should be black letter */
330 {"blU", 0x55}, /* should be black letter */
331 {"blb", 0x62}, /* should be black letter */
332 {"blozenge", 0x25ca}, /* U+25CA; should be black */
333 {"bly", 0x79}, /* should be black letter */
334 {"bra", MBRA},
335 {"brbl", LBRB},
336 {"breve", LBRV},
337 {"bslash", '\\'},
338 {"bsquare", 0x25a0}, /* black square U+25A0 */
339 {"btril", 0x25c0}, /* U+25C0 */
340 {"btrir", 0x25b6}, /* U+25B6 */
341 {"c", 0x63}, /* circa */
342 {"cab", 0x232a},
343 {"cacu", 0x107},
344 {"canc", 0x264b},
345 {"capr", 0x2651},
346 {"caret", 0x5e},
347 {"cb", 0x7d},
348 {"cbigb", 0x7d},
349 {"cbigpren", 0x29},
350 {"cbigsb", 0x5d},
351 {"cced", 0xe7},
352 {"cdil", LCED},
353 {"cdsb", 0x301b}, /* ]] U+301b */
354 {"cent", 0xa2},
355 {"chacek", 0x10d},
356 {"chi", 0x3c7},
357 {"circ", LRNG},
358 {"circa", 0x63}, /* about (year) */
359 {"circbl", 0x325}, /* ring below accent U+0325 */
360 {"circle", 0x25cb}, /* U+25CB */
361 {"circledot", 0x2299},
362 {"click", 0x296},
363 {"club", 0x2663},
364 {"comtime", 0x43},
365 {"conj", 0x260c},
366 {"cprt", 0xa9},
367 {"cq", '\''},
368 {"cqq", 0x201d},
369 {"cross", 0x2720}, /* maltese cross U+2720 */
370 {"crotchet", 0x2669},
371 {"csb", 0x5d},
372 {"ctilde", 0x63}, /* +tilde */
373 {"ctlig", MLCT},
374 {"cyra", 0x430},
375 {"cyre", 0x435},
376 {"cyrhard", 0x44a},
377 {"cyrjat", 0x463},
378 {"cyrm", 0x43c},
379 {"cyrn", 0x43d},
380 {"cyrr", 0x440},
381 {"cyrsoft", 0x44c},
382 {"cyrt", 0x442},
383 {"cyry", 0x44b},
384 {"dag", 0x2020},
385 {"dbar", 0x111},
386 {"dblar", 0x21cb},
387 {"dblgt", 0x226b},
388 {"dbllt", 0x226a},
389 {"dced", 0x64}, /* +cedilla */
390 {"dd", MDD},
391 {"ddag", 0x2021},
392 {"ddd", MDDD},
393 {"decr", 0x2193},
394 {"deg", 0xb0},
395 {"dele", 0x64}, /* should be dele */
396 {"delta", 0x3b4},
397 {"descnode", 0x260b}, /* descending node U+260B */
398 {"diamond", 0x2662},
399 {"digamma", 0x3dd},
400 {"div", 0xf7},
401 {"dlessi", 0x131},
402 {"dlessj1", 0x6a}, /* should be dotless */
403 {"dlessj2", 0x6a}, /* should be dotless */
404 {"dlessj3", 0x6a}, /* should be dotless */
405 {"dollar", 0x24},
406 {"dotab", LDOT},
407 {"dotbl", LDTB},
408 {"drachm", 0x292},
409 {"dubh", 0x2d},
410 {"eacu", 0xe9},
411 {"earth", 0x2641},
412 {"easper", MEAS},
413 {"ebreve", 0x115},
414 {"ecirc", 0xea},
415 {"edh", 0xf0},
416 {"egrave", 0xe8},
417 {"ehacek", 0x11b},
418 {"ehook", 0x119},
419 {"elem", 0x220a},
420 {"elenis", MELN},
421 {"em", 0x2014},
422 {"emac", 0x113},
423 {"emem", MEMM},
424 {"en", 0x2013},
425 {"epsilon", 0x3b5},
426 {"equil", 0x21cb},
427 {"ergo", 0x2234},
428 {"es", MES},
429 {"eszett", 0xdf},
430 {"eta", 0x3b7},
431 {"eth", 0xf0},
432 {"euml", 0xeb},
433 {"expon", 0x2191},
434 {"fact", 0x21},
435 {"fata", 0x251},
436 {"fatpara", 0xb6}, /* should have fatter, filled in bowl */
437 {"female", 0x2640},
438 {"ffilig", MLFFI},
439 {"fflig", MLFF},
440 {"ffllig", MLFFL},
441 {"filig", MLFI},
442 {"flat", 0x266d},
443 {"fllig", MLFL},
444 {"frE", 0x45}, /* should be curly */
445 {"frL", 'L'}, /* should be curly */
446 {"frR", 0x52}, /* should be curly */
447 {"frakB", 0x42}, /* should have fraktur style */
448 {"frakG", 0x47},
449 {"frakH", 0x48},
450 {"frakI", 0x49},
451 {"frakM", 0x4d},
452 {"frakU", 0x55},
453 {"frakX", 0x58},
454 {"frakY", 0x59},
455 {"frakh", 0x68},
456 {"frbl", LFRB},
457 {"frown", LFRN},
458 {"fs", 0x20},
459 {"fsigma", 0x3c2},
460 {"gAacu", 0xc1}, /* should be Α+acute */
461 {"gaacu", 0x3b1}, /* +acute */
462 {"gabreve", 0x3b1}, /* +breve */
463 {"gafrown", 0x3b1}, /* +frown */
464 {"gagrave", 0x3b1}, /* +grave */
465 {"gamac", 0x3b1}, /* +macron */
466 {"gamma", 0x3b3},
467 {"gauml", 0x3b1}, /* +umlaut */
468 {"ge", 0x2267},
469 {"geacu", 0x3b5}, /* +acute */
470 {"gegrave", 0x3b5}, /* +grave */
471 {"ghacu", 0x3b7}, /* +acute */
472 {"ghfrown", 0x3b7}, /* +frown */
473 {"ghgrave", 0x3b7}, /* +grave */
474 {"ghmac", 0x3b7}, /* +macron */
475 {"giacu", 0x3b9}, /* +acute */
476 {"gibreve", 0x3b9}, /* +breve */
477 {"gifrown", 0x3b9}, /* +frown */
478 {"gigrave", 0x3b9}, /* +grave */
479 {"gimac", 0x3b9}, /* +macron */
480 {"giuml", 0x3b9}, /* +umlaut */
481 {"glagjat", 0x467},
482 {"glots", 0x2c0},
483 {"goacu", 0x3bf}, /* +acute */
484 {"gobreve", 0x3bf}, /* +breve */
485 {"grave", LGRV},
486 {"gt", 0x3e},
487 {"guacu", 0x3c5}, /* +acute */
488 {"gufrown", 0x3c5}, /* +frown */
489 {"gugrave", 0x3c5}, /* +grave */
490 {"gumac", 0x3c5}, /* +macron */
491 {"guuml", 0x3c5}, /* +umlaut */
492 {"gwacu", 0x3c9}, /* +acute */
493 {"gwfrown", 0x3c9}, /* +frown */
494 {"gwgrave", 0x3c9}, /* +grave */
495 {"hacek", LHCK},
496 {"halft", 0x2308},
497 {"hash", 0x23},
498 {"hasper", MHAS},
499 {"hatpath", 0x5b2}, /* hataf patah U+05B2 */
500 {"hatqam", 0x5b3}, /* hataf qamats U+05B3 */
501 {"hatseg", 0x5b1}, /* hataf segol U+05B1 */
502 {"hbar", 0x127},
503 {"heart", 0x2661},
504 {"hebaleph", 0x5d0}, /* aleph U+05D0 */
505 {"hebayin", 0x5e2}, /* ayin U+05E2 */
506 {"hebbet", 0x5d1}, /* bet U+05D1 */
507 {"hebbeth", 0x5d1}, /* bet U+05D1 */
508 {"hebcheth", 0x5d7}, /* bet U+05D7 */
509 {"hebdaleth", 0x5d3}, /* dalet U+05D3 */
510 {"hebgimel", 0x5d2}, /* gimel U+05D2 */
511 {"hebhe", 0x5d4}, /* he U+05D4 */
512 {"hebkaph", 0x5db}, /* kaf U+05DB */
513 {"heblamed", 0x5dc}, /* lamed U+05DC */
514 {"hebmem", 0x5de}, /* mem U+05DE */
515 {"hebnun", 0x5e0}, /* nun U+05E0 */
516 {"hebnunfin", 0x5df}, /* final nun U+05DF */
517 {"hebpe", 0x5e4}, /* pe U+05E4 */
518 {"hebpedag", 0x5e3}, /* final pe? U+05E3 */
519 {"hebqoph", 0x5e7}, /* qof U+05E7 */
520 {"hebresh", 0x5e8}, /* resh U+05E8 */
521 {"hebshin", 0x5e9}, /* shin U+05E9 */
522 {"hebtav", 0x5ea}, /* tav U+05EA */
523 {"hebtsade", 0x5e6}, /* tsadi U+05E6 */
524 {"hebwaw", 0x5d5}, /* vav? U+05D5 */
525 {"hebyod", 0x5d9}, /* yod U+05D9 */
526 {"hebzayin", 0x5d6}, /* zayin U+05D6 */
527 {"hgz", 0x292}, /* ??? Cf "alet" */
528 {"hireq", 0x5b4}, /* U+05B4 */
529 {"hlenis", MHLN},
530 {"hook", LOGO},
531 {"horizE", 0x45}, /* should be on side */
532 {"horizP", 0x50}, /* should be on side */
533 {"horizS", 0x223d},
534 {"horizT", 0x22a3},
535 {"horizb", 0x7b}, /* should be underbrace */
536 {"ia", 0x3b1},
537 {"iacu", 0xed},
538 {"iasper", MIAS},
539 {"ib", 0x3b2},
540 {"ibar", 0x268},
541 {"ibreve", 0x12d},
542 {"icirc", 0xee},
543 {"id", 0x3b4},
544 {"ident", 0x2261},
545 {"ie", 0x3b5},
546 {"ifilig", MLFI},
547 {"ifflig", MLFF},
548 {"ig", 0x3b3},
549 {"igrave", 0xec},
550 {"ih", 0x3b7},
551 {"ii", 0x3b9},
552 {"ik", 0x3ba},
553 {"ilenis", MILN},
554 {"imac", 0x12b},
555 {"implies", 0x21d2},
556 {"index", 0x261e},
557 {"infin", 0x221e},
558 {"integ", 0x222b},
559 {"intsec", 0x2229},
560 {"invpri", 0x2cf},
561 {"iota", 0x3b9},
562 {"iq", 0x3c8},
563 {"istlig", MLST},
564 {"isub", 0x3f5}, /* iota below accent */
565 {"iuml", 0xef},
566 {"iz", 0x3b6},
567 {"jup", 0x2643},
568 {"kappa", 0x3ba},
569 {"koppa", 0x3df},
570 {"lambda", 0x3bb},
571 {"lar", 0x2190},
572 {"lbar", 0x142},
573 {"le", 0x2266},
574 {"lenis", LLEN},
575 {"leo", 0x264c},
576 {"lhalfbr", 0x2308},
577 {"lhshoe", 0x2283},
578 {"libra", 0x264e},
579 {"llswing", MLLS},
580 {"lm", 0x2d0},
581 {"logicand", 0x2227},
582 {"logicor", 0x2228},
583 {"longs", 0x283},
584 {"lrar", 0x2194},
585 {"lt", 0x3c},
586 {"ltappr", 0x227e},
587 {"ltflat", 0x2220},
588 {"lumlbl", 0x6c}, /* +umlaut below */
589 {"mac", LMAC},
590 {"male", 0x2642},
591 {"mc", 0x63}, /* should be raised */
592 {"merc", 0x263f}, /* mercury U+263F */
593 {"min", 0x2212},
594 {"moonfq", 0x263d}, /* first quarter moon U+263D */
595 {"moonlq", 0x263e}, /* last quarter moon U+263E */
596 {"msylab", 0x6d}, /* +sylab (ˌ) */
597 {"mu", 0x3bc},
598 {"nacu", 0x144},
599 {"natural", 0x266e},
600 {"neq", 0x2260},
601 {"nfacu", 0x2032},
602 {"nfasper", 0x2bd},
603 {"nfbreve", 0x2d8},
604 {"nfced", 0xb8},
605 {"nfcirc", 0x2c6},
606 {"nffrown", 0x2322},
607 {"nfgra", 0x2cb},
608 {"nfhacek", 0x2c7},
609 {"nfmac", 0xaf},
610 {"nftilde", 0x2dc},
611 {"nfuml", 0xa8},
612 {"ng", 0x14b},
613 {"not", 0xac},
614 {"notelem", 0x2209},
615 {"ntilde", 0xf1},
616 {"nu", 0x3bd},
617 {"oab", 0x2329},
618 {"oacu", 0xf3},
619 {"oasper", MOAS},
620 {"ob", 0x7b},
621 {"obar", 0xf8},
622 {"obigb", 0x7b}, /* should be big */
623 {"obigpren", 0x28},
624 {"obigsb", 0x5b}, /* should be big */
625 {"obreve", 0x14f},
626 {"ocirc", 0xf4},
627 {"odsb", 0x301a}, /* [[ U+301A */
628 {"oe", 0x153},
629 {"oeamp", 0x26},
630 {"ograve", 0xf2},
631 {"ohook", 0x6f}, /* +hook */
632 {"olenis", MOLN},
633 {"omac", 0x14d},
634 {"omega", 0x3c9},
635 {"omicron", 0x3bf},
636 {"ope", 0x25b},
637 {"opp", 0x260d},
638 {"oq", 0x60},
639 {"oqq", 0x201c},
640 {"or", MOR},
641 {"osb", 0x5b},
642 {"otilde", 0xf5},
643 {"ouml", 0xf6},
644 {"ounce", 0x2125}, /* ounce U+2125 */
645 {"ovparen", 0x2322}, /* should be sideways ( */
646 {"p", 0x2032},
647 {"pa", 0x2202},
648 {"page", 0x50},
649 {"pall", 0x28e},
650 {"paln", 0x272},
651 {"par", PAR},
652 {"para", 0xb6},
653 {"pbar", 0x70}, /* +bar */
654 {"per", 0x2118}, /* per U+2118 */
655 {"phi", 0x3c6},
656 {"phi2", 0x3d5},
657 {"pi", 0x3c0},
658 {"pisces", 0x2653},
659 {"planck", 0x127},
660 {"plantinJ", 0x4a}, /* should be script */
661 {"pm", 0xb1},
662 {"pmil", 0x2030},
663 {"pp", 0x2033},
664 {"ppp", 0x2034},
665 {"prop", 0x221d},
666 {"psi", 0x3c8},
667 {"pstlg", 0xa3},
668 {"q", 0x3f}, /* should be raised */
669 {"qamets", 0x5b3}, /* U+05B3 */
670 {"quaver", 0x266a},
671 {"rar", 0x2192},
672 {"rasper", MRAS},
673 {"rdot", 0xb7},
674 {"recipe", 0x211e}, /* U+211E */
675 {"reg", 0xae},
676 {"revC", 0x186}, /* open O U+0186 */
677 {"reva", 0x252},
678 {"revc", 0x254},
679 {"revope", 0x25c},
680 {"revr", 0x279},
681 {"revsc", 0x2d2}, /* upside-down semicolon */
682 {"revv", 0x28c},
683 {"rfa", 0x6f}, /* +hook (Cf "goal") */
684 {"rhacek", 0x159},
685 {"rhalfbr", 0x2309},
686 {"rho", 0x3c1},
687 {"rhshoe", 0x2282},
688 {"rlenis", MRLN},
689 {"rsylab", 0x72}, /* +sylab */
690 {"runash", 0x46}, /* should be runic 'ash' */
691 {"rvow", 0x2d4},
692 {"sacu", 0x15b},
693 {"sagit", 0x2650},
694 {"sampi", 0x3e1},
695 {"saturn", 0x2644},
696 {"sced", 0x15f},
697 {"schwa", 0x259},
698 {"scorpio", 0x264f},
699 {"scrA", 0x41}, /* should be script */
700 {"scrC", 0x43},
701 {"scrE", 0x45},
702 {"scrF", 0x46},
703 {"scrI", 0x49},
704 {"scrJ", 0x4a},
705 {"scrL", 'L'},
706 {"scrO", 0x4f},
707 {"scrP", 0x50},
708 {"scrQ", 0x51},
709 {"scrS", 0x53},
710 {"scrT", 0x54},
711 {"scrb", 0x62},
712 {"scrd", 0x64},
713 {"scrh", 0x68},
714 {"scrl", 0x6c},
715 {"scruple", 0x2108}, /* U+2108 */
716 {"sdd", 0x2d0},
717 {"sect", 0xa7},
718 {"semE", 0x2203},
719 {"sh", 0x283},
720 {"shacek", 0x161},
721 {"sharp", 0x266f},
722 {"sheva", 0x5b0}, /* U+05B0 */
723 {"shti", 0x26a},
724 {"shtsyll", 0x222a},
725 {"shtu", 0x28a},
726 {"sidetri", 0x22b2},
727 {"sigma", 0x3c3},
728 {"since", 0x2235},
729 {"slge", 0x2265}, /* should have slanted line under */
730 {"slle", 0x2264}, /* should have slanted line under */
731 {"sm", 0x2c8},
732 {"smm", 0x2cc},
733 {"spade", 0x2660},
734 {"sqrt", 0x221a},
735 {"square", 0x25a1}, /* U+25A1 */
736 {"ssChi", 0x3a7}, /* should be sans serif */
737 {"ssIota", 0x399},
738 {"ssOmicron", 0x39f},
739 {"ssPi", 0x3a0},
740 {"ssRho", 0x3a1},
741 {"ssSigma", 0x3a3},
742 {"ssTau", 0x3a4},
743 {"star", 0x2a},
744 {"stlig", MLST},
745 {"sup2", 0x2072},
746 {"supgt", 0x2c3},
747 {"suplt", 0x2c2},
748 {"sur", 0x2b3},
749 {"swing", 0x223c},
750 {"tau", 0x3c4},
751 {"taur", 0x2649},
752 {"th", 0xfe},
753 {"thbar", 0xfe}, /* +bar */
754 {"theta", 0x3b8},
755 {"thinqm", 0x3f}, /* should be thinner */
756 {"tilde", LTIL},
757 {"times", 0xd7},
758 {"tri", 0x2206},
759 {"trli", 0x2016},
760 {"ts", 0x2009},
761 {"uacu", 0xfa},
762 {"uasper", MUAS},
763 {"ubar", 0x75}, /* +bar */
764 {"ubreve", 0x16d},
765 {"ucirc", 0xfb},
766 {"udA", 0x2200},
767 {"udT", 0x22a5},
768 {"uda", 0x250},
769 {"udh", 0x265},
770 {"udqm", 0xbf},
771 {"udpsi", 0x22d4},
772 {"udtr", 0x2207},
773 {"ugrave", 0xf9},
774 {"ulenis", MULN},
775 {"umac", 0x16b},
776 {"uml", LUML},
777 {"undl", 0x2cd}, /* underline accent */
778 {"union", 0x222a},
779 {"upsilon", 0x3c5},
780 {"uuml", 0xfc},
781 {"vavpath", 0x5d5}, /* vav U+05D5 (+patah) */
782 {"vavsheva", 0x5d5}, /* vav U+05D5 (+sheva) */
783 {"vb", 0x7c},
784 {"vddd", 0x22ee},
785 {"versicle2", 0x2123}, /* U+2123 */
786 {"vinc", 0xaf},
787 {"virgo", 0x264d},
788 {"vpal", 0x25f},
789 {"vvf", 0x263},
790 {"wasper", MWAS},
791 {"wavyeq", 0x2248},
792 {"wlenis", MWLN},
793 {"wyn", 0x1bf}, /* wynn U+01BF */
794 {"xi", 0x3be},
795 {"yacu", 0xfd},
796 {"ycirc", 0x177},
797 {"ygh", 0x292},
798 {"ymac", 0x79}, /* +macron */
799 {"yuml", 0xff},
800 {"zced", 0x7a}, /* +cedilla */
801 {"zeta", 0x3b6},
802 {"zh", 0x292},
803 {"zhacek", 0x17e}
804 };
805 /*
806 The following special characters don't have close enough
807 equivalents in Unicode, so aren't in the above table.
808 22n 2^(2^n) Cf Fermat
809 2on4 2/4
810 3on8 3/8
811 Bantuo Bantu O. Cf Otshi-herero
812 Car C with circular arrow on top
813 albrtime cut-time: C with vertical line
814 ardal Cf dental
815 bantuo Bantu o. Cf Otshi-herero
816 bbc1 single chem bond below
817 bbc2 double chem bond below
818 bbl1 chem bond like /
819 bbl2 chem bond like //
820 bbr1 chem bond like \
821 bbr2 chem bond \\
822 bcop1 copper symbol. Cf copper
823 bcop2 copper symbol. Cf copper
824 benchm Cf benchmark
825 btc1 single chem bond above
826 btc2 double chem bond above
827 btl1 chem bond like \
828 btl2 chem bond like \\
829 btr1 chem bond like /
830 btr2 chem bond line //
831 burman Cf Burman
832 devph sanskrit letter. Cf ph
833 devrfls sanskrit letter. Cf cerebral
834 duplong[12] musical note
835 egchi early form of chi
836 eggamma[12] early form of gamma
837 egiota early form of iota
838 egkappa early form of kappa
839 eglambda early form of lambda
840 egmu[12] early form of mu
841 egnu[12] early form of nu
842 egpi[123] early form of pi
843 egrho[12] early form of rho
844 egsampi early form of sampi
845 egsan early form of san
846 egsigma[12] early form of sigma
847 egxi[123] early form of xi
848 elatS early form of S
849 elatc[12] early form of C
850 elatg[12] early form of G
851 glagjeri Slavonic Glagolitic jeri
852 glagjeru Slavonic Glagolitic jeru
853 hypolem hypolemisk (line with underdot)
854 lhrbr lower half }
855 longmord long mordent
856 mbwvow backwards scretched C. Cf retract.
857 mord music symbol. Cf mordent
858 mostra Cf direct
859 ohgcirc old form of circumflex
860 oldbeta old form of β. Cf perturbate
861 oldsemibr[12] old forms of semibreve. Cf prolation
862 ormg old form of g. Cf G
863 para[12345] form of ¶
864 pauseo musical pause sign
865 pauseu musical pause sign
866 pharyng Cf pharyngal
867 ragr Black letter ragged r
868 repetn musical repeat. Cf retort
869 segno musical segno sign
870 semain[12] semitic ain
871 semhe semitic he
872 semheth semitic heth
873 semkaph semitic kaph
874 semlamed[12] semitic lamed
875 semmem semitic mem
876 semnum semitic nun
877 sempe semitic pe
878 semqoph[123] semitic qoph
879 semresh semitic resh
880 semtav[1234] semitic tav
881 semyod semitic yod
882 semzayin[123] semitic zayin
883 shtlong[12] U with underbar. Cf glyconic
884 sigmatau σ,τ combination
885 squaver sixteenth note
886 sqbreve square musical breve note
887 swast swastika
888 uhrbr upper half of big }
889 versicle1 Cf versicle
890 */
893 static Rune normtab[128] = {
894 /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
895 /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
896 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
897 /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
898 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
899 /*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'',
900 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
901 /*30*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
902 0x38, 0x39, 0x3a, 0x3b, TAGS, 0x3d, TAGE, 0x3f,
903 /*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
904 0x48, 0x49, 0x4a, 0x4b, 'L', 0x4d, 0x4e, 0x4f,
905 /*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
906 0x58, 0x59, 0x5a, 0x5b, '\\', 0x5d, 0x5e, 0x5f,
907 /*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
908 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
909 /*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
910 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE
911 };
912 static Rune phtab[128] = {
913 /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
914 /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
915 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
916 /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
917 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
918 /*20*/ 0x20, 0x21, 0x2c8, 0x23, 0x24, 0x2cc, 0xe6, '\'',
919 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
920 /*30*/ 0x30, 0x31, 0x32, 0x25c, 0x34, 0x35, 0x36, 0x37,
921 0x38, 0xf8, 0x2d0, 0x3b, TAGS, 0x3d, TAGE, 0x3f,
922 /*40*/ 0x259, 0x251, 0x42, 0x43, 0xf0, 0x25b, 0x46, 0x47,
923 0x48, 0x26a, 0x4a, 0x4b, 'L', 0x4d, 0x14b, 0x254,
924 /*50*/ 0x50, 0x252, 0x52, 0x283, 0x3b8, 0x28a, 0x28c, 0x57,
925 0x58, 0x59, 0x292, 0x5b, '\\', 0x5d, 0x5e, 0x5f,
926 /*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
927 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
928 /*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
929 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE
930 };
931 static Rune grtab[128] = {
932 /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
933 /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
934 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
935 /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
936 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
937 /*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'',
938 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
939 /*30*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
940 0x38, 0x39, 0x3a, 0x3b, TAGS, 0x3d, TAGE, 0x3f,
941 /*40*/ 0x40, 0x391, 0x392, 0x39e, 0x394, 0x395, 0x3a6, 0x393,
942 0x397, 0x399, 0x3da, 0x39a, 0x39b, 0x39c, 0x39d, 0x39f,
943 /*50*/ 0x3a0, 0x398, 0x3a1, 0x3a3, 0x3a4, 0x3a5, 0x56, 0x3a9,
944 0x3a7, 0x3a8, 0x396, 0x5b, '\\', 0x5d, 0x5e, 0x5f,
945 /*60*/ 0x60, 0x3b1, 0x3b2, 0x3be, 0x3b4, 0x3b5, 0x3c6, 0x3b3,
946 0x3b7, 0x3b9, 0x3c2, 0x3ba, 0x3bb, 0x3bc, 0x3bd, 0x3bf,
947 /*70*/ 0x3c0, 0x3b8, 0x3c1, 0x3c3, 0x3c4, 0x3c5, 0x76, 0x3c9,
948 0x3c7, 0x3c8, 0x3b6, 0x7b, 0x7c, 0x7d, 0x7e, NONE
949 };
950 static Rune subtab[128] = {
951 /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
952 /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
953 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
954 /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
955 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
956 /*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'',
957 0x208d, 0x208e, 0x2a, 0x208a, 0x2c, 0x208b, 0x2e, 0x2f,
958 /*30*/ 0x2080, 0x2081, 0x2082, 0x2083, 0x2084, 0x2085, 0x2086, 0x2087,
959 0x2088, 0x2089, 0x3a, 0x3b, TAGS, 0x208c, TAGE, 0x3f,
960 /*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
961 0x48, 0x49, 0x4a, 0x4b, 'L', 0x4d, 0x4e, 0x4f,
962 /*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
963 0x58, 0x59, 0x5a, 0x5b, '\\', 0x5d, 0x5e, 0x5f,
964 /*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
965 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
966 /*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
967 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE
968 };
969 static Rune suptab[128] = {
970 /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
971 /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
972 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
973 /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
974 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
975 /*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'',
976 0x207d, 0x207e, 0x2a, 0x207a, 0x2c, 0x207b, 0x2e, 0x2f,
977 /*30*/ 0x2070, 0x2071, 0x2072, 0x2073, 0x2074, 0x2075, 0x2076, 0x2077,
978 0x2078, 0x2079, 0x3a, 0x3b, TAGS, 0x207c, TAGE, 0x3f,
979 /*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
980 0x48, 0x49, 0x4a, 0x4b, 'L', 0x4d, 0x4e, 0x4f,
981 /*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
982 0x58, 0x59, 0x5a, 0x5b, '\\', 0x5d, 0x5e, 0x5f,
983 /*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
984 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
985 /*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
986 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE
987 };
989 static int tagstarts;
990 static char tag[Buflen];
991 static int naux;
992 static char auxname[Maxaux][Buflen];
993 static char auxval[Maxaux][Buflen];
994 static char spec[Buflen];
995 static char *auxstate[Naux]; /* vals for most recent tag */
996 static Entry curentry;
997 #define cursize (curentry.end-curentry.start)
999 static char *getspec(char *, char *);
1000 static char *gettag(char *, char *);
1001 static void dostatus(void);
1004 * cmd is one of:
1005 * 'p': normal print
1006 * 'h': just print headwords
1007 * 'P': print raw
1009 void
1010 oedprintentry(Entry e, int cmd)
1012 char *p, *pe;
1013 int t, a, i;
1014 long r, rprev, rlig;
1015 Rune *transtab;
1017 p = e.start;
1018 pe = e.end;
1019 transtab = normtab;
1020 rprev = NONE;
1021 changett(0, 0, 0);
1022 curentry = e;
1023 if(cmd == 'h')
1024 outinhibit = 1;
1025 while(p < pe) {
1026 if(cmd == 'r') {
1027 outchar(*p++);
1028 continue;
1030 r = transtab[(*p++)&0x7F];
1031 if(r < NONE) {
1032 /* Emit the rune, but buffer in case of ligature */
1033 if(rprev != NONE)
1034 outrune(rprev);
1035 rprev = r;
1036 } else if(r == SPCS) {
1037 /* Start of special character name */
1038 p = getspec(p, pe);
1039 r = lookassoc(spectab, asize(spectab), spec);
1040 if(r == -1) {
1041 if(debug)
1042 err("spec %ld %d %s",
1043 e.doff, cursize, spec);
1044 r = 0xfffd;
1046 if(r >= LIGS && r < LIGE) {
1047 /* handle possible ligature */
1048 rlig = liglookup(r, rprev);
1049 if(rlig != NONE)
1050 rprev = rlig; /* overwrite rprev */
1051 else {
1052 /* could print accent, but let's not */
1053 if(rprev != NONE) outrune(rprev);
1054 rprev = NONE;
1056 } else if(r >= MULTI && r < MULTIE) {
1057 if(rprev != NONE) {
1058 outrune(rprev);
1059 rprev = NONE;
1061 outrunes(multitab[r-MULTI]);
1062 } else if(r == PAR) {
1063 if(rprev != NONE) {
1064 outrune(rprev);
1065 rprev = NONE;
1067 outnl(1);
1068 } else {
1069 if(rprev != NONE) outrune(rprev);
1070 rprev = r;
1072 } else if(r == TAGS) {
1073 /* Start of tag name */
1074 if(rprev != NONE) {
1075 outrune(rprev);
1076 rprev = NONE;
1078 p = gettag(p, pe);
1079 t = lookassoc(tagtab, asize(tagtab), tag);
1080 if(t == -1) {
1081 if(debug)
1082 err("tag %ld %d %s",
1083 e.doff, cursize, tag);
1084 continue;
1086 for(i = 0; i < Naux; i++)
1087 auxstate[i] = 0;
1088 for(i = 0; i < naux; i++) {
1089 a = lookassoc(auxtab, asize(auxtab), auxname[i]);
1090 if(a == -1) {
1091 if(debug)
1092 err("aux %ld %d %s",
1093 e.doff, cursize, auxname[i]);
1094 } else
1095 auxstate[a] = auxval[i];
1097 switch(t){
1098 case E:
1099 case Ve:
1100 outnl(0);
1101 if(tagstarts)
1102 dostatus();
1103 break;
1104 case Ed:
1105 case Etym:
1106 outchar(tagstarts? '[' : ']');
1107 break;
1108 case Pr:
1109 outchar(tagstarts? '(' : ')');
1110 break;
1111 case In:
1112 transtab = changett(transtab, subtab, tagstarts);
1113 break;
1114 case Hm:
1115 case Su:
1116 case Fq:
1117 transtab = changett(transtab, suptab, tagstarts);
1118 break;
1119 case Gk:
1120 transtab = changett(transtab, grtab, tagstarts);
1121 break;
1122 case Ph:
1123 transtab = changett(transtab, phtab, tagstarts);
1124 break;
1125 case Hw:
1126 if(cmd == 'h') {
1127 if(!tagstarts)
1128 outchar(' ');
1129 outinhibit = !tagstarts;
1131 break;
1132 case S0:
1133 case S1:
1134 case S2:
1135 case S3:
1136 case S4:
1137 case S5:
1138 case S6:
1139 case S7a:
1140 case S7n:
1141 case Sn:
1142 case Sgk:
1143 if(tagstarts) {
1144 outnl(2);
1145 dostatus();
1146 if(auxstate[Num]) {
1147 if(t == S3 || t == S5) {
1148 i = atoi(auxstate[Num]);
1149 while(i--)
1150 outchar('*');
1151 outchars(" ");
1152 } else if(t == S7a || t == S7n || t == Sn) {
1153 outchar('(');
1154 outchars(auxstate[Num]);
1155 outchars(") ");
1156 } else if(t == Sgk) {
1157 i = grtab[(uchar)auxstate[Num][0]];
1158 if(i != NONE)
1159 outrune(i);
1160 outchars(". ");
1161 } else {
1162 outchars(auxstate[Num]);
1163 outchars(". ");
1167 break;
1168 case Cb:
1169 case Db:
1170 case Qp:
1171 case P:
1172 if(tagstarts)
1173 outnl(1);
1174 break;
1175 case Table:
1177 * Todo: gather columns, justify them, etc.
1178 * For now, just let colums come out as rows
1180 if(!tagstarts)
1181 outnl(0);
1182 break;
1183 case Col:
1184 if(tagstarts)
1185 outnl(0);
1186 break;
1187 case Dn:
1188 if(tagstarts)
1189 outchar('/');
1190 break;
1194 if(cmd == 'h') {
1195 outinhibit = 0;
1196 outnl(0);
1201 * Return offset into bdict where next oed entry after fromoff starts.
1202 * Oed entries start with <e>, <ve>, <e st=...>, or <ve st=...>
1204 long
1205 oednextoff(long fromoff)
1207 long a, n;
1208 int c;
1210 a = Bseek(bdict, fromoff, 0);
1211 if(a < 0)
1212 return -1;
1213 n = 0;
1214 for(;;) {
1215 c = Bgetc(bdict);
1216 if(c < 0)
1217 break;
1218 if(c == '<') {
1219 c = Bgetc(bdict);
1220 if(c == 'e') {
1221 c = Bgetc(bdict);
1222 if(c == '>' || c == ' ')
1223 n = 3;
1224 } else if(c == 'v' && Bgetc(bdict) == 'e') {
1225 c = Bgetc(bdict);
1226 if(c == '>' || c == ' ')
1227 n = 4;
1229 if(n)
1230 break;
1233 return (Boffset(bdict)-n);
1236 static char *prkey1 =
1237 "KEY TO THE PRONUNCIATION\n"
1238 "\n"
1239 "I. CONSONANTS\n"
1240 "b, d, f, k, l, m, n, p, t, v, z: usual English values\n"
1241 "\n"
1242 "g as in go (gəʊ)\n"
1243 "h ... ho! (həʊ)\n"
1244 "r ... run (rʌn), terrier (ˈtɛriə(r))\n"
1245 "(r)... her (hɜː(r))\n"
1246 "s ... see (siː), success (səkˈsɜs)\n"
1247 "w ... wear (wɛə(r))\n"
1248 "hw ... when (hwɛn)\n"
1249 "j ... yes (jɛs)\n"
1250 "θ ... thin (θin), bath (bɑːθ)\n"
1251 "ð ... then (ðɛn), bathe (beɪð)\n"
1252 "ʃ ... shop (ʃɒp), dish (dɪʃ)\n"
1253 "tʃ ... chop (tʃɒp), ditch (dɪtʃ)\n"
1254 "ʒ ... vision (ˈvɪʒən), déjeuner (deʒøne)\n"
1256 static char *prkey2 =
1257 "dʒ ... judge (dʒʌdʒ)\n"
1258 "ŋ ... singing (ˈsɪŋɪŋ), think (θiŋk)\n"
1259 "ŋg ... finger (ˈfiŋgə(r))\n"
1260 "\n"
1261 "Foreign\n"
1262 "ʎ as in It. seraglio (serˈraʎo)\n"
1263 "ɲ ... Fr. cognac (kɔɲak)\n"
1264 "x ... Ger. ach (ax), Sc. loch (lɒx)\n"
1265 "ç ... Ger. ich (ɪç), Sc. nicht (nɪçt)\n"
1266 "ɣ ... North Ger. sagen (ˈzaːɣən)\n"
1267 "c ... Afrikaans baardmannetjie (ˈbaːrtmanəci)\n"
1268 "ɥ ... Fr. cuisine (kɥizin)\n"
1269 "\n"
1271 static char *prkey3 =
1272 "II. VOWELS AND DIPTHONGS\n"
1273 "\n"
1274 "Short\n"
1275 "ɪ as in pit (pɪt), -ness (-nɪs)\n"
1276 "ɛ ... pet (pɛt), Fr. sept (sɛt)\n"
1277 "æ ... pat (pæt)\n"
1278 "ʌ ... putt (pʌt)\n"
1279 "ɒ ... pot (pɒt)\n"
1280 "ʊ ... put (pʊt)\n"
1281 "ə ... another (əˈnʌðə(r))\n"
1282 "(ə)... beaten (ˈbiːt(ə)n)\n"
1283 "i ... Fr. si (si)\n"
1284 "e ... Fr. bébé (bebe)\n"
1285 "a ... Fr. mari (mari)\n"
1286 "ɑ ... Fr. bâtiment (bɑtimã)\n"
1287 "ɔ ... Fr. homme (ɔm)\n"
1288 "o ... Fr. eau (o)\n"
1289 "ø ... Fr. peu (pø)\n"
1291 static char *prkey4 =
1292 "œ ... Fr. boeuf (bœf), coeur (kœr)\n"
1293 "u ... Fr. douce (dus)\n"
1294 "ʏ ... Ger. Müller (ˈmʏlər)\n"
1295 "y ... Fr. du (dy)\n"
1296 "\n"
1297 "Long\n"
1298 "iː as in bean (biːn)\n"
1299 "ɑː ... barn (bɑːn)\n"
1300 "ɔː ... born (bɔːn)\n"
1301 "uː ... boon (buːn)\n"
1302 "ɜː ... burn (bɜːn)\n"
1303 "eː ... Ger. Schnee (ʃneː)\n"
1304 "ɛː ... Ger. Fähre (ˈfɛːrə)\n"
1305 "aː ... Ger. Tag (taːk)\n"
1306 "oː ... Ger. Sohn (zoːn)\n"
1307 "øː ... Ger. Goethe (gøːtə)\n"
1308 "yː ... Ger. grün (gryːn)\n"
1309 "\n"
1311 static char *prkey5 =
1312 "Nasal\n"
1313 "ɛ˜, æ˜ as in Fr. fin (fɛ˜, fæ˜)\n"
1314 "ã ... Fr. franc (frã)\n"
1315 "ɔ˜ ... Fr. bon (bɔ˜n)\n"
1316 "œ˜ ... Fr. un (œ˜)\n"
1317 "\n"
1318 "Dipthongs, etc.\n"
1319 "eɪ as in bay (beɪ)\n"
1320 "aɪ ... buy (baɪ)\n"
1321 "ɔɪ ... boy (bɔɪ)\n"
1322 "əʊ ... no (nəʊ)\n"
1323 "aʊ ... now (naʊ)\n"
1324 "ɪə ... peer (pɪə(r))\n"
1325 "ɛə ... pair (pɛə(r))\n"
1326 "ʊə ... tour (tʊə(r))\n"
1327 "ɔə ... boar (bɔə(r))\n"
1328 "\n"
1330 static char *prkey6 =
1331 "III. STRESS\n"
1332 "\n"
1333 "Main stress: ˈ preceding stressed syllable\n"
1334 "Secondary stress: ˌ preceding stressed syllable\n"
1335 "\n"
1336 "E.g.: pronunciation (prəˌnʌnsɪˈeɪʃ(ə)n)\n";
1337 /* TODO: find transcriptions of foreign consonents, œ, ʏ, nasals */
1339 void
1340 oedprintkey(void)
1342 Bprint(bout, "%s%s%s%s%s%s",
1343 prkey1, prkey2, prkey3, prkey4, prkey5, prkey6);
1347 * f points just after a '&', fe points at end of entry.
1348 * Accumulate the special name, starting after the &
1349 * and continuing until the next '.', in spec[].
1350 * Return pointer to char after '.'.
1352 static char *
1353 getspec(char *f, char *fe)
1355 char *t;
1356 int c, i;
1358 t = spec;
1359 i = sizeof spec;
1360 while(--i > 0) {
1361 c = *f++;
1362 if(c == '.' || f == fe)
1363 break;
1364 *t++ = c;
1366 *t = 0;
1367 return f;
1371 * f points just after '<'; fe points at end of entry.
1372 * Expect next characters from bin to match:
1373 * [/][^ >]+( [^>=]+=[^ >]+)*>
1374 * tag auxname auxval
1375 * Accumulate the tag and its auxilliary information in
1376 * tag[], auxname[][] and auxval[][].
1377 * Set tagstarts=1 if the tag is 'starting' (has no '/'), else 0.
1378 * Set naux to the number of aux pairs found.
1379 * Return pointer to after final '>'.
1381 static char *
1382 gettag(char *f, char *fe)
1384 char *t;
1385 int c, i;
1387 t = tag;
1388 c = *f++;
1389 if(c == '/')
1390 tagstarts = 0;
1391 else {
1392 tagstarts = 1;
1393 *t++ = c;
1395 i = Buflen;
1396 naux = 0;
1397 while(--i > 0) {
1398 c = *f++;
1399 if(c == '>' || f == fe)
1400 break;
1401 if(c == ' ') {
1402 *t = 0;
1403 t = auxname[naux];
1404 i = Buflen;
1405 if(naux < Maxaux-1)
1406 naux++;
1407 } else if(naux && c == '=') {
1408 *t = 0;
1409 t = auxval[naux-1];
1410 i = Buflen;
1411 } else
1412 *t++ = c;
1414 *t = 0;
1415 return f;
1418 static void
1419 dostatus(void)
1421 char *s;
1423 s = auxstate[St];
1424 if(s) {
1425 if(strcmp(s, "obs") == 0)
1426 outrune(0x2020);
1427 else if(strcmp(s, "ali") == 0)
1428 outrune(0x2016);
1429 else if(strcmp(s, "err") == 0 || strcmp(s, "spu") == 0)
1430 outrune(0xb6);
1431 else if(strcmp(s, "xref") == 0)
1432 {/* nothing */}
1433 else if(debug)
1434 err("status %ld %d %s", curentry.doff, cursize, s);