Blob


1 /* thanks to Caerwyn Jones <caerwyn@comcast.net> for this module */
2 #include <u.h>
3 #include <libc.h>
4 #include <bio.h>
5 #include "dict.h"
7 enum {
8 Buflen=1000,
9 Maxaux=5
10 };
12 /* Possible tags */
13 enum {
14 B, /* Bold */
15 Blockquote, /* Block quote */
16 Br, /* Break line */
17 Cd, /* ? coloquial data */
18 Col, /* ? Coloquial */
19 Def, /* Definition */
20 Hw, /* Head Word */
21 I, /* Italics */
22 P, /* Paragraph */
23 Pos, /* Part of Speach */
24 Sn, /* Sense */
25 U, /* ? cross reference*/
26 Wf, /* ? word form */
27 Ntag /* end of tags */
28 };
30 /* Assoc tables must be sorted on first field */
32 static Assoc tagtab[] = {
33 {"b", B},
34 {"blockquote", Blockquote},
35 {"BR", Br},
36 {"cd", Cd},
37 {"col", Col},
38 {"def", Def},
39 {"hw", Hw},
40 {"i", I},
41 {"p", P},
42 {"pos", Pos},
43 {"sn", Sn},
44 {"u", U},
45 {"wf", Wf}
46 };
48 /* Possible tag auxilliary info */
49 enum {
50 Cols, /* number of columns in a table */
51 Num, /* letter or number, for a sense */
52 St, /* status (e.g., obs) */
53 Naux
54 };
56 #if 0
57 static Assoc auxtab[] = {
58 {"cols", Cols},
59 {"num", Num},
60 {"st", St}
61 };
62 #endif
64 static Assoc spectab[] = {
65 {"3on4", 0xbe},
66 {"AElig", 0xc6},
67 {"Aacute", 0xc1},
68 {"Aang", 0xc5},
69 {"Abarab", 0x100},
70 {"Acirc", 0xc2},
71 {"Agrave", 0xc0},
72 {"Alpha", 0x391},
73 {"Amacr", 0x100},
74 {"Asg", 0x1b7}, /* Unicyle. Cf "Sake" */
75 {"Auml", 0xc4},
76 {"Beta", 0x392},
77 {"Cced", 0xc7},
78 {"Chacek", 0x10c},
79 {"Chi", 0x3a7},
80 {"Chirho", 0x2627}, /* Chi Rho U+2627 */
81 {"Csigma", 0x3da},
82 {"Delta", 0x394},
83 {"Eacute", 0xc9},
84 {"Ecirc", 0xca},
85 {"Edh", 0xd0},
86 {"Epsilon", 0x395},
87 {"Eta", 0x397},
88 {"Gamma", 0x393},
89 {"Iacute", 0xcd},
90 {"Icirc", 0xce},
91 {"Imacr", 0x12a},
92 {"Integ", 0x222b},
93 {"Iota", 0x399},
94 {"Kappa", 0x39a},
95 {"Koppa", 0x3de},
96 {"Lambda", 0x39b},
97 {"Lbar", 0x141},
98 {"Mu", 0x39c},
99 {"Naira", 0x4e}, /* should have bar through */
100 {"Nplus", 0x4e}, /* should have plus above */
101 {"Ntilde", 0xd1},
102 {"Nu", 0x39d},
103 {"Oacute", 0xd3},
104 {"Obar", 0xd8},
105 {"Ocirc", 0xd4},
106 {"Oe", 0x152},
107 {"Omega", 0x3a9},
108 {"Omicron", 0x39f},
109 {"Ouml", 0xd6},
110 {"Phi", 0x3a6},
111 {"Pi", 0x3a0},
112 {"Psi", 0x3a8},
113 {"Rho", 0x3a1},
114 {"Sacute", 0x15a},
115 {"Sigma", 0x3a3},
116 {"Summ", 0x2211},
117 {"Tau", 0x3a4},
118 {"Th", 0xde},
119 {"Theta", 0x398},
120 {"Tse", 0x426},
121 {"Uacute", 0xda},
122 {"Ucirc", 0xdb},
123 {"Upsilon", 0x3a5},
124 {"Uuml", 0xdc},
125 {"Wyn", 0x1bf}, /* wynn U+01BF */
126 {"Xi", 0x39e},
127 {"Ygh", 0x1b7}, /* Yogh U+01B7 */
128 {"Zeta", 0x396},
129 {"Zh", 0x1b7}, /* looks like Yogh. Cf "Sake" */
130 {"a", 0x61}, /* ante */
131 {"aacute", 0xe1},
132 {"aang", 0xe5},
133 {"aasper", MAAS},
134 {"abreve", 0x103},
135 {"acirc", 0xe2},
136 {"acute", LACU},
137 {"aelig", 0xe6},
138 {"agrave", 0xe0},
139 {"ahook", 0x105},
140 {"alenis", MALN},
141 {"alpha", 0x3b1},
142 {"amacr", 0x101},
143 {"amp", 0x26},
144 {"and", MAND},
145 {"ang", LRNG},
146 {"angle", 0x2220},
147 {"ankh", 0x2625}, /* ankh U+2625 */
148 {"ante", 0x61}, /* before (year) */
149 {"aonq", MAOQ},
150 {"appreq", 0x2243},
151 {"aquar", 0x2652},
152 {"arDadfull", 0x636}, /* Dad U+0636 */
153 {"arHa", 0x62d}, /* haa U+062D */
154 {"arTa", 0x62a}, /* taa U+062A */
155 {"arain", 0x639}, /* ain U+0639 */
156 {"arainfull", 0x639}, /* ain U+0639 */
157 {"aralif", 0x627}, /* alef U+0627 */
158 {"arba", 0x628}, /* baa U+0628 */
159 {"arha", 0x647}, /* ha U+0647 */
160 {"aries", 0x2648},
161 {"arnun", 0x646}, /* noon U+0646 */
162 {"arnunfull", 0x646}, /* noon U+0646 */
163 {"arpa", 0x647}, /* ha U+0647 */
164 {"arqoph", 0x642}, /* qaf U+0642 */
165 {"arshinfull", 0x634}, /* sheen U+0634 */
166 {"arta", 0x62a}, /* taa U+062A */
167 {"artafull", 0x62a}, /* taa U+062A */
168 {"artha", 0x62b}, /* thaa U+062B */
169 {"arwaw", 0x648}, /* waw U+0648 */
170 {"arya", 0x64a}, /* ya U+064A */
171 {"aryafull", 0x64a}, /* ya U+064A */
172 {"arzero", 0x660}, /* indic zero U+0660 */
173 {"asg", 0x292}, /* unicycle character. Cf "hallow" */
174 {"asper", LASP},
175 {"assert", 0x22a2},
176 {"astm", 0x2042}, /* asterism: should be upside down */
177 {"at", 0x40},
178 {"atilde", 0xe3},
179 {"auml", 0xe4},
180 {"ayin", 0x639}, /* arabic ain U+0639 */
181 {"b1", 0x2d}, /* single bond */
182 {"b2", 0x3d}, /* double bond */
183 {"b3", 0x2261}, /* triple bond */
184 {"bbar", 0x180}, /* b with bar U+0180 */
185 {"beta", 0x3b2},
186 {"bigobl", 0x2f},
187 {"blC", 0x43}, /* should be black letter */
188 {"blJ", 0x4a}, /* should be black letter */
189 {"blU", 0x55}, /* should be black letter */
190 {"blb", 0x62}, /* should be black letter */
191 {"blozenge", 0x25ca}, /* U+25CA; should be black */
192 {"bly", 0x79}, /* should be black letter */
193 {"bra", MBRA},
194 {"brbl", LBRB},
195 {"breve", LBRV},
196 {"bslash",'\\'},
197 {"bsquare", 0x25a0}, /* black square U+25A0 */
198 {"btril", 0x25c0}, /* U+25C0 */
199 {"btrir", 0x25b6}, /* U+25B6 */
200 {"c", 0x63}, /* circa */
201 {"cab", 0x232a},
202 {"cacute", 0x107},
203 {"canc", 0x264b},
204 {"capr", 0x2651},
205 {"caret", 0x5e},
206 {"cb", 0x7d},
207 {"cbigb", 0x7d},
208 {"cbigpren", 0x29},
209 {"cbigsb", 0x5d},
210 {"cced", 0xe7},
211 {"cdil", LCED},
212 {"cdsb", 0x301b}, /* ]] U+301b */
213 {"cent", 0xa2},
214 {"chacek", 0x10d},
215 {"chi", 0x3c7},
216 {"circ", LRNG},
217 {"circa", 0x63}, /* about (year) */
218 {"circbl", 0x325}, /* ring below accent U+0325 */
219 {"circle", 0x25cb}, /* U+25CB */
220 {"circledot", 0x2299},
221 {"click", 0x296},
222 {"club", 0x2663},
223 {"comtime", 0x43},
224 {"conj", 0x260c},
225 {"cprt", 0xa9},
226 {"cq", '\''},
227 {"cqq", 0x201d},
228 {"cross", 0x2720}, /* maltese cross U+2720 */
229 {"crotchet", 0x2669},
230 {"csb", 0x5d},
231 {"ctilde", 0x63}, /* +tilde */
232 {"ctlig", MLCT},
233 {"cyra", 0x430},
234 {"cyre", 0x435},
235 {"cyrhard", 0x44a},
236 {"cyrjat", 0x463},
237 {"cyrm", 0x43c},
238 {"cyrn", 0x43d},
239 {"cyrr", 0x440},
240 {"cyrsoft", 0x44c},
241 {"cyrt", 0x442},
242 {"cyry", 0x44b},
243 {"dag", 0x2020},
244 {"dbar", 0x111},
245 {"dblar", 0x21cb},
246 {"dblgt", 0x226b},
247 {"dbllt", 0x226a},
248 {"dced", 0x64}, /* +cedilla */
249 {"dd", MDD},
250 {"ddag", 0x2021},
251 {"ddd", MDDD},
252 {"decr", 0x2193},
253 {"deg", 0xb0},
254 {"dele", 0x64}, /* should be dele */
255 {"delta", 0x3b4},
256 {"descnode", 0x260b}, /* descending node U+260B */
257 {"diamond", 0x2662},
258 {"digamma", 0x3dd},
259 {"div", 0xf7},
260 {"dlessi", 0x131},
261 {"dlessj1", 0x6a}, /* should be dotless */
262 {"dlessj2", 0x6a}, /* should be dotless */
263 {"dlessj3", 0x6a}, /* should be dotless */
264 {"dollar", 0x24},
265 {"dotab", LDOT},
266 {"dotbl", LDTB},
267 {"drachm", 0x292},
268 {"dubh", 0x2d},
269 {"eacute", 0xe9},
270 {"earth", 0x2641},
271 {"easper", MEAS},
272 {"ebreve", 0x115},
273 {"ecirc", 0xea},
274 {"edh", 0xf0},
275 {"egrave", 0xe8},
276 {"ehacek", 0x11b},
277 {"ehook", 0x119},
278 {"elem", 0x220a},
279 {"elenis", MELN},
280 {"em", 0x2014},
281 {"emacr", 0x113},
282 {"emem", MEMM},
283 {"en", 0x2013},
284 {"epsilon", 0x3b5},
285 {"equil", 0x21cb},
286 {"ergo", 0x2234},
287 {"es", MES},
288 {"eszett", 0xdf},
289 {"eta", 0x3b7},
290 {"eth", 0xf0},
291 {"euml", 0xeb},
292 {"expon", 0x2191},
293 {"fact", 0x21},
294 {"fata", 0x251},
295 {"fatpara", 0xb6}, /* should have fatter, filled in bowl */
296 {"female", 0x2640},
297 {"ffilig", MLFFI},
298 {"fflig", MLFF},
299 {"ffllig", MLFFL},
300 {"filig", MLFI},
301 {"flat", 0x266d},
302 {"fllig", MLFL},
303 {"frE", 0x45}, /* should be curly */
304 {"frL", 'L'}, /* should be curly */
305 {"frR", 0x52}, /* should be curly */
306 {"frakB", 0x42}, /* should have fraktur style */
307 {"frakG", 0x47},
308 {"frakH", 0x48},
309 {"frakI", 0x49},
310 {"frakM", 0x4d},
311 {"frakU", 0x55},
312 {"frakX", 0x58},
313 {"frakY", 0x59},
314 {"frakh", 0x68},
315 {"frbl", LFRB},
316 {"frown", LFRN},
317 {"fs", 0x20},
318 {"fsigma", 0x3c2},
319 {"gAacute", 0xc1}, /* should be Α+acute */
320 {"gaacute", 0x3b1}, /* +acute */
321 {"gabreve", 0x3b1}, /* +breve */
322 {"gafrown", 0x3b1}, /* +frown */
323 {"gagrave", 0x3b1}, /* +grave */
324 {"gamacr", 0x3b1}, /* +macron */
325 {"gamma", 0x3b3},
326 {"gauml", 0x3b1}, /* +umlaut */
327 {"ge", 0x2267},
328 {"geacute", 0x3b5}, /* +acute */
329 {"gegrave", 0x3b5}, /* +grave */
330 {"ghacute", 0x3b7}, /* +acute */
331 {"ghfrown", 0x3b7}, /* +frown */
332 {"ghgrave", 0x3b7}, /* +grave */
333 {"ghmacr", 0x3b7}, /* +macron */
334 {"giacute", 0x3b9}, /* +acute */
335 {"gibreve", 0x3b9}, /* +breve */
336 {"gifrown", 0x3b9}, /* +frown */
337 {"gigrave", 0x3b9}, /* +grave */
338 {"gimacr", 0x3b9}, /* +macron */
339 {"giuml", 0x3b9}, /* +umlaut */
340 {"glagjat", 0x467},
341 {"glots", 0x2c0},
342 {"goacute", 0x3bf}, /* +acute */
343 {"gobreve", 0x3bf}, /* +breve */
344 {"grave", LGRV},
345 {"gt", 0x3e},
346 {"guacute", 0x3c5}, /* +acute */
347 {"gufrown", 0x3c5}, /* +frown */
348 {"gugrave", 0x3c5}, /* +grave */
349 {"gumacr", 0x3c5}, /* +macron */
350 {"guuml", 0x3c5}, /* +umlaut */
351 {"gwacute", 0x3c9}, /* +acute */
352 {"gwfrown", 0x3c9}, /* +frown */
353 {"gwgrave", 0x3c9}, /* +grave */
354 {"hacek", LHCK},
355 {"halft", 0x2308},
356 {"hash", 0x23},
357 {"hasper", MHAS},
358 {"hatpath", 0x5b2}, /* hataf patah U+05B2 */
359 {"hatqam", 0x5b3}, /* hataf qamats U+05B3 */
360 {"hatseg", 0x5b1}, /* hataf segol U+05B1 */
361 {"hbar", 0x127},
362 {"heart", 0x2661},
363 {"hebaleph", 0x5d0}, /* aleph U+05D0 */
364 {"hebayin", 0x5e2}, /* ayin U+05E2 */
365 {"hebbet", 0x5d1}, /* bet U+05D1 */
366 {"hebbeth", 0x5d1}, /* bet U+05D1 */
367 {"hebcheth", 0x5d7}, /* bet U+05D7 */
368 {"hebdaleth", 0x5d3}, /* dalet U+05D3 */
369 {"hebgimel", 0x5d2}, /* gimel U+05D2 */
370 {"hebhe", 0x5d4}, /* he U+05D4 */
371 {"hebkaph", 0x5db}, /* kaf U+05DB */
372 {"heblamed", 0x5dc}, /* lamed U+05DC */
373 {"hebmem", 0x5de}, /* mem U+05DE */
374 {"hebnun", 0x5e0}, /* nun U+05E0 */
375 {"hebnunfin", 0x5df}, /* final nun U+05DF */
376 {"hebpe", 0x5e4}, /* pe U+05E4 */
377 {"hebpedag", 0x5e3}, /* final pe? U+05E3 */
378 {"hebqoph", 0x5e7}, /* qof U+05E7 */
379 {"hebresh", 0x5e8}, /* resh U+05E8 */
380 {"hebshin", 0x5e9}, /* shin U+05E9 */
381 {"hebtav", 0x5ea}, /* tav U+05EA */
382 {"hebtsade", 0x5e6}, /* tsadi U+05E6 */
383 {"hebwaw", 0x5d5}, /* vav? U+05D5 */
384 {"hebyod", 0x5d9}, /* yod U+05D9 */
385 {"hebzayin", 0x5d6}, /* zayin U+05D6 */
386 {"hgz", 0x292}, /* ??? Cf "alet" */
387 {"hireq", 0x5b4}, /* U+05B4 */
388 {"hlenis", MHLN},
389 {"hook", LOGO},
390 {"horizE", 0x45}, /* should be on side */
391 {"horizP", 0x50}, /* should be on side */
392 {"horizS", 0x223d},
393 {"horizT", 0x22a3},
394 {"horizb", 0x7b}, /* should be underbrace */
395 {"ia", 0x3b1},
396 {"iacute", 0xed},
397 {"iasper", MIAS},
398 {"ib", 0x3b2},
399 {"ibar", 0x268},
400 {"ibreve", 0x12d},
401 {"icirc", 0xee},
402 {"id", 0x3b4},
403 {"ident", 0x2261},
404 {"ie", 0x3b5},
405 {"ifilig", MLFI},
406 {"ifflig", MLFF},
407 {"ig", 0x3b3},
408 {"igrave", 0xec},
409 {"ih", 0x3b7},
410 {"ii", 0x3b9},
411 {"ik", 0x3ba},
412 {"ilenis", MILN},
413 {"imacr", 0x12b},
414 {"implies", 0x21d2},
415 {"index", 0x261e},
416 {"infin", 0x221e},
417 {"integ", 0x222b},
418 {"intsec", 0x2229},
419 {"invpri", 0x2cf},
420 {"iota", 0x3b9},
421 {"iq", 0x3c8},
422 {"istlig", MLST},
423 {"isub", 0x3f5}, /* iota below accent */
424 {"iuml", 0xef},
425 {"iz", 0x3b6},
426 {"jup", 0x2643},
427 {"kappa", 0x3ba},
428 {"koppa", 0x3df},
429 {"lambda", 0x3bb},
430 {"lar", 0x2190},
431 {"lbar", 0x142},
432 {"le", 0x2266},
433 {"lenis", LLEN},
434 {"leo", 0x264c},
435 {"lhalfbr", 0x2308},
436 {"lhshoe", 0x2283},
437 {"libra", 0x264e},
438 {"llswing", MLLS},
439 {"lm", 0x2d0},
440 {"logicand", 0x2227},
441 {"logicor", 0x2228},
442 {"longs", 0x283},
443 {"lrar", 0x2194},
444 {"lt", 0x3c},
445 {"ltappr", 0x227e},
446 {"ltflat", 0x2220},
447 {"lumlbl", 0x6c}, /* +umlaut below */
448 {"mac", LMAC},
449 {"male", 0x2642},
450 {"mc", 0x63}, /* should be raised */
451 {"merc", 0x263f}, /* mercury U+263F */
452 {"min", 0x2212},
453 {"moonfq", 0x263d}, /* first quarter moon U+263D */
454 {"moonlq", 0x263e}, /* last quarter moon U+263E */
455 {"msylab", 0x6d}, /* +sylab (ˌ) */
456 {"mu", 0x3bc},
457 {"nacute", 0x144},
458 {"natural", 0x266e},
459 {"neq", 0x2260},
460 {"nfacute", 0x2032},
461 {"nfasper", 0x2bd},
462 {"nfbreve", 0x2d8},
463 {"nfced", 0xb8},
464 {"nfcirc", 0x2c6},
465 {"nffrown", 0x2322},
466 {"nfgra", 0x2cb},
467 {"nfhacek", 0x2c7},
468 {"nfmac", 0xaf},
469 {"nftilde", 0x2dc},
470 {"nfuml", 0xa8},
471 {"ng", 0x14b},
472 {"not", 0xac},
473 {"notelem", 0x2209},
474 {"ntilde", 0xf1},
475 {"nu", 0x3bd},
476 {"oab", 0x2329},
477 {"oacute", 0xf3},
478 {"oasper", MOAS},
479 {"ob", 0x7b},
480 {"obar", 0xf8},
481 {"obigb", 0x7b}, /* should be big */
482 {"obigpren", 0x28},
483 {"obigsb", 0x5b}, /* should be big */
484 {"obreve", 0x14f},
485 {"ocirc", 0xf4},
486 {"odsb", 0x301a}, /* [[ U+301A */
487 {"oelig", 0x153},
488 {"oeamp", 0x26},
489 {"ograve", 0xf2},
490 {"ohook", 0x6f}, /* +hook */
491 {"olenis", MOLN},
492 {"omacr", 0x14d},
493 {"omega", 0x3c9},
494 {"omicron", 0x3bf},
495 {"ope", 0x25b},
496 {"opp", 0x260d},
497 {"oq", 0x60},
498 {"oqq", 0x201c},
499 {"or", MOR},
500 {"osb", 0x5b},
501 {"otilde", 0xf5},
502 {"ouml", 0xf6},
503 {"ounce", 0x2125}, /* ounce U+2125 */
504 {"ovparen", 0x2322}, /* should be sideways ( */
505 {"p", 0x2032},
506 {"pa", 0x2202},
507 {"page", 0x50},
508 {"pall", 0x28e},
509 {"paln", 0x272},
510 {"par", PAR},
511 {"para", 0xb6},
512 {"pbar", 0x70}, /* +bar */
513 {"per", 0x2118}, /* per U+2118 */
514 {"phi", 0x3c6},
515 {"phi2", 0x3d5},
516 {"pi", 0x3c0},
517 {"pisces", 0x2653},
518 {"planck", 0x127},
519 {"plantinJ", 0x4a}, /* should be script */
520 {"pm", 0xb1},
521 {"pmil", 0x2030},
522 {"pp", 0x2033},
523 {"ppp", 0x2034},
524 {"prop", 0x221d},
525 {"psi", 0x3c8},
526 {"pstlg", 0xa3},
527 {"q", 0x3f}, /* should be raised */
528 {"qamets", 0x5b3}, /* U+05B3 */
529 {"quaver", 0x266a},
530 {"rar", 0x2192},
531 {"rasper", MRAS},
532 {"rdot", 0xb7},
533 {"recipe", 0x211e}, /* U+211E */
534 {"reg", 0xae},
535 {"revC", 0x186}, /* open O U+0186 */
536 {"reva", 0x252},
537 {"revc", 0x254},
538 {"revope", 0x25c},
539 {"revr", 0x279},
540 {"revsc", 0x2d2}, /* upside-down semicolon */
541 {"revv", 0x28c},
542 {"rfa", 0x6f}, /* +hook (Cf "goal") */
543 {"rhacek", 0x159},
544 {"rhalfbr", 0x2309},
545 {"rho", 0x3c1},
546 {"rhshoe", 0x2282},
547 {"rlenis", MRLN},
548 {"rsylab", 0x72}, /* +sylab */
549 {"runash", 0x46}, /* should be runic 'ash' */
550 {"rvow", 0x2d4},
551 {"sacute", 0x15b},
552 {"sagit", 0x2650},
553 {"sampi", 0x3e1},
554 {"saturn", 0x2644},
555 {"sced", 0x15f},
556 {"schwa", 0x259},
557 {"scorpio", 0x264f},
558 {"scrA", 0x41}, /* should be script */
559 {"scrC", 0x43},
560 {"scrE", 0x45},
561 {"scrF", 0x46},
562 {"scrI", 0x49},
563 {"scrJ", 0x4a},
564 {"scrL",'L'},
565 {"scrO", 0x4f},
566 {"scrP", 0x50},
567 {"scrQ", 0x51},
568 {"scrS", 0x53},
569 {"scrT", 0x54},
570 {"scrb", 0x62},
571 {"scrd", 0x64},
572 {"scrh", 0x68},
573 {"scrl", 0x6c},
574 {"scruple", 0x2108}, /* U+2108 */
575 {"sdd", 0x2d0},
576 {"sect", 0xa7},
577 {"semE", 0x2203},
578 {"sh", 0x283},
579 {"shacek", 0x161},
580 {"sharp", 0x266f},
581 {"sheva", 0x5b0}, /* U+05B0 */
582 {"shti", 0x26a},
583 {"shtsyll", 0x222a},
584 {"shtu", 0x28a},
585 {"sidetri", 0x22b2},
586 {"sigma", 0x3c3},
587 {"since", 0x2235},
588 {"slge", 0x2265}, /* should have slanted line under */
589 {"slle", 0x2264}, /* should have slanted line under */
590 {"sm", 0x2c8},
591 {"smm", 0x2cc},
592 {"spade", 0x2660},
593 {"sqrt", 0x221a},
594 {"square", 0x25a1}, /* U+25A1 */
595 {"ssChi", 0x3a7}, /* should be sans serif */
596 {"ssIota", 0x399},
597 {"ssOmicron", 0x39f},
598 {"ssPi", 0x3a0},
599 {"ssRho", 0x3a1},
600 {"ssSigma", 0x3a3},
601 {"ssTau", 0x3a4},
602 {"star", 0x2a},
603 {"stlig", MLST},
604 {"sup2", 0x2072},
605 {"supgt", 0x2c3},
606 {"suplt", 0x2c2},
607 {"sur", 0x2b3},
608 {"swing", 0x223c},
609 {"tau", 0x3c4},
610 {"taur", 0x2649},
611 {"th", 0xfe},
612 {"thbar", 0xfe}, /* +bar */
613 {"theta", 0x3b8},
614 {"thinqm", 0x3f}, /* should be thinner */
615 {"tilde", LTIL},
616 {"times", 0xd7},
617 {"tri", 0x2206},
618 {"trli", 0x2016},
619 {"ts", 0x2009},
620 {"uacute", 0xfa},
621 {"uasper", MUAS},
622 {"ubar", 0x75}, /* +bar */
623 {"ubreve", 0x16d},
624 {"ucirc", 0xfb},
625 {"udA", 0x2200},
626 {"udT", 0x22a5},
627 {"uda", 0x250},
628 {"udh", 0x265},
629 {"udqm", 0xbf},
630 {"udpsi", 0x22d4},
631 {"udtr", 0x2207},
632 {"ugrave", 0xf9},
633 {"ulenis", MULN},
634 {"umacr", 0x16b},
635 {"uml", LUML},
636 {"undl", 0x2cd}, /* underline accent */
637 {"union", 0x222a},
638 {"upsilon", 0x3c5},
639 {"uuml", 0xfc},
640 {"vavpath", 0x5d5}, /* vav U+05D5 (+patah) */
641 {"vavsheva", 0x5d5}, /* vav U+05D5 (+sheva) */
642 {"vb", 0x7c},
643 {"vddd", 0x22ee},
644 {"versicle2", 0x2123}, /* U+2123 */
645 {"vinc", 0xaf},
646 {"virgo", 0x264d},
647 {"vpal", 0x25f},
648 {"vvf", 0x263},
649 {"wasper", MWAS},
650 {"wavyeq", 0x2248},
651 {"wlenis", MWLN},
652 {"wyn", 0x1bf}, /* wynn U+01BF */
653 {"xi", 0x3be},
654 {"yacute", 0xfd},
655 {"ycirc", 0x177},
656 {"ygh", 0x292},
657 {"ymacr", 0x79}, /* +macron */
658 {"yuml", 0xff},
659 {"zced", 0x7a}, /* +cedilla */
660 {"zeta", 0x3b6},
661 {"zh", 0x292},
662 {"zhacek", 0x17e}
663 };
664 /*
665 The following special characters don't have close enough
666 equivalents in Unicode, so aren't in the above table.
667 22n 2^(2^n) Cf Fermat
668 2on4 2/4
669 3on8 3/8
670 Bantuo Bantu O. Cf Otshi-herero
671 Car C with circular arrow on top
672 albrtime cut-time: C with vertical line
673 ardal Cf dental
674 bantuo Bantu o. Cf Otshi-herero
675 bbc1 single chem bond below
676 bbc2 double chem bond below
677 bbl1 chem bond like /
678 bbl2 chem bond like //
679 bbr1 chem bond like \
680 bbr2 chem bond \\
681 bcop1 copper symbol. Cf copper
682 bcop2 copper symbol. Cf copper
683 benchm Cf benchmark
684 btc1 single chem bond above
685 btc2 double chem bond above
686 btl1 chem bond like \
687 btl2 chem bond like \\
688 btr1 chem bond like /
689 btr2 chem bond line //
690 burman Cf Burman
691 devph sanskrit letter. Cf ph
692 devrfls sanskrit letter. Cf cerebral
693 duplong[12] musical note
694 egchi early form of chi
695 eggamma[12] early form of gamma
696 egiota early form of iota
697 egkappa early form of kappa
698 eglambda early form of lambda
699 egmu[12] early form of mu
700 egnu[12] early form of nu
701 egpi[123] early form of pi
702 egrho[12] early form of rho
703 egsampi early form of sampi
704 egsan early form of san
705 egsigma[12] early form of sigma
706 egxi[123] early form of xi
707 elatS early form of S
708 elatc[12] early form of C
709 elatg[12] early form of G
710 glagjeri Slavonic Glagolitic jeri
711 glagjeru Slavonic Glagolitic jeru
712 hypolem hypolemisk (line with underdot)
713 lhrbr lower half }
714 longmord long mordent
715 mbwvow backwards scretched C. Cf retract.
716 mord music symbol. Cf mordent
717 mostra Cf direct
718 ohgcirc old form of circumflex
719 oldbeta old form of β. Cf perturbate
720 oldsemibr[12] old forms of semibreve. Cf prolation
721 ormg old form of g. Cf G
722 para[12345] form of ¶
723 pauseo musical pause sign
724 pauseu musical pause sign
725 pharyng Cf pharyngal
726 ragr Black letter ragged r
727 repetn musical repeat. Cf retort
728 segno musical segno sign
729 semain[12] semitic ain
730 semhe semitic he
731 semheth semitic heth
732 semkaph semitic kaph
733 semlamed[12] semitic lamed
734 semmem semitic mem
735 semnum semitic nun
736 sempe semitic pe
737 semqoph[123] semitic qoph
738 semresh semitic resh
739 semtav[1234] semitic tav
740 semyod semitic yod
741 semzayin[123] semitic zayin
742 shtlong[12] U with underbar. Cf glyconic
743 sigmatau σ,τ combination
744 squaver sixteenth note
745 sqbreve square musical breve note
746 swast swastika
747 uhrbr upper half of big }
748 versicle1 Cf versicle
749 */
752 static Rune normtab[128] = {
753 /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
754 /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
755 NONE, NONE, ' ', NONE, NONE, NONE, NONE, NONE,
756 /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
757 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
758 /*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'',
759 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
760 /*30*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
761 0x38, 0x39, 0x3a, 0x3b, TAGS, 0x3d, TAGE, 0x3f,
762 /*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
763 0x48, 0x49, 0x4a, 0x4b,'L', 0x4d, 0x4e, 0x4f,
764 /*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
765 0x58, 0x59, 0x5a, 0x5b,'\\', 0x5d, 0x5e, 0x5f,
766 /*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
767 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
768 /*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
769 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE
770 };
771 #if 0
772 static Rune phtab[128] = {
773 /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
774 /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
775 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
776 /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
777 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
778 /*20*/ 0x20, 0x21, 0x2c8, 0x23, 0x24, 0x2cc, 0xe6, '\'',
779 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
780 /*30*/ 0x30, 0x31, 0x32, 0x25c, 0x34, 0x35, 0x36, 0x37,
781 0x38, 0xf8, 0x2d0, 0x3b, TAGS, 0x3d, TAGE, 0x3f,
782 /*40*/ 0x259, 0x251, 0x42, 0x43, 0xf0, 0x25b, 0x46, 0x47,
783 0x48, 0x26a, 0x4a, 0x4b,'L', 0x4d, 0x14b, 0x254,
784 /*50*/ 0x50, 0x252, 0x52, 0x283, 0x3b8, 0x28a, 0x28c, 0x57,
785 0x58, 0x59, 0x292, 0x5b,'\\', 0x5d, 0x5e, 0x5f,
786 /*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
787 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
788 /*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
789 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE
790 };
791 static Rune grtab[128] = {
792 /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
793 /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
794 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
795 /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
796 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
797 /*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'',
798 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
799 /*30*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
800 0x38, 0x39, 0x3a, 0x3b, TAGS, 0x3d, TAGE, 0x3f,
801 /*40*/ 0x40, 0x391, 0x392, 0x39e, 0x394, 0x395, 0x3a6, 0x393,
802 0x397, 0x399, 0x3da, 0x39a, 0x39b, 0x39c, 0x39d, 0x39f,
803 /*50*/ 0x3a0, 0x398, 0x3a1, 0x3a3, 0x3a4, 0x3a5, 0x56, 0x3a9,
804 0x3a7, 0x3a8, 0x396, 0x5b,'\\', 0x5d, 0x5e, 0x5f,
805 /*60*/ 0x60, 0x3b1, 0x3b2, 0x3be, 0x3b4, 0x3b5, 0x3c6, 0x3b3,
806 0x3b7, 0x3b9, 0x3c2, 0x3ba, 0x3bb, 0x3bc, 0x3bd, 0x3bf,
807 /*70*/ 0x3c0, 0x3b8, 0x3c1, 0x3c3, 0x3c4, 0x3c5, 0x76, 0x3c9,
808 0x3c7, 0x3c8, 0x3b6, 0x7b, 0x7c, 0x7d, 0x7e, NONE
809 };
810 static Rune subtab[128] = {
811 /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
812 /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
813 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
814 /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
815 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
816 /*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'',
817 0x208d, 0x208e, 0x2a, 0x208a, 0x2c, 0x208b, 0x2e, 0x2f,
818 /*30*/ 0x2080, 0x2081, 0x2082, 0x2083, 0x2084, 0x2085, 0x2086, 0x2087,
819 0x2088, 0x2089, 0x3a, 0x3b, TAGS, 0x208c, TAGE, 0x3f,
820 /*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
821 0x48, 0x49, 0x4a, 0x4b,'L', 0x4d, 0x4e, 0x4f,
822 /*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
823 0x58, 0x59, 0x5a, 0x5b,'\\', 0x5d, 0x5e, 0x5f,
824 /*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
825 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
826 /*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
827 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE
828 };
829 static Rune suptab[128] = {
830 /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
831 /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
832 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
833 /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
834 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
835 /*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'',
836 0x207d, 0x207e, 0x2a, 0x207a, 0x2c, 0x207b, 0x2e, 0x2f,
837 /*30*/ 0x2070, 0x2071, 0x2072, 0x2073, 0x2074, 0x2075, 0x2076, 0x2077,
838 0x2078, 0x2079, 0x3a, 0x3b, TAGS, 0x207c, TAGE, 0x3f,
839 /*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
840 0x48, 0x49, 0x4a, 0x4b,'L', 0x4d, 0x4e, 0x4f,
841 /*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
842 0x58, 0x59, 0x5a, 0x5b,'\\', 0x5d, 0x5e, 0x5f,
843 /*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
844 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
845 /*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
846 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE
847 };
848 #endif
850 static int tagstarts;
851 static char tag[Buflen];
852 static char spec[Buflen];
853 static Entry curentry;
854 #define cursize (curentry.end-curentry.start)
856 static char *getspec(char *, char *);
857 static char *gettag(char *, char *);
859 /*
860 * cmd is one of:
861 * 'p': normal print
862 * 'h': just print headwords
863 * 'P': print raw
864 */
865 void
866 pgwprintentry(Entry e, int cmd)
868 char *p, *pe;
869 int t;
870 long r, rprev, rlig;
871 Rune *transtab;
873 p = e.start;
874 pe = e.end;
875 transtab = normtab;
876 rprev = NONE;
877 changett(0, 0, 0);
878 curentry = e;
879 if(cmd == 'h')
880 outinhibit = 1;
881 while(p < pe) {
882 if(cmd == 'r') {
883 outchar(*p++);
884 continue;
886 r = transtab[(*p++)&0x7F];
887 if(r < NONE) {
888 /* Emit the rune, but buffer in case of ligature */
889 if(rprev != NONE)
890 outrune(rprev);
891 rprev = r;
892 } else if(r == SPCS) {
893 /* Start of special character name */
894 p = getspec(p, pe);
895 r = lookassoc(spectab, asize(spectab), spec);
896 if(r == -1) {
897 if(debug)
898 err("spec %ld %d %s",
899 e.doff, cursize, spec);
900 r = 0xfffd;
902 if(r >= LIGS && r < LIGE) {
903 /* handle possible ligature */
904 rlig = liglookup(r, rprev);
905 if(rlig != NONE)
906 rprev = rlig; /* overwrite rprev */
907 else {
908 /* could print accent, but let's not */
909 if(rprev != NONE) outrune(rprev);
910 rprev = NONE;
912 } else if(r >= MULTI && r < MULTIE) {
913 if(rprev != NONE) {
914 outrune(rprev);
915 rprev = NONE;
917 outrunes(multitab[r-MULTI]);
918 } else if(r == PAR) {
919 if(rprev != NONE) {
920 outrune(rprev);
921 rprev = NONE;
923 outnl(1);
924 } else {
925 if(rprev != NONE) outrune(rprev);
926 rprev = r;
928 } else if(r == TAGS) {
929 /* Start of tag name */
930 if(rprev != NONE) {
931 outrune(rprev);
932 rprev = NONE;
934 p = gettag(p, pe);
935 t = lookassoc(tagtab, asize(tagtab), tag);
936 if(t == -1) {
937 if(debug)
938 err("tag %ld %d %s",
939 e.doff, cursize, tag);
940 continue;
942 switch(t){
943 case Hw:
944 if(cmd == 'h') {
945 if(!tagstarts)
946 outchar(' ');
947 outinhibit = !tagstarts;
949 break;
950 case Sn:
951 if(tagstarts) {
952 outnl(2);
954 break;
955 case P:
956 outnl(tagstarts);
957 break;
958 case Col:
959 case Br:
960 case Blockquote:
961 if(tagstarts)
962 outnl(1);
963 break;
964 case U:
965 outchar('/');
969 if(cmd == 'h') {
970 outinhibit = 0;
971 outnl(0);
975 /*
976 * Return offset into bdict where next webster entry after fromoff starts.
977 * Webster entries start with <p><hw>
978 */
979 long
980 pgwnextoff(long fromoff)
982 long a, n;
983 int c;
985 a = Bseek(bdict, fromoff, 0);
986 if(a != fromoff)
987 return -1;
988 n = 0;
989 for(;;) {
990 c = Bgetc(bdict);
991 if(c < 0)
992 break;
993 if(c == '<' && Bgetc(bdict) == 'p' && Bgetc(bdict) == '>') {
994 c = Bgetc(bdict);
995 if(c == '<') {
996 if (Bgetc(bdict) == 'h' && Bgetc(bdict) == 'w'
997 && Bgetc(bdict) == '>')
998 n = 7;
999 }else if (c == '{')
1000 n = 4;
1001 if(n)
1002 break;
1005 return (Boffset(bdict)-n);
1008 static char *prkey1 =
1009 "KEY TO THE PRONUNCIATION\n"
1010 "\n"
1011 "I. CONSONANTS\n"
1012 "b, d, f, k, l, m, n, p, t, v, z: usual English values\n"
1013 "\n"
1014 "g as in go (gəʊ)\n"
1015 "h ... ho! (həʊ)\n"
1016 "r ... run (rʌn), terrier (ˈtɛriə(r))\n"
1017 "(r)... her (hɜː(r))\n"
1018 "s ... see (siː), success (səkˈsɜs)\n"
1019 "w ... wear (wɛə(r))\n"
1020 "hw ... when (hwɛn)\n"
1021 "j ... yes (jɛs)\n"
1022 "θ ... thin (θin), bath (bɑːθ)\n"
1023 "ð ... then (ðɛn), bathe (beɪð)\n"
1024 "ʃ ... shop (ʃɒp), dish (dɪʃ)\n"
1025 "tʃ ... chop (tʃɒp), ditch (dɪtʃ)\n"
1026 "ʒ ... vision (ˈvɪʒən), déjeuner (deʒøne)\n"
1028 static char *prkey2 =
1029 "dʒ ... judge (dʒʌdʒ)\n"
1030 "ŋ ... singing (ˈsɪŋɪŋ), think (θiŋk)\n"
1031 "ŋg ... finger (ˈfiŋgə(r))\n"
1032 "\n"
1033 "Foreign\n"
1034 "ʎ as in It. seraglio (serˈraʎo)\n"
1035 "ɲ ... Fr. cognac (kɔɲak)\n"
1036 "x ... Ger. ach (ax), Sc. loch (lɒx)\n"
1037 "ç ... Ger. ich (ɪç), Sc. nicht (nɪçt)\n"
1038 "ɣ ... North Ger. sagen (ˈzaːɣən)\n"
1039 "c ... Afrikaans baardmannetjie (ˈbaːrtmanəci)\n"
1040 "ɥ ... Fr. cuisine (kɥizin)\n"
1041 "\n"
1043 static char *prkey3 =
1044 "II. VOWELS AND DIPTHONGS\n"
1045 "\n"
1046 "Short\n"
1047 "ɪ as in pit (pɪt), -ness (-nɪs)\n"
1048 "ɛ ... pet (pɛt), Fr. sept (sɛt)\n"
1049 "æ ... pat (pæt)\n"
1050 "ʌ ... putt (pʌt)\n"
1051 "ɒ ... pot (pɒt)\n"
1052 "ʊ ... put (pʊt)\n"
1053 "ə ... another (əˈnʌðə(r))\n"
1054 "(ə)... beaten (ˈbiːt(ə)n)\n"
1055 "i ... Fr. si (si)\n"
1056 "e ... Fr. bébé (bebe)\n"
1057 "a ... Fr. mari (mari)\n"
1058 "ɑ ... Fr. bâtiment (bɑtimã)\n"
1059 "ɔ ... Fr. homme (ɔm)\n"
1060 "o ... Fr. eau (o)\n"
1061 "ø ... Fr. peu (pø)\n"
1063 static char *prkey4 =
1064 "œ ... Fr. boeuf (bœf), coeur (kœr)\n"
1065 "u ... Fr. douce (dus)\n"
1066 "ʏ ... Ger. Müller (ˈmʏlər)\n"
1067 "y ... Fr. du (dy)\n"
1068 "\n"
1069 "Long\n"
1070 "iː as in bean (biːn)\n"
1071 "ɑː ... barn (bɑːn)\n"
1072 "ɔː ... born (bɔːn)\n"
1073 "uː ... boon (buːn)\n"
1074 "ɜː ... burn (bɜːn)\n"
1075 "eː ... Ger. Schnee (ʃneː)\n"
1076 "ɛː ... Ger. Fähre (ˈfɛːrə)\n"
1077 "aː ... Ger. Tag (taːk)\n"
1078 "oː ... Ger. Sohn (zoːn)\n"
1079 "øː ... Ger. Goethe (gøːtə)\n"
1080 "yː ... Ger. grün (gryːn)\n"
1081 "\n"
1083 static char *prkey5 =
1084 "Nasal\n"
1085 "ɛ˜, æ˜ as in Fr. fin (fɛ˜, fæ˜)\n"
1086 "ã ... Fr. franc (frã)\n"
1087 "ɔ˜ ... Fr. bon (bɔ˜n)\n"
1088 "œ˜ ... Fr. un (œ˜)\n"
1089 "\n"
1090 "Dipthongs, etc.\n"
1091 "eɪ as in bay (beɪ)\n"
1092 "aɪ ... buy (baɪ)\n"
1093 "ɔɪ ... boy (bɔɪ)\n"
1094 "əʊ ... no (nəʊ)\n"
1095 "aʊ ... now (naʊ)\n"
1096 "ɪə ... peer (pɪə(r))\n"
1097 "ɛə ... pair (pɛə(r))\n"
1098 "ʊə ... tour (tʊə(r))\n"
1099 "ɔə ... boar (bɔə(r))\n"
1100 "\n"
1102 static char *prkey6 =
1103 "III. STRESS\n"
1104 "\n"
1105 "Main stress: ˈ preceding stressed syllable\n"
1106 "Secondary stress: ˌ preceding stressed syllable\n"
1107 "\n"
1108 "E.g.: pronunciation (prəˌnʌnsɪˈeɪʃ(ə)n)\n";
1109 /* TODO: find transcriptions of foreign consonents, œ, ʏ, nasals */
1111 void
1112 pgwprintkey(void)
1114 Bprint(bout, "%s%s%s%s%s%s",
1115 prkey1, prkey2, prkey3, prkey4, prkey5, prkey6);
1119 * f points just after a '&', fe points at end of entry.
1120 * Accumulate the special name, starting after the &
1121 * and continuing until the next ';', in spec[].
1122 * Return pointer to char after ';'.
1124 static char *
1125 getspec(char *f, char *fe)
1127 char *t;
1128 int c, i;
1130 t = spec;
1131 i = sizeof spec;
1132 while(--i > 0) {
1133 c = *f++;
1134 if(c == ';' || f == fe)
1135 break;
1136 *t++ = c;
1138 *t = 0;
1139 return f;
1143 * f points just after '<'; fe points at end of entry.
1144 * Expect next characters from bin to match:
1145 * [/][^ >]+( [^>=]+=[^ >]+)*>
1146 * tag auxname auxval
1147 * Accumulate the tag and its auxilliary information in
1148 * tag[], auxname[][] and auxval[][].
1149 * Set tagstarts=1 if the tag is 'starting' (has no '/'), else 0.
1150 * Set naux to the number of aux pairs found.
1151 * Return pointer to after final '>'.
1153 static char *
1154 gettag(char *f, char *fe)
1156 char *t;
1157 int c, i;
1159 t = tag;
1160 c = *f++;
1161 if(c == '/')
1162 tagstarts = 0;
1163 else {
1164 tagstarts = 1;
1165 *t++ = c;
1167 i = Buflen;
1168 while(--i > 0) {
1169 c = *f++;
1170 if(c == '>' || f == fe)
1171 break;
1172 *t++ = c;
1174 *t = 0;
1175 return f;