1 /* thanks to Caerwyn Jones <caerwyn@comcast.net> for this module */
15 Blockquote, /* Block quote */
17 Cd, /* ? coloquial data */
18 Col, /* ? Coloquial */
23 Pos, /* Part of Speach */
25 U, /* ? cross reference*/
27 Ntag /* end of tags */
30 /* Assoc tables must be sorted on first field */
32 static Assoc tagtab[] = {
34 {"blockquote", Blockquote},
48 /* Possible tag auxilliary info */
50 Cols, /* number of columns in a table */
51 Num, /* letter or number, for a sense */
52 St, /* status (e.g., obs) */
57 static Assoc auxtab[] = {
64 static Assoc spectab[] = {
74 {"Asg", 0x1b7}, /* Unicyle. Cf "Sake" */
80 {"Chirho", 0x2627}, /* Chi Rho U+2627 */
99 {"Naira", 0x4e}, /* should have bar through */
100 {"Nplus", 0x4e}, /* should have plus above */
125 {"Wyn", 0x1bf}, /* wynn U+01BF */
127 {"Ygh", 0x1b7}, /* Yogh U+01B7 */
129 {"Zh", 0x1b7}, /* looks like Yogh. Cf "Sake" */
130 {"a", 0x61}, /* ante */
147 {"ankh", 0x2625}, /* ankh U+2625 */
148 {"ante", 0x61}, /* before (year) */
152 {"arDadfull", 0x636}, /* Dad U+0636 */
153 {"arHa", 0x62d}, /* haa U+062D */
154 {"arTa", 0x62a}, /* taa U+062A */
155 {"arain", 0x639}, /* ain U+0639 */
156 {"arainfull", 0x639}, /* ain U+0639 */
157 {"aralif", 0x627}, /* alef U+0627 */
158 {"arba", 0x628}, /* baa U+0628 */
159 {"arha", 0x647}, /* ha U+0647 */
161 {"arnun", 0x646}, /* noon U+0646 */
162 {"arnunfull", 0x646}, /* noon U+0646 */
163 {"arpa", 0x647}, /* ha U+0647 */
164 {"arqoph", 0x642}, /* qaf U+0642 */
165 {"arshinfull", 0x634}, /* sheen U+0634 */
166 {"arta", 0x62a}, /* taa U+062A */
167 {"artafull", 0x62a}, /* taa U+062A */
168 {"artha", 0x62b}, /* thaa U+062B */
169 {"arwaw", 0x648}, /* waw U+0648 */
170 {"arya", 0x64a}, /* ya U+064A */
171 {"aryafull", 0x64a}, /* ya U+064A */
172 {"arzero", 0x660}, /* indic zero U+0660 */
173 {"asg", 0x292}, /* unicycle character. Cf "hallow" */
176 {"astm", 0x2042}, /* asterism: should be upside down */
180 {"ayin", 0x639}, /* arabic ain U+0639 */
181 {"b1", 0x2d}, /* single bond */
182 {"b2", 0x3d}, /* double bond */
183 {"b3", 0x2261}, /* triple bond */
184 {"bbar", 0x180}, /* b with bar U+0180 */
187 {"blC", 0x43}, /* should be black letter */
188 {"blJ", 0x4a}, /* should be black letter */
189 {"blU", 0x55}, /* should be black letter */
190 {"blb", 0x62}, /* should be black letter */
191 {"blozenge", 0x25ca}, /* U+25CA; should be black */
192 {"bly", 0x79}, /* should be black letter */
197 {"bsquare", 0x25a0}, /* black square U+25A0 */
198 {"btril", 0x25c0}, /* U+25C0 */
199 {"btrir", 0x25b6}, /* U+25B6 */
200 {"c", 0x63}, /* circa */
212 {"cdsb", 0x301b}, /* ]] U+301b */
217 {"circa", 0x63}, /* about (year) */
218 {"circbl", 0x325}, /* ring below accent U+0325 */
219 {"circle", 0x25cb}, /* U+25CB */
220 {"circledot", 0x2299},
228 {"cross", 0x2720}, /* maltese cross U+2720 */
229 {"crotchet", 0x2669},
231 {"ctilde", 0x63}, /* +tilde */
248 {"dced", 0x64}, /* +cedilla */
254 {"dele", 0x64}, /* should be dele */
256 {"descnode", 0x260b}, /* descending node U+260B */
261 {"dlessj1", 0x6a}, /* should be dotless */
262 {"dlessj2", 0x6a}, /* should be dotless */
263 {"dlessj3", 0x6a}, /* should be dotless */
295 {"fatpara", 0xb6}, /* should have fatter, filled in bowl */
303 {"frE", 0x45}, /* should be curly */
304 {"frL", 'L'}, /* should be curly */
305 {"frR", 0x52}, /* should be curly */
306 {"frakB", 0x42}, /* should have fraktur style */
319 {"gAacute", 0xc1}, /* should be Α+acute */
320 {"gaacute", 0x3b1}, /* +acute */
321 {"gabreve", 0x3b1}, /* +breve */
322 {"gafrown", 0x3b1}, /* +frown */
323 {"gagrave", 0x3b1}, /* +grave */
324 {"gamacr", 0x3b1}, /* +macron */
326 {"gauml", 0x3b1}, /* +umlaut */
328 {"geacute", 0x3b5}, /* +acute */
329 {"gegrave", 0x3b5}, /* +grave */
330 {"ghacute", 0x3b7}, /* +acute */
331 {"ghfrown", 0x3b7}, /* +frown */
332 {"ghgrave", 0x3b7}, /* +grave */
333 {"ghmacr", 0x3b7}, /* +macron */
334 {"giacute", 0x3b9}, /* +acute */
335 {"gibreve", 0x3b9}, /* +breve */
336 {"gifrown", 0x3b9}, /* +frown */
337 {"gigrave", 0x3b9}, /* +grave */
338 {"gimacr", 0x3b9}, /* +macron */
339 {"giuml", 0x3b9}, /* +umlaut */
342 {"goacute", 0x3bf}, /* +acute */
343 {"gobreve", 0x3bf}, /* +breve */
346 {"guacute", 0x3c5}, /* +acute */
347 {"gufrown", 0x3c5}, /* +frown */
348 {"gugrave", 0x3c5}, /* +grave */
349 {"gumacr", 0x3c5}, /* +macron */
350 {"guuml", 0x3c5}, /* +umlaut */
351 {"gwacute", 0x3c9}, /* +acute */
352 {"gwfrown", 0x3c9}, /* +frown */
353 {"gwgrave", 0x3c9}, /* +grave */
358 {"hatpath", 0x5b2}, /* hataf patah U+05B2 */
359 {"hatqam", 0x5b3}, /* hataf qamats U+05B3 */
360 {"hatseg", 0x5b1}, /* hataf segol U+05B1 */
363 {"hebaleph", 0x5d0}, /* aleph U+05D0 */
364 {"hebayin", 0x5e2}, /* ayin U+05E2 */
365 {"hebbet", 0x5d1}, /* bet U+05D1 */
366 {"hebbeth", 0x5d1}, /* bet U+05D1 */
367 {"hebcheth", 0x5d7}, /* bet U+05D7 */
368 {"hebdaleth", 0x5d3}, /* dalet U+05D3 */
369 {"hebgimel", 0x5d2}, /* gimel U+05D2 */
370 {"hebhe", 0x5d4}, /* he U+05D4 */
371 {"hebkaph", 0x5db}, /* kaf U+05DB */
372 {"heblamed", 0x5dc}, /* lamed U+05DC */
373 {"hebmem", 0x5de}, /* mem U+05DE */
374 {"hebnun", 0x5e0}, /* nun U+05E0 */
375 {"hebnunfin", 0x5df}, /* final nun U+05DF */
376 {"hebpe", 0x5e4}, /* pe U+05E4 */
377 {"hebpedag", 0x5e3}, /* final pe? U+05E3 */
378 {"hebqoph", 0x5e7}, /* qof U+05E7 */
379 {"hebresh", 0x5e8}, /* resh U+05E8 */
380 {"hebshin", 0x5e9}, /* shin U+05E9 */
381 {"hebtav", 0x5ea}, /* tav U+05EA */
382 {"hebtsade", 0x5e6}, /* tsadi U+05E6 */
383 {"hebwaw", 0x5d5}, /* vav? U+05D5 */
384 {"hebyod", 0x5d9}, /* yod U+05D9 */
385 {"hebzayin", 0x5d6}, /* zayin U+05D6 */
386 {"hgz", 0x292}, /* ??? Cf "alet" */
387 {"hireq", 0x5b4}, /* U+05B4 */
390 {"horizE", 0x45}, /* should be on side */
391 {"horizP", 0x50}, /* should be on side */
394 {"horizb", 0x7b}, /* should be underbrace */
423 {"isub", 0x3f5}, /* iota below accent */
440 {"logicand", 0x2227},
447 {"lumlbl", 0x6c}, /* +umlaut below */
450 {"mc", 0x63}, /* should be raised */
451 {"merc", 0x263f}, /* mercury U+263F */
453 {"moonfq", 0x263d}, /* first quarter moon U+263D */
454 {"moonlq", 0x263e}, /* last quarter moon U+263E */
455 {"msylab", 0x6d}, /* +sylab (ˌ) */
481 {"obigb", 0x7b}, /* should be big */
483 {"obigsb", 0x5b}, /* should be big */
486 {"odsb", 0x301a}, /* [[ U+301A */
490 {"ohook", 0x6f}, /* +hook */
503 {"ounce", 0x2125}, /* ounce U+2125 */
504 {"ovparen", 0x2322}, /* should be sideways ( */
512 {"pbar", 0x70}, /* +bar */
513 {"per", 0x2118}, /* per U+2118 */
519 {"plantinJ", 0x4a}, /* should be script */
527 {"q", 0x3f}, /* should be raised */
528 {"qamets", 0x5b3}, /* U+05B3 */
533 {"recipe", 0x211e}, /* U+211E */
535 {"revC", 0x186}, /* open O U+0186 */
540 {"revsc", 0x2d2}, /* upside-down semicolon */
542 {"rfa", 0x6f}, /* +hook (Cf "goal") */
548 {"rsylab", 0x72}, /* +sylab */
549 {"runash", 0x46}, /* should be runic 'ash' */
558 {"scrA", 0x41}, /* should be script */
574 {"scruple", 0x2108}, /* U+2108 */
581 {"sheva", 0x5b0}, /* U+05B0 */
588 {"slge", 0x2265}, /* should have slanted line under */
589 {"slle", 0x2264}, /* should have slanted line under */
594 {"square", 0x25a1}, /* U+25A1 */
595 {"ssChi", 0x3a7}, /* should be sans serif */
597 {"ssOmicron", 0x39f},
612 {"thbar", 0xfe}, /* +bar */
614 {"thinqm", 0x3f}, /* should be thinner */
622 {"ubar", 0x75}, /* +bar */
636 {"undl", 0x2cd}, /* underline accent */
640 {"vavpath", 0x5d5}, /* vav U+05D5 (+patah) */
641 {"vavsheva", 0x5d5}, /* vav U+05D5 (+sheva) */
644 {"versicle2", 0x2123}, /* U+2123 */
652 {"wyn", 0x1bf}, /* wynn U+01BF */
657 {"ymacr", 0x79}, /* +macron */
659 {"zced", 0x7a}, /* +cedilla */
665 The following special characters don't have close enough
666 equivalents in Unicode, so aren't in the above table.
667 22n 2^(2^n) Cf Fermat
670 Bantuo Bantu O. Cf Otshi-herero
671 Car C with circular arrow on top
672 albrtime cut-time: C with vertical line
674 bantuo Bantu o. Cf Otshi-herero
675 bbc1 single chem bond below
676 bbc2 double chem bond below
677 bbl1 chem bond like /
678 bbl2 chem bond like //
679 bbr1 chem bond like \
681 bcop1 copper symbol. Cf copper
682 bcop2 copper symbol. Cf copper
684 btc1 single chem bond above
685 btc2 double chem bond above
686 btl1 chem bond like \
687 btl2 chem bond like \\
688 btr1 chem bond like /
689 btr2 chem bond line //
691 devph sanskrit letter. Cf ph
692 devrfls sanskrit letter. Cf cerebral
693 duplong[12] musical note
694 egchi early form of chi
695 eggamma[12] early form of gamma
696 egiota early form of iota
697 egkappa early form of kappa
698 eglambda early form of lambda
699 egmu[12] early form of mu
700 egnu[12] early form of nu
701 egpi[123] early form of pi
702 egrho[12] early form of rho
703 egsampi early form of sampi
704 egsan early form of san
705 egsigma[12] early form of sigma
706 egxi[123] early form of xi
707 elatS early form of S
708 elatc[12] early form of C
709 elatg[12] early form of G
710 glagjeri Slavonic Glagolitic jeri
711 glagjeru Slavonic Glagolitic jeru
712 hypolem hypolemisk (line with underdot)
714 longmord long mordent
715 mbwvow backwards scretched C. Cf retract.
716 mord music symbol. Cf mordent
718 ohgcirc old form of circumflex
719 oldbeta old form of β. Cf perturbate
720 oldsemibr[12] old forms of semibreve. Cf prolation
721 ormg old form of g. Cf G
722 para[12345] form of ¶
723 pauseo musical pause sign
724 pauseu musical pause sign
726 ragr Black letter ragged r
727 repetn musical repeat. Cf retort
728 segno musical segno sign
729 semain[12] semitic ain
733 semlamed[12] semitic lamed
737 semqoph[123] semitic qoph
739 semtav[1234] semitic tav
741 semzayin[123] semitic zayin
742 shtlong[12] U with underbar. Cf glyconic
743 sigmatau σ,τ combination
744 squaver sixteenth note
745 sqbreve square musical breve note
747 uhrbr upper half of big }
748 versicle1 Cf versicle
752 static Rune normtab[128] = {
753 /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
754 /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
755 NONE, NONE, ' ', NONE, NONE, NONE, NONE, NONE,
756 /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
757 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
758 /*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'',
759 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
760 /*30*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
761 0x38, 0x39, 0x3a, 0x3b, TAGS, 0x3d, TAGE, 0x3f,
762 /*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
763 0x48, 0x49, 0x4a, 0x4b,'L', 0x4d, 0x4e, 0x4f,
764 /*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
765 0x58, 0x59, 0x5a, 0x5b,'\\', 0x5d, 0x5e, 0x5f,
766 /*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
767 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
768 /*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
769 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE
772 static Rune phtab[128] = {
773 /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
774 /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
775 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
776 /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
777 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
778 /*20*/ 0x20, 0x21, 0x2c8, 0x23, 0x24, 0x2cc, 0xe6, '\'',
779 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
780 /*30*/ 0x30, 0x31, 0x32, 0x25c, 0x34, 0x35, 0x36, 0x37,
781 0x38, 0xf8, 0x2d0, 0x3b, TAGS, 0x3d, TAGE, 0x3f,
782 /*40*/ 0x259, 0x251, 0x42, 0x43, 0xf0, 0x25b, 0x46, 0x47,
783 0x48, 0x26a, 0x4a, 0x4b,'L', 0x4d, 0x14b, 0x254,
784 /*50*/ 0x50, 0x252, 0x52, 0x283, 0x3b8, 0x28a, 0x28c, 0x57,
785 0x58, 0x59, 0x292, 0x5b,'\\', 0x5d, 0x5e, 0x5f,
786 /*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
787 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
788 /*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
789 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE
791 static Rune grtab[128] = {
792 /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
793 /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
794 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
795 /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
796 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
797 /*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'',
798 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
799 /*30*/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
800 0x38, 0x39, 0x3a, 0x3b, TAGS, 0x3d, TAGE, 0x3f,
801 /*40*/ 0x40, 0x391, 0x392, 0x39e, 0x394, 0x395, 0x3a6, 0x393,
802 0x397, 0x399, 0x3da, 0x39a, 0x39b, 0x39c, 0x39d, 0x39f,
803 /*50*/ 0x3a0, 0x398, 0x3a1, 0x3a3, 0x3a4, 0x3a5, 0x56, 0x3a9,
804 0x3a7, 0x3a8, 0x396, 0x5b,'\\', 0x5d, 0x5e, 0x5f,
805 /*60*/ 0x60, 0x3b1, 0x3b2, 0x3be, 0x3b4, 0x3b5, 0x3c6, 0x3b3,
806 0x3b7, 0x3b9, 0x3c2, 0x3ba, 0x3bb, 0x3bc, 0x3bd, 0x3bf,
807 /*70*/ 0x3c0, 0x3b8, 0x3c1, 0x3c3, 0x3c4, 0x3c5, 0x76, 0x3c9,
808 0x3c7, 0x3c8, 0x3b6, 0x7b, 0x7c, 0x7d, 0x7e, NONE
810 static Rune subtab[128] = {
811 /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
812 /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
813 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
814 /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
815 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
816 /*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'',
817 0x208d, 0x208e, 0x2a, 0x208a, 0x2c, 0x208b, 0x2e, 0x2f,
818 /*30*/ 0x2080, 0x2081, 0x2082, 0x2083, 0x2084, 0x2085, 0x2086, 0x2087,
819 0x2088, 0x2089, 0x3a, 0x3b, TAGS, 0x208c, TAGE, 0x3f,
820 /*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
821 0x48, 0x49, 0x4a, 0x4b,'L', 0x4d, 0x4e, 0x4f,
822 /*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
823 0x58, 0x59, 0x5a, 0x5b,'\\', 0x5d, 0x5e, 0x5f,
824 /*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
825 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
826 /*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
827 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE
829 static Rune suptab[128] = {
830 /*0*/ /*1*/ /*2*/ /*3*/ /*4*/ /*5*/ /*6*/ /*7*/
831 /*00*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
832 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
833 /*10*/ NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
834 NONE, NONE, NONE, NONE, NONE, NONE, NONE, NONE,
835 /*20*/ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, SPCS, '\'',
836 0x207d, 0x207e, 0x2a, 0x207a, 0x2c, 0x207b, 0x2e, 0x2f,
837 /*30*/ 0x2070, 0x2071, 0x2072, 0x2073, 0x2074, 0x2075, 0x2076, 0x2077,
838 0x2078, 0x2079, 0x3a, 0x3b, TAGS, 0x207c, TAGE, 0x3f,
839 /*40*/ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
840 0x48, 0x49, 0x4a, 0x4b,'L', 0x4d, 0x4e, 0x4f,
841 /*50*/ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
842 0x58, 0x59, 0x5a, 0x5b,'\\', 0x5d, 0x5e, 0x5f,
843 /*60*/ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
844 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
845 /*70*/ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
846 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, NONE
850 static int tagstarts;
851 static char tag[Buflen];
852 static char spec[Buflen];
853 static Entry curentry;
854 #define cursize (curentry.end-curentry.start)
856 static char *getspec(char *, char *);
857 static char *gettag(char *, char *);
862 * 'h': just print headwords
866 pgwprintentry(Entry e, int cmd)
886 r = transtab[(*p++)&0x7F];
888 /* Emit the rune, but buffer in case of ligature */
892 } else if(r == SPCS) {
893 /* Start of special character name */
895 r = lookassoc(spectab, asize(spectab), spec);
898 err("spec %ld %d %s",
899 e.doff, cursize, spec);
902 if(r >= LIGS && r < LIGE) {
903 /* handle possible ligature */
904 rlig = liglookup(r, rprev);
906 rprev = rlig; /* overwrite rprev */
908 /* could print accent, but let's not */
909 if(rprev != NONE) outrune(rprev);
912 } else if(r >= MULTI && r < MULTIE) {
917 outrunes(multitab[r-MULTI]);
918 } else if(r == PAR) {
925 if(rprev != NONE) outrune(rprev);
928 } else if(r == TAGS) {
929 /* Start of tag name */
935 t = lookassoc(tagtab, asize(tagtab), tag);
939 e.doff, cursize, tag);
947 outinhibit = !tagstarts;
976 * Return offset into bdict where next webster entry after fromoff starts.
977 * Webster entries start with <p><hw>
980 pgwnextoff(long fromoff)
985 a = Bseek(bdict, fromoff, 0);
993 if(c == '<' && Bgetc(bdict) == 'p' && Bgetc(bdict) == '>') {
996 if (Bgetc(bdict) == 'h' && Bgetc(bdict) == 'w'
997 && Bgetc(bdict) == '>')
1005 return (Boffset(bdict)-n);
1008 static char *prkey1 =
1009 "KEY TO THE PRONUNCIATION\n"
1012 "b, d, f, k, l, m, n, p, t, v, z: usual English values\n"
1014 "g as in go (gəʊ)\n"
1016 "r ... run (rʌn), terrier (ˈtɛriə(r))\n"
1017 "(r)... her (hɜː(r))\n"
1018 "s ... see (siː), success (səkˈsɜs)\n"
1019 "w ... wear (wɛə(r))\n"
1020 "hw ... when (hwɛn)\n"
1022 "θ ... thin (θin), bath (bɑːθ)\n"
1023 "ð ... then (ðɛn), bathe (beɪð)\n"
1024 "ʃ ... shop (ʃɒp), dish (dɪʃ)\n"
1025 "tʃ ... chop (tʃɒp), ditch (dɪtʃ)\n"
1026 "ʒ ... vision (ˈvɪʒən), déjeuner (deʒøne)\n"
1028 static char *prkey2 =
1029 "dʒ ... judge (dʒʌdʒ)\n"
1030 "ŋ ... singing (ˈsɪŋɪŋ), think (θiŋk)\n"
1031 "ŋg ... finger (ˈfiŋgə(r))\n"
1034 "ʎ as in It. seraglio (serˈraʎo)\n"
1035 "ɲ ... Fr. cognac (kɔɲak)\n"
1036 "x ... Ger. ach (ax), Sc. loch (lɒx)\n"
1037 "ç ... Ger. ich (ɪç), Sc. nicht (nɪçt)\n"
1038 "ɣ ... North Ger. sagen (ˈzaːɣən)\n"
1039 "c ... Afrikaans baardmannetjie (ˈbaːrtmanəci)\n"
1040 "ɥ ... Fr. cuisine (kɥizin)\n"
1043 static char *prkey3 =
1044 "II. VOWELS AND DIPTHONGS\n"
1047 "ɪ as in pit (pɪt), -ness (-nɪs)\n"
1048 "ɛ ... pet (pɛt), Fr. sept (sɛt)\n"
1050 "ʌ ... putt (pʌt)\n"
1053 "ə ... another (əˈnʌðə(r))\n"
1054 "(ə)... beaten (ˈbiːt(ə)n)\n"
1055 "i ... Fr. si (si)\n"
1056 "e ... Fr. bébé (bebe)\n"
1057 "a ... Fr. mari (mari)\n"
1058 "ɑ ... Fr. bâtiment (bɑtimã)\n"
1059 "ɔ ... Fr. homme (ɔm)\n"
1060 "o ... Fr. eau (o)\n"
1061 "ø ... Fr. peu (pø)\n"
1063 static char *prkey4 =
1064 "œ ... Fr. boeuf (bœf), coeur (kœr)\n"
1065 "u ... Fr. douce (dus)\n"
1066 "ʏ ... Ger. Müller (ˈmʏlər)\n"
1067 "y ... Fr. du (dy)\n"
1070 "iː as in bean (biːn)\n"
1071 "ɑː ... barn (bɑːn)\n"
1072 "ɔː ... born (bɔːn)\n"
1073 "uː ... boon (buːn)\n"
1074 "ɜː ... burn (bɜːn)\n"
1075 "eː ... Ger. Schnee (ʃneː)\n"
1076 "ɛː ... Ger. Fähre (ˈfɛːrə)\n"
1077 "aː ... Ger. Tag (taːk)\n"
1078 "oː ... Ger. Sohn (zoːn)\n"
1079 "øː ... Ger. Goethe (gøːtə)\n"
1080 "yː ... Ger. grün (gryːn)\n"
1083 static char *prkey5 =
1085 "ɛ˜, æ˜ as in Fr. fin (fɛ˜, fæ˜)\n"
1086 "ã ... Fr. franc (frã)\n"
1087 "ɔ˜ ... Fr. bon (bɔ˜n)\n"
1088 "œ˜ ... Fr. un (œ˜)\n"
1091 "eɪ as in bay (beɪ)\n"
1092 "aɪ ... buy (baɪ)\n"
1093 "ɔɪ ... boy (bɔɪ)\n"
1095 "aʊ ... now (naʊ)\n"
1096 "ɪə ... peer (pɪə(r))\n"
1097 "ɛə ... pair (pɛə(r))\n"
1098 "ʊə ... tour (tʊə(r))\n"
1099 "ɔə ... boar (bɔə(r))\n"
1102 static char *prkey6 =
1105 "Main stress: ˈ preceding stressed syllable\n"
1106 "Secondary stress: ˌ preceding stressed syllable\n"
1108 "E.g.: pronunciation (prəˌnʌnsɪˈeɪʃ(ə)n)\n";
1109 /* TODO: find transcriptions of foreign consonents, œ, ʏ, nasals */
1114 Bprint(bout, "%s%s%s%s%s%s",
1115 prkey1, prkey2, prkey3, prkey4, prkey5, prkey6);
1119 * f points just after a '&', fe points at end of entry.
1120 * Accumulate the special name, starting after the &
1121 * and continuing until the next ';', in spec[].
1122 * Return pointer to char after ';'.
1125 getspec(char *f, char *fe)
1134 if(c == ';' || f == fe)
1143 * f points just after '<'; fe points at end of entry.
1144 * Expect next characters from bin to match:
1145 * [/][^ >]+( [^>=]+=[^ >]+)*>
1146 * tag auxname auxval
1147 * Accumulate the tag and its auxilliary information in
1148 * tag[], auxname[][] and auxval[][].
1149 * Set tagstarts=1 if the tag is 'starting' (has no '/'), else 0.
1150 * Set naux to the number of aux pairs found.
1151 * Return pointer to after final '>'.
1154 gettag(char *f, char *fe)
1170 if(c == '>' || f == fe)