Blame


1 5cedca1b 2004-05-15 devnull #include "tdef.h"
2 5cedca1b 2004-05-15 devnull #include "fns.h"
3 5cedca1b 2004-05-15 devnull #include "ext.h"
4 5cedca1b 2004-05-15 devnull
5 5cedca1b 2004-05-15 devnull #define HY_BIT 0200 /* stuff in here only works for 7-bit ascii */
6 5cedca1b 2004-05-15 devnull /* this value is used (as a literal) in suftab.c */
7 5cedca1b 2004-05-15 devnull /* to encode possible hyphenation points in suffixes. */
8 5cedca1b 2004-05-15 devnull /* it could be changed, by widening the tables */
9 5cedca1b 2004-05-15 devnull /* to be shorts instead of chars. */
10 5cedca1b 2004-05-15 devnull
11 5cedca1b 2004-05-15 devnull /*
12 5cedca1b 2004-05-15 devnull * troff8.c
13 5cedca1b 2004-05-15 devnull *
14 5cedca1b 2004-05-15 devnull * hyphenation
15 5cedca1b 2004-05-15 devnull */
16 5cedca1b 2004-05-15 devnull
17 5cedca1b 2004-05-15 devnull int hexsize = 0; /* hyphenation exception list size */
18 5cedca1b 2004-05-15 devnull char *hbufp = NULL; /* base of list */
19 5cedca1b 2004-05-15 devnull char *nexth = NULL; /* first free slot in list */
20 5cedca1b 2004-05-15 devnull Tchar *hyend;
21 5cedca1b 2004-05-15 devnull
22 5cedca1b 2004-05-15 devnull #define THRESH 160 /* digram goodness threshold */
23 5cedca1b 2004-05-15 devnull int thresh = THRESH;
24 5cedca1b 2004-05-15 devnull
25 5cedca1b 2004-05-15 devnull int texhyphen(void);
26 5cedca1b 2004-05-15 devnull static int alpha(Tchar);
27 5cedca1b 2004-05-15 devnull
28 5cedca1b 2004-05-15 devnull void hyphen(Tchar *wp)
29 5cedca1b 2004-05-15 devnull {
30 5cedca1b 2004-05-15 devnull int j;
31 5cedca1b 2004-05-15 devnull Tchar *i;
32 5cedca1b 2004-05-15 devnull
33 5cedca1b 2004-05-15 devnull i = wp;
34 5cedca1b 2004-05-15 devnull while (punct((*i++)))
35 5cedca1b 2004-05-15 devnull ;
36 5cedca1b 2004-05-15 devnull if (!alpha(*--i))
37 5cedca1b 2004-05-15 devnull return;
38 5cedca1b 2004-05-15 devnull wdstart = i++;
39 5cedca1b 2004-05-15 devnull while (alpha(*i++))
40 5cedca1b 2004-05-15 devnull ;
41 5cedca1b 2004-05-15 devnull hyend = wdend = --i - 1;
42 5cedca1b 2004-05-15 devnull while (punct((*i++)))
43 5cedca1b 2004-05-15 devnull ;
44 5cedca1b 2004-05-15 devnull if (*--i)
45 5cedca1b 2004-05-15 devnull return;
46 5cedca1b 2004-05-15 devnull if (wdend - wdstart < 4) /* 4 chars is too short to hyphenate */
47 5cedca1b 2004-05-15 devnull return;
48 5cedca1b 2004-05-15 devnull hyp = hyptr;
49 5cedca1b 2004-05-15 devnull *hyp = 0;
50 5cedca1b 2004-05-15 devnull hyoff = 2;
51 5cedca1b 2004-05-15 devnull
52 5cedca1b 2004-05-15 devnull /* for now, try exceptions first, then tex (if hyphalg is non-zero),
53 5cedca1b 2004-05-15 devnull then suffix and digram if tex didn't hyphenate it at all.
54 5cedca1b 2004-05-15 devnull */
55 5cedca1b 2004-05-15 devnull
56 5cedca1b 2004-05-15 devnull if (!exword() && !texhyphen() && !suffix())
57 5cedca1b 2004-05-15 devnull digram();
58 5cedca1b 2004-05-15 devnull
59 5cedca1b 2004-05-15 devnull /* this appears to sort hyphenation points into increasing order */
60 5cedca1b 2004-05-15 devnull *hyp++ = 0;
61 5cedca1b 2004-05-15 devnull if (*hyptr)
62 5cedca1b 2004-05-15 devnull for (j = 1; j; ) {
63 5cedca1b 2004-05-15 devnull j = 0;
64 5cedca1b 2004-05-15 devnull for (hyp = hyptr + 1; *hyp != 0; hyp++) {
65 5cedca1b 2004-05-15 devnull if (*(hyp - 1) > *hyp) {
66 5cedca1b 2004-05-15 devnull j++;
67 5cedca1b 2004-05-15 devnull i = *hyp;
68 5cedca1b 2004-05-15 devnull *hyp = *(hyp - 1);
69 5cedca1b 2004-05-15 devnull *(hyp - 1) = i;
70 5cedca1b 2004-05-15 devnull }
71 5cedca1b 2004-05-15 devnull }
72 5cedca1b 2004-05-15 devnull }
73 5cedca1b 2004-05-15 devnull }
74 5cedca1b 2004-05-15 devnull
75 c5561c23 2004-05-16 devnull static int alpha(Tchar i) /* non-zero if really alphabetic */
76 5cedca1b 2004-05-15 devnull {
77 5cedca1b 2004-05-15 devnull if (ismot(i))
78 5cedca1b 2004-05-15 devnull return 0;
79 5cedca1b 2004-05-15 devnull else if (cbits(i) >= ALPHABET) /* this isn't very elegant, but there's */
80 5cedca1b 2004-05-15 devnull return 0; /* no good way to make sure i is in range for */
81 5cedca1b 2004-05-15 devnull else /* the call of isalpha */
82 5cedca1b 2004-05-15 devnull return isalpha(cbits(i));
83 5cedca1b 2004-05-15 devnull }
84 5cedca1b 2004-05-15 devnull
85 c5561c23 2004-05-16 devnull int
86 5cedca1b 2004-05-15 devnull punct(Tchar i)
87 5cedca1b 2004-05-15 devnull {
88 5cedca1b 2004-05-15 devnull if (!i || alpha(i))
89 5cedca1b 2004-05-15 devnull return(0);
90 5cedca1b 2004-05-15 devnull else
91 5cedca1b 2004-05-15 devnull return(1);
92 5cedca1b 2004-05-15 devnull }
93 5cedca1b 2004-05-15 devnull
94 5cedca1b 2004-05-15 devnull
95 5cedca1b 2004-05-15 devnull void caseha(void) /* set hyphenation algorithm */
96 5cedca1b 2004-05-15 devnull {
97 5cedca1b 2004-05-15 devnull hyphalg = HYPHALG;
98 5cedca1b 2004-05-15 devnull if (skip())
99 5cedca1b 2004-05-15 devnull return;
100 5cedca1b 2004-05-15 devnull noscale++;
101 5cedca1b 2004-05-15 devnull hyphalg = atoi0();
102 5cedca1b 2004-05-15 devnull noscale = 0;
103 5cedca1b 2004-05-15 devnull }
104 5cedca1b 2004-05-15 devnull
105 5cedca1b 2004-05-15 devnull
106 5cedca1b 2004-05-15 devnull void caseht(void) /* set hyphenation threshold; not in manual! */
107 5cedca1b 2004-05-15 devnull {
108 5cedca1b 2004-05-15 devnull thresh = THRESH;
109 5cedca1b 2004-05-15 devnull if (skip())
110 5cedca1b 2004-05-15 devnull return;
111 5cedca1b 2004-05-15 devnull noscale++;
112 5cedca1b 2004-05-15 devnull thresh = atoi0();
113 5cedca1b 2004-05-15 devnull noscale = 0;
114 5cedca1b 2004-05-15 devnull }
115 5cedca1b 2004-05-15 devnull
116 5cedca1b 2004-05-15 devnull
117 5cedca1b 2004-05-15 devnull char *growh(char *where)
118 5cedca1b 2004-05-15 devnull {
119 5cedca1b 2004-05-15 devnull char *new;
120 5cedca1b 2004-05-15 devnull
121 5cedca1b 2004-05-15 devnull hexsize += NHEX;
122 5cedca1b 2004-05-15 devnull if ((new = grow(hbufp, hexsize, sizeof(char))) == NULL)
123 5cedca1b 2004-05-15 devnull return NULL;
124 5cedca1b 2004-05-15 devnull if (new == hbufp) {
125 5cedca1b 2004-05-15 devnull return where;
126 5cedca1b 2004-05-15 devnull } else {
127 5cedca1b 2004-05-15 devnull int diff;
128 5cedca1b 2004-05-15 devnull diff = where - hbufp;
129 5cedca1b 2004-05-15 devnull hbufp = new;
130 5cedca1b 2004-05-15 devnull return new + diff;
131 5cedca1b 2004-05-15 devnull }
132 5cedca1b 2004-05-15 devnull }
133 5cedca1b 2004-05-15 devnull
134 5cedca1b 2004-05-15 devnull
135 5cedca1b 2004-05-15 devnull void casehw(void)
136 5cedca1b 2004-05-15 devnull {
137 5cedca1b 2004-05-15 devnull int i, k;
138 5cedca1b 2004-05-15 devnull char *j;
139 5cedca1b 2004-05-15 devnull Tchar t;
140 5cedca1b 2004-05-15 devnull
141 5cedca1b 2004-05-15 devnull if (nexth == NULL) {
142 5cedca1b 2004-05-15 devnull if ((nexth = hbufp = grow(hbufp, NHEX, sizeof(char))) == NULL) {
143 5cedca1b 2004-05-15 devnull ERROR "No space for exception word list." WARN;
144 5cedca1b 2004-05-15 devnull return;
145 5cedca1b 2004-05-15 devnull }
146 5cedca1b 2004-05-15 devnull hexsize = NHEX;
147 5cedca1b 2004-05-15 devnull }
148 5cedca1b 2004-05-15 devnull k = 0;
149 5cedca1b 2004-05-15 devnull while (!skip()) {
150 5cedca1b 2004-05-15 devnull if ((j = nexth) >= hbufp + hexsize - 2)
151 5cedca1b 2004-05-15 devnull if ((j = nexth = growh(j)) == NULL)
152 5cedca1b 2004-05-15 devnull goto full;
153 5cedca1b 2004-05-15 devnull for (;;) {
154 5cedca1b 2004-05-15 devnull if (ismot(t = getch()))
155 5cedca1b 2004-05-15 devnull continue;
156 5cedca1b 2004-05-15 devnull i = cbits(t);
157 5cedca1b 2004-05-15 devnull if (i == ' ' || i == '\n') {
158 5cedca1b 2004-05-15 devnull *j++ = 0;
159 5cedca1b 2004-05-15 devnull nexth = j;
160 5cedca1b 2004-05-15 devnull *j = 0;
161 5cedca1b 2004-05-15 devnull if (i == ' ')
162 5cedca1b 2004-05-15 devnull break;
163 5cedca1b 2004-05-15 devnull else
164 5cedca1b 2004-05-15 devnull return;
165 5cedca1b 2004-05-15 devnull }
166 5cedca1b 2004-05-15 devnull if (i == '-') {
167 5cedca1b 2004-05-15 devnull k = HY_BIT;
168 5cedca1b 2004-05-15 devnull continue;
169 5cedca1b 2004-05-15 devnull }
170 5cedca1b 2004-05-15 devnull *j++ = maplow(i) | k;
171 5cedca1b 2004-05-15 devnull k = 0;
172 5cedca1b 2004-05-15 devnull if (j >= hbufp + hexsize - 2)
173 5cedca1b 2004-05-15 devnull if ((j = growh(j)) == NULL)
174 5cedca1b 2004-05-15 devnull goto full;
175 5cedca1b 2004-05-15 devnull }
176 5cedca1b 2004-05-15 devnull }
177 5cedca1b 2004-05-15 devnull return;
178 5cedca1b 2004-05-15 devnull full:
179 5cedca1b 2004-05-15 devnull ERROR "Cannot grow exception word list." WARN;
180 5cedca1b 2004-05-15 devnull *nexth = 0;
181 5cedca1b 2004-05-15 devnull }
182 5cedca1b 2004-05-15 devnull
183 5cedca1b 2004-05-15 devnull
184 5cedca1b 2004-05-15 devnull int exword(void)
185 5cedca1b 2004-05-15 devnull {
186 5cedca1b 2004-05-15 devnull Tchar *w;
187 5cedca1b 2004-05-15 devnull char *e, *save;
188 5cedca1b 2004-05-15 devnull
189 5cedca1b 2004-05-15 devnull e = hbufp;
190 5cedca1b 2004-05-15 devnull while (1) {
191 5cedca1b 2004-05-15 devnull save = e;
192 5cedca1b 2004-05-15 devnull if (e == NULL || *e == 0)
193 5cedca1b 2004-05-15 devnull return(0);
194 5cedca1b 2004-05-15 devnull w = wdstart;
195 5cedca1b 2004-05-15 devnull while (*e && w <= hyend && (*e & 0177) == maplow(cbits(*w))) {
196 5cedca1b 2004-05-15 devnull e++;
197 5cedca1b 2004-05-15 devnull w++;
198 5cedca1b 2004-05-15 devnull }
199 5cedca1b 2004-05-15 devnull if (!*e) {
200 5cedca1b 2004-05-15 devnull if (w-1 == hyend || (w == wdend && maplow(cbits(*w)) == 's')) {
201 5cedca1b 2004-05-15 devnull w = wdstart;
202 5cedca1b 2004-05-15 devnull for (e = save; *e; e++) {
203 5cedca1b 2004-05-15 devnull if (*e & HY_BIT)
204 5cedca1b 2004-05-15 devnull *hyp++ = w;
205 5cedca1b 2004-05-15 devnull if (hyp > hyptr + NHYP - 1)
206 5cedca1b 2004-05-15 devnull hyp = hyptr + NHYP - 1;
207 5cedca1b 2004-05-15 devnull w++;
208 5cedca1b 2004-05-15 devnull }
209 5cedca1b 2004-05-15 devnull return(1);
210 5cedca1b 2004-05-15 devnull } else {
211 5cedca1b 2004-05-15 devnull e++;
212 5cedca1b 2004-05-15 devnull continue;
213 5cedca1b 2004-05-15 devnull }
214 5cedca1b 2004-05-15 devnull } else
215 5cedca1b 2004-05-15 devnull while (*e++)
216 5cedca1b 2004-05-15 devnull ;
217 5cedca1b 2004-05-15 devnull }
218 5cedca1b 2004-05-15 devnull }
219 5cedca1b 2004-05-15 devnull
220 c5561c23 2004-05-16 devnull int
221 5cedca1b 2004-05-15 devnull suffix(void)
222 5cedca1b 2004-05-15 devnull {
223 5cedca1b 2004-05-15 devnull Tchar *w;
224 5cedca1b 2004-05-15 devnull char *s, *s0;
225 5cedca1b 2004-05-15 devnull Tchar i;
226 5cedca1b 2004-05-15 devnull extern char *suftab[];
227 5cedca1b 2004-05-15 devnull
228 5cedca1b 2004-05-15 devnull again:
229 5cedca1b 2004-05-15 devnull i = cbits(*hyend);
230 5cedca1b 2004-05-15 devnull if (!alpha(i))
231 5cedca1b 2004-05-15 devnull return(0);
232 5cedca1b 2004-05-15 devnull if (i < 'a')
233 5cedca1b 2004-05-15 devnull i -= 'A' - 'a';
234 5cedca1b 2004-05-15 devnull if ((s0 = suftab[i-'a']) == 0)
235 5cedca1b 2004-05-15 devnull return(0);
236 5cedca1b 2004-05-15 devnull for (;;) {
237 5cedca1b 2004-05-15 devnull if ((i = *s0 & 017) == 0)
238 5cedca1b 2004-05-15 devnull return(0);
239 5cedca1b 2004-05-15 devnull s = s0 + i - 1;
240 5cedca1b 2004-05-15 devnull w = hyend - 1;
241 5cedca1b 2004-05-15 devnull while (s > s0 && w >= wdstart && (*s & 0177) == maplow(cbits(*w))) {
242 5cedca1b 2004-05-15 devnull s--;
243 5cedca1b 2004-05-15 devnull w--;
244 5cedca1b 2004-05-15 devnull }
245 5cedca1b 2004-05-15 devnull if (s == s0)
246 5cedca1b 2004-05-15 devnull break;
247 5cedca1b 2004-05-15 devnull s0 += i;
248 5cedca1b 2004-05-15 devnull }
249 5cedca1b 2004-05-15 devnull s = s0 + i - 1;
250 5cedca1b 2004-05-15 devnull w = hyend;
251 5cedca1b 2004-05-15 devnull if (*s0 & HY_BIT)
252 5cedca1b 2004-05-15 devnull goto mark;
253 5cedca1b 2004-05-15 devnull while (s > s0) {
254 5cedca1b 2004-05-15 devnull w--;
255 5cedca1b 2004-05-15 devnull if (*s-- & HY_BIT) {
256 5cedca1b 2004-05-15 devnull mark:
257 5cedca1b 2004-05-15 devnull hyend = w - 1;
258 5cedca1b 2004-05-15 devnull if (*s0 & 0100) /* 0100 used in suftab to encode something too */
259 5cedca1b 2004-05-15 devnull continue;
260 5cedca1b 2004-05-15 devnull if (!chkvow(w))
261 5cedca1b 2004-05-15 devnull return(0);
262 5cedca1b 2004-05-15 devnull *hyp++ = w;
263 5cedca1b 2004-05-15 devnull }
264 5cedca1b 2004-05-15 devnull }
265 5cedca1b 2004-05-15 devnull if (*s0 & 040)
266 5cedca1b 2004-05-15 devnull return(0);
267 5cedca1b 2004-05-15 devnull if (exword())
268 5cedca1b 2004-05-15 devnull return(1);
269 5cedca1b 2004-05-15 devnull goto again;
270 5cedca1b 2004-05-15 devnull }
271 5cedca1b 2004-05-15 devnull
272 c5561c23 2004-05-16 devnull int
273 5cedca1b 2004-05-15 devnull maplow(int i)
274 5cedca1b 2004-05-15 devnull {
275 5cedca1b 2004-05-15 devnull if (isupper(i))
276 5cedca1b 2004-05-15 devnull i = tolower(i);
277 5cedca1b 2004-05-15 devnull return(i);
278 5cedca1b 2004-05-15 devnull }
279 5cedca1b 2004-05-15 devnull
280 c5561c23 2004-05-16 devnull int
281 5cedca1b 2004-05-15 devnull vowel(int i)
282 5cedca1b 2004-05-15 devnull {
283 5cedca1b 2004-05-15 devnull switch (i) {
284 5cedca1b 2004-05-15 devnull case 'a': case 'A':
285 5cedca1b 2004-05-15 devnull case 'e': case 'E':
286 5cedca1b 2004-05-15 devnull case 'i': case 'I':
287 5cedca1b 2004-05-15 devnull case 'o': case 'O':
288 5cedca1b 2004-05-15 devnull case 'u': case 'U':
289 5cedca1b 2004-05-15 devnull case 'y': case 'Y':
290 5cedca1b 2004-05-15 devnull return(1);
291 5cedca1b 2004-05-15 devnull default:
292 5cedca1b 2004-05-15 devnull return(0);
293 5cedca1b 2004-05-15 devnull }
294 5cedca1b 2004-05-15 devnull }
295 5cedca1b 2004-05-15 devnull
296 5cedca1b 2004-05-15 devnull
297 5cedca1b 2004-05-15 devnull Tchar *chkvow(Tchar *w)
298 5cedca1b 2004-05-15 devnull {
299 5cedca1b 2004-05-15 devnull while (--w >= wdstart)
300 5cedca1b 2004-05-15 devnull if (vowel(cbits(*w)))
301 5cedca1b 2004-05-15 devnull return(w);
302 5cedca1b 2004-05-15 devnull return(0);
303 5cedca1b 2004-05-15 devnull }
304 5cedca1b 2004-05-15 devnull
305 5cedca1b 2004-05-15 devnull
306 5cedca1b 2004-05-15 devnull void digram(void)
307 5cedca1b 2004-05-15 devnull {
308 5cedca1b 2004-05-15 devnull Tchar *w;
309 5cedca1b 2004-05-15 devnull int val;
310 5cedca1b 2004-05-15 devnull Tchar *nhyend, *maxw;
311 5cedca1b 2004-05-15 devnull int maxval;
312 5cedca1b 2004-05-15 devnull extern char bxh[26][13], bxxh[26][13], xxh[26][13], xhx[26][13], hxx[26][13];
313 c5561c23 2004-05-16 devnull maxw = 0;
314 5cedca1b 2004-05-15 devnull again:
315 5cedca1b 2004-05-15 devnull if (!(w = chkvow(hyend + 1)))
316 5cedca1b 2004-05-15 devnull return;
317 5cedca1b 2004-05-15 devnull hyend = w;
318 5cedca1b 2004-05-15 devnull if (!(w = chkvow(hyend)))
319 5cedca1b 2004-05-15 devnull return;
320 5cedca1b 2004-05-15 devnull nhyend = w;
321 5cedca1b 2004-05-15 devnull maxval = 0;
322 5cedca1b 2004-05-15 devnull w--;
323 5cedca1b 2004-05-15 devnull while (++w < hyend && w < wdend - 1) {
324 5cedca1b 2004-05-15 devnull val = 1;
325 5cedca1b 2004-05-15 devnull if (w == wdstart)
326 5cedca1b 2004-05-15 devnull val *= dilook('a', cbits(*w), bxh);
327 5cedca1b 2004-05-15 devnull else if (w == wdstart + 1)
328 5cedca1b 2004-05-15 devnull val *= dilook(cbits(*(w-1)), cbits(*w), bxxh);
329 5cedca1b 2004-05-15 devnull else
330 5cedca1b 2004-05-15 devnull val *= dilook(cbits(*(w-1)), cbits(*w), xxh);
331 5cedca1b 2004-05-15 devnull val *= dilook(cbits(*w), cbits(*(w+1)), xhx);
332 5cedca1b 2004-05-15 devnull val *= dilook(cbits(*(w+1)), cbits(*(w+2)), hxx);
333 5cedca1b 2004-05-15 devnull if (val > maxval) {
334 5cedca1b 2004-05-15 devnull maxval = val;
335 5cedca1b 2004-05-15 devnull maxw = w + 1;
336 5cedca1b 2004-05-15 devnull }
337 5cedca1b 2004-05-15 devnull }
338 5cedca1b 2004-05-15 devnull hyend = nhyend;
339 5cedca1b 2004-05-15 devnull if (maxval > thresh)
340 5cedca1b 2004-05-15 devnull *hyp++ = maxw;
341 5cedca1b 2004-05-15 devnull goto again;
342 5cedca1b 2004-05-15 devnull }
343 5cedca1b 2004-05-15 devnull
344 c5561c23 2004-05-16 devnull int
345 5cedca1b 2004-05-15 devnull dilook(int a, int b, char t[26][13])
346 5cedca1b 2004-05-15 devnull {
347 5cedca1b 2004-05-15 devnull int i, j;
348 5cedca1b 2004-05-15 devnull
349 5cedca1b 2004-05-15 devnull i = t[maplow(a)-'a'][(j = maplow(b)-'a')/2];
350 5cedca1b 2004-05-15 devnull if (!(j & 01))
351 5cedca1b 2004-05-15 devnull i >>= 4;
352 5cedca1b 2004-05-15 devnull return(i & 017);
353 5cedca1b 2004-05-15 devnull }
354 5cedca1b 2004-05-15 devnull
355 5cedca1b 2004-05-15 devnull
356 5cedca1b 2004-05-15 devnull /* here beginneth the tex hyphenation code, as interpreted freely */
357 5cedca1b 2004-05-15 devnull /* the main difference is that there is no attempt to squeeze space */
358 5cedca1b 2004-05-15 devnull /* as tightly at tex does. */
359 5cedca1b 2004-05-15 devnull
360 5cedca1b 2004-05-15 devnull static int texit(Tchar *, Tchar *);
361 5cedca1b 2004-05-15 devnull static int readpats(void);
362 5cedca1b 2004-05-15 devnull static void install(char *);
363 5cedca1b 2004-05-15 devnull static void fixup(void);
364 5cedca1b 2004-05-15 devnull static int trieindex(int, int);
365 5cedca1b 2004-05-15 devnull
366 5cedca1b 2004-05-15 devnull static char pats[50000]; /* size ought to be computed dynamically */
367 5cedca1b 2004-05-15 devnull static char *nextpat = pats;
368 5cedca1b 2004-05-15 devnull static char *trie[27*27]; /* english-specific sizes */
369 5cedca1b 2004-05-15 devnull
370 5cedca1b 2004-05-15 devnull int texhyphen(void)
371 5cedca1b 2004-05-15 devnull {
372 5cedca1b 2004-05-15 devnull static int loaded = 0; /* -1: couldn't find tex file */
373 5cedca1b 2004-05-15 devnull
374 5cedca1b 2004-05-15 devnull if (hyphalg == 0 || loaded == -1) /* non-zero => tex for now */
375 5cedca1b 2004-05-15 devnull return 0;
376 5cedca1b 2004-05-15 devnull if (loaded == 0) {
377 5cedca1b 2004-05-15 devnull if (readpats())
378 5cedca1b 2004-05-15 devnull loaded = 1;
379 5cedca1b 2004-05-15 devnull else
380 5cedca1b 2004-05-15 devnull loaded = -1;
381 5cedca1b 2004-05-15 devnull }
382 5cedca1b 2004-05-15 devnull return texit(wdstart, wdend);
383 5cedca1b 2004-05-15 devnull }
384 5cedca1b 2004-05-15 devnull
385 5cedca1b 2004-05-15 devnull static int texit(Tchar *start, Tchar *end) /* hyphenate as in tex, return # found */
386 5cedca1b 2004-05-15 devnull {
387 5cedca1b 2004-05-15 devnull int nw, i, k, equal, cnt[500];
388 5cedca1b 2004-05-15 devnull char w[500+1], *np, *pp, *wp, *xpp, *xwp;
389 5cedca1b 2004-05-15 devnull
390 5cedca1b 2004-05-15 devnull w[0] = '.';
391 5cedca1b 2004-05-15 devnull for (nw = 1; start <= end && nw < 500-1; nw++, start++)
392 5cedca1b 2004-05-15 devnull w[nw] = maplow(tolower(cbits(*start)));
393 5cedca1b 2004-05-15 devnull start -= (nw - 1);
394 5cedca1b 2004-05-15 devnull w[nw++] = '.';
395 5cedca1b 2004-05-15 devnull w[nw] = 0;
396 5cedca1b 2004-05-15 devnull /*
397 5cedca1b 2004-05-15 devnull * printf("try %s\n", w);
398 5cedca1b 2004-05-15 devnull */
399 5cedca1b 2004-05-15 devnull for (i = 0; i <= nw; i++)
400 5cedca1b 2004-05-15 devnull cnt[i] = '0';
401 5cedca1b 2004-05-15 devnull
402 5cedca1b 2004-05-15 devnull for (wp = w; wp < w + nw; wp++) {
403 5cedca1b 2004-05-15 devnull for (pp = trie[trieindex(*wp, *(wp+1))]; pp < nextpat; ) {
404 5cedca1b 2004-05-15 devnull if (pp == 0 /* no trie entry */
405 5cedca1b 2004-05-15 devnull || *pp != *wp /* no match on 1st letter */
406 5cedca1b 2004-05-15 devnull || *(pp+1) != *(wp+1)) /* no match on 2nd letter */
407 5cedca1b 2004-05-15 devnull break; /* so move to next letter of word */
408 5cedca1b 2004-05-15 devnull equal = 1;
409 5cedca1b 2004-05-15 devnull for (xpp = pp+2, xwp = wp+2; *xpp; )
410 5cedca1b 2004-05-15 devnull if (*xpp++ != *xwp++) {
411 5cedca1b 2004-05-15 devnull equal = 0;
412 5cedca1b 2004-05-15 devnull break;
413 5cedca1b 2004-05-15 devnull }
414 5cedca1b 2004-05-15 devnull if (equal) {
415 5cedca1b 2004-05-15 devnull np = xpp+1; /* numpat */
416 5cedca1b 2004-05-15 devnull for (k = wp-w; *np; k++, np++)
417 5cedca1b 2004-05-15 devnull if (*np > cnt[k])
418 5cedca1b 2004-05-15 devnull cnt[k] = *np;
419 5cedca1b 2004-05-15 devnull /*
420 5cedca1b 2004-05-15 devnull * printf("match: %s %s\n", pp, xpp+1);
421 5cedca1b 2004-05-15 devnull */
422 5cedca1b 2004-05-15 devnull }
423 5cedca1b 2004-05-15 devnull pp += *(pp-1); /* skip over pattern and numbers to next */
424 5cedca1b 2004-05-15 devnull }
425 5cedca1b 2004-05-15 devnull }
426 5cedca1b 2004-05-15 devnull /*
427 5cedca1b 2004-05-15 devnull * for (i = 0; i < nw; i++) printf("%c", w[i]);
428 5cedca1b 2004-05-15 devnull * printf(" ");
429 5cedca1b 2004-05-15 devnull * for (i = 0; i <= nw; i++) printf("%c", cnt[i]);
430 5cedca1b 2004-05-15 devnull * printf("\n");
431 5cedca1b 2004-05-15 devnull */
432 5cedca1b 2004-05-15 devnull /*
433 5cedca1b 2004-05-15 devnull * for (i = 1; i < nw - 1; i++) {
434 5cedca1b 2004-05-15 devnull * if (i > 2 && i < nw - 3 && cnt[i] % 2)
435 5cedca1b 2004-05-15 devnull * printf("-");
436 5cedca1b 2004-05-15 devnull * if (cbits(start[i-1]) != '.')
437 5cedca1b 2004-05-15 devnull * printf("%c", cbits(start[i-1]));
438 5cedca1b 2004-05-15 devnull * }
439 5cedca1b 2004-05-15 devnull * printf("\n");
440 5cedca1b 2004-05-15 devnull */
441 5cedca1b 2004-05-15 devnull for (i = 1; i < nw -1; i++)
442 5cedca1b 2004-05-15 devnull if (i > 2 && i < nw - 3 && cnt[i] % 2)
443 5cedca1b 2004-05-15 devnull *hyp++ = start + i - 1;
444 5cedca1b 2004-05-15 devnull return hyp - hyptr; /* non-zero if a hyphen was found */
445 5cedca1b 2004-05-15 devnull }
446 5cedca1b 2004-05-15 devnull
447 5cedca1b 2004-05-15 devnull /*
448 5cedca1b 2004-05-15 devnull This code assumes that hyphen.tex looks like
449 5cedca1b 2004-05-15 devnull % some comments
450 5cedca1b 2004-05-15 devnull \patterns{ % more comments
451 5cedca1b 2004-05-15 devnull pat5ter4ns, 1 per line, SORTED, nothing else
452 5cedca1b 2004-05-15 devnull }
453 5cedca1b 2004-05-15 devnull more goo
454 5cedca1b 2004-05-15 devnull \hyphenation{ % more comments
455 5cedca1b 2004-05-15 devnull ex-cep-tions, one per line; i ignore this part for now
456 5cedca1b 2004-05-15 devnull }
457 5cedca1b 2004-05-15 devnull
458 5cedca1b 2004-05-15 devnull this code is NOT robust against variations. unfortunately,
459 5cedca1b 2004-05-15 devnull it looks like every local language version of this file has
460 5cedca1b 2004-05-15 devnull a different format. i have also made no provision for weird
461 5cedca1b 2004-05-15 devnull characters. sigh.
462 5cedca1b 2004-05-15 devnull */
463 c5561c23 2004-05-16 devnull
464 5cedca1b 2004-05-15 devnull static int readpats(void)
465 5cedca1b 2004-05-15 devnull {
466 5cedca1b 2004-05-15 devnull FILE *fp;
467 5cedca1b 2004-05-15 devnull char buf[200], buf1[200];
468 5cedca1b 2004-05-15 devnull
469 1f72bc47 2004-05-17 devnull if ((fp = fopen(unsharp(TEXHYPHENS), "r")) == NULL
470 1f72bc47 2004-05-17 devnull && (fp = fopen(unsharp(DWBalthyphens), "r")) == NULL) {
471 5cedca1b 2004-05-15 devnull ERROR "warning: can't find hyphen.tex" WARN;
472 5cedca1b 2004-05-15 devnull return 0;
473 5cedca1b 2004-05-15 devnull }
474 5cedca1b 2004-05-15 devnull
475 5cedca1b 2004-05-15 devnull while (fgets(buf, sizeof buf, fp) != NULL) {
476 5cedca1b 2004-05-15 devnull sscanf(buf, "%s", buf1);
477 5cedca1b 2004-05-15 devnull if (strcmp(buf1, "\\patterns{") == 0)
478 5cedca1b 2004-05-15 devnull break;
479 5cedca1b 2004-05-15 devnull }
480 5cedca1b 2004-05-15 devnull while (fgets(buf, sizeof buf, fp) != NULL) {
481 5cedca1b 2004-05-15 devnull if (buf[0] == '}')
482 5cedca1b 2004-05-15 devnull break;
483 5cedca1b 2004-05-15 devnull install(buf);
484 5cedca1b 2004-05-15 devnull }
485 5cedca1b 2004-05-15 devnull fclose(fp);
486 5cedca1b 2004-05-15 devnull fixup();
487 5cedca1b 2004-05-15 devnull return 1;
488 5cedca1b 2004-05-15 devnull }
489 5cedca1b 2004-05-15 devnull
490 5cedca1b 2004-05-15 devnull static void install(char *s) /* map ab4c5de to: 12 abcde \0 00405 \0 */
491 5cedca1b 2004-05-15 devnull {
492 5cedca1b 2004-05-15 devnull int npat, lastpat;
493 5cedca1b 2004-05-15 devnull char num[500], *onextpat = nextpat;
494 5cedca1b 2004-05-15 devnull
495 5cedca1b 2004-05-15 devnull num[0] = '0';
496 5cedca1b 2004-05-15 devnull *nextpat++ = ' '; /* fill in with count later */
497 5cedca1b 2004-05-15 devnull for (npat = lastpat = 0; *s != '\n' && *s != '\0'; s++) {
498 5cedca1b 2004-05-15 devnull if (isdigit(*s)) {
499 5cedca1b 2004-05-15 devnull num[npat] = *s;
500 5cedca1b 2004-05-15 devnull lastpat = npat;
501 5cedca1b 2004-05-15 devnull } else {
502 5cedca1b 2004-05-15 devnull *nextpat++ = *s;
503 5cedca1b 2004-05-15 devnull npat++;
504 5cedca1b 2004-05-15 devnull num[npat] = '0';
505 5cedca1b 2004-05-15 devnull }
506 5cedca1b 2004-05-15 devnull }
507 5cedca1b 2004-05-15 devnull *nextpat++ = 0;
508 5cedca1b 2004-05-15 devnull if (nextpat > pats + sizeof(pats)-20) {
509 5cedca1b 2004-05-15 devnull ERROR "tex hyphenation table overflow, tail end ignored" WARN;
510 5cedca1b 2004-05-15 devnull nextpat = onextpat;
511 5cedca1b 2004-05-15 devnull }
512 5cedca1b 2004-05-15 devnull num[lastpat+1] = 0;
513 5cedca1b 2004-05-15 devnull strcat(nextpat, num);
514 5cedca1b 2004-05-15 devnull nextpat += strlen(nextpat) + 1;
515 5cedca1b 2004-05-15 devnull }
516 5cedca1b 2004-05-15 devnull
517 5cedca1b 2004-05-15 devnull static void fixup(void) /* build indexes of where . a b c ... start */
518 5cedca1b 2004-05-15 devnull {
519 5cedca1b 2004-05-15 devnull char *p, *lastc;
520 5cedca1b 2004-05-15 devnull int n;
521 5cedca1b 2004-05-15 devnull
522 5cedca1b 2004-05-15 devnull for (lastc = pats, p = pats+1; p < nextpat; p++)
523 5cedca1b 2004-05-15 devnull if (*p == ' ') {
524 5cedca1b 2004-05-15 devnull *lastc = p - lastc;
525 5cedca1b 2004-05-15 devnull lastc = p;
526 5cedca1b 2004-05-15 devnull }
527 5cedca1b 2004-05-15 devnull *lastc = p - lastc;
528 5cedca1b 2004-05-15 devnull for (p = pats+1; p < nextpat; ) {
529 5cedca1b 2004-05-15 devnull n = trieindex(p[0], p[1]);
530 5cedca1b 2004-05-15 devnull if (trie[n] == 0)
531 5cedca1b 2004-05-15 devnull trie[n] = p;
532 5cedca1b 2004-05-15 devnull p += p[-1];
533 5cedca1b 2004-05-15 devnull }
534 5cedca1b 2004-05-15 devnull /* printf("pats = %d\n", nextpat - pats); */
535 5cedca1b 2004-05-15 devnull }
536 5cedca1b 2004-05-15 devnull
537 5cedca1b 2004-05-15 devnull static int trieindex(int d1, int d2)
538 5cedca1b 2004-05-15 devnull {
539 5cedca1b 2004-05-15 devnull return 27 * (d1 == '.' ? 0 : d1 - 'a' + 1) + (d2 == '.' ? 0 : d2 - 'a' + 1);
540 5cedca1b 2004-05-15 devnull }