Blame


1 74374cc8 2005-10-31 devnull #include <u.h>
2 5cedca1b 2004-05-15 devnull #include "tdef.h"
3 5cedca1b 2004-05-15 devnull #include "fns.h"
4 5cedca1b 2004-05-15 devnull #include "ext.h"
5 5cedca1b 2004-05-15 devnull
6 5cedca1b 2004-05-15 devnull #define HY_BIT 0200 /* stuff in here only works for 7-bit ascii */
7 5cedca1b 2004-05-15 devnull /* this value is used (as a literal) in suftab.c */
8 5cedca1b 2004-05-15 devnull /* to encode possible hyphenation points in suffixes. */
9 5cedca1b 2004-05-15 devnull /* it could be changed, by widening the tables */
10 5cedca1b 2004-05-15 devnull /* to be shorts instead of chars. */
11 5cedca1b 2004-05-15 devnull
12 5cedca1b 2004-05-15 devnull /*
13 5cedca1b 2004-05-15 devnull * troff8.c
14 fa325e9b 2020-01-10 cross *
15 5cedca1b 2004-05-15 devnull * hyphenation
16 5cedca1b 2004-05-15 devnull */
17 5cedca1b 2004-05-15 devnull
18 5cedca1b 2004-05-15 devnull int hexsize = 0; /* hyphenation exception list size */
19 5cedca1b 2004-05-15 devnull char *hbufp = NULL; /* base of list */
20 5cedca1b 2004-05-15 devnull char *nexth = NULL; /* first free slot in list */
21 5cedca1b 2004-05-15 devnull Tchar *hyend;
22 5cedca1b 2004-05-15 devnull
23 5cedca1b 2004-05-15 devnull #define THRESH 160 /* digram goodness threshold */
24 5cedca1b 2004-05-15 devnull int thresh = THRESH;
25 5cedca1b 2004-05-15 devnull
26 5cedca1b 2004-05-15 devnull int texhyphen(void);
27 5cedca1b 2004-05-15 devnull static int alpha(Tchar);
28 5cedca1b 2004-05-15 devnull
29 5cedca1b 2004-05-15 devnull void hyphen(Tchar *wp)
30 5cedca1b 2004-05-15 devnull {
31 5cedca1b 2004-05-15 devnull int j;
32 5cedca1b 2004-05-15 devnull Tchar *i;
33 5cedca1b 2004-05-15 devnull
34 5cedca1b 2004-05-15 devnull i = wp;
35 5cedca1b 2004-05-15 devnull while (punct((*i++)))
36 5cedca1b 2004-05-15 devnull ;
37 5cedca1b 2004-05-15 devnull if (!alpha(*--i))
38 5cedca1b 2004-05-15 devnull return;
39 5cedca1b 2004-05-15 devnull wdstart = i++;
40 5cedca1b 2004-05-15 devnull while (alpha(*i++))
41 5cedca1b 2004-05-15 devnull ;
42 5cedca1b 2004-05-15 devnull hyend = wdend = --i - 1;
43 5cedca1b 2004-05-15 devnull while (punct((*i++)))
44 5cedca1b 2004-05-15 devnull ;
45 5cedca1b 2004-05-15 devnull if (*--i)
46 5cedca1b 2004-05-15 devnull return;
47 5cedca1b 2004-05-15 devnull if (wdend - wdstart < 4) /* 4 chars is too short to hyphenate */
48 5cedca1b 2004-05-15 devnull return;
49 5cedca1b 2004-05-15 devnull hyp = hyptr;
50 5cedca1b 2004-05-15 devnull *hyp = 0;
51 5cedca1b 2004-05-15 devnull hyoff = 2;
52 5cedca1b 2004-05-15 devnull
53 5cedca1b 2004-05-15 devnull /* for now, try exceptions first, then tex (if hyphalg is non-zero),
54 5cedca1b 2004-05-15 devnull then suffix and digram if tex didn't hyphenate it at all.
55 5cedca1b 2004-05-15 devnull */
56 5cedca1b 2004-05-15 devnull
57 5cedca1b 2004-05-15 devnull if (!exword() && !texhyphen() && !suffix())
58 5cedca1b 2004-05-15 devnull digram();
59 5cedca1b 2004-05-15 devnull
60 5cedca1b 2004-05-15 devnull /* this appears to sort hyphenation points into increasing order */
61 5cedca1b 2004-05-15 devnull *hyp++ = 0;
62 fa325e9b 2020-01-10 cross if (*hyptr)
63 5cedca1b 2004-05-15 devnull for (j = 1; j; ) {
64 5cedca1b 2004-05-15 devnull j = 0;
65 5cedca1b 2004-05-15 devnull for (hyp = hyptr + 1; *hyp != 0; hyp++) {
66 5cedca1b 2004-05-15 devnull if (*(hyp - 1) > *hyp) {
67 5cedca1b 2004-05-15 devnull j++;
68 5cedca1b 2004-05-15 devnull i = *hyp;
69 5cedca1b 2004-05-15 devnull *hyp = *(hyp - 1);
70 5cedca1b 2004-05-15 devnull *(hyp - 1) = i;
71 5cedca1b 2004-05-15 devnull }
72 5cedca1b 2004-05-15 devnull }
73 5cedca1b 2004-05-15 devnull }
74 5cedca1b 2004-05-15 devnull }
75 5cedca1b 2004-05-15 devnull
76 c5561c23 2004-05-16 devnull static int alpha(Tchar i) /* non-zero if really alphabetic */
77 5cedca1b 2004-05-15 devnull {
78 5cedca1b 2004-05-15 devnull if (ismot(i))
79 5cedca1b 2004-05-15 devnull return 0;
80 5cedca1b 2004-05-15 devnull else if (cbits(i) >= ALPHABET) /* this isn't very elegant, but there's */
81 5cedca1b 2004-05-15 devnull return 0; /* no good way to make sure i is in range for */
82 5cedca1b 2004-05-15 devnull else /* the call of isalpha */
83 5cedca1b 2004-05-15 devnull return isalpha(cbits(i));
84 5cedca1b 2004-05-15 devnull }
85 5cedca1b 2004-05-15 devnull
86 c5561c23 2004-05-16 devnull int
87 5cedca1b 2004-05-15 devnull punct(Tchar i)
88 5cedca1b 2004-05-15 devnull {
89 5cedca1b 2004-05-15 devnull if (!i || alpha(i))
90 5cedca1b 2004-05-15 devnull return(0);
91 5cedca1b 2004-05-15 devnull else
92 5cedca1b 2004-05-15 devnull return(1);
93 5cedca1b 2004-05-15 devnull }
94 5cedca1b 2004-05-15 devnull
95 5cedca1b 2004-05-15 devnull
96 5cedca1b 2004-05-15 devnull void caseha(void) /* set hyphenation algorithm */
97 5cedca1b 2004-05-15 devnull {
98 5cedca1b 2004-05-15 devnull hyphalg = HYPHALG;
99 5cedca1b 2004-05-15 devnull if (skip())
100 5cedca1b 2004-05-15 devnull return;
101 5cedca1b 2004-05-15 devnull noscale++;
102 5cedca1b 2004-05-15 devnull hyphalg = atoi0();
103 5cedca1b 2004-05-15 devnull noscale = 0;
104 5cedca1b 2004-05-15 devnull }
105 5cedca1b 2004-05-15 devnull
106 5cedca1b 2004-05-15 devnull
107 5cedca1b 2004-05-15 devnull void caseht(void) /* set hyphenation threshold; not in manual! */
108 5cedca1b 2004-05-15 devnull {
109 5cedca1b 2004-05-15 devnull thresh = THRESH;
110 5cedca1b 2004-05-15 devnull if (skip())
111 5cedca1b 2004-05-15 devnull return;
112 5cedca1b 2004-05-15 devnull noscale++;
113 5cedca1b 2004-05-15 devnull thresh = atoi0();
114 5cedca1b 2004-05-15 devnull noscale = 0;
115 5cedca1b 2004-05-15 devnull }
116 5cedca1b 2004-05-15 devnull
117 5cedca1b 2004-05-15 devnull
118 5cedca1b 2004-05-15 devnull char *growh(char *where)
119 5cedca1b 2004-05-15 devnull {
120 5cedca1b 2004-05-15 devnull char *new;
121 5cedca1b 2004-05-15 devnull
122 5cedca1b 2004-05-15 devnull hexsize += NHEX;
123 5cedca1b 2004-05-15 devnull if ((new = grow(hbufp, hexsize, sizeof(char))) == NULL)
124 5cedca1b 2004-05-15 devnull return NULL;
125 5cedca1b 2004-05-15 devnull if (new == hbufp) {
126 5cedca1b 2004-05-15 devnull return where;
127 5cedca1b 2004-05-15 devnull } else {
128 5cedca1b 2004-05-15 devnull int diff;
129 5cedca1b 2004-05-15 devnull diff = where - hbufp;
130 5cedca1b 2004-05-15 devnull hbufp = new;
131 5cedca1b 2004-05-15 devnull return new + diff;
132 5cedca1b 2004-05-15 devnull }
133 5cedca1b 2004-05-15 devnull }
134 5cedca1b 2004-05-15 devnull
135 5cedca1b 2004-05-15 devnull
136 5cedca1b 2004-05-15 devnull void casehw(void)
137 5cedca1b 2004-05-15 devnull {
138 5cedca1b 2004-05-15 devnull int i, k;
139 5cedca1b 2004-05-15 devnull char *j;
140 5cedca1b 2004-05-15 devnull Tchar t;
141 5cedca1b 2004-05-15 devnull
142 5cedca1b 2004-05-15 devnull if (nexth == NULL) {
143 5cedca1b 2004-05-15 devnull if ((nexth = hbufp = grow(hbufp, NHEX, sizeof(char))) == NULL) {
144 5cedca1b 2004-05-15 devnull ERROR "No space for exception word list." WARN;
145 5cedca1b 2004-05-15 devnull return;
146 5cedca1b 2004-05-15 devnull }
147 5cedca1b 2004-05-15 devnull hexsize = NHEX;
148 5cedca1b 2004-05-15 devnull }
149 5cedca1b 2004-05-15 devnull k = 0;
150 5cedca1b 2004-05-15 devnull while (!skip()) {
151 5cedca1b 2004-05-15 devnull if ((j = nexth) >= hbufp + hexsize - 2)
152 5cedca1b 2004-05-15 devnull if ((j = nexth = growh(j)) == NULL)
153 5cedca1b 2004-05-15 devnull goto full;
154 5cedca1b 2004-05-15 devnull for (;;) {
155 5cedca1b 2004-05-15 devnull if (ismot(t = getch()))
156 5cedca1b 2004-05-15 devnull continue;
157 5cedca1b 2004-05-15 devnull i = cbits(t);
158 5cedca1b 2004-05-15 devnull if (i == ' ' || i == '\n') {
159 5cedca1b 2004-05-15 devnull *j++ = 0;
160 5cedca1b 2004-05-15 devnull nexth = j;
161 5cedca1b 2004-05-15 devnull *j = 0;
162 5cedca1b 2004-05-15 devnull if (i == ' ')
163 5cedca1b 2004-05-15 devnull break;
164 5cedca1b 2004-05-15 devnull else
165 5cedca1b 2004-05-15 devnull return;
166 5cedca1b 2004-05-15 devnull }
167 5cedca1b 2004-05-15 devnull if (i == '-') {
168 5cedca1b 2004-05-15 devnull k = HY_BIT;
169 5cedca1b 2004-05-15 devnull continue;
170 5cedca1b 2004-05-15 devnull }
171 5cedca1b 2004-05-15 devnull *j++ = maplow(i) | k;
172 5cedca1b 2004-05-15 devnull k = 0;
173 5cedca1b 2004-05-15 devnull if (j >= hbufp + hexsize - 2)
174 5cedca1b 2004-05-15 devnull if ((j = growh(j)) == NULL)
175 5cedca1b 2004-05-15 devnull goto full;
176 5cedca1b 2004-05-15 devnull }
177 5cedca1b 2004-05-15 devnull }
178 5cedca1b 2004-05-15 devnull return;
179 5cedca1b 2004-05-15 devnull full:
180 5cedca1b 2004-05-15 devnull ERROR "Cannot grow exception word list." WARN;
181 5cedca1b 2004-05-15 devnull *nexth = 0;
182 5cedca1b 2004-05-15 devnull }
183 5cedca1b 2004-05-15 devnull
184 5cedca1b 2004-05-15 devnull
185 5cedca1b 2004-05-15 devnull int exword(void)
186 5cedca1b 2004-05-15 devnull {
187 5cedca1b 2004-05-15 devnull Tchar *w;
188 5cedca1b 2004-05-15 devnull char *e, *save;
189 5cedca1b 2004-05-15 devnull
190 5cedca1b 2004-05-15 devnull e = hbufp;
191 5cedca1b 2004-05-15 devnull while (1) {
192 5cedca1b 2004-05-15 devnull save = e;
193 5cedca1b 2004-05-15 devnull if (e == NULL || *e == 0)
194 5cedca1b 2004-05-15 devnull return(0);
195 5cedca1b 2004-05-15 devnull w = wdstart;
196 5cedca1b 2004-05-15 devnull while (*e && w <= hyend && (*e & 0177) == maplow(cbits(*w))) {
197 fa325e9b 2020-01-10 cross e++;
198 5cedca1b 2004-05-15 devnull w++;
199 5cedca1b 2004-05-15 devnull }
200 5cedca1b 2004-05-15 devnull if (!*e) {
201 5cedca1b 2004-05-15 devnull if (w-1 == hyend || (w == wdend && maplow(cbits(*w)) == 's')) {
202 5cedca1b 2004-05-15 devnull w = wdstart;
203 5cedca1b 2004-05-15 devnull for (e = save; *e; e++) {
204 5cedca1b 2004-05-15 devnull if (*e & HY_BIT)
205 5cedca1b 2004-05-15 devnull *hyp++ = w;
206 5cedca1b 2004-05-15 devnull if (hyp > hyptr + NHYP - 1)
207 5cedca1b 2004-05-15 devnull hyp = hyptr + NHYP - 1;
208 5cedca1b 2004-05-15 devnull w++;
209 5cedca1b 2004-05-15 devnull }
210 5cedca1b 2004-05-15 devnull return(1);
211 5cedca1b 2004-05-15 devnull } else {
212 fa325e9b 2020-01-10 cross e++;
213 5cedca1b 2004-05-15 devnull continue;
214 5cedca1b 2004-05-15 devnull }
215 fa325e9b 2020-01-10 cross } else
216 5cedca1b 2004-05-15 devnull while (*e++)
217 5cedca1b 2004-05-15 devnull ;
218 5cedca1b 2004-05-15 devnull }
219 5cedca1b 2004-05-15 devnull }
220 5cedca1b 2004-05-15 devnull
221 c5561c23 2004-05-16 devnull int
222 5cedca1b 2004-05-15 devnull suffix(void)
223 5cedca1b 2004-05-15 devnull {
224 5cedca1b 2004-05-15 devnull Tchar *w;
225 5cedca1b 2004-05-15 devnull char *s, *s0;
226 5cedca1b 2004-05-15 devnull Tchar i;
227 5cedca1b 2004-05-15 devnull extern char *suftab[];
228 5cedca1b 2004-05-15 devnull
229 5cedca1b 2004-05-15 devnull again:
230 5cedca1b 2004-05-15 devnull i = cbits(*hyend);
231 5cedca1b 2004-05-15 devnull if (!alpha(i))
232 5cedca1b 2004-05-15 devnull return(0);
233 5cedca1b 2004-05-15 devnull if (i < 'a')
234 5cedca1b 2004-05-15 devnull i -= 'A' - 'a';
235 5cedca1b 2004-05-15 devnull if ((s0 = suftab[i-'a']) == 0)
236 5cedca1b 2004-05-15 devnull return(0);
237 5cedca1b 2004-05-15 devnull for (;;) {
238 5cedca1b 2004-05-15 devnull if ((i = *s0 & 017) == 0)
239 5cedca1b 2004-05-15 devnull return(0);
240 5cedca1b 2004-05-15 devnull s = s0 + i - 1;
241 5cedca1b 2004-05-15 devnull w = hyend - 1;
242 5cedca1b 2004-05-15 devnull while (s > s0 && w >= wdstart && (*s & 0177) == maplow(cbits(*w))) {
243 5cedca1b 2004-05-15 devnull s--;
244 5cedca1b 2004-05-15 devnull w--;
245 5cedca1b 2004-05-15 devnull }
246 5cedca1b 2004-05-15 devnull if (s == s0)
247 5cedca1b 2004-05-15 devnull break;
248 5cedca1b 2004-05-15 devnull s0 += i;
249 5cedca1b 2004-05-15 devnull }
250 5cedca1b 2004-05-15 devnull s = s0 + i - 1;
251 5cedca1b 2004-05-15 devnull w = hyend;
252 fa325e9b 2020-01-10 cross if (*s0 & HY_BIT)
253 5cedca1b 2004-05-15 devnull goto mark;
254 5cedca1b 2004-05-15 devnull while (s > s0) {
255 5cedca1b 2004-05-15 devnull w--;
256 5cedca1b 2004-05-15 devnull if (*s-- & HY_BIT) {
257 5cedca1b 2004-05-15 devnull mark:
258 5cedca1b 2004-05-15 devnull hyend = w - 1;
259 5cedca1b 2004-05-15 devnull if (*s0 & 0100) /* 0100 used in suftab to encode something too */
260 5cedca1b 2004-05-15 devnull continue;
261 5cedca1b 2004-05-15 devnull if (!chkvow(w))
262 5cedca1b 2004-05-15 devnull return(0);
263 5cedca1b 2004-05-15 devnull *hyp++ = w;
264 5cedca1b 2004-05-15 devnull }
265 5cedca1b 2004-05-15 devnull }
266 5cedca1b 2004-05-15 devnull if (*s0 & 040)
267 5cedca1b 2004-05-15 devnull return(0);
268 5cedca1b 2004-05-15 devnull if (exword())
269 5cedca1b 2004-05-15 devnull return(1);
270 5cedca1b 2004-05-15 devnull goto again;
271 5cedca1b 2004-05-15 devnull }
272 5cedca1b 2004-05-15 devnull
273 c5561c23 2004-05-16 devnull int
274 5cedca1b 2004-05-15 devnull maplow(int i)
275 5cedca1b 2004-05-15 devnull {
276 fa325e9b 2020-01-10 cross if (isupper(i))
277 5cedca1b 2004-05-15 devnull i = tolower(i);
278 5cedca1b 2004-05-15 devnull return(i);
279 5cedca1b 2004-05-15 devnull }
280 5cedca1b 2004-05-15 devnull
281 c5561c23 2004-05-16 devnull int
282 5cedca1b 2004-05-15 devnull vowel(int i)
283 5cedca1b 2004-05-15 devnull {
284 5cedca1b 2004-05-15 devnull switch (i) {
285 5cedca1b 2004-05-15 devnull case 'a': case 'A':
286 5cedca1b 2004-05-15 devnull case 'e': case 'E':
287 5cedca1b 2004-05-15 devnull case 'i': case 'I':
288 5cedca1b 2004-05-15 devnull case 'o': case 'O':
289 5cedca1b 2004-05-15 devnull case 'u': case 'U':
290 5cedca1b 2004-05-15 devnull case 'y': case 'Y':
291 5cedca1b 2004-05-15 devnull return(1);
292 5cedca1b 2004-05-15 devnull default:
293 5cedca1b 2004-05-15 devnull return(0);
294 5cedca1b 2004-05-15 devnull }
295 5cedca1b 2004-05-15 devnull }
296 5cedca1b 2004-05-15 devnull
297 5cedca1b 2004-05-15 devnull
298 5cedca1b 2004-05-15 devnull Tchar *chkvow(Tchar *w)
299 5cedca1b 2004-05-15 devnull {
300 5cedca1b 2004-05-15 devnull while (--w >= wdstart)
301 5cedca1b 2004-05-15 devnull if (vowel(cbits(*w)))
302 5cedca1b 2004-05-15 devnull return(w);
303 5cedca1b 2004-05-15 devnull return(0);
304 5cedca1b 2004-05-15 devnull }
305 5cedca1b 2004-05-15 devnull
306 5cedca1b 2004-05-15 devnull
307 5cedca1b 2004-05-15 devnull void digram(void)
308 5cedca1b 2004-05-15 devnull {
309 5cedca1b 2004-05-15 devnull Tchar *w;
310 5cedca1b 2004-05-15 devnull int val;
311 5cedca1b 2004-05-15 devnull Tchar *nhyend, *maxw;
312 5cedca1b 2004-05-15 devnull int maxval;
313 5cedca1b 2004-05-15 devnull extern char bxh[26][13], bxxh[26][13], xxh[26][13], xhx[26][13], hxx[26][13];
314 c5561c23 2004-05-16 devnull maxw = 0;
315 5cedca1b 2004-05-15 devnull again:
316 5cedca1b 2004-05-15 devnull if (!(w = chkvow(hyend + 1)))
317 5cedca1b 2004-05-15 devnull return;
318 5cedca1b 2004-05-15 devnull hyend = w;
319 5cedca1b 2004-05-15 devnull if (!(w = chkvow(hyend)))
320 5cedca1b 2004-05-15 devnull return;
321 5cedca1b 2004-05-15 devnull nhyend = w;
322 5cedca1b 2004-05-15 devnull maxval = 0;
323 5cedca1b 2004-05-15 devnull w--;
324 5cedca1b 2004-05-15 devnull while (++w < hyend && w < wdend - 1) {
325 5cedca1b 2004-05-15 devnull val = 1;
326 5cedca1b 2004-05-15 devnull if (w == wdstart)
327 5cedca1b 2004-05-15 devnull val *= dilook('a', cbits(*w), bxh);
328 5cedca1b 2004-05-15 devnull else if (w == wdstart + 1)
329 5cedca1b 2004-05-15 devnull val *= dilook(cbits(*(w-1)), cbits(*w), bxxh);
330 fa325e9b 2020-01-10 cross else
331 5cedca1b 2004-05-15 devnull val *= dilook(cbits(*(w-1)), cbits(*w), xxh);
332 5cedca1b 2004-05-15 devnull val *= dilook(cbits(*w), cbits(*(w+1)), xhx);
333 5cedca1b 2004-05-15 devnull val *= dilook(cbits(*(w+1)), cbits(*(w+2)), hxx);
334 5cedca1b 2004-05-15 devnull if (val > maxval) {
335 5cedca1b 2004-05-15 devnull maxval = val;
336 5cedca1b 2004-05-15 devnull maxw = w + 1;
337 5cedca1b 2004-05-15 devnull }
338 5cedca1b 2004-05-15 devnull }
339 5cedca1b 2004-05-15 devnull hyend = nhyend;
340 5cedca1b 2004-05-15 devnull if (maxval > thresh)
341 5cedca1b 2004-05-15 devnull *hyp++ = maxw;
342 5cedca1b 2004-05-15 devnull goto again;
343 5cedca1b 2004-05-15 devnull }
344 5cedca1b 2004-05-15 devnull
345 c5561c23 2004-05-16 devnull int
346 5cedca1b 2004-05-15 devnull dilook(int a, int b, char t[26][13])
347 5cedca1b 2004-05-15 devnull {
348 5cedca1b 2004-05-15 devnull int i, j;
349 5cedca1b 2004-05-15 devnull
350 5cedca1b 2004-05-15 devnull i = t[maplow(a)-'a'][(j = maplow(b)-'a')/2];
351 5cedca1b 2004-05-15 devnull if (!(j & 01))
352 5cedca1b 2004-05-15 devnull i >>= 4;
353 5cedca1b 2004-05-15 devnull return(i & 017);
354 5cedca1b 2004-05-15 devnull }
355 5cedca1b 2004-05-15 devnull
356 5cedca1b 2004-05-15 devnull
357 5cedca1b 2004-05-15 devnull /* here beginneth the tex hyphenation code, as interpreted freely */
358 5cedca1b 2004-05-15 devnull /* the main difference is that there is no attempt to squeeze space */
359 5cedca1b 2004-05-15 devnull /* as tightly at tex does. */
360 5cedca1b 2004-05-15 devnull
361 5cedca1b 2004-05-15 devnull static int texit(Tchar *, Tchar *);
362 5cedca1b 2004-05-15 devnull static int readpats(void);
363 5cedca1b 2004-05-15 devnull static void install(char *);
364 5cedca1b 2004-05-15 devnull static void fixup(void);
365 5cedca1b 2004-05-15 devnull static int trieindex(int, int);
366 5cedca1b 2004-05-15 devnull
367 5cedca1b 2004-05-15 devnull static char pats[50000]; /* size ought to be computed dynamically */
368 5cedca1b 2004-05-15 devnull static char *nextpat = pats;
369 5cedca1b 2004-05-15 devnull static char *trie[27*27]; /* english-specific sizes */
370 5cedca1b 2004-05-15 devnull
371 5cedca1b 2004-05-15 devnull int texhyphen(void)
372 5cedca1b 2004-05-15 devnull {
373 5cedca1b 2004-05-15 devnull static int loaded = 0; /* -1: couldn't find tex file */
374 5cedca1b 2004-05-15 devnull
375 5cedca1b 2004-05-15 devnull if (hyphalg == 0 || loaded == -1) /* non-zero => tex for now */
376 5cedca1b 2004-05-15 devnull return 0;
377 5cedca1b 2004-05-15 devnull if (loaded == 0) {
378 5cedca1b 2004-05-15 devnull if (readpats())
379 5cedca1b 2004-05-15 devnull loaded = 1;
380 5cedca1b 2004-05-15 devnull else
381 5cedca1b 2004-05-15 devnull loaded = -1;
382 5cedca1b 2004-05-15 devnull }
383 5cedca1b 2004-05-15 devnull return texit(wdstart, wdend);
384 5cedca1b 2004-05-15 devnull }
385 5cedca1b 2004-05-15 devnull
386 5cedca1b 2004-05-15 devnull static int texit(Tchar *start, Tchar *end) /* hyphenate as in tex, return # found */
387 5cedca1b 2004-05-15 devnull {
388 5cedca1b 2004-05-15 devnull int nw, i, k, equal, cnt[500];
389 5cedca1b 2004-05-15 devnull char w[500+1], *np, *pp, *wp, *xpp, *xwp;
390 5cedca1b 2004-05-15 devnull
391 5cedca1b 2004-05-15 devnull w[0] = '.';
392 5cedca1b 2004-05-15 devnull for (nw = 1; start <= end && nw < 500-1; nw++, start++)
393 5cedca1b 2004-05-15 devnull w[nw] = maplow(tolower(cbits(*start)));
394 5cedca1b 2004-05-15 devnull start -= (nw - 1);
395 5cedca1b 2004-05-15 devnull w[nw++] = '.';
396 5cedca1b 2004-05-15 devnull w[nw] = 0;
397 5cedca1b 2004-05-15 devnull /*
398 5cedca1b 2004-05-15 devnull * printf("try %s\n", w);
399 5cedca1b 2004-05-15 devnull */
400 5cedca1b 2004-05-15 devnull for (i = 0; i <= nw; i++)
401 5cedca1b 2004-05-15 devnull cnt[i] = '0';
402 5cedca1b 2004-05-15 devnull
403 4198bd0e 2007-05-08 devnull for (wp = w; wp+1 < w+nw; wp++) {
404 5cedca1b 2004-05-15 devnull for (pp = trie[trieindex(*wp, *(wp+1))]; pp < nextpat; ) {
405 5cedca1b 2004-05-15 devnull if (pp == 0 /* no trie entry */
406 5cedca1b 2004-05-15 devnull || *pp != *wp /* no match on 1st letter */
407 5cedca1b 2004-05-15 devnull || *(pp+1) != *(wp+1)) /* no match on 2nd letter */
408 5cedca1b 2004-05-15 devnull break; /* so move to next letter of word */
409 5cedca1b 2004-05-15 devnull equal = 1;
410 5cedca1b 2004-05-15 devnull for (xpp = pp+2, xwp = wp+2; *xpp; )
411 5cedca1b 2004-05-15 devnull if (*xpp++ != *xwp++) {
412 5cedca1b 2004-05-15 devnull equal = 0;
413 5cedca1b 2004-05-15 devnull break;
414 5cedca1b 2004-05-15 devnull }
415 5cedca1b 2004-05-15 devnull if (equal) {
416 5cedca1b 2004-05-15 devnull np = xpp+1; /* numpat */
417 5cedca1b 2004-05-15 devnull for (k = wp-w; *np; k++, np++)
418 5cedca1b 2004-05-15 devnull if (*np > cnt[k])
419 5cedca1b 2004-05-15 devnull cnt[k] = *np;
420 5cedca1b 2004-05-15 devnull /*
421 5cedca1b 2004-05-15 devnull * printf("match: %s %s\n", pp, xpp+1);
422 5cedca1b 2004-05-15 devnull */
423 5cedca1b 2004-05-15 devnull }
424 5cedca1b 2004-05-15 devnull pp += *(pp-1); /* skip over pattern and numbers to next */
425 5cedca1b 2004-05-15 devnull }
426 5cedca1b 2004-05-15 devnull }
427 5cedca1b 2004-05-15 devnull /*
428 5cedca1b 2004-05-15 devnull * for (i = 0; i < nw; i++) printf("%c", w[i]);
429 5cedca1b 2004-05-15 devnull * printf(" ");
430 5cedca1b 2004-05-15 devnull * for (i = 0; i <= nw; i++) printf("%c", cnt[i]);
431 5cedca1b 2004-05-15 devnull * printf("\n");
432 5cedca1b 2004-05-15 devnull */
433 5cedca1b 2004-05-15 devnull /*
434 5cedca1b 2004-05-15 devnull * for (i = 1; i < nw - 1; i++) {
435 5cedca1b 2004-05-15 devnull * if (i > 2 && i < nw - 3 && cnt[i] % 2)
436 5cedca1b 2004-05-15 devnull * printf("-");
437 5cedca1b 2004-05-15 devnull * if (cbits(start[i-1]) != '.')
438 5cedca1b 2004-05-15 devnull * printf("%c", cbits(start[i-1]));
439 5cedca1b 2004-05-15 devnull * }
440 5cedca1b 2004-05-15 devnull * printf("\n");
441 5cedca1b 2004-05-15 devnull */
442 5cedca1b 2004-05-15 devnull for (i = 1; i < nw -1; i++)
443 5cedca1b 2004-05-15 devnull if (i > 2 && i < nw - 3 && cnt[i] % 2)
444 5cedca1b 2004-05-15 devnull *hyp++ = start + i - 1;
445 5cedca1b 2004-05-15 devnull return hyp - hyptr; /* non-zero if a hyphen was found */
446 5cedca1b 2004-05-15 devnull }
447 5cedca1b 2004-05-15 devnull
448 5cedca1b 2004-05-15 devnull /*
449 5cedca1b 2004-05-15 devnull This code assumes that hyphen.tex looks like
450 5cedca1b 2004-05-15 devnull % some comments
451 5cedca1b 2004-05-15 devnull \patterns{ % more comments
452 5cedca1b 2004-05-15 devnull pat5ter4ns, 1 per line, SORTED, nothing else
453 5cedca1b 2004-05-15 devnull }
454 5cedca1b 2004-05-15 devnull more goo
455 5cedca1b 2004-05-15 devnull \hyphenation{ % more comments
456 5cedca1b 2004-05-15 devnull ex-cep-tions, one per line; i ignore this part for now
457 5cedca1b 2004-05-15 devnull }
458 5cedca1b 2004-05-15 devnull
459 5cedca1b 2004-05-15 devnull this code is NOT robust against variations. unfortunately,
460 5cedca1b 2004-05-15 devnull it looks like every local language version of this file has
461 5cedca1b 2004-05-15 devnull a different format. i have also made no provision for weird
462 5cedca1b 2004-05-15 devnull characters. sigh.
463 5cedca1b 2004-05-15 devnull */
464 c5561c23 2004-05-16 devnull
465 5cedca1b 2004-05-15 devnull static int readpats(void)
466 5cedca1b 2004-05-15 devnull {
467 5cedca1b 2004-05-15 devnull FILE *fp;
468 5cedca1b 2004-05-15 devnull char buf[200], buf1[200];
469 5cedca1b 2004-05-15 devnull
470 1f72bc47 2004-05-17 devnull if ((fp = fopen(unsharp(TEXHYPHENS), "r")) == NULL
471 1f72bc47 2004-05-17 devnull && (fp = fopen(unsharp(DWBalthyphens), "r")) == NULL) {
472 5cedca1b 2004-05-15 devnull ERROR "warning: can't find hyphen.tex" WARN;
473 5cedca1b 2004-05-15 devnull return 0;
474 5cedca1b 2004-05-15 devnull }
475 5cedca1b 2004-05-15 devnull
476 5cedca1b 2004-05-15 devnull while (fgets(buf, sizeof buf, fp) != NULL) {
477 5cedca1b 2004-05-15 devnull sscanf(buf, "%s", buf1);
478 5cedca1b 2004-05-15 devnull if (strcmp(buf1, "\\patterns{") == 0)
479 5cedca1b 2004-05-15 devnull break;
480 5cedca1b 2004-05-15 devnull }
481 5cedca1b 2004-05-15 devnull while (fgets(buf, sizeof buf, fp) != NULL) {
482 5cedca1b 2004-05-15 devnull if (buf[0] == '}')
483 5cedca1b 2004-05-15 devnull break;
484 5cedca1b 2004-05-15 devnull install(buf);
485 5cedca1b 2004-05-15 devnull }
486 5cedca1b 2004-05-15 devnull fclose(fp);
487 5cedca1b 2004-05-15 devnull fixup();
488 5cedca1b 2004-05-15 devnull return 1;
489 5cedca1b 2004-05-15 devnull }
490 5cedca1b 2004-05-15 devnull
491 5cedca1b 2004-05-15 devnull static void install(char *s) /* map ab4c5de to: 12 abcde \0 00405 \0 */
492 5cedca1b 2004-05-15 devnull {
493 5cedca1b 2004-05-15 devnull int npat, lastpat;
494 5cedca1b 2004-05-15 devnull char num[500], *onextpat = nextpat;
495 5cedca1b 2004-05-15 devnull
496 5cedca1b 2004-05-15 devnull num[0] = '0';
497 5cedca1b 2004-05-15 devnull *nextpat++ = ' '; /* fill in with count later */
498 5cedca1b 2004-05-15 devnull for (npat = lastpat = 0; *s != '\n' && *s != '\0'; s++) {
499 74374cc8 2005-10-31 devnull if (isdigit((uchar)*s)) {
500 5cedca1b 2004-05-15 devnull num[npat] = *s;
501 5cedca1b 2004-05-15 devnull lastpat = npat;
502 5cedca1b 2004-05-15 devnull } else {
503 5cedca1b 2004-05-15 devnull *nextpat++ = *s;
504 5cedca1b 2004-05-15 devnull npat++;
505 5cedca1b 2004-05-15 devnull num[npat] = '0';
506 5cedca1b 2004-05-15 devnull }
507 5cedca1b 2004-05-15 devnull }
508 5cedca1b 2004-05-15 devnull *nextpat++ = 0;
509 5cedca1b 2004-05-15 devnull if (nextpat > pats + sizeof(pats)-20) {
510 5cedca1b 2004-05-15 devnull ERROR "tex hyphenation table overflow, tail end ignored" WARN;
511 5cedca1b 2004-05-15 devnull nextpat = onextpat;
512 5cedca1b 2004-05-15 devnull }
513 5cedca1b 2004-05-15 devnull num[lastpat+1] = 0;
514 5cedca1b 2004-05-15 devnull strcat(nextpat, num);
515 5cedca1b 2004-05-15 devnull nextpat += strlen(nextpat) + 1;
516 5cedca1b 2004-05-15 devnull }
517 5cedca1b 2004-05-15 devnull
518 5cedca1b 2004-05-15 devnull static void fixup(void) /* build indexes of where . a b c ... start */
519 5cedca1b 2004-05-15 devnull {
520 5cedca1b 2004-05-15 devnull char *p, *lastc;
521 5cedca1b 2004-05-15 devnull int n;
522 5cedca1b 2004-05-15 devnull
523 5cedca1b 2004-05-15 devnull for (lastc = pats, p = pats+1; p < nextpat; p++)
524 5cedca1b 2004-05-15 devnull if (*p == ' ') {
525 5cedca1b 2004-05-15 devnull *lastc = p - lastc;
526 5cedca1b 2004-05-15 devnull lastc = p;
527 5cedca1b 2004-05-15 devnull }
528 5cedca1b 2004-05-15 devnull *lastc = p - lastc;
529 5cedca1b 2004-05-15 devnull for (p = pats+1; p < nextpat; ) {
530 5cedca1b 2004-05-15 devnull n = trieindex(p[0], p[1]);
531 5cedca1b 2004-05-15 devnull if (trie[n] == 0)
532 5cedca1b 2004-05-15 devnull trie[n] = p;
533 5cedca1b 2004-05-15 devnull p += p[-1];
534 5cedca1b 2004-05-15 devnull }
535 5cedca1b 2004-05-15 devnull /* printf("pats = %d\n", nextpat - pats); */
536 5cedca1b 2004-05-15 devnull }
537 5cedca1b 2004-05-15 devnull
538 5cedca1b 2004-05-15 devnull static int trieindex(int d1, int d2)
539 5cedca1b 2004-05-15 devnull {
540 4198bd0e 2007-05-08 devnull int z;
541 4198bd0e 2007-05-08 devnull
542 4198bd0e 2007-05-08 devnull z = 27 * (d1 == '.' ? 0 : d1 - 'a' + 1) + (d2 == '.' ? 0 : d2 - 'a' + 1);
543 4198bd0e 2007-05-08 devnull assert(z >= 0 && z < 27*27);
544 4198bd0e 2007-05-08 devnull return z;
545 5cedca1b 2004-05-15 devnull }