Blame


1 30d57c74 2005-12-27 devnull #include <u.h>
2 30d57c74 2005-12-27 devnull #include <libc.h>
3 30d57c74 2005-12-27 devnull #include <bio.h>
4 30d57c74 2005-12-27 devnull #include "hdr.h"
5 30d57c74 2005-12-27 devnull #include "conv.h"
6 30d57c74 2005-12-27 devnull
7 30d57c74 2005-12-27 devnull typedef struct Hchar Hchar;
8 30d57c74 2005-12-27 devnull struct Hchar
9 30d57c74 2005-12-27 devnull {
10 30d57c74 2005-12-27 devnull char *s;
11 30d57c74 2005-12-27 devnull Rune r;
12 30d57c74 2005-12-27 devnull };
13 30d57c74 2005-12-27 devnull
14 30d57c74 2005-12-27 devnull /* &lt;, &gt;, &quot;, &amp; intentionally omitted */
15 30d57c74 2005-12-27 devnull
16 30d57c74 2005-12-27 devnull static Hchar byname[] =
17 30d57c74 2005-12-27 devnull {
18 30d57c74 2005-12-27 devnull {"AElig", 198},
19 30d57c74 2005-12-27 devnull {"Aacute", 193},
20 30d57c74 2005-12-27 devnull {"Acirc", 194},
21 30d57c74 2005-12-27 devnull {"Agrave", 192},
22 30d57c74 2005-12-27 devnull {"Aring", 197},
23 30d57c74 2005-12-27 devnull {"Atilde", 195},
24 30d57c74 2005-12-27 devnull {"Auml", 196},
25 30d57c74 2005-12-27 devnull {"Ccedil", 199},
26 30d57c74 2005-12-27 devnull {"ETH", 208},
27 30d57c74 2005-12-27 devnull {"Eacute", 201},
28 30d57c74 2005-12-27 devnull {"Ecirc", 202},
29 30d57c74 2005-12-27 devnull {"Egrave", 200},
30 30d57c74 2005-12-27 devnull {"Euml", 203},
31 30d57c74 2005-12-27 devnull {"Iacute", 205},
32 30d57c74 2005-12-27 devnull {"Icirc", 206},
33 30d57c74 2005-12-27 devnull {"Igrave", 204},
34 30d57c74 2005-12-27 devnull {"Iuml", 207},
35 30d57c74 2005-12-27 devnull {"Ntilde", 209},
36 30d57c74 2005-12-27 devnull {"Oacute", 211},
37 30d57c74 2005-12-27 devnull {"Ocirc", 212},
38 30d57c74 2005-12-27 devnull {"Ograve", 210},
39 30d57c74 2005-12-27 devnull {"Oslash", 216},
40 30d57c74 2005-12-27 devnull {"Otilde", 213},
41 30d57c74 2005-12-27 devnull {"Ouml", 214},
42 30d57c74 2005-12-27 devnull {"THORN", 222},
43 30d57c74 2005-12-27 devnull {"Uacute", 218},
44 30d57c74 2005-12-27 devnull {"Ucirc", 219},
45 30d57c74 2005-12-27 devnull {"Ugrave", 217},
46 30d57c74 2005-12-27 devnull {"Uuml", 220},
47 30d57c74 2005-12-27 devnull {"Yacute", 221},
48 30d57c74 2005-12-27 devnull {"aacute", 225},
49 30d57c74 2005-12-27 devnull {"acirc", 226},
50 30d57c74 2005-12-27 devnull {"acute", 180},
51 30d57c74 2005-12-27 devnull {"aelig", 230},
52 30d57c74 2005-12-27 devnull {"agrave", 224},
53 30d57c74 2005-12-27 devnull {"alpha", 945},
54 30d57c74 2005-12-27 devnull {"aring", 229},
55 30d57c74 2005-12-27 devnull {"atilde", 227},
56 30d57c74 2005-12-27 devnull {"auml", 228},
57 30d57c74 2005-12-27 devnull {"beta", 946},
58 30d57c74 2005-12-27 devnull {"brvbar", 166},
59 30d57c74 2005-12-27 devnull {"ccedil", 231},
60 30d57c74 2005-12-27 devnull {"cdots", 8943},
61 30d57c74 2005-12-27 devnull {"cedil", 184},
62 30d57c74 2005-12-27 devnull {"cent", 162},
63 30d57c74 2005-12-27 devnull {"chi", 967},
64 30d57c74 2005-12-27 devnull {"copy", 169},
65 30d57c74 2005-12-27 devnull {"curren", 164},
66 30d57c74 2005-12-27 devnull {"ddots", 8945},
67 30d57c74 2005-12-27 devnull {"deg", 176},
68 30d57c74 2005-12-27 devnull {"delta", 948},
69 30d57c74 2005-12-27 devnull {"divide", 247},
70 30d57c74 2005-12-27 devnull {"eacute", 233},
71 30d57c74 2005-12-27 devnull {"ecirc", 234},
72 30d57c74 2005-12-27 devnull {"egrave", 232},
73 30d57c74 2005-12-27 devnull {"emdash", 8212}, /* non-standard but commonly used */
74 30d57c74 2005-12-27 devnull {"emsp", 8195},
75 30d57c74 2005-12-27 devnull {"endash", 8211}, /* non-standard but commonly used */
76 30d57c74 2005-12-27 devnull {"ensp", 8194},
77 30d57c74 2005-12-27 devnull {"epsilon", 949},
78 30d57c74 2005-12-27 devnull {"eta", 951},
79 30d57c74 2005-12-27 devnull {"eth", 240},
80 30d57c74 2005-12-27 devnull {"euml", 235},
81 30d57c74 2005-12-27 devnull {"frac12", 189},
82 30d57c74 2005-12-27 devnull {"frac14", 188},
83 30d57c74 2005-12-27 devnull {"frac34", 190},
84 30d57c74 2005-12-27 devnull {"gamma", 947},
85 30d57c74 2005-12-27 devnull {"iacute", 237},
86 30d57c74 2005-12-27 devnull {"icirc", 238},
87 30d57c74 2005-12-27 devnull {"iexcl", 161},
88 30d57c74 2005-12-27 devnull {"igrave", 236},
89 30d57c74 2005-12-27 devnull {"iota", 953},
90 30d57c74 2005-12-27 devnull {"iquest", 191},
91 30d57c74 2005-12-27 devnull {"iuml", 239},
92 30d57c74 2005-12-27 devnull {"kappa", 954},
93 30d57c74 2005-12-27 devnull {"lambda", 955},
94 30d57c74 2005-12-27 devnull {"laquo", 171},
95 30d57c74 2005-12-27 devnull {"ldquo", 8220},
96 30d57c74 2005-12-27 devnull {"ldots", 8230},
97 30d57c74 2005-12-27 devnull {"lsquo", 8216},
98 30d57c74 2005-12-27 devnull {"macr", 175},
99 30d57c74 2005-12-27 devnull {"mdash", 8212},
100 30d57c74 2005-12-27 devnull {"micro", 181},
101 30d57c74 2005-12-27 devnull {"middot", 183},
102 30d57c74 2005-12-27 devnull {"mu", 956},
103 30d57c74 2005-12-27 devnull {"nbsp", 160},
104 30d57c74 2005-12-27 devnull {"ndash", 8211},
105 30d57c74 2005-12-27 devnull {"not", 172},
106 30d57c74 2005-12-27 devnull {"ntilde", 241},
107 30d57c74 2005-12-27 devnull {"nu", 957},
108 30d57c74 2005-12-27 devnull {"oacute", 243},
109 30d57c74 2005-12-27 devnull {"ocirc", 244},
110 30d57c74 2005-12-27 devnull {"ograve", 242},
111 30d57c74 2005-12-27 devnull {"omega", 969},
112 30d57c74 2005-12-27 devnull {"omicron", 959},
113 30d57c74 2005-12-27 devnull {"ordf", 170},
114 30d57c74 2005-12-27 devnull {"ordm", 186},
115 30d57c74 2005-12-27 devnull {"oslash", 248},
116 30d57c74 2005-12-27 devnull {"otilde", 245},
117 30d57c74 2005-12-27 devnull {"ouml", 246},
118 30d57c74 2005-12-27 devnull {"para", 182},
119 30d57c74 2005-12-27 devnull {"phi", 966},
120 30d57c74 2005-12-27 devnull {"pi", 960},
121 30d57c74 2005-12-27 devnull {"plusmn", 177},
122 30d57c74 2005-12-27 devnull {"pound", 163},
123 30d57c74 2005-12-27 devnull {"psi", 968},
124 30d57c74 2005-12-27 devnull {"quad", 8193},
125 30d57c74 2005-12-27 devnull {"raquo", 187},
126 30d57c74 2005-12-27 devnull {"rdquo", 8221},
127 30d57c74 2005-12-27 devnull {"reg", 174},
128 30d57c74 2005-12-27 devnull {"rho", 961},
129 30d57c74 2005-12-27 devnull {"rsquo", 8217},
130 30d57c74 2005-12-27 devnull {"sect", 167},
131 30d57c74 2005-12-27 devnull {"shy", 173},
132 30d57c74 2005-12-27 devnull {"sigma", 963},
133 30d57c74 2005-12-27 devnull {"sp", 8194},
134 30d57c74 2005-12-27 devnull {"sup1", 185},
135 30d57c74 2005-12-27 devnull {"sup2", 178},
136 30d57c74 2005-12-27 devnull {"sup3", 179},
137 30d57c74 2005-12-27 devnull {"szlig", 223},
138 30d57c74 2005-12-27 devnull {"tau", 964},
139 30d57c74 2005-12-27 devnull {"theta", 952},
140 30d57c74 2005-12-27 devnull {"thinsp", 8201},
141 30d57c74 2005-12-27 devnull {"thorn", 254},
142 30d57c74 2005-12-27 devnull {"times", 215},
143 30d57c74 2005-12-27 devnull {"trade", 8482},
144 30d57c74 2005-12-27 devnull {"uacute", 250},
145 30d57c74 2005-12-27 devnull {"ucirc", 251},
146 30d57c74 2005-12-27 devnull {"ugrave", 249},
147 30d57c74 2005-12-27 devnull {"uml", 168},
148 30d57c74 2005-12-27 devnull {"upsilon", 965},
149 30d57c74 2005-12-27 devnull {"uuml", 252},
150 30d57c74 2005-12-27 devnull {"varepsilon", 8712},
151 30d57c74 2005-12-27 devnull {"varphi", 981},
152 30d57c74 2005-12-27 devnull {"varpi", 982},
153 30d57c74 2005-12-27 devnull {"varrho", 1009},
154 30d57c74 2005-12-27 devnull {"vdots", 8942},
155 30d57c74 2005-12-27 devnull {"vsigma", 962},
156 30d57c74 2005-12-27 devnull {"vtheta", 977},
157 30d57c74 2005-12-27 devnull {"xi", 958},
158 30d57c74 2005-12-27 devnull {"yacute", 253},
159 30d57c74 2005-12-27 devnull {"yen", 165},
160 30d57c74 2005-12-27 devnull {"yuml", 255},
161 30d57c74 2005-12-27 devnull {"zeta", 950}
162 30d57c74 2005-12-27 devnull };
163 30d57c74 2005-12-27 devnull
164 30d57c74 2005-12-27 devnull static Hchar byrune[nelem(byname)];
165 30d57c74 2005-12-27 devnull
166 30d57c74 2005-12-27 devnull static int
167 30d57c74 2005-12-27 devnull hnamecmp(const void *va, const void *vb)
168 30d57c74 2005-12-27 devnull {
169 30d57c74 2005-12-27 devnull Hchar *a, *b;
170 30d57c74 2005-12-27 devnull
171 30d57c74 2005-12-27 devnull a = (Hchar*)va;
172 30d57c74 2005-12-27 devnull b = (Hchar*)vb;
173 30d57c74 2005-12-27 devnull return strcmp(a->s, b->s);
174 30d57c74 2005-12-27 devnull }
175 30d57c74 2005-12-27 devnull
176 30d57c74 2005-12-27 devnull static int
177 30d57c74 2005-12-27 devnull hrunecmp(const void *va, const void *vb)
178 30d57c74 2005-12-27 devnull {
179 30d57c74 2005-12-27 devnull Hchar *a, *b;
180 30d57c74 2005-12-27 devnull
181 30d57c74 2005-12-27 devnull a = (Hchar*)va;
182 30d57c74 2005-12-27 devnull b = (Hchar*)vb;
183 30d57c74 2005-12-27 devnull return a->r - b->r;
184 30d57c74 2005-12-27 devnull }
185 30d57c74 2005-12-27 devnull
186 30d57c74 2005-12-27 devnull static void
187 30d57c74 2005-12-27 devnull html_init(void)
188 30d57c74 2005-12-27 devnull {
189 30d57c74 2005-12-27 devnull static int init;
190 30d57c74 2005-12-27 devnull
191 30d57c74 2005-12-27 devnull if(init)
192 30d57c74 2005-12-27 devnull return;
193 30d57c74 2005-12-27 devnull init = 1;
194 30d57c74 2005-12-27 devnull memmove(byrune, byname, sizeof byrune);
195 30d57c74 2005-12-27 devnull qsort(byname, nelem(byname), sizeof byname[0], hnamecmp);
196 30d57c74 2005-12-27 devnull qsort(byrune, nelem(byrune), sizeof byrune[0], hrunecmp);
197 30d57c74 2005-12-27 devnull }
198 30d57c74 2005-12-27 devnull
199 30d57c74 2005-12-27 devnull static Rune
200 30d57c74 2005-12-27 devnull findbyname(char *s)
201 30d57c74 2005-12-27 devnull {
202 30d57c74 2005-12-27 devnull Hchar *h;
203 30d57c74 2005-12-27 devnull int n, m, x;
204 30d57c74 2005-12-27 devnull
205 30d57c74 2005-12-27 devnull h = byname;
206 30d57c74 2005-12-27 devnull n = nelem(byname);
207 30d57c74 2005-12-27 devnull while(n > 0){
208 30d57c74 2005-12-27 devnull m = n/2;
209 30d57c74 2005-12-27 devnull x = strcmp(h[m].s, s);
210 30d57c74 2005-12-27 devnull if(x == 0)
211 30d57c74 2005-12-27 devnull return h[m].r;
212 30d57c74 2005-12-27 devnull if(x < 0){
213 30d57c74 2005-12-27 devnull h += m+1;
214 30d57c74 2005-12-27 devnull n -= m+1;
215 30d57c74 2005-12-27 devnull }else
216 30d57c74 2005-12-27 devnull n = m;
217 30d57c74 2005-12-27 devnull }
218 30d57c74 2005-12-27 devnull return Runeerror;
219 30d57c74 2005-12-27 devnull }
220 30d57c74 2005-12-27 devnull
221 30d57c74 2005-12-27 devnull static char*
222 30d57c74 2005-12-27 devnull findbyrune(Rune r)
223 30d57c74 2005-12-27 devnull {
224 30d57c74 2005-12-27 devnull Hchar *h;
225 30d57c74 2005-12-27 devnull int n, m;
226 30d57c74 2005-12-27 devnull
227 30d57c74 2005-12-27 devnull h = byrune;
228 30d57c74 2005-12-27 devnull n = nelem(byrune);
229 30d57c74 2005-12-27 devnull while(n > 0){
230 30d57c74 2005-12-27 devnull m = n/2;
231 30d57c74 2005-12-27 devnull if(h[m].r == r)
232 30d57c74 2005-12-27 devnull return h[m].s;
233 30d57c74 2005-12-27 devnull if(h[m].r < r){
234 30d57c74 2005-12-27 devnull h += m+1;
235 30d57c74 2005-12-27 devnull n -= m+1;
236 30d57c74 2005-12-27 devnull }else
237 30d57c74 2005-12-27 devnull n = m;
238 30d57c74 2005-12-27 devnull }
239 30d57c74 2005-12-27 devnull return nil;
240 30d57c74 2005-12-27 devnull }
241 30d57c74 2005-12-27 devnull
242 30d57c74 2005-12-27 devnull void
243 30d57c74 2005-12-27 devnull html_in(int fd, long *x, struct convert *out)
244 30d57c74 2005-12-27 devnull {
245 30d57c74 2005-12-27 devnull char buf[100], *p;
246 30d57c74 2005-12-27 devnull Biobuf b;
247 30d57c74 2005-12-27 devnull Rune rbuf[N];
248 30d57c74 2005-12-27 devnull Rune *r, *er;
249 30d57c74 2005-12-27 devnull int c, i;
250 30d57c74 2005-12-27 devnull
251 30d57c74 2005-12-27 devnull USED(x);
252 30d57c74 2005-12-27 devnull
253 30d57c74 2005-12-27 devnull html_init();
254 30d57c74 2005-12-27 devnull r = rbuf;
255 30d57c74 2005-12-27 devnull er = rbuf+N;
256 30d57c74 2005-12-27 devnull Binit(&b, fd, OREAD);
257 30d57c74 2005-12-27 devnull while((c = Bgetrune(&b)) != Beof){
258 30d57c74 2005-12-27 devnull if(r >= er){
259 30d57c74 2005-12-27 devnull OUT(out, rbuf, r-rbuf);
260 30d57c74 2005-12-27 devnull r = rbuf;
261 30d57c74 2005-12-27 devnull }
262 30d57c74 2005-12-27 devnull if(c == '&'){
263 30d57c74 2005-12-27 devnull buf[0] = c;
264 30d57c74 2005-12-27 devnull for(i=1; i<nelem(buf)-1;){
265 30d57c74 2005-12-27 devnull c = Bgetc(&b);
266 30d57c74 2005-12-27 devnull if(c == Beof)
267 30d57c74 2005-12-27 devnull break;
268 30d57c74 2005-12-27 devnull buf[i++] = c;
269 30d57c74 2005-12-27 devnull if(strchr("; \t\r\n", c))
270 30d57c74 2005-12-27 devnull break;
271 30d57c74 2005-12-27 devnull }
272 30d57c74 2005-12-27 devnull buf[i] = 0;
273 30d57c74 2005-12-27 devnull if(buf[i-1] == ';'){
274 30d57c74 2005-12-27 devnull buf[i-1] = 0;
275 30d57c74 2005-12-27 devnull if((c = findbyname(buf+1)) != Runeerror){
276 30d57c74 2005-12-27 devnull *r++ = c;
277 30d57c74 2005-12-27 devnull continue;
278 30d57c74 2005-12-27 devnull }
279 30d57c74 2005-12-27 devnull buf[i-1] = ';';
280 30d57c74 2005-12-27 devnull if(buf[1] == '#'){
281 30d57c74 2005-12-27 devnull if(buf[2] == 'x')
282 30d57c74 2005-12-27 devnull c = strtol(buf+3, &p, 16);
283 30d57c74 2005-12-27 devnull else
284 30d57c74 2005-12-27 devnull c = strtol(buf+2, &p, 10);
285 30d57c74 2005-12-27 devnull if(*p != ';' || c >= NRUNE || c < 0)
286 30d57c74 2005-12-27 devnull goto bad;
287 30d57c74 2005-12-27 devnull *r++ = c;
288 30d57c74 2005-12-27 devnull continue;
289 30d57c74 2005-12-27 devnull }
290 30d57c74 2005-12-27 devnull }
291 30d57c74 2005-12-27 devnull bad:
292 30d57c74 2005-12-27 devnull for(p=buf; p<buf+i; ){
293 30d57c74 2005-12-27 devnull p += chartorune(r++, p);
294 30d57c74 2005-12-27 devnull if(r >= er){
295 30d57c74 2005-12-27 devnull OUT(out, rbuf, r-rbuf);
296 30d57c74 2005-12-27 devnull r = rbuf;
297 30d57c74 2005-12-27 devnull }
298 30d57c74 2005-12-27 devnull }
299 30d57c74 2005-12-27 devnull continue;
300 30d57c74 2005-12-27 devnull }
301 30d57c74 2005-12-27 devnull *r++ = c;
302 30d57c74 2005-12-27 devnull }
303 30d57c74 2005-12-27 devnull if(r > rbuf)
304 30d57c74 2005-12-27 devnull OUT(out, rbuf, r-rbuf);
305 30d57c74 2005-12-27 devnull }
306 30d57c74 2005-12-27 devnull
307 30d57c74 2005-12-27 devnull /*
308 30d57c74 2005-12-27 devnull * use biobuf because can use more than UTFmax bytes per rune
309 30d57c74 2005-12-27 devnull */
310 30d57c74 2005-12-27 devnull void
311 30d57c74 2005-12-27 devnull html_out(Rune *r, int n, long *x)
312 30d57c74 2005-12-27 devnull {
313 30d57c74 2005-12-27 devnull char *s;
314 30d57c74 2005-12-27 devnull Biobuf b;
315 30d57c74 2005-12-27 devnull Rune *er;
316 30d57c74 2005-12-27 devnull
317 30d57c74 2005-12-27 devnull html_init();
318 30d57c74 2005-12-27 devnull Binit(&b, 1, OWRITE);
319 30d57c74 2005-12-27 devnull er = r+n;
320 30d57c74 2005-12-27 devnull for(; r<er; r++){
321 30d57c74 2005-12-27 devnull if(*r < Runeself)
322 30d57c74 2005-12-27 devnull Bputrune(&b, *r);
323 30d57c74 2005-12-27 devnull else if((s = findbyrune(*r)) != nil)
324 30d57c74 2005-12-27 devnull Bprint(&b, "&%s;", s);
325 30d57c74 2005-12-27 devnull else
326 1a8bd157 2006-02-14 devnull Bprint(&b, "&#%d;", *r);
327 30d57c74 2005-12-27 devnull }
328 30d57c74 2005-12-27 devnull Bflush(&b);
329 30d57c74 2005-12-27 devnull }
330 30d57c74 2005-12-27 devnull