Blob
1 #include <u.h>2 #include <libc.h>3 #include <bio.h>4 #include "hdr.h"5 #include "conv.h"7 typedef struct Hchar Hchar;8 struct Hchar9 {10 char *s;11 Rune r;12 };14 /* <, >, ", & intentionally omitted */16 /*17 * Names beginning with _ are names we recognize18 * (without the underscore) but will not generate,19 * because they are nonstandard.20 */21 static Hchar byname[] =22 {23 {"AElig", 198},24 {"Aacute", 193},25 {"Acirc", 194},26 {"Agrave", 192},27 {"Alpha", 913},28 {"Aring", 197},29 {"Atilde", 195},30 {"Auml", 196},31 {"Beta", 914},32 {"Ccedil", 199},33 {"Chi", 935},34 {"Dagger", 8225},35 {"Delta", 916},36 {"ETH", 208},37 {"Eacute", 201},38 {"Ecirc", 202},39 {"Egrave", 200},40 {"Epsilon", 917},41 {"Eta", 919},42 {"Euml", 203},43 {"Gamma", 915},44 {"Iacute", 205},45 {"Icirc", 206},46 {"Igrave", 204},47 {"Iota", 921},48 {"Iuml", 207},49 {"Kappa", 922},50 {"Lambda", 923},51 {"Mu", 924},52 {"Ntilde", 209},53 {"Nu", 925},54 {"OElig", 338},55 {"Oacute", 211},56 {"Ocirc", 212},57 {"Ograve", 210},58 {"Omega", 937},59 {"Omicron", 927},60 {"Oslash", 216},61 {"Otilde", 213},62 {"Ouml", 214},63 {"Phi", 934},64 {"Pi", 928},65 {"Prime", 8243},66 {"Psi", 936},67 {"Rho", 929},68 {"Scaron", 352},69 {"Sigma", 931},70 {"THORN", 222},71 {"Tau", 932},72 {"Theta", 920},73 {"Uacute", 218},74 {"Ucirc", 219},75 {"Ugrave", 217},76 {"Upsilon", 933},77 {"Uuml", 220},78 {"Xi", 926},79 {"Yacute", 221},80 {"Yuml", 376},81 {"Zeta", 918},82 {"aacute", 225},83 {"acirc", 226},84 {"acute", 180},85 {"aelig", 230},86 {"agrave", 224},87 {"alefsym", 8501},88 {"alpha", 945},89 {"amp", 38},90 {"and", 8743},91 {"ang", 8736},92 {"aring", 229},93 {"asymp", 8776},94 {"atilde", 227},95 {"auml", 228},96 {"bdquo", 8222},97 {"beta", 946},98 {"brvbar", 166},99 {"bull", 8226},100 {"cap", 8745},101 {"ccedil", 231},102 {"cdots", 8943},103 {"cedil", 184},104 {"cent", 162},105 {"chi", 967},106 {"circ", 710},107 {"clubs", 9827},108 {"cong", 8773},109 {"copy", 169},110 {"crarr", 8629},111 {"cup", 8746},112 {"curren", 164},113 {"dArr", 8659},114 {"dagger", 8224},115 {"darr", 8595},116 {"ddots", 8945},117 {"deg", 176},118 {"delta", 948},119 {"diams", 9830},120 {"divide", 247},121 {"eacute", 233},122 {"ecirc", 234},123 {"egrave", 232},124 {"_emdash", 8212}, /* non-standard but commonly used */125 {"empty", 8709},126 {"emsp", 8195},127 {"_endash", 8211}, /* non-standard but commonly used */128 {"ensp", 8194},129 {"epsilon", 949},130 {"equiv", 8801},131 {"eta", 951},132 {"eth", 240},133 {"euml", 235},134 {"euro", 8364},135 {"exist", 8707},136 {"fnof", 402},137 {"forall", 8704},138 {"frac12", 189},139 {"frac14", 188},140 {"frac34", 190},141 {"frasl", 8260},142 {"gamma", 947},143 {"ge", 8805},144 {"gt", 62},145 {"hArr", 8660},146 {"harr", 8596},147 {"hearts", 9829},148 {"hellip", 8230},149 {"iacute", 237},150 {"icirc", 238},151 {"iexcl", 161},152 {"igrave", 236},153 {"image", 8465},154 {"infin", 8734},155 {"int", 8747},156 {"iota", 953},157 {"iquest", 191},158 {"isin", 8712},159 {"iuml", 239},160 {"kappa", 954},161 {"lArr", 8656},162 {"lambda", 955},163 {"lang", 9001},164 {"laquo", 171},165 {"larr", 8592},166 {"lceil", 8968},167 {"_ldots", 8230},168 {"ldquo", 8220},169 {"le", 8804},170 {"lfloor", 8970},171 {"lowast", 8727},172 {"loz", 9674},173 {"lrm", 8206},174 {"lsaquo", 8249},175 {"lsquo", 8216},176 {"lt", 60},177 {"macr", 175},178 {"mdash", 8212},179 {"micro", 181},180 {"middot", 183},181 {"minus", 8722},182 {"mu", 956},183 {"nabla", 8711},184 {"nbsp", 160},185 {"ndash", 8211},186 {"ne", 8800},187 {"ni", 8715},188 {"not", 172},189 {"notin", 8713},190 {"nsub", 8836},191 {"ntilde", 241},192 {"nu", 957},193 {"oacute", 243},194 {"ocirc", 244},195 {"oelig", 339},196 {"ograve", 242},197 {"oline", 8254},198 {"omega", 969},199 {"omicron", 959},200 {"oplus", 8853},201 {"or", 8744},202 {"ordf", 170},203 {"ordm", 186},204 {"oslash", 248},205 {"otilde", 245},206 {"otimes", 8855},207 {"ouml", 246},208 {"para", 182},209 {"part", 8706},210 {"permil", 8240},211 {"perp", 8869},212 {"phi", 966},213 {"pi", 960},214 {"piv", 982},215 {"plusmn", 177},216 {"pound", 163},217 {"prime", 8242},218 {"prod", 8719},219 {"prop", 8733},220 {"psi", 968},221 {"quad", 8193},222 {"quot", 34},223 {"rArr", 8658},224 {"radic", 8730},225 {"rang", 9002},226 {"raquo", 187},227 {"rarr", 8594},228 {"rceil", 8969},229 {"rdquo", 8221},230 {"real", 8476},231 {"reg", 174},232 {"rfloor", 8971},233 {"rho", 961},234 {"rlm", 8207},235 {"rsaquo", 8250},236 {"rsquo", 8217},237 {"sbquo", 8218},238 {"scaron", 353},239 {"sdot", 8901},240 {"sect", 167},241 {"shy", 173},242 {"sigma", 963},243 {"sigmaf", 962},244 {"sim", 8764},245 {"_sp", 8194},246 {"spades", 9824},247 {"sub", 8834},248 {"sube", 8838},249 {"sum", 8721},250 {"sup", 8835},251 {"sup1", 185},252 {"sup2", 178},253 {"sup3", 179},254 {"supe", 8839},255 {"szlig", 223},256 {"tau", 964},257 {"there4", 8756},258 {"theta", 952},259 {"thetasym", 977},260 {"thinsp", 8201},261 {"thorn", 254},262 {"tilde", 732},263 {"times", 215},264 {"trade", 8482},265 {"uArr", 8657},266 {"uacute", 250},267 {"uarr", 8593},268 {"ucirc", 251},269 {"ugrave", 249},270 {"uml", 168},271 {"upsih", 978},272 {"upsilon", 965},273 {"uuml", 252},274 {"_varepsilon", 8712},275 {"varphi", 981},276 {"_varpi", 982},277 {"varrho", 1009},278 {"vdots", 8942},279 {"_vsigma", 962},280 {"_vtheta", 977},281 {"weierp", 8472},282 {"xi", 958},283 {"yacute", 253},284 {"yen", 165},285 {"yuml", 255},286 {"zeta", 950},287 {"zwj", 8205},288 {"zwnj", 8204}289 };291 static Hchar byrune[nelem(byname)];293 static int294 hnamecmp(const void *va, const void *vb)295 {296 Hchar *a, *b;298 a = (Hchar*)va;299 b = (Hchar*)vb;300 return strcmp(a->s, b->s);301 }303 static int304 hrunecmp(const void *va, const void *vb)305 {306 Hchar *a, *b;308 a = (Hchar*)va;309 b = (Hchar*)vb;310 return a->r - b->r;311 }313 static void314 html_init(void)315 {316 static int init;317 int i;319 if(init)320 return;321 init = 1;322 memmove(byrune, byname, sizeof byrune);324 /* Eliminate names we aren't allowed to generate. */325 for(i=0; i<nelem(byrune); i++){326 if(byrune[i].s[0] == '_'){327 byrune[i].r = Runeerror;328 byname[i].s++;329 }330 }332 qsort(byname, nelem(byname), sizeof byname[0], hnamecmp);333 qsort(byrune, nelem(byrune), sizeof byrune[0], hrunecmp);334 }336 static Rune337 findbyname(char *s)338 {339 Hchar *h;340 int n, m, x;342 h = byname;343 n = nelem(byname);344 while(n > 0){345 m = n/2;346 x = strcmp(h[m].s, s);347 if(x == 0)348 return h[m].r;349 if(x < 0){350 h += m+1;351 n -= m+1;352 }else353 n = m;354 }355 return Runeerror;356 }358 static char*359 findbyrune(Rune r)360 {361 Hchar *h;362 int n, m;364 if(r == Runeerror)365 return nil;366 h = byrune;367 n = nelem(byrune);368 while(n > 0){369 m = n/2;370 if(h[m].r == r)371 return h[m].s;372 if(h[m].r < r){373 h += m+1;374 n -= m+1;375 }else376 n = m;377 }378 return nil;379 }381 void382 html_in(int fd, long *x, struct convert *out)383 {384 char buf[100], *p;385 Biobuf b;386 Rune rbuf[N];387 Rune *r, *er;388 int c, i;390 USED(x);392 html_init();393 r = rbuf;394 er = rbuf+N;395 Binit(&b, fd, OREAD);396 while((c = Bgetrune(&b)) != Beof){397 if(r >= er){398 OUT(out, rbuf, r-rbuf);399 r = rbuf;400 }401 if(c == '&'){402 buf[0] = c;403 for(i=1; i<nelem(buf)-1;){404 c = Bgetc(&b);405 if(c == Beof)406 break;407 buf[i++] = c;408 if(strchr("; \t\r\n", c))409 break;410 }411 buf[i] = 0;412 if(buf[i-1] == ';'){413 buf[i-1] = 0;414 if((c = findbyname(buf+1)) != Runeerror){415 *r++ = c;416 continue;417 }418 buf[i-1] = ';';419 if(buf[1] == '#'){420 if(buf[2] == 'x')421 c = strtol(buf+3, &p, 16);422 else423 c = strtol(buf+2, &p, 10);424 if(*p != ';' || c >= NRUNE || c < 0)425 goto bad;426 *r++ = c;427 continue;428 }429 }430 bad:431 for(p=buf; p<buf+i; ){432 p += chartorune(r++, p);433 if(r >= er){434 OUT(out, rbuf, r-rbuf);435 r = rbuf;436 }437 }438 continue;439 }440 *r++ = c;441 }442 if(r > rbuf)443 OUT(out, rbuf, r-rbuf);444 OUT(out, rbuf, 0);445 }447 /*448 * use biobuf because can use more than UTFmax bytes per rune449 */450 void451 html_out(Rune *r, int n, long *x)452 {453 char *s;454 Biobuf b;455 Rune *er;457 USED(x);458 html_init();459 Binit(&b, 1, OWRITE);460 er = r+n;461 for(; r<er; r++){462 if(*r < Runeself)463 Bputrune(&b, *r);464 else if((s = findbyrune(*r)) != nil)465 Bprint(&b, "&%s;", s);466 else467 Bprint(&b, "&#%d;", *r);468 }469 Bflush(&b);470 }