8 Wid = 20 /* tmac.anhtml sets page width to 20" so we can recognize .nf text */
12 typedef struct Troffchar Troffchar;
13 typedef struct Htmlchar Htmlchar;
14 typedef struct Font Font;
15 typedef struct HTMLfont HTMLfont;
17 /* a Char is 32 bits. low 16 bits are the rune. higher are attributes */
27 Anchor = 26 /* must be last */
30 enum /* magic emissions */
36 int attrorder[] = { Indent1, Indent2, Indent3, Heading, Anchor, Italic, Bold, CW };
67 /* R must be first; it's the default representation for fonts we don't recognize */
68 HTMLfont htmlfonts[] =
73 "LuxiSans-Oblique", "i", Italic,
79 #define TABLE "<table border=0 cellpadding=0 cellspacing=0>"
82 onattr[8*sizeof(ulong)] =
84 0, 0, 0, 0, 0, 0, 0, 0,
85 0, 0, 0, 0, 0, 0, 0, 0,
88 "<tt><font size=+1>", /* cw */
89 "<+table border=0 cellpadding=0 cellspacing=0><tr height=2><td><tr><td width=20><td>\n", /* indent1 */
90 "<+table border=0 cellpadding=0 cellspacing=0><tr height=2><td><tr><td width=20><td>\n", /* indent2 */
91 "<+table border=0 cellpadding=0 cellspacing=0><tr height=2><td><tr><td width=20><td>\n", /* indent3 */
95 "<p><font size=+1><b>", /* heading 25 */
96 "<unused>", /* anchor 26 */
100 offattr[8*sizeof(ulong)] =
102 0, 0, 0, 0, 0, 0, 0, 0,
103 0, 0, 0, 0, 0, 0, 0, 0,
106 "</font></tt>", /* cw */
107 "<-/table>", /* indent1 */
108 "<-/table>", /* indent2 */
109 "<-/table>", /* indent3 */
113 "</b></font>", /* heading 25 */
114 "</a>", /* anchor 26 */
133 ulong attr = 0; /* or'ed into each Char */
138 char** anchors; /* allocated in order */
147 char *title = "Plan 9 man page";
149 void process(Biobuf*, char*);
150 void mountfont(int, char*);
151 void switchfont(int);
163 sysfatal("malloc failed: %r");
168 erealloc(void *p, ulong n)
173 sysfatal("realloc failed: %r");
184 sysfatal("strdup failed: %r");
191 fprint(2, "usage: troff2html [-d] [-t title] [file ...]\n");
196 hccmp(const void *va, const void *vb)
202 return a->value - b->value;
206 main(int argc, char *argv[])
212 for(i=0; i<nelem(htmlchars); i++){
213 chartorune(&r, htmlchars[i].utf);
214 htmlchars[i].value = r;
216 qsort(htmlchars, nelem(htmlchars), sizeof(htmlchars[0]), hccmp);
231 Binit(&bout, 1, OWRITE);
233 Binit(&in, 0, OREAD);
234 process(&in, "<stdin>");
236 for(i=0; i<argc; i++){
237 inp = Bopen(argv[i], OREAD);
239 sysfatal("can't open %s: %r", argv[i]);
240 process(inp, argv[i]);
251 emitul(ulong ul, int special)
255 if(nalloc == nchars){
257 chars = realloc(chars, nalloc*sizeof(chars[0]));
259 sysfatal("malloc failed: %r");
266 * Attr-specific transformations.
268 if((a&(1<<CW)) && c=='-')
279 * Turn single quotes into double quotes.
282 if(c == 0x2018 && (chars[nchars-1]&0xFFFF) == 0x2018
283 && a==(chars[nchars-1]&~0xFFFF)){
284 chars[nchars-1] = (ul&~0xFFFF) | 0x201C;
287 if(c == 0x2019 && (chars[nchars-1]&0xFFFF) == 0x2019
288 && a==(chars[nchars-1]&~0xFFFF)){
289 chars[nchars-1] = (ul&~0xFFFF) | 0x201D;
294 chars[nchars++] = ul;
302 * Close man page references early, so that
304 * doesn't make the comma part of the link.
307 attr &= ~(1<<Anchor);
313 emitul(Estring | attr, 0);
321 iputrune(Biobuf *b, Rune r)
325 if(linelen++ > 60 && r == ' ')
328 Bprint(b, "&#%d;", r);
332 for(i=0; i<indentlevel; i++)
339 iputs(Biobuf *b, char *s)
341 if(s[0]=='<' && s[1]=='+'){
343 Bprint(b, "<%s", s+2);
346 }else if(s[0]=='<' && s[1]=='-'){
349 Bprint(b, "<%s", s+2);
363 /* walk up the nest stack until we reach something we need to turn off. */
364 for(i=0; i<nnest; i++)
368 /* turn off everything above that */
369 for(j=nnest-1; j>=i; j--)
370 iputs(&bout, offattr[nest[j]]);
372 /* turn on everything we just turned off but didn't want to */
373 for(j=i; j<nnest; j++)
375 iputs(&bout, onattr[nest[j]]);
379 /* shift the zeros (turned off things) up */
380 for(i=j=0; i<nnest; i++)
385 /* now turn on the new attributes */
386 for(i=0; i<nelem(attrorder); i++){
390 onattr[j] = anchors[nanchors++];
391 iputs(&bout, onattr[j]);
405 for(i=0; i<nchars; i++){
408 iputrune(&bout, '\n');
409 iputs(&bout, TABLE "<tr height=5><td></table>");
410 iputrune(&bout, '\n');
416 * If we're going to something off after a space,
417 * let's just turn it off before.
419 if(c==' ' && i<nchars-1 && (chars[i+1]&0xFFFF) >= 32)
420 a ^= a & ~chars[i+1];
423 /* next word is string to print */
424 iputs(&bout, (char*)chars[++i]);
427 iputrune(&bout, c & 0xFFFF);
436 Bprint(&bout, "<head>\n");
437 if(pagename && section){
439 strecpy(buf, buf+sizeof buf, pagename);
441 *p = tolower((uchar)*p);
442 Bprint(&bout, "<title>%s(%s) - %s</title>\n", buf, section, s);
444 Bprint(&bout, "<title>%s</title>\n", s);
445 Bprint(&bout, "<meta content=\"text/html; charset=utf-8\" http-equiv=Content-Type>\n");
446 Bprint(&bout, "</head>\n");
447 Bprint(&bout, "<body bgcolor=#ffffff>\n");
448 Bprint(&bout, "<table border=0 cellpadding=0 cellspacing=0 width=100%%>\n");
449 Bprint(&bout, "<tr height=10><td>\n");
450 Bprint(&bout, "<tr><td width=20><td>\n");
451 if(pagename && section){
452 Bprint(&bout, "<tr><td width=20><td><b>%s(%s)</b><td align=right><b>%s(%s)</b>\n",
453 pagename, section, pagename, section);
455 Bprint(&bout, "<tr><td width=20><td colspan=2>\n");
461 Bprint(&bout, "<td width=20>\n");
462 Bprint(&bout, "<tr height=20><td>\n");
463 Bprint(&bout, "</table>\n");
469 t = localtime(time(nil));
470 Bprint(&bout, TABLE "<tr height=20><td></table>\n");
471 Bprint(&bout, "<font size=-1><a href=\"http:/*www.lucent.com/copyright.html\">\n"); */
472 Bprint(&bout, "Portions Copyright</A> © %d Lucent Technologies. All rights reserved.</font>\n", t->year+1900);
475 Bprint(&bout, "<!-- TRAILER -->\n");
476 Bprint(&bout, "</body></html>\n");
498 for(i=0; i<sizeof buf; i++){
533 for(i=0; i<sizeof buf; i++){
534 /* must get bytes not runes */
540 if(c == '\n' || c==' ' || c=='\t'){
550 setnum(Biobuf *b, char *name, int min, int max)
556 fprint(2, "set %s = %d\n", name, i);
559 sysfatal("value of %s is %d; min %d max %d at %s:#%d", name, i, min, max, filename, cno);
566 char *p, *fld[16], buf[1024];
572 sysfatal("xcmd error: %r");
574 fprint(2, "x command '%s'\n", p);
575 nfld = tokenize(p, fld, nelem(fld));
585 sysfatal("font %d out of range at %s:#%d", i, filename, cno);
586 mountfont(i, fld[2]);
592 if(nfld<2 || atoi(fld[1])!=res)
593 sysfatal("typesetter has unexpected resolution %s", fld[1]? fld[1] : "<unspecified>");
602 if(nfld!=2 || strcmp(fld[1], "utf")!=0)
603 sysfatal("output for unknown typesetter type %s", fld[1]);
606 if(nfld<3 || strcmp(fld[1], "html")!=0)
608 /* is it a man reference of the form cp(1)? */
609 /* X manref start/end cp (1) */
610 if(nfld==6 && strcmp(fld[2], "manref")==0){
611 /* was the right macro; is it the right form? */
612 if(strlen(fld[5])>=3 &&
613 fld[5][0]=='('/*)*/ && (fld[5][2]==/*(*/')' || (isalpha((uchar)fld[5][2]) && fld[5][3]==/*(*/')')) &&
614 '0'<=fld[5][1] && fld[5][1]<='9'){
615 if(strcmp(fld[3], "start") == 0){
616 /* set anchor attribute and remember string */
619 snprint(buf, sizeof buf,
620 "<a href=\"/magic/man2html/man%c/%s\">",
623 snprint(buf, sizeof buf,
624 "<a href=\"../man%c/%s.html\">", fld[5][1], fld[4]);
626 if('A' <= *p && *p <= 'Z')
630 anchors = erealloc(anchors, nanchors*sizeof(char*));
631 anchors[nanchors-1] = estrdup(buf);
632 }else if(strcmp(fld[3], "end") == 0)
633 attr &= ~(1<<Anchor);
635 }else if(nfld >= 4 && strcmp(fld[2], "href") == 0){
638 anchors = erealloc(anchors, nanchors*sizeof(char*));
639 anchors[nanchors-1] = smprint("<a href=\"%s\">", fld[3]);
640 }else if(strcmp(fld[2], "/href") == 0){
641 attr &= ~(1<<Anchor);
642 }else if(strcmp(fld[2], "manPP") == 0){
645 }else if(nfld>=5 && strcmp(fld[2], "manhead") == 0){
646 pagename = strdup(fld[3]);
647 section = strdup(fld[4]);
648 }else if(nfld<4 || strcmp(fld[2], "manref")!=0){
649 if(nfld>2 && strcmp(fld[2], "<P>")==0){ /* avoid triggering extra <br> */
651 /* clear all font attributes before paragraph */
652 emitul(' ' | (attr & ~(0xFFFF|((1<<Italic)|(1<<Bold)|(1<<CW)))), 0);
654 /* next emittec char will turn font attributes back on */
655 }else if(nfld>2 && strcmp(fld[2], "<H4>")==0)
656 attr |= (1<<Heading);
657 else if(nfld>2 && strcmp(fld[2], "</H4>")==0)
658 attr &= ~(1<<Heading);
660 fprint(2, "unknown in-line html %s... at %s:%#d\n",
661 fld[2], filename, cno);
666 fprint(2, "unknown or badly formatted x command %s\n", fld[0]);
670 lookup(int c, Htmlchar tab[], int ntab)
678 if(c < tab[mid].value)
680 else if(c > tab[mid].value)
685 return -1; /* no match */
693 i = lookup(r, htmlchars, nelem(htmlchars));
695 emitstr(htmlchars[i].name);
705 for(i=0; troffchars[i].name!=nil; i++)
706 if(strcmp(s, troffchars[i].name) == 0)
707 return troffchars[i].value;
720 /* these most peculiar numbers appear in the troff -man output */
721 nind = ((prevlineH-1*res)+323)/324;
722 attr &= ~((1<<Indent1)|(1<<Indent2)|(1<<Indent3));
724 attr |= (1<<Indent1);
726 attr |= (1<<Indent2);
728 attr |= (1<<Indent3);
735 process(Biobuf *b, char *name)
747 /* go to ground state */
753 case '0': case '1': case '2': case '3': case '4':
754 case '5': case '6': case '7': case '8': case '9':
758 sysfatal("illegal character motion at %s:#%d", filename, cno);
761 /* fall through to character case */
768 /* draw line; ignore */
771 while(c!='\n' && c!= Beof);
774 v = setnum(b, "font", 0, Nfont);
778 v = setnum(b, "hpos", -20000, 20000);
779 /* generate spaces if motion is large and within a line */
780 if(!atnewline && v>2*72)
786 setnum(b, "n1", -10000, 10000);
787 /*Bprint(&bout, " N1=%d", v); */
788 getc(b); /* space separates */
789 setnum(b, "n2", -10000, 10000);
791 if(!didP && hp < (Wid-1)*res) /* if line is less than 19" long, probably need a line break */
796 page = setnum(b, "ps", -10000, 10000);
799 ps = setnum(b, "ps", 1, 1000);
802 vp += setnum(b, "vpos", -10000, 10000);
803 /* BUG: ignore motion */
814 emitstr(troffchar(p));
817 hp = setnum(b, "hpos", 0, 20000);
818 /*Bprint(&bout, " H=%d ", hp); */
821 vp = setnum(b, "vpos", 0, 10000);
824 fprint(2, "dhtml: unknown directive %c(0x%.2ux) at %s:#%d\n", c, c, filename, cno);
835 for(i=0; htmlfonts[i].name!=nil; i++)
836 if(strcmp(name, htmlfonts[i].name) == 0)
837 return &htmlfonts[i];
838 return &htmlfonts[0];
842 mountfont(int pos, char *name)
845 fprint(2, "mount font %s on %d\n", name, pos);
846 if(font[pos] != nil){
847 free(font[pos]->name);
850 font[pos] = emalloc(sizeof(Font));
851 font[pos]->name = estrdup(name);
852 font[pos]->htmlfont = htmlfont(name);
861 fprint(2, "font change from %d (%s) to %d (%s)\n", ft, font[ft]->name, pos, font[pos]->name);
864 hf = font[ft]->htmlfont;
866 attr &= ~(1<<hf->bit);
868 hf = font[ft]->htmlfont;
870 attr |= (1<<hf->bit);