Blob
1 #include <u.h>2 #include <libc.h>3 #include <bio.h>5 /*6 * Deroff command -- strip troff, eqn, and tbl sequences from7 * a file. Has three flags argument, -w, to cause output one word per line8 * rather than in the original format.9 * -mm (or -ms) causes the corresponding macro's to be interpreted10 * so that just sentences are output11 * -ml also gets rid of lists.12 * -i causes deroff to ignore .so and .nx commands.13 * Deroff follows .so and .nx commands, removes contents of macro14 * definitions, equations (both .EQ ... .EN and $...$),15 * Tbl command sequences, and Troff backslash vconstructions.16 *17 * All input is through the C macro; the most recently read character is in c.18 */20 /*21 #define C ((c = Bgetrune(infile)) < 0?\22 eof():\23 ((c == ldelim) && (filesp == files)?\24 skeqn():\25 (c == '\n'?\26 (linect++,c):\27 c)))29 #define C1 ((c = Bgetrune(infile)) == Beof?\30 eof():\31 (c == '\n'?\32 (linect++,c):\33 c))34 */36 /* lose those macros! */37 #define C fC()38 #define C1 fC1()40 #define SKIP while(C != '\n')41 #define SKIP1 while(C1 != '\n')42 #define SKIP_TO_COM SKIP;\43 SKIP;\44 pc=c;\45 while(C != '.' || pc != '\n' || C > 'Z')\46 pc=c48 #define YES 149 #define NO 050 #define MS 051 #define MM 152 #define ONE 153 #define TWO 255 #define NOCHAR -256 #define EXTENDED -1 /* All runes above 0x7F */57 #define SPECIAL 058 #define APOS 159 #define PUNCT 260 #define DIGIT 361 #define LETTER 464 int linect = 0;65 int wordflag= NO;66 int underscoreflag = NO;67 int msflag = NO;68 int iflag = NO;69 int mac = MM;70 int disp = 0;71 int inmacro = NO;72 int intable = NO;73 int eqnflag = 0;75 #define MAX_ASCII 0X8077 char chars[MAX_ASCII]; /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */79 Rune line[30000];80 Rune* lp;82 long c;83 long pc;84 int ldelim = NOCHAR;85 int rdelim = NOCHAR;88 char** argv;90 char fname[50];91 Biobuf* files[15];92 Biobuf**filesp;93 Biobuf* infile;94 char* devnull = "/dev/null";95 Biobuf *infile;96 Biobuf bout;98 long skeqn(void);99 Biobuf* opn(char *p);100 int eof(void);101 int charclass(int);102 void getfname(void);103 void fatal(char *s, char *p);104 void usage(void);105 void work(void);106 void putmac(Rune *rp, int vconst);107 void regline(int macline, int vconst);108 void putwords(void);109 void comline(void);110 void macro(void);111 void eqn(void);112 void tbl(void);113 void stbl(void);114 void sdis(char a1, char a2);115 void sce(void);116 void backsl(void);117 char* copys(char *s);118 void refer(int c1);119 void inpic(void);121 int122 fC(void)123 {124 c = Bgetrune(infile);125 if(c < 0)126 return eof();127 if(c == ldelim && filesp == files)128 return skeqn();129 if(c == '\n')130 linect++;131 return c;132 }134 int135 fC1(void)136 {137 c = Bgetrune(infile);138 if(c == Beof)139 return eof();140 if(c == '\n')141 linect++;142 return c;143 }145 void146 main(int argc, char *av[])147 {148 int i;149 char *f;151 argv = av;152 Binit(&bout, 1, OWRITE);153 ARGBEGIN{154 case 'w':155 wordflag = YES;156 break;157 case '_':158 wordflag = YES;159 underscoreflag = YES;160 break;161 case 'm':162 msflag = YES;163 if(f = ARGF())164 switch(*f)165 {166 case 'm': mac = MM; break;167 case 's': mac = MS; break;168 case 'l': disp = 1; break;169 default: usage();170 }171 else172 usage();173 break;174 case 'i':175 iflag = YES;176 break;177 default:178 usage();179 }ARGEND180 if(*argv)181 infile = opn(*argv++);182 else{183 infile = malloc(sizeof(Biobuf));184 Binit(infile, 0, OREAD);185 }186 files[0] = infile;187 filesp = &files[0];189 for(i='a'; i<='z' ; ++i)190 chars[i] = LETTER;191 for(i='A'; i<='Z'; ++i)192 chars[i] = LETTER;193 for(i='0'; i<='9'; ++i)194 chars[i] = DIGIT;195 chars['\''] = APOS;196 chars['&'] = APOS;197 chars['\b'] = APOS;198 chars['.'] = PUNCT;199 chars[','] = PUNCT;200 chars[';'] = PUNCT;201 chars['?'] = PUNCT;202 chars[':'] = PUNCT;203 work();204 }206 long207 skeqn(void)208 {209 while(C1 != rdelim)210 if(c == '\\')211 c = C1;212 else if(c == '"')213 while(C1 != '"')214 if(c == '\\')215 C1;216 if (msflag)217 eqnflag = 1;218 return(c = ' ');219 }221 Biobuf*222 opn(char *p)223 {224 Biobuf *fd;226 while ((fd = Bopen(p, OREAD)) == 0) {227 if(msflag || p == devnull)228 fatal("Cannot open file %s - quitting\n", p);229 else {230 fprint(2, "Deroff: Cannot open file %s - continuing\n", p);231 p = devnull;232 }233 }234 linect = 0;235 return(fd);236 }238 int239 eof(void)240 {241 if(Bfildes(infile) != 0)242 Bterm(infile);243 if(filesp > files)244 infile = *--filesp;245 else246 if(*argv)247 infile = opn(*argv++);248 else249 exits(0);250 return(C);251 }253 void254 getfname(void)255 {256 char *p;257 Rune r;258 Dir *dir;259 struct chain260 {261 struct chain* nextp;262 char* datap;263 } *q;265 static struct chain *namechain= 0;267 while(C == ' ')268 ;269 for(p = fname; (r=c) != '\n' && r != ' ' && r != '\t' && r != '\\'; C)270 p += runetochar(p, &r);271 *p = '\0';272 while(c != '\n')273 C;274 if(!strcmp(fname, "/sys/lib/tmac/tmac.cs")275 || !strcmp(fname, "/sys/lib/tmac/tmac.s")) {276 fname[0] = '\0';277 return;278 }279 dir = dirstat(fname);280 if(dir!=nil && ((dir->mode & DMDIR) || dir->type != 'M')) {281 free(dir);282 fname[0] = '\0';283 return;284 }285 free(dir);286 /*287 * see if this name has already been used288 */290 for(q = namechain; q; q = q->nextp)291 if( !strcmp(fname, q->datap)) {292 fname[0] = '\0';293 return;294 }295 q = (struct chain*)malloc(sizeof(struct chain));296 q->nextp = namechain;297 q->datap = copys(fname);298 namechain = q;299 }301 void302 usage(void)303 {304 fprint(2,"usage: deroff [-nw_pi] [-m (m s l)] [file ...] \n");305 exits("usage");306 }308 void309 fatal(char *s, char *p)310 {311 fprint(2, "deroff: ");312 fprint(2, s, p);313 exits(s);314 }316 void317 work(void)318 {320 for(;;) {321 eqnflag = 0;322 if(C == '.' || c == '\'')323 comline();324 else325 regline(NO, TWO);326 }327 }329 void330 regline(int macline, int vconst)331 {332 line[0] = c;333 lp = line;334 for(;;) {335 if(c == '\\') {336 *lp = ' ';337 backsl();338 if(c == '%') /* no blank for hyphenation char */339 lp--;340 }341 if(c == '\n')342 break;343 if(intable && c=='T') {344 *++lp = C;345 if(c=='{' || c=='}') {346 lp[-1] = ' ';347 *lp = C;348 }349 } else {350 if(msflag == 1 && eqnflag == 1) {351 eqnflag = 0;352 *++lp = 'x';353 }354 *++lp = C;355 }356 }357 *lp = '\0';358 if(lp != line) {359 if(wordflag)360 putwords();361 else362 if(macline)363 putmac(line,vconst);364 else365 Bprint(&bout, "%S\n", line);366 }367 }369 void370 putmac(Rune *rp, int vconst)371 {372 Rune *t;373 int found;374 Rune last;376 found = 0;377 last = 0;378 while(*rp) {379 while(*rp == ' ' || *rp == '\t')380 Bputrune(&bout, *rp++);381 for(t = rp; *t != ' ' && *t != '\t' && *t != '\0'; t++)382 ;383 if(*rp == '\"')384 rp++;385 if(t > rp+vconst && charclass(*rp) == LETTER386 && charclass(rp[1]) == LETTER) {387 while(rp < t)388 if(*rp == '\"')389 rp++;390 else391 Bputrune(&bout, *rp++);392 last = t[-1];393 found++;394 } else395 if(found && charclass(*rp) == PUNCT && rp[1] == '\0')396 Bputrune(&bout, *rp++);397 else {398 last = t[-1];399 rp = t;400 }401 }402 Bputc(&bout, '\n');403 if(msflag && charclass(last) == PUNCT)404 Bprint(&bout, " %C\n", last);405 }407 /*408 * break into words for -w option409 */410 void411 putwords(void)412 {413 Rune *p, *p1;414 int i, nlet;417 for(p1 = line;;) {418 /*419 * skip initial specials ampersands and apostrophes420 */421 while((i = charclass(*p1)) != EXTENDED && i < DIGIT)422 if(*p1++ == '\0')423 return;424 nlet = 0;425 for(p = p1; (i = charclass(*p)) != SPECIAL || (underscoreflag && *p=='_'); p++)426 if(i == LETTER || (underscoreflag && *p == '_'))427 nlet++;428 /*429 * MDM definition of word430 */431 if(nlet > 1) {432 /*433 * delete trailing ampersands and apostrophes434 */435 while(*--p == '\'' || *p == '&'436 || charclass(*p) == PUNCT)437 ;438 while(p1 <= p)439 Bputrune(&bout, *p1++);440 Bputc(&bout, '\n');441 } else442 p1 = p;443 }444 }446 void447 comline(void)448 {449 long c1, c2;451 while(C==' ' || c=='\t')452 ;453 comx:454 if((c1=c) == '\n')455 return;456 c2 = C;457 if(c1=='.' && c2!='.')458 inmacro = NO;459 if(msflag && c1 == '['){460 refer(c2);461 return;462 }463 if(c2 == '\n')464 return;465 if(c1 == '\\' && c2 == '\"')466 SKIP;467 else468 if (filesp==files && c1=='E' && c2=='Q')469 eqn();470 else471 if(filesp==files && c1=='T' && (c2=='S' || c2=='C' || c2=='&')) {472 if(msflag)473 stbl();474 else475 tbl();476 }477 else478 if(c1=='T' && c2=='E')479 intable = NO;480 else if (!inmacro &&481 ((c1 == 'd' && c2 == 'e') ||482 (c1 == 'i' && c2 == 'g') ||483 (c1 == 'a' && c2 == 'm')))484 macro();485 else486 if(c1=='s' && c2=='o') {487 if(iflag)488 SKIP;489 else {490 getfname();491 if(fname[0]) {492 if(infile = opn(fname))493 *++filesp = infile;494 else infile = *filesp;495 }496 }497 }498 else499 if(c1=='n' && c2=='x')500 if(iflag)501 SKIP;502 else {503 getfname();504 if(fname[0] == '\0')505 exits(0);506 if(Bfildes(infile) != 0)507 Bterm(infile);508 infile = *filesp = opn(fname);509 }510 else511 if(c1 == 't' && c2 == 'm')512 SKIP;513 else514 if(c1=='h' && c2=='w')515 SKIP;516 else517 if(msflag && c1 == 'T' && c2 == 'L') {518 SKIP_TO_COM;519 goto comx;520 }521 else522 if(msflag && c1=='N' && c2 == 'R')523 SKIP;524 else525 if(msflag && c1 == 'A' && (c2 == 'U' || c2 == 'I')){526 if(mac==MM)SKIP;527 else {528 SKIP_TO_COM;529 goto comx;530 }531 } else532 if(msflag && c1=='F' && c2=='S') {533 SKIP_TO_COM;534 goto comx;535 }536 else537 if(msflag && (c1=='S' || c1=='N') && c2=='H') {538 SKIP_TO_COM;539 goto comx;540 } else541 if(c1 == 'U' && c2 == 'X') {542 if(wordflag)543 Bprint(&bout, "UNIX\n");544 else545 Bprint(&bout, "UNIX ");546 } else547 if(msflag && c1=='O' && c2=='K') {548 SKIP_TO_COM;549 goto comx;550 } else551 if(msflag && c1=='N' && c2=='D')552 SKIP;553 else554 if(msflag && mac==MM && c1=='H' && (c2==' '||c2=='U'))555 SKIP;556 else557 if(msflag && mac==MM && c2=='L') {558 if(disp || c1=='R')559 sdis('L', 'E');560 else {561 SKIP;562 Bprint(&bout, " .");563 }564 } else565 if(!msflag && c1=='P' && c2=='S') {566 inpic();567 } else568 if(msflag && (c1=='D' || c1=='N' || c1=='K'|| c1=='P') && c2=='S') {569 sdis(c1, 'E');570 } else571 if(msflag && (c1 == 'K' && c2 == 'F')) {572 sdis(c1,'E');573 } else574 if(msflag && c1=='n' && c2=='f')575 sdis('f','i');576 else577 if(msflag && c1=='c' && c2=='e')578 sce();579 else {580 if(c1=='.' && c2=='.') {581 if(msflag) {582 SKIP;583 return;584 }585 while(C == '.')586 ;587 }588 inmacro++;589 if(c1 <= 'Z' && msflag)590 regline(YES,ONE);591 else {592 if(wordflag)593 C;594 regline(YES,TWO);595 }596 inmacro--;597 }598 }600 void601 macro(void)602 {603 if(msflag) {604 do {605 SKIP1;606 } while(C1 != '.' || C1 != '.' || C1 == '.');607 if(c != '\n')608 SKIP;609 return;610 }611 SKIP;612 inmacro = YES;613 }615 void616 sdis(char a1, char a2)617 {618 int c1, c2;619 int eqnf;620 int lct;622 if(a1 == 'P'){623 while(C1 == ' ')624 ;625 if(c == '<') {626 SKIP1;627 return;628 }629 }630 lct = 0;631 eqnf = 1;632 if(c != '\n')633 SKIP1;634 for(;;) {635 while(C1 != '.')636 if(c == '\n')637 continue;638 else639 SKIP1;640 if((c1=C1) == '\n')641 continue;642 if((c2=C1) == '\n') {643 if(a1 == 'f' && (c1 == 'P' || c1 == 'H'))644 return;645 continue;646 }647 if(c1==a1 && c2 == a2) {648 SKIP1;649 if(lct != 0){650 lct--;651 continue;652 }653 if(eqnf)654 Bprint(&bout, " .");655 Bputc(&bout, '\n');656 return;657 } else658 if(a1 == 'L' && c2 == 'L') {659 lct++;660 SKIP1;661 } else662 if(a1 == 'D' && c1 == 'E' && c2 == 'Q') {663 eqn();664 eqnf = 0;665 } else666 if(a1 == 'f') {667 if((mac == MS && c2 == 'P') ||668 (mac == MM && c1 == 'H' && c2 == 'U')){669 SKIP1;670 return;671 }672 SKIP1;673 }674 else675 SKIP1;676 }677 }679 void680 tbl(void)681 {682 while(C != '.')683 ;684 SKIP;685 intable = YES;686 }688 void689 stbl(void)690 {691 while(C != '.')692 ;693 SKIP_TO_COM;694 if(c != 'T' || C != 'E') {695 SKIP;696 pc = c;697 while(C != '.' || pc != '\n' || C != 'T' || C != 'E')698 pc = c;699 }700 }702 void703 eqn(void)704 {705 long c1, c2;706 int dflg;707 char last;709 last = 0;710 dflg = 1;711 SKIP;713 for(;;) {714 if(C1 == '.' || c == '\'') {715 while(C1==' ' || c=='\t')716 ;717 if(c=='E' && C1=='N') {718 SKIP;719 if(msflag && dflg) {720 Bputc(&bout, 'x');721 Bputc(&bout, ' ');722 if(last) {723 Bputc(&bout, last);724 Bputc(&bout, '\n');725 }726 }727 return;728 }729 } else730 if(c == 'd') {731 if(C1=='e' && C1=='l')732 if(C1=='i' && C1=='m') {733 while(C1 == ' ')734 ;735 if((c1=c)=='\n' || (c2=C1)=='\n' ||736 (c1=='o' && c2=='f' && C1=='f')) {737 ldelim = NOCHAR;738 rdelim = NOCHAR;739 } else {740 ldelim = c1;741 rdelim = c2;742 }743 }744 dflg = 0;745 }746 if(c != '\n')747 while(C1 != '\n') {748 if(chars[c] == PUNCT)749 last = c;750 else751 if(c != ' ')752 last = 0;753 }754 }755 }757 /*758 * skip over a complete backslash vconstruction759 */760 void761 backsl(void)762 {763 int bdelim;765 sw:766 switch(C1)767 {768 case '"':769 SKIP1;770 return;772 case 's':773 if(C1 == '\\')774 backsl();775 else {776 while(C1>='0' && c<='9')777 ;778 Bungetrune(infile);779 c = '0';780 }781 lp--;782 return;784 case 'f':785 case 'n':786 case '*':787 if(C1 != '(')788 return;790 case '(':791 if(msflag) {792 if(C == 'e') {793 if(C1 == 'm') {794 *lp = '-';795 return;796 }797 } else798 if(c != '\n')799 C1;800 return;801 }802 if(C1 != '\n')803 C1;804 return;806 case '$':807 C1; /* discard argument number */808 return;810 case 'b':811 case 'x':812 case 'v':813 case 'h':814 case 'w':815 case 'o':816 case 'l':817 case 'L':818 if((bdelim=C1) == '\n')819 return;820 while(C1!='\n' && c!=bdelim)821 if(c == '\\')822 backsl();823 return;825 case '\\':826 if(inmacro)827 goto sw;828 default:829 return;830 }831 }833 char*834 copys(char *s)835 {836 char *t, *t0;838 if((t0 = t = malloc((strlen(s)+1))) == 0)839 fatal("Cannot allocate memory", (char*)0);840 while(*t++ = *s++)841 ;842 return(t0);843 }845 void846 sce(void)847 {848 int n = 1;850 while (C != '\n' && !('0' <= c && c <= '9'))851 ;852 if (c != '\n') {853 for (n = c-'0';'0' <= C && c <= '9';)854 n = n*10 + c-'0';855 }856 while(n) {857 if(C == '.') {858 if(C == 'c') {859 if(C == 'e') {860 while(C == ' ')861 ;862 if(c == '0') {863 SKIP;864 break;865 } else866 SKIP;867 } else868 SKIP;869 } else870 if(c == 'P' || C == 'P') {871 if(c != '\n')872 SKIP;873 break;874 } else875 if(c != '\n')876 SKIP;877 } else {878 SKIP;879 n--;880 }881 }882 }884 void885 refer(int c1)886 {887 int c2;889 if(c1 != '\n')890 SKIP;891 c2 = 0;892 for(;;) {893 if(C != '.')894 SKIP;895 else {896 if(C != ']')897 SKIP;898 else {899 while(C != '\n')900 c2 = c;901 if(charclass(c2) == PUNCT)902 Bprint(&bout, " %C",c2);903 return;904 }905 }906 }907 }909 void910 inpic(void)911 {912 int c1;913 Rune *p1;915 /* SKIP1;*/916 while(C1 != '\n')917 if(c == '<'){918 SKIP1;919 return;920 }921 p1 = line;922 c = '\n';923 for(;;) {924 c1 = c;925 if(C1 == '.' && c1 == '\n') {926 if(C1 != 'P' || C1 != 'E') {927 if(c != '\n'){928 SKIP1;929 c = '\n';930 }931 continue;932 }933 SKIP1;934 return;935 } else936 if(c == '\"') {937 while(C1 != '\"') {938 if(c == '\\') {939 if(C1 == '\"')940 continue;941 Bungetrune(infile);942 backsl();943 } else944 *p1++ = c;945 }946 *p1++ = ' ';947 } else948 if(c == '\n' && p1 != line) {949 *p1 = '\0';950 if(wordflag)951 putwords();952 else953 Bprint(&bout, "%S\n\n", line);954 p1 = line;955 }956 }957 }959 int960 charclass(int c)961 {962 if(c < MAX_ASCII)963 return chars[c];964 switch(c){965 case 0x2013: case 0x2014: /* en dash, em dash */966 return SPECIAL;967 }968 return EXTENDED;969 }