Blob


1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
5 /*
6 * Deroff command -- strip troff, eqn, and tbl sequences from
7 * a file. Has three flags argument, -w, to cause output one word per line
8 * rather than in the original format.
9 * -mm (or -ms) causes the corresponding macro's to be interpreted
10 * so that just sentences are output
11 * -ml also gets rid of lists.
12 * -i causes deroff to ignore .so and .nx commands.
13 * Deroff follows .so and .nx commands, removes contents of macro
14 * definitions, equations (both .EQ ... .EN and $...$),
15 * Tbl command sequences, and Troff backslash vconstructions.
16 *
17 * All input is through the C macro; the most recently read character is in c.
18 */
20 /*
21 #define C ((c = Bgetrune(infile)) < 0?\
22 eof():\
23 ((c == ldelim) && (filesp == files)?\
24 skeqn():\
25 (c == '\n'?\
26 (linect++,c):\
27 c)))
29 #define C1 ((c = Bgetrune(infile)) == Beof?\
30 eof():\
31 (c == '\n'?\
32 (linect++,c):\
33 c))
34 */
36 /* lose those macros! */
37 #define C fC()
38 #define C1 fC1()
40 #define SKIP while(C != '\n')
41 #define SKIP1 while(C1 != '\n')
42 #define SKIP_TO_COM SKIP;\
43 SKIP;\
44 pc=c;\
45 while(C != '.' || pc != '\n' || C > 'Z')\
46 pc=c
48 #define YES 1
49 #define NO 0
50 #define MS 0
51 #define MM 1
52 #define ONE 1
53 #define TWO 2
55 #define NOCHAR -2
56 #define EXTENDED -1 /* All runes above 0x7F */
57 #define SPECIAL 0
58 #define APOS 1
59 #define PUNCT 2
60 #define DIGIT 3
61 #define LETTER 4
64 int linect = 0;
65 int wordflag= NO;
66 int underscoreflag = NO;
67 int msflag = NO;
68 int iflag = NO;
69 int mac = MM;
70 int disp = 0;
71 int inmacro = NO;
72 int intable = NO;
73 int eqnflag = 0;
75 #define MAX_ASCII 0X80
77 char chars[MAX_ASCII]; /* SPECIAL, PUNCT, APOS, DIGIT, or LETTER */
79 Rune line[30000];
80 Rune* lp;
82 long c;
83 long pc;
84 int ldelim = NOCHAR;
85 int rdelim = NOCHAR;
88 char** argv;
90 char fname[50];
91 Biobuf* files[15];
92 Biobuf**filesp;
93 Biobuf* infile;
94 char* devnull = "/dev/null";
95 Biobuf *infile;
96 Biobuf bout;
98 long skeqn(void);
99 Biobuf* opn(char *p);
100 int eof(void);
101 int charclass(int);
102 void getfname(void);
103 void fatal(char *s, char *p);
104 void usage(void);
105 void work(void);
106 void putmac(Rune *rp, int vconst);
107 void regline(int macline, int vconst);
108 void putwords(void);
109 void comline(void);
110 void macro(void);
111 void eqn(void);
112 void tbl(void);
113 void stbl(void);
114 void sdis(char a1, char a2);
115 void sce(void);
116 void backsl(void);
117 char* copys(char *s);
118 void refer(int c1);
119 void inpic(void);
121 int
122 fC(void)
124 c = Bgetrune(infile);
125 if(c < 0)
126 return eof();
127 if(c == ldelim && filesp == files)
128 return skeqn();
129 if(c == '\n')
130 linect++;
131 return c;
134 int
135 fC1(void)
137 c = Bgetrune(infile);
138 if(c == Beof)
139 return eof();
140 if(c == '\n')
141 linect++;
142 return c;
145 void
146 main(int argc, char *av[])
148 int i;
149 char *f;
151 argv = av;
152 Binit(&bout, 1, OWRITE);
153 ARGBEGIN{
154 case 'w':
155 wordflag = YES;
156 break;
157 case '_':
158 wordflag = YES;
159 underscoreflag = YES;
160 break;
161 case 'm':
162 msflag = YES;
163 if(f = ARGF())
164 switch(*f)
166 case 'm': mac = MM; break;
167 case 's': mac = MS; break;
168 case 'l': disp = 1; break;
169 default: usage();
171 else
172 usage();
173 break;
174 case 'i':
175 iflag = YES;
176 break;
177 default:
178 usage();
179 }ARGEND
180 if(*argv)
181 infile = opn(*argv++);
182 else{
183 infile = malloc(sizeof(Biobuf));
184 Binit(infile, 0, OREAD);
186 files[0] = infile;
187 filesp = &files[0];
189 for(i='a'; i<='z' ; ++i)
190 chars[i] = LETTER;
191 for(i='A'; i<='Z'; ++i)
192 chars[i] = LETTER;
193 for(i='0'; i<='9'; ++i)
194 chars[i] = DIGIT;
195 chars['\''] = APOS;
196 chars['&'] = APOS;
197 chars['\b'] = APOS;
198 chars['.'] = PUNCT;
199 chars[','] = PUNCT;
200 chars[';'] = PUNCT;
201 chars['?'] = PUNCT;
202 chars[':'] = PUNCT;
203 work();
206 long
207 skeqn(void)
209 while(C1 != rdelim)
210 if(c == '\\')
211 c = C1;
212 else if(c == '"')
213 while(C1 != '"')
214 if(c == '\\')
215 C1;
216 if (msflag)
217 eqnflag = 1;
218 return(c = ' ');
221 Biobuf*
222 opn(char *p)
224 Biobuf *fd;
226 while ((fd = Bopen(p, OREAD)) == 0) {
227 if(msflag || p == devnull)
228 fatal("Cannot open file %s - quitting\n", p);
229 else {
230 fprint(2, "Deroff: Cannot open file %s - continuing\n", p);
231 p = devnull;
234 linect = 0;
235 return(fd);
238 int
239 eof(void)
241 if(Bfildes(infile) != 0)
242 Bterm(infile);
243 if(filesp > files)
244 infile = *--filesp;
245 else
246 if(*argv)
247 infile = opn(*argv++);
248 else
249 exits(0);
250 return(C);
253 void
254 getfname(void)
256 char *p;
257 Rune r;
258 Dir *dir;
259 struct chain
261 struct chain* nextp;
262 char* datap;
263 } *q;
265 static struct chain *namechain= 0;
267 while(C == ' ')
269 for(p = fname; (r=c) != '\n' && r != ' ' && r != '\t' && r != '\\'; C)
270 p += runetochar(p, &r);
271 *p = '\0';
272 while(c != '\n')
273 C;
274 if(!strcmp(fname, "/sys/lib/tmac/tmac.cs")
275 || !strcmp(fname, "/sys/lib/tmac/tmac.s")) {
276 fname[0] = '\0';
277 return;
279 dir = dirstat(fname);
280 if(dir!=nil && ((dir->mode & DMDIR) || dir->type != 'M')) {
281 free(dir);
282 fname[0] = '\0';
283 return;
285 free(dir);
286 /*
287 * see if this name has already been used
288 */
290 for(q = namechain; q; q = q->nextp)
291 if( !strcmp(fname, q->datap)) {
292 fname[0] = '\0';
293 return;
295 q = (struct chain*)malloc(sizeof(struct chain));
296 q->nextp = namechain;
297 q->datap = copys(fname);
298 namechain = q;
301 void
302 usage(void)
304 fprint(2,"usage: deroff [-nw_pi] [-m (m s l)] [file ...] \n");
305 exits("usage");
308 void
309 fatal(char *s, char *p)
311 fprint(2, "deroff: ");
312 fprint(2, s, p);
313 exits(s);
316 void
317 work(void)
320 for(;;) {
321 eqnflag = 0;
322 if(C == '.' || c == '\'')
323 comline();
324 else
325 regline(NO, TWO);
329 void
330 regline(int macline, int vconst)
332 line[0] = c;
333 lp = line;
334 for(;;) {
335 if(c == '\\') {
336 *lp = ' ';
337 backsl();
338 if(c == '%') /* no blank for hyphenation char */
339 lp--;
341 if(c == '\n')
342 break;
343 if(intable && c=='T') {
344 *++lp = C;
345 if(c=='{' || c=='}') {
346 lp[-1] = ' ';
347 *lp = C;
349 } else {
350 if(msflag == 1 && eqnflag == 1) {
351 eqnflag = 0;
352 *++lp = 'x';
354 *++lp = C;
357 *lp = '\0';
358 if(lp != line) {
359 if(wordflag)
360 putwords();
361 else
362 if(macline)
363 putmac(line,vconst);
364 else
365 Bprint(&bout, "%S\n", line);
369 void
370 putmac(Rune *rp, int vconst)
372 Rune *t;
373 int found;
374 Rune last;
376 found = 0;
377 last = 0;
378 while(*rp) {
379 while(*rp == ' ' || *rp == '\t')
380 Bputrune(&bout, *rp++);
381 for(t = rp; *t != ' ' && *t != '\t' && *t != '\0'; t++)
383 if(*rp == '\"')
384 rp++;
385 if(t > rp+vconst && charclass(*rp) == LETTER
386 && charclass(rp[1]) == LETTER) {
387 while(rp < t)
388 if(*rp == '\"')
389 rp++;
390 else
391 Bputrune(&bout, *rp++);
392 last = t[-1];
393 found++;
394 } else
395 if(found && charclass(*rp) == PUNCT && rp[1] == '\0')
396 Bputrune(&bout, *rp++);
397 else {
398 last = t[-1];
399 rp = t;
402 Bputc(&bout, '\n');
403 if(msflag && charclass(last) == PUNCT)
404 Bprint(&bout, " %C\n", last);
407 /*
408 * break into words for -w option
409 */
410 void
411 putwords(void)
413 Rune *p, *p1;
414 int i, nlet;
417 for(p1 = line;;) {
418 /*
419 * skip initial specials ampersands and apostrophes
420 */
421 while((i = charclass(*p1)) != EXTENDED && i < DIGIT)
422 if(*p1++ == '\0')
423 return;
424 nlet = 0;
425 for(p = p1; (i = charclass(*p)) != SPECIAL || (underscoreflag && *p=='_'); p++)
426 if(i == LETTER || (underscoreflag && *p == '_'))
427 nlet++;
428 /*
429 * MDM definition of word
430 */
431 if(nlet > 1) {
432 /*
433 * delete trailing ampersands and apostrophes
434 */
435 while(*--p == '\'' || *p == '&'
436 || charclass(*p) == PUNCT)
438 while(p1 <= p)
439 Bputrune(&bout, *p1++);
440 Bputc(&bout, '\n');
441 } else
442 p1 = p;
446 void
447 comline(void)
449 long c1, c2;
451 while(C==' ' || c=='\t')
453 comx:
454 if((c1=c) == '\n')
455 return;
456 c2 = C;
457 if(c1=='.' && c2!='.')
458 inmacro = NO;
459 if(msflag && c1 == '['){
460 refer(c2);
461 return;
463 if(c2 == '\n')
464 return;
465 if(c1 == '\\' && c2 == '\"')
466 SKIP;
467 else
468 if (filesp==files && c1=='E' && c2=='Q')
469 eqn();
470 else
471 if(filesp==files && c1=='T' && (c2=='S' || c2=='C' || c2=='&')) {
472 if(msflag)
473 stbl();
474 else
475 tbl();
477 else
478 if(c1=='T' && c2=='E')
479 intable = NO;
480 else if (!inmacro &&
481 ((c1 == 'd' && c2 == 'e') ||
482 (c1 == 'i' && c2 == 'g') ||
483 (c1 == 'a' && c2 == 'm')))
484 macro();
485 else
486 if(c1=='s' && c2=='o') {
487 if(iflag)
488 SKIP;
489 else {
490 getfname();
491 if(fname[0]) {
492 if(infile = opn(fname))
493 *++filesp = infile;
494 else infile = *filesp;
498 else
499 if(c1=='n' && c2=='x')
500 if(iflag)
501 SKIP;
502 else {
503 getfname();
504 if(fname[0] == '\0')
505 exits(0);
506 if(Bfildes(infile) != 0)
507 Bterm(infile);
508 infile = *filesp = opn(fname);
510 else
511 if(c1 == 't' && c2 == 'm')
512 SKIP;
513 else
514 if(c1=='h' && c2=='w')
515 SKIP;
516 else
517 if(msflag && c1 == 'T' && c2 == 'L') {
518 SKIP_TO_COM;
519 goto comx;
521 else
522 if(msflag && c1=='N' && c2 == 'R')
523 SKIP;
524 else
525 if(msflag && c1 == 'A' && (c2 == 'U' || c2 == 'I')){
526 if(mac==MM)SKIP;
527 else {
528 SKIP_TO_COM;
529 goto comx;
531 } else
532 if(msflag && c1=='F' && c2=='S') {
533 SKIP_TO_COM;
534 goto comx;
536 else
537 if(msflag && (c1=='S' || c1=='N') && c2=='H') {
538 SKIP_TO_COM;
539 goto comx;
540 } else
541 if(c1 == 'U' && c2 == 'X') {
542 if(wordflag)
543 Bprint(&bout, "UNIX\n");
544 else
545 Bprint(&bout, "UNIX ");
546 } else
547 if(msflag && c1=='O' && c2=='K') {
548 SKIP_TO_COM;
549 goto comx;
550 } else
551 if(msflag && c1=='N' && c2=='D')
552 SKIP;
553 else
554 if(msflag && mac==MM && c1=='H' && (c2==' '||c2=='U'))
555 SKIP;
556 else
557 if(msflag && mac==MM && c2=='L') {
558 if(disp || c1=='R')
559 sdis('L', 'E');
560 else {
561 SKIP;
562 Bprint(&bout, " .");
564 } else
565 if(!msflag && c1=='P' && c2=='S') {
566 inpic();
567 } else
568 if(msflag && (c1=='D' || c1=='N' || c1=='K'|| c1=='P') && c2=='S') {
569 sdis(c1, 'E');
570 } else
571 if(msflag && (c1 == 'K' && c2 == 'F')) {
572 sdis(c1,'E');
573 } else
574 if(msflag && c1=='n' && c2=='f')
575 sdis('f','i');
576 else
577 if(msflag && c1=='c' && c2=='e')
578 sce();
579 else {
580 if(c1=='.' && c2=='.') {
581 if(msflag) {
582 SKIP;
583 return;
585 while(C == '.')
588 inmacro++;
589 if(c1 <= 'Z' && msflag)
590 regline(YES,ONE);
591 else {
592 if(wordflag)
593 C;
594 regline(YES,TWO);
596 inmacro--;
600 void
601 macro(void)
603 if(msflag) {
604 do {
605 SKIP1;
606 } while(C1 != '.' || C1 != '.' || C1 == '.');
607 if(c != '\n')
608 SKIP;
609 return;
611 SKIP;
612 inmacro = YES;
615 void
616 sdis(char a1, char a2)
618 int c1, c2;
619 int eqnf;
620 int lct;
622 if(a1 == 'P'){
623 while(C1 == ' ')
625 if(c == '<') {
626 SKIP1;
627 return;
630 lct = 0;
631 eqnf = 1;
632 if(c != '\n')
633 SKIP1;
634 for(;;) {
635 while(C1 != '.')
636 if(c == '\n')
637 continue;
638 else
639 SKIP1;
640 if((c1=C1) == '\n')
641 continue;
642 if((c2=C1) == '\n') {
643 if(a1 == 'f' && (c1 == 'P' || c1 == 'H'))
644 return;
645 continue;
647 if(c1==a1 && c2 == a2) {
648 SKIP1;
649 if(lct != 0){
650 lct--;
651 continue;
653 if(eqnf)
654 Bprint(&bout, " .");
655 Bputc(&bout, '\n');
656 return;
657 } else
658 if(a1 == 'L' && c2 == 'L') {
659 lct++;
660 SKIP1;
661 } else
662 if(a1 == 'D' && c1 == 'E' && c2 == 'Q') {
663 eqn();
664 eqnf = 0;
665 } else
666 if(a1 == 'f') {
667 if((mac == MS && c2 == 'P') ||
668 (mac == MM && c1 == 'H' && c2 == 'U')){
669 SKIP1;
670 return;
672 SKIP1;
674 else
675 SKIP1;
679 void
680 tbl(void)
682 while(C != '.')
684 SKIP;
685 intable = YES;
688 void
689 stbl(void)
691 while(C != '.')
693 SKIP_TO_COM;
694 if(c != 'T' || C != 'E') {
695 SKIP;
696 pc = c;
697 while(C != '.' || pc != '\n' || C != 'T' || C != 'E')
698 pc = c;
702 void
703 eqn(void)
705 long c1, c2;
706 int dflg;
707 char last;
709 last = 0;
710 dflg = 1;
711 SKIP;
713 for(;;) {
714 if(C1 == '.' || c == '\'') {
715 while(C1==' ' || c=='\t')
717 if(c=='E' && C1=='N') {
718 SKIP;
719 if(msflag && dflg) {
720 Bputc(&bout, 'x');
721 Bputc(&bout, ' ');
722 if(last) {
723 Bputc(&bout, last);
724 Bputc(&bout, '\n');
727 return;
729 } else
730 if(c == 'd') {
731 if(C1=='e' && C1=='l')
732 if(C1=='i' && C1=='m') {
733 while(C1 == ' ')
735 if((c1=c)=='\n' || (c2=C1)=='\n' ||
736 (c1=='o' && c2=='f' && C1=='f')) {
737 ldelim = NOCHAR;
738 rdelim = NOCHAR;
739 } else {
740 ldelim = c1;
741 rdelim = c2;
744 dflg = 0;
746 if(c != '\n')
747 while(C1 != '\n') {
748 if(charclass(c) == PUNCT)
749 last = c;
750 else
751 if(c != ' ')
752 last = 0;
757 /*
758 * skip over a complete backslash vconstruction
759 */
760 void
761 backsl(void)
763 int bdelim;
765 sw:
766 switch(C1)
768 case '"':
769 SKIP1;
770 return;
772 case 's':
773 if(C1 == '\\')
774 backsl();
775 else {
776 while(C1>='0' && c<='9')
778 Bungetrune(infile);
779 c = '0';
781 lp--;
782 return;
784 case 'f':
785 case 'n':
786 case '*':
787 if(C1 != '(')
788 return;
790 case '(':
791 if(msflag) {
792 if(C == 'e') {
793 if(C1 == 'm') {
794 *lp = '-';
795 return;
797 } else
798 if(c != '\n')
799 C1;
800 return;
802 if(C1 != '\n')
803 C1;
804 return;
806 case '$':
807 C1; /* discard argument number */
808 return;
810 case 'b':
811 case 'x':
812 case 'v':
813 case 'h':
814 case 'w':
815 case 'o':
816 case 'l':
817 case 'L':
818 if((bdelim=C1) == '\n')
819 return;
820 while(C1!='\n' && c!=bdelim)
821 if(c == '\\')
822 backsl();
823 return;
825 case '\\':
826 if(inmacro)
827 goto sw;
828 default:
829 return;
833 char*
834 copys(char *s)
836 char *t, *t0;
838 if((t0 = t = malloc((strlen(s)+1))) == 0)
839 fatal("Cannot allocate memory", (char*)0);
840 while(*t++ = *s++)
842 return(t0);
845 void
846 sce(void)
848 int n = 1;
850 while (C != '\n' && !('0' <= c && c <= '9'))
852 if (c != '\n') {
853 for (n = c-'0';'0' <= C && c <= '9';)
854 n = n*10 + c-'0';
856 while(n) {
857 if(C == '.') {
858 if(C == 'c') {
859 if(C == 'e') {
860 while(C == ' ')
862 if(c == '0') {
863 SKIP;
864 break;
865 } else
866 SKIP;
867 } else
868 SKIP;
869 } else
870 if(c == 'P' || C == 'P') {
871 if(c != '\n')
872 SKIP;
873 break;
874 } else
875 if(c != '\n')
876 SKIP;
877 } else {
878 SKIP;
879 n--;
884 void
885 refer(int c1)
887 int c2;
889 if(c1 != '\n')
890 SKIP;
891 c2 = 0;
892 for(;;) {
893 if(C != '.')
894 SKIP;
895 else {
896 if(C != ']')
897 SKIP;
898 else {
899 while(C != '\n')
900 c2 = c;
901 if(charclass(c2) == PUNCT)
902 Bprint(&bout, " %C",c2);
903 return;
909 void
910 inpic(void)
912 int c1;
913 Rune *p1;
915 /* SKIP1;*/
916 while(C1 != '\n')
917 if(c == '<'){
918 SKIP1;
919 return;
921 p1 = line;
922 c = '\n';
923 for(;;) {
924 c1 = c;
925 if(C1 == '.' && c1 == '\n') {
926 if(C1 != 'P' || C1 != 'E') {
927 if(c != '\n'){
928 SKIP1;
929 c = '\n';
931 continue;
933 SKIP1;
934 return;
935 } else
936 if(c == '\"') {
937 while(C1 != '\"') {
938 if(c == '\\') {
939 if(C1 == '\"')
940 continue;
941 Bungetrune(infile);
942 backsl();
943 } else
944 *p1++ = c;
946 *p1++ = ' ';
947 } else
948 if(c == '\n' && p1 != line) {
949 *p1 = '\0';
950 if(wordflag)
951 putwords();
952 else
953 Bprint(&bout, "%S\n\n", line);
954 p1 = line;
959 int
960 charclass(int c)
962 if(c < MAX_ASCII)
963 return chars[c];
964 switch(c){
965 case 0x2013: case 0x2014: /* en dash, em dash */
966 return SPECIAL;
968 return EXTENDED;