Blob


1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
6 #define Bungetrune Bungetc /* ok for now. */
8 /*
9 * all these are 32 bit
10 */
11 #define TBITSET ((32+NTERMS)/32) /* BOTCH?? +31 */
12 #define BIT(a,i) ((a)[(i)>>5] & (1<<((i)&037)))
13 #define SETBIT(a,i) ((a)[(i)>>5] |= (1<<((i)&037)))
14 #define NWORDS(n) (((n)+32)/32)
16 char *PARSER = "#9/lib/yaccpar";
17 char *PARSERS = "#9/lib/yaccpars";
19 #define TEMPNAME "y.tmp.XXXXXX"
20 #define ACTNAME "y.acts.XXXXXX"
21 #define OFILE "tab.c"
22 #define FILEU "output"
23 #define FILED "tab.h"
24 #define FILEDEBUG "debug"
26 enum
27 {
28 /*
29 * the following are adjustable
30 * according to memory size
31 */
32 ACTSIZE = 40000,
33 MEMSIZE = 40000,
34 NSTATES = 2000,
35 NTERMS = 511,
36 NPROD = 1600,
37 NNONTERM = 600,
38 TEMPSIZE = 2000,
39 CNAMSZ = 10000,
40 LSETSIZE = 2400,
41 WSETSIZE = 350,
43 NAMESIZE = 50,
44 NTYPES = 63,
45 ISIZE = 400,
47 PRIVATE = 0xE000, /* unicode private use */
49 /* relationships which must hold:
50 TBITSET ints must hold NTERMS+1 bits...
51 WSETSIZE >= NNONTERM
52 LSETSIZE >= NNONTERM
53 TEMPSIZE >= NTERMS + NNONTERM + 1
54 TEMPSIZE >= NSTATES
55 */
57 NTBASE = 010000,
58 ERRCODE = 8190,
59 ACCEPTCODE = 8191,
61 NOASC = 0, /* no assoc. */
62 LASC = 1, /* left assoc. */
63 RASC = 2, /* right assoc. */
64 BASC = 3, /* binary assoc. */
66 /* flags for state generation */
68 DONE = 0,
69 MUSTDO = 1,
70 MUSTLOOKAHEAD = 2,
72 /* flags for a rule having an action, and being reduced */
74 ACTFLAG = 04,
75 REDFLAG = 010,
77 /* output parser flags */
78 YYFLAG1 = -1000,
80 /* parse tokens */
81 IDENTIFIER = PRIVATE,
82 MARK,
83 TERM,
84 LEFT,
85 RIGHT,
86 BINARY,
87 PREC,
88 LCURLY,
89 IDENTCOLON,
90 NUMBER,
91 START,
92 TYPEDEF,
93 TYPENAME,
94 UNION,
96 ENDFILE = 0,
98 EMPTY = 1,
99 WHOKNOWS = 0,
100 OK = 1,
101 NOMORE = -1000
102 };
104 /* macros for getting associativity and precedence levels */
106 #define ASSOC(i) ((i)&03)
107 #define PLEVEL(i) (((i)>>4)&077)
108 #define TYPE(i) (((i)>>10)&077)
110 /* macros for setting associativity and precedence levels */
112 #define SETASC(i,j) i |= j
113 #define SETPLEV(i,j) i |= (j<<4)
114 #define SETTYPE(i,j) i |= (j<<10)
116 /* looping macros */
118 #define TLOOP(i) for(i=1; i<=ntokens; i++)
119 #define NTLOOP(i) for(i=0; i<=nnonter; i++)
120 #define PLOOP(s,i) for(i=s; i<nprod; i++)
121 #define SLOOP(i) for(i=0; i<nstate; i++)
122 #define WSBUMP(x) x++
123 #define WSLOOP(s,j) for(j=s; j<cwp; j++)
124 #define ITMLOOP(i,p,q) for(q=pstate[i+1], p=pstate[i]; p<q; p++)
125 #define SETLOOP(i) for(i=0; i<tbitset; i++)
127 /* command to clobber tempfiles after use */
129 #define ZAPFILE(x) if(x) remove(x)
131 /* I/O descriptors */
133 Biobuf* faction; /* file for saving actions */
134 Biobuf* fdefine; /* file for #defines */
135 Biobuf* fdebug; /* y.debug for strings for debugging */
136 Biobuf* ftable; /* y.tab.c file */
137 Biobuf* ftemp; /* tempfile to pass 2 */
138 Biobuf* finput; /* input file */
139 Biobuf* foutput; /* y.output file */
141 /* communication variables between various I/O routines */
143 char* infile; /* input file name */
144 int numbval; /* value of an input number */
145 char tokname[NAMESIZE+4]; /* input token name, slop for runes and 0 */
147 /* structure declarations */
149 typedef
150 struct
152 int lset[TBITSET];
153 } Lkset;
155 typedef
156 struct
158 int* pitem;
159 Lkset* look;
160 } Item;
162 typedef
163 struct
165 char* name;
166 int value;
167 } Symb;
169 typedef
170 struct
172 int* pitem;
173 int flag;
174 Lkset ws;
175 } Wset;
177 /* storage of names */
179 char cnames[CNAMSZ]; /* place where token and nonterminal names are stored */
180 int cnamsz = CNAMSZ; /* size of cnames */
181 char* cnamp = cnames; /* place where next name is to be put in */
182 int ndefout = 4; /* number of defined symbols output */
183 char* tempname;
184 char* actname;
185 char ttempname[] = TEMPNAME;
186 char tactname[] = ACTNAME;
187 char* parser;
188 char* yydebug;
189 int yyarg;
190 int yyline = 1;
192 /* storage of types */
193 int ntypes; /* number of types defined */
194 char* typeset[NTYPES]; /* pointers to type tags */
196 /* token information */
198 int ntokens = 0 ; /* number of tokens */
199 Symb tokset[NTERMS];
200 int toklev[NTERMS]; /* vector with the precedence of the terminals */
202 /* nonterminal information */
204 int nnonter = -1; /* the number of nonterminals */
205 Symb nontrst[NNONTERM];
206 int start; /* start symbol */
208 /* assigned token type values */
209 int extval = 0;
211 char* ytabc = OFILE; /* name of y.tab.c */
213 /* grammar rule information */
215 int mem0[MEMSIZE] ; /* production storage */
216 int* mem = mem0;
217 int nprod = 1; /* number of productions */
218 int* prdptr[NPROD]; /* pointers to descriptions of productions */
219 int levprd[NPROD]; /* precedence levels for the productions */
220 int rlines[NPROD]; /* line number for this rule */
222 /* state information */
224 int nstate = 0; /* number of states */
225 Item* pstate[NSTATES+2]; /* pointers to the descriptions of the states */
226 int tystate[NSTATES]; /* contains type information about the states */
227 int defact[NSTATES]; /* the default actions of states */
228 int tstates[NTERMS]; /* states generated by terminal gotos */
229 int ntstates[NNONTERM]; /* states generated by nonterminal gotos */
230 int mstates[NSTATES]; /* chain of overflows of term/nonterm generation lists */
231 int lastred; /* the number of the last reduction of a state */
233 /* lookahead set information */
235 Lkset lkst[LSETSIZE];
236 int nolook; /* flag to turn off lookahead computations */
237 int tbitset; /* size of lookahead sets */
238 int nlset = 0; /* next lookahead set index */
239 int nolook = 0; /* flag to suppress lookahead computations */
240 Lkset clset; /* temporary storage for lookahead computations */
242 /* working set information */
244 Wset wsets[WSETSIZE];
245 Wset* cwp;
247 /* storage for action table */
249 int amem[ACTSIZE]; /* action table storage */
250 int* memp = amem; /* next free action table position */
251 int indgo[NSTATES]; /* index to the stored goto table */
253 /* temporary vector, indexable by states, terms, or ntokens */
255 int temp1[TEMPSIZE]; /* temporary storage, indexed by terms + ntokens or states */
256 int lineno = 1; /* current input line number */
257 int fatfl = 1; /* if on, error is fatal */
258 int nerrors = 0; /* number of errors */
260 /* statistics collection variables */
262 int zzgoent;
263 int zzgobest;
264 int zzacent;
265 int zzexcp;
266 int zzclose;
267 int zzrrconf;
268 int zzsrconf;
270 int* ggreed = lkst[0].lset;
271 int* pgo = wsets[0].ws.lset;
272 int* yypgo = &nontrst[0].value;
274 int maxspr = 0; /* maximum spread of any entry */
275 int maxoff = 0; /* maximum offset into a array */
276 int* pmem = mem0;
277 int* maxa;
278 int nxdb = 0;
279 int adb = 0;
282 /* storage for information about the nonterminals */
284 int** pres[NNONTERM+2]; /* vector of pointers to productions yielding each nonterminal */
285 Lkset* pfirst[NNONTERM+2]; /* vector of pointers to first sets for each nonterminal */
286 int pempty[NNONTERM+1]; /* vector of nonterminals nontrivially deriving e */
288 /* random stuff picked out from between functions */
290 int indebug = 0;
291 Wset* zzcwp = wsets;
292 int zzgoent = 0;
293 int zzgobest = 0;
294 int zzacent = 0;
295 int zzexcp = 0;
296 int zzclose = 0;
297 int zzsrconf = 0;
298 int* zzmemsz = mem0;
299 int zzrrconf = 0;
300 int pidebug = 0; /* debugging flag for putitem */
301 int gsdebug = 0;
302 int cldebug = 0; /* debugging flag for closure */
303 int pkdebug = 0;
304 int g2debug = 0;
306 struct
308 char* name;
309 long value;
310 } resrv[] =
312 "binary", BINARY,
313 "left", LEFT,
314 "nonassoc", BINARY,
315 "prec", PREC,
316 "right", RIGHT,
317 "start", START,
318 "term", TERM,
319 "token", TERM,
320 "type", TYPEDEF,
321 "union", UNION,
322 0,
323 };
325 /* define functions */
327 void main(int, char**);
328 void others(void);
329 char* chcopy(char*, char*);
330 char* writem(int*);
331 char* symnam(int);
332 void summary(void);
333 void error(char*, ...);
334 void aryfil(int*, int, int);
335 int setunion(int*, int*);
336 void prlook(Lkset*);
337 void cpres(void);
338 void cpfir(void);
339 int state(int);
340 void putitem(int*, Lkset*);
341 void cempty(void);
342 void stagen(void);
343 void closure(int);
344 Lkset* flset(Lkset*);
345 void cleantmp(void);
346 void intr(void);
347 void setup(int, char**);
348 void finact(void);
349 int defin(int, char*);
350 void defout(int);
351 char* cstash(char*);
352 long gettok(void);
353 int fdtype(int);
354 int chfind(int, char*);
355 void cpyunion(void);
356 void cpycode(void);
357 int skipcom(void);
358 void cpyact(int);
359 void openup(char*, int, int, int, char*);
360 void output(void);
361 int apack(int*, int);
362 void go2out(void);
363 void go2gen(int);
364 void precftn(int, int, int);
365 void wract(int);
366 void wrstate(int);
367 void warray(char*, int*, int);
368 void hideprod(void);
369 void callopt(void);
370 void gin(int);
371 void stin(int);
372 int nxti(void);
373 void osummary(void);
374 void aoutput(void);
375 void arout(char*, int*, int);
376 int gtnm(void);
378 void
379 main(int argc, char *argv[])
381 PARSER = unsharp(PARSER);
382 PARSERS = unsharp(PARSERS);
383 parser = PARSER;
385 setup(argc, argv); /* initialize and read productions */
386 tbitset = NWORDS(ntokens);
387 cpres(); /* make table of which productions yield a given nonterminal */
388 cempty(); /* make a table of which nonterminals can match the empty string */
389 cpfir(); /* make a table of firsts of nonterminals */
390 stagen(); /* generate the states */
391 output(); /* write the states and the tables */
392 go2out();
393 hideprod();
394 summary();
395 callopt();
396 others();
397 exits(0);
400 /*
401 * put out other arrays, copy the parsers
402 */
403 void
404 others(void)
406 int c, i, j;
408 finput = Bopen(parser, OREAD);
409 if(finput == 0)
410 error("cannot open parser %s: %r", parser);
411 warray("yyr1", levprd, nprod);
412 aryfil(temp1, nprod, 0);
413 PLOOP(1, i)
414 temp1[i] = prdptr[i+1]-prdptr[i]-2;
415 warray("yyr2", temp1, nprod);
417 aryfil(temp1, nstate, -1000);
418 TLOOP(i)
419 for(j=tstates[i]; j!=0; j=mstates[j])
420 temp1[j] = i;
421 NTLOOP(i)
422 for(j=ntstates[i]; j!=0; j=mstates[j])
423 temp1[j] = -i;
424 warray("yychk", temp1, nstate);
425 warray("yydef", defact, nstate);
427 /* put out token translation tables */
428 /* table 1 has 0-256 */
429 aryfil(temp1, 256, 0);
430 c = 0;
431 TLOOP(i) {
432 j = tokset[i].value;
433 if(j >= 0 && j < 256) {
434 if(temp1[j]) {
435 print("yacc bug -- cant have 2 different Ts with same value\n");
436 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
437 nerrors++;
439 temp1[j] = i;
440 if(j > c)
441 c = j;
444 warray("yytok1", temp1, c+1);
446 /* table 2 has PRIVATE-PRIVATE+256 */
447 aryfil(temp1, 256, 0);
448 c = 0;
449 TLOOP(i) {
450 j = tokset[i].value - PRIVATE;
451 if(j >= 0 && j < 256) {
452 if(temp1[j]) {
453 print("yacc bug -- cant have 2 different Ts with same value\n");
454 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
455 nerrors++;
457 temp1[j] = i;
458 if(j > c)
459 c = j;
462 warray("yytok2", temp1, c+1);
464 /* table 3 has everything else */
465 Bprint(ftable, "static\tconst\tlong yytok3[] =\n{\n");
466 c = 0;
467 TLOOP(i) {
468 j = tokset[i].value;
469 if(j >= 0 && j < 256)
470 continue;
471 if(j >= PRIVATE && j < 256+PRIVATE)
472 continue;
474 Bprint(ftable, "%4d,%4d,", j, i);
475 c++;
476 if(c%5 == 0)
477 Bprint(ftable, "\n");
479 Bprint(ftable, "%4d\n};\n", 0);
481 /* copy parser text */
482 while((c=Bgetrune(finput)) != Beof) {
483 if(c == '$') {
484 if((c = Bgetrune(finput)) != 'A')
485 Bputrune(ftable, '$');
486 else { /* copy actions */
487 faction = Bopen(actname, OREAD);
488 if(faction == 0)
489 error("cannot reopen action tempfile");
490 while((c=Bgetrune(faction)) != Beof)
491 Bputrune(ftable, c);
492 Bterm(faction);
493 ZAPFILE(actname);
494 c = Bgetrune(finput);
497 Bputrune(ftable, c);
499 Bterm(ftable);
502 /*
503 * copies string q into p, returning next free char ptr
504 */
505 char*
506 chcopy(char* p, char* q)
508 int c;
510 while(c = *q) {
511 if(c == '"')
512 *p++ = '\\';
513 *p++ = c;
514 q++;
516 *p = 0;
517 return p;
520 /*
521 * creates output string for item pointed to by pp
522 */
523 char*
524 writem(int *pp)
526 int i,*p;
527 static char sarr[ISIZE];
528 char* q;
530 for(p=pp; *p>0; p++)
532 p = prdptr[-*p];
533 q = chcopy(sarr, nontrst[*p-NTBASE].name);
534 q = chcopy(q, ": ");
535 for(;;) {
536 *q = ' ';
537 p++;
538 if(p == pp)
539 *q = '.';
540 q++;
541 *q = '\0';
542 i = *p;
543 if(i <= 0)
544 break;
545 q = chcopy(q, symnam(i));
546 if(q > &sarr[ISIZE-30])
547 error("item too big");
550 /* an item calling for a reduction */
551 i = *pp;
552 if(i < 0 ) {
553 q = chcopy(q, " (");
554 sprint(q, "%d)", -i);
556 return sarr;
559 /*
560 * return a pointer to the name of symbol i
561 */
562 char*
563 symnam(int i)
565 char* cp;
567 cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
568 if(*cp == ' ')
569 cp++;
570 return cp;
573 /*
574 * output the summary on y.output
575 */
576 void
577 summary(void)
580 if(foutput != 0) {
581 Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n",
582 ntokens, NTERMS, nnonter, NNONTERM);
583 Bprint(foutput, "%d/%d grammar rules, %d/%d states\n",
584 nprod, NPROD, nstate, NSTATES);
585 Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n",
586 zzsrconf, zzrrconf);
587 Bprint(foutput, "%d/%d working sets used\n",
588 (int)(zzcwp-wsets), WSETSIZE);
589 Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n",
590 (int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE);
591 Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE);
592 Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate);
593 Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp);
594 Bprint(foutput, "%d goto entries\n", zzgoent);
595 Bprint(foutput, "%d entries saved by goto default\n", zzgobest);
597 if(zzsrconf != 0 || zzrrconf != 0) {
598 print("\nconflicts: ");
599 if(zzsrconf)
600 print("%d shift/reduce", zzsrconf);
601 if(zzsrconf && zzrrconf)
602 print(", ");
603 if(zzrrconf)
604 print("%d reduce/reduce", zzrrconf);
605 print("\n");
607 if(ftemp != 0) {
608 Bterm(ftemp);
609 ftemp = 0;
611 if(fdefine != 0) {
612 Bterm(fdefine);
613 fdefine = 0;
617 /*
618 * write out error comment -- NEEDS WORK
619 */
620 void
621 error(char *s, ...)
623 va_list arg;
625 nerrors++;
626 fprint(2, "\n fatal error:");
627 va_start(arg, s);
628 vfprint(2, s, arg);
629 va_end(arg);
630 fprint(2, ", %s:%d\n", infile, lineno);
631 if(!fatfl)
632 return;
633 summary();
634 cleantmp();
635 exits("error");
638 /*
639 * set elements 0 through n-1 to c
640 */
641 void
642 aryfil(int *v, int n, int c)
644 int i;
646 for(i=0; i<n; i++)
647 v[i] = c;
650 /*
651 * set a to the union of a and b
652 * return 1 if b is not a subset of a, 0 otherwise
653 */
654 int
655 setunion(int *a, int *b)
657 int i, x, sub;
659 sub = 0;
660 SETLOOP(i) {
661 x = *a;
662 *a |= *b;
663 if(*a != x)
664 sub = 1;
665 a++;
666 b++;
668 return sub;
671 void
672 prlook(Lkset* p)
674 int j, *pp;
676 pp = p->lset;
677 if(pp == 0)
678 Bprint(foutput, "\tNULL");
679 else {
680 Bprint(foutput, " { ");
681 TLOOP(j)
682 if(BIT(pp,j))
683 Bprint(foutput, "%s ", symnam(j));
684 Bprint(foutput, "}");
688 /*
689 * compute an array with the beginnings of productions yielding given nonterminals
690 * The array pres points to these lists
691 * the array pyield has the lists: the total size is only NPROD+1
692 */
693 void
694 cpres(void)
696 int c, j, i, **pmem;
697 static int *pyield[NPROD];
699 pmem = pyield;
700 NTLOOP(i) {
701 c = i+NTBASE;
702 pres[i] = pmem;
703 fatfl = 0; /* make undefined symbols nonfatal */
704 PLOOP(0, j)
705 if(*prdptr[j] == c)
706 *pmem++ = prdptr[j]+1;
707 if(pres[i] == pmem)
708 error("nonterminal %s not defined!", nontrst[i].name);
710 pres[i] = pmem;
711 fatfl = 1;
712 if(nerrors) {
713 summary();
714 cleantmp();
715 exits("error");
717 if(pmem != &pyield[nprod])
718 error("internal Yacc error: pyield %d", pmem-&pyield[nprod]);
721 /*
722 * compute an array with the first of nonterminals
723 */
724 void
725 cpfir(void)
727 int *p, **s, i, **t, ch, changes;
729 zzcwp = &wsets[nnonter];
730 NTLOOP(i) {
731 aryfil(wsets[i].ws.lset, tbitset, 0);
732 t = pres[i+1];
733 /* initially fill the sets */
734 for(s=pres[i]; s<t; ++s)
735 for(p = *s; (ch = *p) > 0; ++p) {
736 if(ch < NTBASE) {
737 SETBIT(wsets[i].ws.lset, ch);
738 break;
740 if(!pempty[ch-NTBASE])
741 break;
745 /* now, reflect transitivity */
746 changes = 1;
747 while(changes) {
748 changes = 0;
749 NTLOOP(i) {
750 t = pres[i+1];
751 for(s = pres[i]; s < t; ++s)
752 for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) {
753 changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset);
754 if(!pempty[ch])
755 break;
760 NTLOOP(i)
761 pfirst[i] = flset(&wsets[i].ws);
762 if(!indebug)
763 return;
764 if(foutput != 0)
765 NTLOOP(i) {
766 Bprint(foutput, "\n%s: ", nontrst[i].name);
767 prlook(pfirst[i]);
768 Bprint(foutput, " %d\n", pempty[i]);
772 /*
773 * sorts last state,and sees if it equals earlier ones. returns state number
774 */
775 int
776 state(int c)
778 Item *p1, *p2, *k, *l, *q1, *q2;
779 int size1, size2, i;
781 p1 = pstate[nstate];
782 p2 = pstate[nstate+1];
783 if(p1 == p2)
784 return 0; /* null state */
785 /* sort the items */
786 for(k = p2-1; k > p1; k--) /* make k the biggest */
787 for(l = k-1; l >= p1; --l)
788 if(l->pitem > k->pitem) {
789 int *s;
790 Lkset *ss;
792 s = k->pitem;
793 k->pitem = l->pitem;
794 l->pitem = s;
795 ss = k->look;
796 k->look = l->look;
797 l->look = ss;
799 size1 = p2 - p1; /* size of state */
801 for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) {
802 /* get ith state */
803 q1 = pstate[i];
804 q2 = pstate[i+1];
805 size2 = q2 - q1;
806 if(size1 != size2)
807 continue;
808 k = p1;
809 for(l = q1; l < q2; l++) {
810 if(l->pitem != k->pitem)
811 break;
812 k++;
814 if(l != q2)
815 continue;
816 /* found it */
817 pstate[nstate+1] = pstate[nstate]; /* delete last state */
818 /* fix up lookaheads */
819 if(nolook)
820 return i;
821 for(l = q1, k = p1; l < q2; ++l, ++k ) {
822 int s;
824 SETLOOP(s)
825 clset.lset[s] = l->look->lset[s];
826 if(setunion(clset.lset, k->look->lset)) {
827 tystate[i] = MUSTDO;
828 /* register the new set */
829 l->look = flset( &clset );
832 return i;
834 /* state is new */
835 if(nolook)
836 error("yacc state/nolook error");
837 pstate[nstate+2] = p2;
838 if(nstate+1 >= NSTATES)
839 error("too many states");
840 if(c >= NTBASE) {
841 mstates[nstate] = ntstates[c-NTBASE];
842 ntstates[c-NTBASE] = nstate;
843 } else {
844 mstates[nstate] = tstates[c];
845 tstates[c] = nstate;
847 tystate[nstate] = MUSTDO;
848 return nstate++;
851 void
852 putitem(int *ptr, Lkset *lptr)
854 Item *j;
856 if(pidebug && foutput != 0)
857 Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate);
858 j = pstate[nstate+1];
859 j->pitem = ptr;
860 if(!nolook)
861 j->look = flset(lptr);
862 pstate[nstate+1] = ++j;
863 if((int*)j > zzmemsz) {
864 zzmemsz = (int*)j;
865 if(zzmemsz >= &mem0[MEMSIZE])
866 error("out of state space");
870 /*
871 * mark nonterminals which derive the empty string
872 * also, look for nonterminals which don't derive any token strings
873 */
874 void
875 cempty(void)
878 int i, *p;
880 /* first, use the array pempty to detect productions that can never be reduced */
881 /* set pempty to WHONOWS */
882 aryfil(pempty, nnonter+1, WHOKNOWS);
884 /* now, look at productions, marking nonterminals which derive something */
885 more:
886 PLOOP(0, i) {
887 if(pempty[*prdptr[i] - NTBASE])
888 continue;
889 for(p = prdptr[i]+1; *p >= 0; ++p)
890 if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS)
891 break;
892 /* production can be derived */
893 if(*p < 0) {
894 pempty[*prdptr[i]-NTBASE] = OK;
895 goto more;
899 /* now, look at the nonterminals, to see if they are all OK */
900 NTLOOP(i) {
901 /* the added production rises or falls as the start symbol ... */
902 if(i == 0)
903 continue;
904 if(pempty[i] != OK) {
905 fatfl = 0;
906 error("nonterminal %s never derives any token string", nontrst[i].name);
910 if(nerrors) {
911 summary();
912 cleantmp();
913 exits("error");
916 /* now, compute the pempty array, to see which nonterminals derive the empty string */
917 /* set pempty to WHOKNOWS */
918 aryfil( pempty, nnonter+1, WHOKNOWS);
920 /* loop as long as we keep finding empty nonterminals */
922 again:
923 PLOOP(1, i) {
924 /* not known to be empty */
925 if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) {
926 for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p)
928 /* we have a nontrivially empty nonterminal */
929 if(*p < 0) {
930 pempty[*prdptr[i]-NTBASE] = EMPTY;
931 /* got one ... try for another */
932 goto again;
938 /*
939 * generate the states
940 */
941 void
942 stagen(void)
945 int c, i, j, more;
946 Wset *p, *q;
948 /* initialize */
949 nstate = 0;
951 /* THIS IS FUNNY from the standpoint of portability
952 * it represents the magic moment when the mem0 array, which has
953 * been holding the productions, starts to hold item pointers, of a
954 * different type...
955 * someday, alloc should be used to allocate all this stuff... for now, we
956 * accept that if pointers don't fit in integers, there is a problem...
957 */
959 pstate[0] = pstate[1] = (Item*)mem;
960 aryfil(clset.lset, tbitset, 0);
961 putitem(prdptr[0]+1, &clset);
962 tystate[0] = MUSTDO;
963 nstate = 1;
964 pstate[2] = pstate[1];
966 aryfil(amem, ACTSIZE, 0);
968 /* now, the main state generation loop */
969 for(more=1; more;) {
970 more = 0;
971 SLOOP(i) {
972 if(tystate[i] != MUSTDO)
973 continue;
974 tystate[i] = DONE;
975 aryfil(temp1, nnonter+1, 0);
976 /* take state i, close it, and do gotos */
977 closure(i);
978 /* generate goto's */
979 WSLOOP(wsets, p) {
980 if(p->flag)
981 continue;
982 p->flag = 1;
983 c = *(p->pitem);
984 if(c <= 1) {
985 if(pstate[i+1]-pstate[i] <= p-wsets)
986 tystate[i] = MUSTLOOKAHEAD;
987 continue;
989 /* do a goto on c */
990 WSLOOP(p, q)
991 /* this item contributes to the goto */
992 if(c == *(q->pitem)) {
993 putitem(q->pitem+1, &q->ws);
994 q->flag = 1;
996 if(c < NTBASE)
997 state(c); /* register new state */
998 else
999 temp1[c-NTBASE] = state(c);
1001 if(gsdebug && foutput != 0) {
1002 Bprint(foutput, "%d: ", i);
1003 NTLOOP(j)
1004 if(temp1[j])
1005 Bprint(foutput, "%s %d, ",
1006 nontrst[j].name, temp1[j]);
1007 Bprint(foutput, "\n");
1009 indgo[i] = apack(&temp1[1], nnonter-1) - 1;
1010 /* do some more */
1011 more = 1;
1017 * generate the closure of state i
1019 void
1020 closure(int i)
1023 Wset *u, *v;
1024 Item *p, *q;
1025 int c, ch, work, k, *pi, **s, **t;
1027 zzclose++;
1029 /* first, copy kernel of state i to wsets */
1030 cwp = wsets;
1031 ITMLOOP(i, p, q) {
1032 cwp->pitem = p->pitem;
1033 cwp->flag = 1; /* this item must get closed */
1034 SETLOOP(k)
1035 cwp->ws.lset[k] = p->look->lset[k];
1036 WSBUMP(cwp);
1039 /* now, go through the loop, closing each item */
1040 work = 1;
1041 while(work) {
1042 work = 0;
1043 WSLOOP(wsets, u) {
1044 if(u->flag == 0)
1045 continue;
1046 /* dot is before c */
1047 c = *(u->pitem);
1048 if(c < NTBASE) {
1049 u->flag = 0;
1050 /* only interesting case is where . is before nonterminal */
1051 continue;
1054 /* compute the lookahead */
1055 aryfil(clset.lset, tbitset, 0);
1057 /* find items involving c */
1058 WSLOOP(u, v)
1059 if(v->flag == 1 && *(pi=v->pitem) == c) {
1060 v->flag = 0;
1061 if(nolook)
1062 continue;
1063 while((ch = *++pi) > 0) {
1064 /* terminal symbol */
1065 if(ch < NTBASE) {
1066 SETBIT(clset.lset, ch);
1067 break;
1069 /* nonterminal symbol */
1070 setunion(clset.lset, pfirst[ch-NTBASE]->lset);
1071 if(!pempty[ch-NTBASE])
1072 break;
1074 if(ch <= 0)
1075 setunion(clset.lset, v->ws.lset);
1079 * now loop over productions derived from c
1080 * c is now nonterminal number
1082 c -= NTBASE;
1083 t = pres[c+1];
1084 for(s = pres[c]; s < t; ++s) {
1086 * put these items into the closure
1087 * is the item there
1089 WSLOOP(wsets, v)
1090 /* yes, it is there */
1091 if(v->pitem == *s) {
1092 if(nolook)
1093 goto nexts;
1094 if(setunion(v->ws.lset, clset.lset))
1095 v->flag = work = 1;
1096 goto nexts;
1099 /* not there; make a new entry */
1100 if(cwp-wsets+1 >= WSETSIZE)
1101 error( "working set overflow");
1102 cwp->pitem = *s;
1103 cwp->flag = 1;
1104 if(!nolook) {
1105 work = 1;
1106 SETLOOP(k) cwp->ws.lset[k] = clset.lset[k];
1108 WSBUMP(cwp);
1110 nexts:;
1115 /* have computed closure; flags are reset; return */
1116 if(cwp > zzcwp)
1117 zzcwp = cwp;
1118 if(cldebug && foutput != 0) {
1119 Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook);
1120 WSLOOP(wsets, u) {
1121 if(u->flag)
1122 Bprint(foutput, "flag set!\n");
1123 u->flag = 0;
1124 Bprint(foutput, "\t%s", writem(u->pitem));
1125 prlook(&u->ws);
1126 Bprint(foutput, "\n");
1132 * decide if the lookahead set pointed to by p is known
1133 * return pointer to a perminent location for the set
1135 Lkset*
1136 flset(Lkset *p)
1138 Lkset *q;
1139 int *u, *v, *w, j;
1141 for(q = &lkst[nlset]; q-- > lkst;) {
1142 u = p->lset;
1143 v = q->lset;
1144 w = &v[tbitset];
1145 while(v < w)
1146 if(*u++ != *v++)
1147 goto more;
1148 /* we have matched */
1149 return q;
1150 more:;
1152 /* add a new one */
1153 q = &lkst[nlset++];
1154 if(nlset >= LSETSIZE)
1155 error("too many lookahead sets");
1156 SETLOOP(j)
1157 q->lset[j] = p->lset[j];
1158 return q;
1161 void
1162 cleantmp(void)
1164 ZAPFILE(actname);
1165 ZAPFILE(tempname);
1168 void
1169 intr(void)
1171 cleantmp();
1172 exits("interrupted");
1175 void
1176 setup(int argc, char *argv[])
1178 long c, t;
1179 int i, j, fd, lev, ty, ytab, *p;
1180 int vflag, dflag, stem;
1181 char actnm[8], *stemc, *s, dirbuf[128];
1182 Biobuf *fout;
1184 ytab = 0;
1185 vflag = 0;
1186 dflag = 0;
1187 stem = 0;
1188 stemc = "y";
1189 foutput = 0;
1190 fdefine = 0;
1191 fdebug = 0;
1192 ARGBEGIN{
1193 case 'v':
1194 case 'V':
1195 vflag++;
1196 break;
1197 case 'D':
1198 yydebug = ARGF();
1199 break;
1200 case 'a':
1201 yyarg = 1;
1202 break;
1203 case 'd':
1204 dflag++;
1205 break;
1206 case 'l':
1207 yyline = 0;
1208 break;
1209 case 'o':
1210 ytab++;
1211 ytabc = ARGF();
1212 break;
1213 case 's':
1214 stem++;
1215 stemc = ARGF();
1216 break;
1217 case 'S':
1218 parser = PARSERS;
1219 break;
1220 default:
1221 error("illegal option: %c", ARGC());
1222 }ARGEND
1223 openup(stemc, dflag, vflag, ytab, ytabc);
1224 fout = dflag?fdefine:ftable;
1225 if(yyarg){
1226 Bprint(ftable, "#define\tYYARG\t1\n\n");
1228 if((fd = mkstemp(ttempname)) >= 0){
1229 tempname = ttempname;
1230 ftemp = Bfdopen(fd, OWRITE);
1232 if((fd = mkstemp(tactname)) >= 0){
1233 actname = tactname;
1234 faction = Bfdopen(fd, OWRITE);
1236 if(ftemp == 0 || faction == 0)
1237 error("cannot open temp file");
1238 if(argc < 1)
1239 error("no input file");
1240 infile = argv[0];
1241 if(infile[0] != '/' && getwd(dirbuf, sizeof dirbuf)!=nil){
1242 i = strlen(infile)+1+strlen(dirbuf)+1+10;
1243 s = malloc(i);
1244 if(s != nil){
1245 snprint(s, i, "%s/%s", dirbuf, infile);
1246 cleanname(s);
1247 infile = s;
1250 finput = Bopen(infile, OREAD);
1251 if(finput == 0)
1252 error("cannot open '%s'", argv[0]);
1253 cnamp = cnames;
1255 defin(0, "$end");
1256 extval = PRIVATE; /* tokens start in unicode 'private use' */
1257 defin(0, "error");
1258 defin(1, "$accept");
1259 defin(0, "$unk");
1260 mem = mem0;
1261 i = 0;
1263 for(t = gettok(); t != MARK && t != ENDFILE;)
1264 switch(t) {
1265 case ';':
1266 t = gettok();
1267 break;
1269 case START:
1270 if(gettok() != IDENTIFIER)
1271 error("bad %%start construction");
1272 start = chfind(1, tokname);
1273 t = gettok();
1274 continue;
1276 case TYPEDEF:
1277 if(gettok() != TYPENAME)
1278 error("bad syntax in %%type");
1279 ty = numbval;
1280 for(;;) {
1281 t = gettok();
1282 switch(t) {
1283 case IDENTIFIER:
1284 if((t=chfind(1, tokname)) < NTBASE) {
1285 j = TYPE(toklev[t]);
1286 if(j != 0 && j != ty)
1287 error("type redeclaration of token %s",
1288 tokset[t].name);
1289 else
1290 SETTYPE(toklev[t], ty);
1291 } else {
1292 j = nontrst[t-NTBASE].value;
1293 if(j != 0 && j != ty)
1294 error("type redeclaration of nonterminal %s",
1295 nontrst[t-NTBASE].name );
1296 else
1297 nontrst[t-NTBASE].value = ty;
1299 case ',':
1300 continue;
1301 case ';':
1302 t = gettok();
1303 default:
1304 break;
1306 break;
1308 continue;
1310 case UNION:
1311 /* copy the union declaration to the output */
1312 cpyunion();
1313 t = gettok();
1314 continue;
1316 case LEFT:
1317 case BINARY:
1318 case RIGHT:
1319 i++;
1321 case TERM:
1322 /* nonzero means new prec. and assoc. */
1323 lev = t-TERM;
1324 ty = 0;
1326 /* get identifiers so defined */
1327 t = gettok();
1329 /* there is a type defined */
1330 if(t == TYPENAME) {
1331 ty = numbval;
1332 t = gettok();
1334 for(;;) {
1335 switch(t) {
1336 case ',':
1337 t = gettok();
1338 continue;
1340 case ';':
1341 break;
1343 case IDENTIFIER:
1344 j = chfind(0, tokname);
1345 if(j >= NTBASE)
1346 error("%s defined earlier as nonterminal", tokname);
1347 if(lev) {
1348 if(ASSOC(toklev[j]))
1349 error("redeclaration of precedence of %s", tokname);
1350 SETASC(toklev[j], lev);
1351 SETPLEV(toklev[j], i);
1353 if(ty) {
1354 if(TYPE(toklev[j]))
1355 error("redeclaration of type of %s", tokname);
1356 SETTYPE(toklev[j],ty);
1358 t = gettok();
1359 if(t == NUMBER) {
1360 tokset[j].value = numbval;
1361 if(j < ndefout && j > 3)
1362 error("please define type number of %s earlier",
1363 tokset[j].name);
1364 t = gettok();
1366 continue;
1368 break;
1370 continue;
1372 case LCURLY:
1373 defout(0);
1374 cpycode();
1375 t = gettok();
1376 continue;
1378 default:
1379 error("syntax error");
1381 if(t == ENDFILE)
1382 error("unexpected EOF before %%");
1384 /* t is MARK */
1385 if(!yyarg)
1386 Bprint(ftable, "extern int yyerrflag;\n");
1387 Bprint(ftable, "#ifndef YYMAXDEPTH\n");
1388 Bprint(ftable, "#define YYMAXDEPTH 150\n");
1389 Bprint(ftable, "#endif\n" );
1390 if(!ntypes) {
1391 Bprint(ftable, "#ifndef YYSTYPE\n");
1392 Bprint(ftable, "#define YYSTYPE int\n");
1393 Bprint(ftable, "#endif\n");
1395 if(!yyarg){
1396 Bprint(ftable, "YYSTYPE yylval;\n");
1397 Bprint(ftable, "YYSTYPE yyval;\n");
1398 }else{
1399 if(dflag)
1400 Bprint(ftable, "#include \"%s.%s\"\n\n", stemc, FILED);
1401 Bprint(fout, "struct Yyarg {\n");
1402 Bprint(fout, "\tint\tyynerrs;\n");
1403 Bprint(fout, "\tint\tyyerrflag;\n");
1404 Bprint(fout, "\tvoid*\targ;\n");
1405 Bprint(fout, "\tYYSTYPE\tyyval;\n");
1406 Bprint(fout, "\tYYSTYPE\tyylval;\n");
1407 Bprint(fout, "};\n\n");
1409 prdptr[0] = mem;
1411 /* added production */
1412 *mem++ = NTBASE;
1414 /* if start is 0, we will overwrite with the lhs of the first rule */
1415 *mem++ = start;
1416 *mem++ = 1;
1417 *mem++ = 0;
1418 prdptr[1] = mem;
1419 while((t=gettok()) == LCURLY)
1420 cpycode();
1421 if(t != IDENTCOLON)
1422 error("bad syntax on first rule");
1424 if(!start)
1425 prdptr[0][1] = chfind(1, tokname);
1427 /* read rules */
1428 while(t != MARK && t != ENDFILE) {
1429 /* process a rule */
1430 rlines[nprod] = lineno;
1431 if(t == '|')
1432 *mem++ = *prdptr[nprod-1];
1433 else
1434 if(t == IDENTCOLON) {
1435 *mem = chfind(1, tokname);
1436 if(*mem < NTBASE)
1437 error("token illegal on LHS of grammar rule");
1438 mem++;
1439 } else
1440 error("illegal rule: missing semicolon or | ?");
1441 /* read rule body */
1442 t = gettok();
1444 more_rule:
1445 while(t == IDENTIFIER) {
1446 *mem = chfind(1, tokname);
1447 if(*mem < NTBASE)
1448 levprd[nprod] = toklev[*mem];
1449 mem++;
1450 t = gettok();
1452 if(t == PREC) {
1453 if(gettok() != IDENTIFIER)
1454 error("illegal %%prec syntax");
1455 j = chfind(2, tokname);
1456 if(j >= NTBASE)
1457 error("nonterminal %s illegal after %%prec",
1458 nontrst[j-NTBASE].name);
1459 levprd[nprod] = toklev[j];
1460 t = gettok();
1462 if(t == '=') {
1463 levprd[nprod] |= ACTFLAG;
1464 Bprint(faction, "\ncase %d:", nprod);
1465 cpyact(mem-prdptr[nprod]-1);
1466 Bprint(faction, " break;");
1467 if((t=gettok()) == IDENTIFIER) {
1469 /* action within rule... */
1470 sprint(actnm, "$$%d", nprod);
1472 /* make it a nonterminal */
1473 j = chfind(1, actnm);
1476 * the current rule will become rule number nprod+1
1477 * move the contents down, and make room for the null
1479 for(p = mem; p >= prdptr[nprod]; --p)
1480 p[2] = *p;
1481 mem += 2;
1483 /* enter null production for action */
1484 p = prdptr[nprod];
1485 *p++ = j;
1486 *p++ = -nprod;
1488 /* update the production information */
1489 levprd[nprod+1] = levprd[nprod] & ~ACTFLAG;
1490 levprd[nprod] = ACTFLAG;
1491 if(++nprod >= NPROD)
1492 error("more than %d rules", NPROD);
1493 prdptr[nprod] = p;
1495 /* make the action appear in the original rule */
1496 *mem++ = j;
1498 /* get some more of the rule */
1499 goto more_rule;
1503 while(t == ';')
1504 t = gettok();
1505 *mem++ = -nprod;
1507 /* check that default action is reasonable */
1508 if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) {
1510 /* no explicit action, LHS has value */
1511 int tempty;
1513 tempty = prdptr[nprod][1];
1514 if(tempty < 0)
1515 error("must return a value, since LHS has a type");
1516 else
1517 if(tempty >= NTBASE)
1518 tempty = nontrst[tempty-NTBASE].value;
1519 else
1520 tempty = TYPE(toklev[tempty]);
1521 if(tempty != nontrst[*prdptr[nprod]-NTBASE].value)
1522 error("default action causes potential type clash");
1524 nprod++;
1525 if(nprod >= NPROD)
1526 error("more than %d rules", NPROD);
1527 prdptr[nprod] = mem;
1528 levprd[nprod] = 0;
1531 /* end of all rules */
1532 defout(1);
1534 finact();
1535 if(t == MARK) {
1536 Bprint(ftable, "\n");
1537 if(yyline)
1538 Bprint(ftable, "#line\t%d\t\"%s\"\n", lineno, infile);
1539 while((c=Bgetrune(finput)) != Beof)
1540 Bputrune(ftable, c);
1542 Bterm(finput);
1546 * finish action routine
1548 void
1549 finact(void)
1552 Bterm(faction);
1553 Bprint(ftable, "#define YYEOFCODE %d\n", 1);
1554 Bprint(ftable, "#define YYERRCODE %d\n", 2);
1558 * define s to be a terminal if t=0
1559 * or a nonterminal if t=1
1561 int
1562 defin(int nt, char *s)
1564 int val;
1565 Rune rune;
1567 val = 0;
1568 if(nt) {
1569 nnonter++;
1570 if(nnonter >= NNONTERM)
1571 error("too many nonterminals, limit %d",NNONTERM);
1572 nontrst[nnonter].name = cstash(s);
1573 return NTBASE + nnonter;
1576 /* must be a token */
1577 ntokens++;
1578 if(ntokens >= NTERMS)
1579 error("too many terminals, limit %d", NTERMS);
1580 tokset[ntokens].name = cstash(s);
1582 /* establish value for token */
1583 /* single character literal */
1584 if(s[0] == ' ') {
1585 val = chartorune(&rune, &s[1]);
1586 if(s[val+1] == 0) {
1587 val = rune;
1588 goto out;
1592 /* escape sequence */
1593 if(s[0] == ' ' && s[1] == '\\') {
1594 if(s[3] == 0) {
1595 /* single character escape sequence */
1596 switch(s[2]) {
1597 case 'n': val = '\n'; break;
1598 case 'r': val = '\r'; break;
1599 case 'b': val = '\b'; break;
1600 case 't': val = '\t'; break;
1601 case 'f': val = '\f'; break;
1602 case '\'': val = '\''; break;
1603 case '"': val = '"'; break;
1604 case '\\': val = '\\'; break;
1605 default: error("invalid escape");
1607 goto out;
1610 /* \nnn sequence */
1611 if(s[2] >= '0' && s[2] <= '7') {
1612 if(s[3] < '0' ||
1613 s[3] > '7' ||
1614 s[4] < '0' ||
1615 s[4] > '7' ||
1616 s[5] != 0)
1617 error("illegal \\nnn construction");
1618 val = 64*s[2] + 8*s[3] + s[4] - 73*'0';
1619 if(val == 0)
1620 error("'\\000' is illegal");
1621 goto out;
1623 error("unknown escape");
1625 val = extval++;
1627 out:
1628 tokset[ntokens].value = val;
1629 toklev[ntokens] = 0;
1630 return ntokens;
1634 * write out the defines (at the end of the declaration section)
1636 void
1637 defout(int last)
1639 int i, c;
1640 char sar[NAMESIZE+10];
1642 for(i=ndefout; i<=ntokens; i++) {
1643 /* non-literals */
1644 c = tokset[i].name[0];
1645 if(c != ' ' && c != '$') {
1646 Bprint(ftable, "#define %s %d\n",
1647 tokset[i].name, tokset[i].value);
1648 if(fdefine)
1649 Bprint(fdefine, "#define\t%s\t%d\n",
1650 tokset[i].name, tokset[i].value);
1653 ndefout = ntokens+1;
1654 if(last && fdebug) {
1655 Bprint(fdebug, "static char* yytoknames[] =\n{\n");
1656 TLOOP(i) {
1657 if(tokset[i].name) {
1658 chcopy(sar, tokset[i].name);
1659 Bprint(fdebug, "\t\"%s\",\n", sar);
1660 continue;
1662 Bprint(fdebug, "\t0,\n");
1664 Bprint(fdebug, "};\n");
1668 char*
1669 cstash(char *s)
1671 char *temp;
1673 temp = cnamp;
1674 do {
1675 if(cnamp >= &cnames[cnamsz])
1676 error("too many characters in id's and literals");
1677 else
1678 *cnamp++ = *s;
1679 } while(*s++);
1680 return temp;
1683 long
1684 gettok(void)
1686 long c;
1687 Rune rune;
1688 int i, base, match, reserve;
1689 static int peekline;
1691 begin:
1692 reserve = 0;
1693 lineno += peekline;
1694 peekline = 0;
1695 c = Bgetrune(finput);
1696 while(c == ' ' || c == '\n' || c == '\t' || c == '\f') {
1697 if(c == '\n')
1698 lineno++;
1699 c = Bgetrune(finput);
1702 /* skip comment */
1703 if(c == '/') {
1704 lineno += skipcom();
1705 goto begin;
1707 switch(c) {
1708 case Beof:
1709 return ENDFILE;
1711 case '{':
1712 Bungetrune(finput);
1713 return '=';
1715 case '<':
1716 /* get, and look up, a type name (union member name) */
1717 i = 0;
1718 while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') {
1719 rune = c;
1720 c = runetochar(&tokname[i], &rune);
1721 if(i < NAMESIZE)
1722 i += c;
1724 if(c != '>')
1725 error("unterminated < ... > clause");
1726 tokname[i] = 0;
1727 for(i=1; i<=ntypes; i++)
1728 if(!strcmp(typeset[i], tokname)) {
1729 numbval = i;
1730 return TYPENAME;
1732 ntypes++;
1733 numbval = ntypes;
1734 typeset[numbval] = cstash(tokname);
1735 return TYPENAME;
1737 case '"':
1738 case '\'':
1739 match = c;
1740 tokname[0] = ' ';
1741 i = 1;
1742 for(;;) {
1743 c = Bgetrune(finput);
1744 if(c == '\n' || c <= 0)
1745 error("illegal or missing ' or \"" );
1746 if(c == '\\') {
1747 tokname[i] = '\\';
1748 if(i < NAMESIZE)
1749 i++;
1750 c = Bgetrune(finput);
1751 } else
1752 if(c == match)
1753 break;
1754 rune = c;
1755 c = runetochar(&tokname[i], &rune);
1756 if(i < NAMESIZE)
1757 i += c;
1759 break;
1761 case '%':
1762 case '\\':
1763 switch(c = Bgetrune(finput)) {
1764 case '0': return TERM;
1765 case '<': return LEFT;
1766 case '2': return BINARY;
1767 case '>': return RIGHT;
1768 case '%':
1769 case '\\': return MARK;
1770 case '=': return PREC;
1771 case '{': return LCURLY;
1772 default: reserve = 1;
1775 default:
1776 /* number */
1777 if(isdigit(c)) {
1778 numbval = c-'0';
1779 base = (c=='0')? 8: 10;
1780 for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput))
1781 numbval = numbval*base + (c-'0');
1782 Bungetrune(finput);
1783 return NUMBER;
1785 if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$') {
1786 i = 0;
1787 while(islower(c) || isupper(c) || isdigit(c) ||
1788 c == '-' || c=='_' || c=='.' || c=='$') {
1789 if(reserve && isupper(c))
1790 c += 'a'-'A';
1791 rune = c;
1792 c = runetochar(&tokname[i], &rune);
1793 if(i < NAMESIZE)
1794 i += c;
1795 c = Bgetrune(finput);
1797 } else
1798 return c;
1799 Bungetrune(finput);
1801 tokname[i] = 0;
1803 /* find a reserved word */
1804 if(reserve) {
1805 for(c=0; resrv[c].name; c++)
1806 if(strcmp(tokname, resrv[c].name) == 0)
1807 return resrv[c].value;
1808 error("invalid escape, or illegal reserved word: %s", tokname);
1811 /* look ahead to distinguish IDENTIFIER from IDENTCOLON */
1812 c = Bgetrune(finput);
1813 while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') {
1814 if(c == '\n')
1815 peekline++;
1816 /* look for comments */
1817 if(c == '/')
1818 peekline += skipcom();
1819 c = Bgetrune(finput);
1821 if(c == ':')
1822 return IDENTCOLON;
1823 Bungetrune(finput);
1824 return IDENTIFIER;
1828 * determine the type of a symbol
1830 int
1831 fdtype(int t)
1833 int v;
1835 if(t >= NTBASE)
1836 v = nontrst[t-NTBASE].value;
1837 else
1838 v = TYPE(toklev[t]);
1839 if(v <= 0)
1840 error("must specify type for %s", (t>=NTBASE)?
1841 nontrst[t-NTBASE].name: tokset[t].name);
1842 return v;
1845 int
1846 chfind(int t, char *s)
1848 int i;
1850 if(s[0] == ' ')
1851 t = 0;
1852 TLOOP(i)
1853 if(!strcmp(s, tokset[i].name))
1854 return i;
1855 NTLOOP(i)
1856 if(!strcmp(s, nontrst[i].name))
1857 return NTBASE+i;
1859 /* cannot find name */
1860 if(t > 1)
1861 error("%s should have been defined earlier", s);
1862 return defin(t, s);
1866 * copy the union declaration to the output, and the define file if present
1868 void
1869 cpyunion(void)
1871 long c;
1872 int level;
1874 Bprint(ftable, "\n");
1875 if(yyline)
1876 Bprint(ftable, "#line\t%d\t\"%s\"\n", lineno, infile);
1877 Bprint(ftable, "typedef union ");
1878 if(fdefine != 0)
1879 Bprint(fdefine, "\ntypedef union ");
1881 level = 0;
1882 for(;;) {
1883 if((c=Bgetrune(finput)) == Beof)
1884 error("EOF encountered while processing %%union");
1885 Bputrune(ftable, c);
1886 if(fdefine != 0)
1887 Bputrune(fdefine, c);
1888 switch(c) {
1889 case '\n':
1890 lineno++;
1891 break;
1892 case '{':
1893 level++;
1894 break;
1895 case '}':
1896 level--;
1898 /* we are finished copying */
1899 if(level == 0) {
1900 Bprint(ftable, " YYSTYPE;\n");
1901 if(fdefine != 0){
1902 Bprint(fdefine, "\tYYSTYPE;\n");
1903 if(!yyarg)
1904 Bprint(fdefine, "extern\tYYSTYPE\tyylval;\n");
1906 return;
1913 * copies code between \{ and \}
1915 void
1916 cpycode(void)
1918 long c;
1920 c = Bgetrune(finput);
1921 if(c == '\n') {
1922 c = Bgetrune(finput);
1923 lineno++;
1925 Bprint(ftable, "\n");
1926 if(yyline)
1927 Bprint(ftable, "#line\t%d\t\"%s\"\n", lineno, infile);
1928 while(c != Beof) {
1929 if(c == '\\') {
1930 if((c=Bgetrune(finput)) == '}')
1931 return;
1932 Bputc(ftable, '\\');
1934 if(c == '%') {
1935 if((c=Bgetrune(finput)) == '}')
1936 return;
1937 Bputc(ftable, '%');
1939 Bputrune(ftable, c);
1940 if(c == '\n')
1941 lineno++;
1942 c = Bgetrune(finput);
1944 error("eof before %%}");
1948 * skip over comments
1949 * skipcom is called after reading a '/'
1951 int
1952 skipcom(void)
1954 long c;
1955 int i;
1957 /* i is the number of lines skipped */
1958 i = 0;
1959 if(Bgetrune(finput) != '*')
1960 error("illegal comment");
1961 c = Bgetrune(finput);
1962 while(c != Beof) {
1963 while(c == '*')
1964 if((c=Bgetrune(finput)) == '/')
1965 return i;
1966 if(c == '\n')
1967 i++;
1968 c = Bgetrune(finput);
1970 error("EOF inside comment");
1971 return 0;
1975 * copy C action to the next ; or closing }
1977 void
1978 cpyact(int offset)
1980 long c;
1981 int brac, match, j, s, fnd, tok;
1983 Bprint(faction, "\n");
1984 if(yyline)
1985 Bprint(faction, "#line\t%d\t\"%s\"\n", lineno, infile);
1986 brac = 0;
1988 loop:
1989 c = Bgetrune(finput);
1990 swt:
1991 switch(c) {
1992 case ';':
1993 if(brac == 0) {
1994 Bputrune(faction, c);
1995 return;
1997 goto lcopy;
1999 case '{':
2000 brac++;
2001 goto lcopy;
2003 case '$':
2004 s = 1;
2005 tok = -1;
2006 c = Bgetrune(finput);
2008 /* type description */
2009 if(c == '<') {
2010 Bungetrune(finput);
2011 if(gettok() != TYPENAME)
2012 error("bad syntax on $<ident> clause");
2013 tok = numbval;
2014 c = Bgetrune(finput);
2016 if(c == '$') {
2017 Bprint(faction, "yyval");
2019 /* put out the proper tag... */
2020 if(ntypes) {
2021 if(tok < 0)
2022 tok = fdtype(*prdptr[nprod]);
2023 Bprint(faction, ".%s", typeset[tok]);
2025 goto loop;
2027 if(c == '-') {
2028 s = -s;
2029 c = Bgetrune(finput);
2031 if(isdigit(c)) {
2032 j = 0;
2033 while(isdigit(c)) {
2034 j = j*10 + (c-'0');
2035 c = Bgetrune(finput);
2037 Bungetrune(finput);
2038 j = j*s - offset;
2039 if(j > 0)
2040 error("Illegal use of $%d", j+offset);
2042 dollar:
2043 Bprint(faction, "yypt[-%d].yyv", -j);
2045 /* put out the proper tag */
2046 if(ntypes) {
2047 if(j+offset <= 0 && tok < 0)
2048 error("must specify type of $%d", j+offset);
2049 if(tok < 0)
2050 tok = fdtype(prdptr[nprod][j+offset]);
2051 Bprint(faction, ".%s", typeset[tok]);
2053 goto loop;
2055 if(isupper(c) || islower(c) || c == '_' || c == '.') {
2056 int tok; /* tok used oustide for type info */
2058 /* look for $name */
2059 Bungetrune(finput);
2060 if(gettok() != IDENTIFIER)
2061 error("$ must be followed by an identifier");
2062 tok = chfind(2, tokname);
2063 if((c = Bgetrune(finput)) != '#') {
2064 Bungetrune(finput);
2065 fnd = -1;
2066 } else
2067 if(gettok() != NUMBER) {
2068 error("# must be followed by number");
2069 fnd = -1;
2070 } else
2071 fnd = numbval;
2072 for(j=1; j<=offset; ++j)
2073 if(tok == prdptr[nprod][j]) {
2074 if(--fnd <= 0) {
2075 j -= offset;
2076 goto dollar;
2079 error("$name or $name#number not found");
2081 Bputc(faction, '$');
2082 if(s < 0 )
2083 Bputc(faction, '-');
2084 goto swt;
2086 case '}':
2087 brac--;
2088 if(brac)
2089 goto lcopy;
2090 Bputrune(faction, c);
2091 return;
2093 case '/':
2094 /* look for comments */
2095 Bputrune(faction, c);
2096 c = Bgetrune(finput);
2097 if(c != '*')
2098 goto swt;
2100 /* it really is a comment */
2101 Bputrune(faction, c);
2102 c = Bgetrune(finput);
2103 while(c >= 0) {
2104 while(c == '*') {
2105 Bputrune(faction, c);
2106 if((c=Bgetrune(finput)) == '/')
2107 goto lcopy;
2109 Bputrune(faction, c);
2110 if(c == '\n')
2111 lineno++;
2112 c = Bgetrune(finput);
2114 error("EOF inside comment");
2116 case '\'':
2117 /* character constant */
2118 match = '\'';
2119 goto string;
2121 case '"':
2122 /* character string */
2123 match = '"';
2125 string:
2126 Bputrune(faction, c);
2127 while(c = Bgetrune(finput)) {
2128 if(c == '\\') {
2129 Bputrune(faction, c);
2130 c = Bgetrune(finput);
2131 if(c == '\n')
2132 lineno++;
2133 } else
2134 if(c == match)
2135 goto lcopy;
2136 if(c == '\n')
2137 error("newline in string or char. const.");
2138 Bputrune(faction, c);
2140 error("EOF in string or character constant");
2142 case Beof:
2143 error("action does not terminate");
2145 case '\n':
2146 lineno++;
2147 goto lcopy;
2150 lcopy:
2151 Bputrune(faction, c);
2152 goto loop;
2155 void
2156 openup(char *stem, int dflag, int vflag, int ytab, char *ytabc)
2158 char buf[256];
2160 if(vflag) {
2161 sprint(buf, "%s.%s", stem, FILEU);
2162 foutput = Bopen(buf, OWRITE);
2163 if(foutput == 0)
2164 error("cannot open %s", buf);
2166 if(yydebug) {
2167 sprint(buf, "%s.%s", stem, FILEDEBUG);
2168 if((fdebug = Bopen(buf, OWRITE)) == 0)
2169 error("can't open %s", buf);
2171 if(dflag) {
2172 sprint(buf, "%s.%s", stem, FILED);
2173 fdefine = Bopen(buf, OWRITE);
2174 if(fdefine == 0)
2175 error("can't create %s", buf);
2177 if(ytab == 0)
2178 sprint(buf, "%s.%s", stem, OFILE);
2179 else
2180 strcpy(buf, ytabc);
2181 ftable = Bopen(buf, OWRITE);
2182 if(ftable == 0)
2183 error("cannot open table file %s", buf);
2187 * print the output for the states
2189 void
2190 output(void)
2192 int i, k, c;
2193 Wset *u, *v;
2195 Bprint(ftable, "static\tconst\tshort yyexca[] =\n{");
2196 if(fdebug)
2197 Bprint(fdebug, "static\tconst\tchar* yystates[] =\n{\n");
2199 /* output the stuff for state i */
2200 SLOOP(i) {
2201 nolook = tystate[i]!=MUSTLOOKAHEAD;
2202 closure(i);
2204 /* output actions */
2205 nolook = 1;
2206 aryfil(temp1, ntokens+nnonter+1, 0);
2207 WSLOOP(wsets, u) {
2208 c = *(u->pitem);
2209 if(c > 1 && c < NTBASE && temp1[c] == 0) {
2210 WSLOOP(u, v)
2211 if(c == *(v->pitem))
2212 putitem(v->pitem+1, (Lkset*)0);
2213 temp1[c] = state(c);
2214 } else
2215 if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0)
2216 temp1[c+ntokens] = amem[indgo[i]+c];
2218 if(i == 1)
2219 temp1[1] = ACCEPTCODE;
2221 /* now, we have the shifts; look at the reductions */
2222 lastred = 0;
2223 WSLOOP(wsets, u) {
2224 c = *u->pitem;
2226 /* reduction */
2227 if(c <= 0) {
2228 lastred = -c;
2229 TLOOP(k)
2230 if(BIT(u->ws.lset, k)) {
2231 if(temp1[k] == 0)
2232 temp1[k] = c;
2233 else
2234 if(temp1[k] < 0) { /* reduce/reduce conflict */
2235 if(foutput)
2236 Bprint(foutput,
2237 "\n%d: reduce/reduce conflict"
2238 " (red'ns %d and %d ) on %s",
2239 i, -temp1[k], lastred,
2240 symnam(k));
2241 if(-temp1[k] > lastred)
2242 temp1[k] = -lastred;
2243 zzrrconf++;
2244 } else
2245 /* potential shift/reduce conflict */
2246 precftn( lastred, k, i );
2250 wract(i);
2253 if(fdebug)
2254 Bprint(fdebug, "};\n");
2255 Bprint(ftable, "};\n");
2256 Bprint(ftable, "#define YYNPROD %d\n", nprod);
2257 Bprint(ftable, "#define YYPRIVATE %d\n", PRIVATE);
2258 if(yydebug)
2259 Bprint(ftable, "#define yydebug %s\n", yydebug);
2263 * pack state i from temp1 into amem
2265 int
2266 apack(int *p, int n)
2268 int *pp, *qq, *rr, off, *q, *r;
2270 /* we don't need to worry about checking because
2271 * we will only look at entries known to be there...
2272 * eliminate leading and trailing 0's
2275 q = p+n;
2276 for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off)
2278 /* no actions */
2279 if(pp > q)
2280 return 0;
2281 p = pp;
2283 /* now, find a place for the elements from p to q, inclusive */
2284 r = &amem[ACTSIZE-1];
2285 for(rr = amem; rr <= r; rr++, off++) {
2286 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2287 if(*pp != 0)
2288 if(*pp != *qq && *qq != 0)
2289 goto nextk;
2291 /* we have found an acceptable k */
2292 if(pkdebug && foutput != 0)
2293 Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem));
2294 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2295 if(*pp) {
2296 if(qq > r)
2297 error("action table overflow");
2298 if(qq > memp)
2299 memp = qq;
2300 *qq = *pp;
2302 if(pkdebug && foutput != 0)
2303 for(pp = amem; pp <= memp; pp += 10) {
2304 Bprint(foutput, "\t");
2305 for(qq = pp; qq <= pp+9; qq++)
2306 Bprint(foutput, "%d ", *qq);
2307 Bprint(foutput, "\n");
2309 return(off);
2310 nextk:;
2312 error("no space in action table");
2313 return 0;
2317 * output the gotos for the nontermninals
2319 void
2320 go2out(void)
2322 int i, j, k, best, count, cbest, times;
2324 /* mark begining of gotos */
2325 Bprint(ftemp, "$\n");
2326 for(i = 1; i <= nnonter; i++) {
2327 go2gen(i);
2329 /* find the best one to make default */
2330 best = -1;
2331 times = 0;
2333 /* is j the most frequent */
2334 for(j = 0; j <= nstate; j++) {
2335 if(tystate[j] == 0)
2336 continue;
2337 if(tystate[j] == best)
2338 continue;
2340 /* is tystate[j] the most frequent */
2341 count = 0;
2342 cbest = tystate[j];
2343 for(k = j; k <= nstate; k++)
2344 if(tystate[k] == cbest)
2345 count++;
2346 if(count > times) {
2347 best = cbest;
2348 times = count;
2352 /* best is now the default entry */
2353 zzgobest += times-1;
2354 for(j = 0; j <= nstate; j++)
2355 if(tystate[j] != 0 && tystate[j] != best) {
2356 Bprint(ftemp, "%d,%d,", j, tystate[j]);
2357 zzgoent++;
2360 /* now, the default */
2361 if(best == -1)
2362 best = 0;
2363 zzgoent++;
2364 Bprint(ftemp, "%d\n", best);
2369 * output the gotos for nonterminal c
2371 void
2372 go2gen(int c)
2374 int i, work, cc;
2375 Item *p, *q;
2378 /* first, find nonterminals with gotos on c */
2379 aryfil(temp1, nnonter+1, 0);
2380 temp1[c] = 1;
2381 work = 1;
2382 while(work) {
2383 work = 0;
2384 PLOOP(0, i)
2386 /* cc is a nonterminal */
2387 if((cc=prdptr[i][1]-NTBASE) >= 0)
2388 /* cc has a goto on c */
2389 if(temp1[cc] != 0) {
2391 /* thus, the left side of production i does too */
2392 cc = *prdptr[i]-NTBASE;
2393 if(temp1[cc] == 0) {
2394 work = 1;
2395 temp1[cc] = 1;
2400 /* now, we have temp1[c] = 1 if a goto on c in closure of cc */
2401 if(g2debug && foutput != 0) {
2402 Bprint(foutput, "%s: gotos on ", nontrst[c].name);
2403 NTLOOP(i)
2404 if(temp1[i])
2405 Bprint(foutput, "%s ", nontrst[i].name);
2406 Bprint(foutput, "\n");
2409 /* now, go through and put gotos into tystate */
2410 aryfil(tystate, nstate, 0);
2411 SLOOP(i)
2412 ITMLOOP(i, p, q)
2413 if((cc = *p->pitem) >= NTBASE)
2414 /* goto on c is possible */
2415 if(temp1[cc-NTBASE]) {
2416 tystate[i] = amem[indgo[i]+c];
2417 break;
2422 * decide a shift/reduce conflict by precedence.
2423 * r is a rule number, t a token number
2424 * the conflict is in state s
2425 * temp1[t] is changed to reflect the action
2427 void
2428 precftn(int r, int t, int s)
2430 int lp, lt, action;
2432 lp = levprd[r];
2433 lt = toklev[t];
2434 if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) {
2436 /* conflict */
2437 if(foutput != 0)
2438 Bprint(foutput,
2439 "\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s",
2440 s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t));
2441 zzsrconf++;
2442 return;
2444 if(PLEVEL(lt) == PLEVEL(lp))
2445 action = ASSOC(lt);
2446 else
2447 if(PLEVEL(lt) > PLEVEL(lp))
2448 action = RASC; /* shift */
2449 else
2450 action = LASC; /* reduce */
2451 switch(action) {
2452 case BASC: /* error action */
2453 temp1[t] = ERRCODE;
2454 break;
2455 case LASC: /* reduce */
2456 temp1[t] = -r;
2457 break;
2462 * output state i
2463 * temp1 has the actions, lastred the default
2465 void
2466 wract(int i)
2468 int p, p0, p1, ntimes, tred, count, j, flag;
2470 /* find the best choice for lastred */
2471 lastred = 0;
2472 ntimes = 0;
2473 TLOOP(j) {
2474 if(temp1[j] >= 0)
2475 continue;
2476 if(temp1[j]+lastred == 0)
2477 continue;
2478 /* count the number of appearances of temp1[j] */
2479 count = 0;
2480 tred = -temp1[j];
2481 levprd[tred] |= REDFLAG;
2482 TLOOP(p)
2483 if(temp1[p]+tred == 0)
2484 count++;
2485 if(count > ntimes) {
2486 lastred = tred;
2487 ntimes = count;
2492 * for error recovery, arrange that, if there is a shift on the
2493 * error recovery token, `error', that the default be the error action
2495 if(temp1[2] > 0)
2496 lastred = 0;
2498 /* clear out entries in temp1 which equal lastred */
2499 TLOOP(p)
2500 if(temp1[p]+lastred == 0)
2501 temp1[p] = 0;
2503 wrstate(i);
2504 defact[i] = lastred;
2505 flag = 0;
2506 TLOOP(p0)
2507 if((p1=temp1[p0]) != 0) {
2508 if(p1 < 0) {
2509 p1 = -p1;
2510 goto exc;
2512 if(p1 == ACCEPTCODE) {
2513 p1 = -1;
2514 goto exc;
2516 if(p1 == ERRCODE) {
2517 p1 = 0;
2518 exc:
2519 if(flag++ == 0)
2520 Bprint(ftable, "-1, %d,\n", i);
2521 Bprint(ftable, "\t%d, %d,\n", p0, p1);
2522 zzexcp++;
2523 continue;
2525 Bprint(ftemp, "%d,%d,", p0, p1);
2526 zzacent++;
2528 if(flag) {
2529 defact[i] = -2;
2530 Bprint(ftable, "\t-2, %d,\n", lastred);
2532 Bprint(ftemp, "\n");
2536 * writes state i
2538 void
2539 wrstate(int i)
2541 int j0, j1;
2542 Item *pp, *qq;
2543 Wset *u;
2545 if(fdebug) {
2546 if(lastred) {
2547 Bprint(fdebug, " 0, /*%d*/\n", i);
2548 } else {
2549 Bprint(fdebug, " \"");
2550 ITMLOOP(i, pp, qq)
2551 Bprint(fdebug, "%s\\n", writem(pp->pitem));
2552 if(tystate[i] == MUSTLOOKAHEAD)
2553 WSLOOP(wsets + (pstate[i+1] - pstate[i]), u)
2554 if(*u->pitem < 0)
2555 Bprint(fdebug, "%s\\n", writem(u->pitem));
2556 Bprint(fdebug, "\", /*%d*/\n", i);
2559 if(foutput == 0)
2560 return;
2561 Bprint(foutput, "\nstate %d\n", i);
2562 ITMLOOP(i, pp, qq)
2563 Bprint(foutput, "\t%s\n", writem(pp->pitem));
2564 if(tystate[i] == MUSTLOOKAHEAD)
2565 /* print out empty productions in closure */
2566 WSLOOP(wsets+(pstate[i+1]-pstate[i]), u)
2567 if(*u->pitem < 0)
2568 Bprint(foutput, "\t%s\n", writem(u->pitem));
2570 /* check for state equal to another */
2571 TLOOP(j0)
2572 if((j1=temp1[j0]) != 0) {
2573 Bprint(foutput, "\n\t%s ", symnam(j0));
2574 /* shift, error, or accept */
2575 if(j1 > 0) {
2576 if(j1 == ACCEPTCODE)
2577 Bprint(foutput, "accept");
2578 else
2579 if(j1 == ERRCODE)
2580 Bprint(foutput, "error");
2581 else
2582 Bprint(foutput, "shift %d", j1);
2583 } else
2584 Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]);
2587 /* output the final production */
2588 if(lastred)
2589 Bprint(foutput, "\n\t. reduce %d (src line %d)\n\n",
2590 lastred, rlines[lastred]);
2591 else
2592 Bprint(foutput, "\n\t. error\n\n");
2594 /* now, output nonterminal actions */
2595 j1 = ntokens;
2596 for(j0 = 1; j0 <= nnonter; j0++) {
2597 j1++;
2598 if(temp1[j1])
2599 Bprint(foutput, "\t%s goto %d\n", symnam(j0+NTBASE), temp1[j1]);
2603 void
2604 warray(char *s, int *v, int n)
2606 int i;
2608 Bprint(ftable, "static\tconst\tshort %s[] =\n{", s);
2609 for(i=0;;) {
2610 if(i%10 == 0)
2611 Bprint(ftable, "\n");
2612 Bprint(ftable, "%4d", v[i]);
2613 i++;
2614 if(i >= n) {
2615 Bprint(ftable, "\n};\n");
2616 break;
2618 Bprint(ftable, ",");
2623 * in order to free up the mem and amem arrays for the optimizer,
2624 * and still be able to output yyr1, etc., after the sizes of
2625 * the action array is known, we hide the nonterminals
2626 * derived by productions in levprd.
2629 void
2630 hideprod(void)
2632 int i, j;
2634 j = 0;
2635 levprd[0] = 0;
2636 PLOOP(1, i) {
2637 if(!(levprd[i] & REDFLAG)) {
2638 j++;
2639 if(foutput != 0)
2640 Bprint(foutput, "Rule not reduced: %s\n", writem(prdptr[i]));
2642 levprd[i] = *prdptr[i] - NTBASE;
2644 if(j)
2645 print("%d rules never reduced\n", j);
2648 void
2649 callopt(void)
2651 int i, *p, j, k, *q;
2653 /* read the arrays from tempfile and set parameters */
2654 finput = Bopen(tempname, OREAD);
2655 if(finput == 0)
2656 error("optimizer cannot open tempfile");
2658 pgo[0] = 0;
2659 temp1[0] = 0;
2660 nstate = 0;
2661 nnonter = 0;
2662 for(;;) {
2663 switch(gtnm()) {
2664 case '\n':
2665 nstate++;
2666 pmem--;
2667 temp1[nstate] = pmem - mem0;
2668 case ',':
2669 continue;
2670 case '$':
2671 break;
2672 default:
2673 error("bad tempfile %s", tempname);
2675 break;
2678 pmem--;
2679 temp1[nstate] = yypgo[0] = pmem - mem0;
2680 for(;;) {
2681 switch(gtnm()) {
2682 case '\n':
2683 nnonter++;
2684 yypgo[nnonter] = pmem-mem0;
2685 case ',':
2686 continue;
2687 case -1:
2688 break;
2689 default:
2690 error("bad tempfile");
2692 break;
2694 pmem--;
2695 yypgo[nnonter--] = pmem - mem0;
2696 for(i = 0; i < nstate; i++) {
2697 k = 32000;
2698 j = 0;
2699 q = mem0 + temp1[i+1];
2700 for(p = mem0 + temp1[i]; p < q ; p += 2) {
2701 if(*p > j)
2702 j = *p;
2703 if(*p < k)
2704 k = *p;
2706 /* nontrivial situation */
2707 if(k <= j) {
2708 /* j is now the range */
2709 /* j -= k; *//* call scj */
2710 if(k > maxoff)
2711 maxoff = k;
2713 tystate[i] = (temp1[i+1]-temp1[i]) + 2*j;
2714 if(j > maxspr)
2715 maxspr = j;
2718 /* initialize ggreed table */
2719 for(i = 1; i <= nnonter; i++) {
2720 ggreed[i] = 1;
2721 j = 0;
2723 /* minimum entry index is always 0 */
2724 q = mem0 + yypgo[i+1] - 1;
2725 for(p = mem0+yypgo[i]; p < q ; p += 2) {
2726 ggreed[i] += 2;
2727 if(*p > j)
2728 j = *p;
2730 ggreed[i] = ggreed[i] + 2*j;
2731 if(j > maxoff)
2732 maxoff = j;
2735 /* now, prepare to put the shift actions into the amem array */
2736 for(i = 0; i < ACTSIZE; i++)
2737 amem[i] = 0;
2738 maxa = amem;
2739 for(i = 0; i < nstate; i++) {
2740 if(tystate[i] == 0 && adb > 1)
2741 Bprint(ftable, "State %d: null\n", i);
2742 indgo[i] = YYFLAG1;
2744 while((i = nxti()) != NOMORE)
2745 if(i >= 0)
2746 stin(i);
2747 else
2748 gin(-i);
2750 /* print amem array */
2751 if(adb > 2 )
2752 for(p = amem; p <= maxa; p += 10) {
2753 Bprint(ftable, "%4d ", (int)(p-amem));
2754 for(i = 0; i < 10; ++i)
2755 Bprint(ftable, "%4d ", p[i]);
2756 Bprint(ftable, "\n");
2759 /* write out the output appropriate to the language */
2760 aoutput();
2761 osummary();
2762 ZAPFILE(tempname);
2765 void
2766 gin(int i)
2768 int *p, *r, *s, *q1, *q2;
2770 /* enter gotos on nonterminal i into array amem */
2771 ggreed[i] = 0;
2773 q2 = mem0+ yypgo[i+1] - 1;
2774 q1 = mem0 + yypgo[i];
2776 /* now, find amem place for it */
2777 for(p = amem; p < &amem[ACTSIZE]; p++) {
2778 if(*p)
2779 continue;
2780 for(r = q1; r < q2; r += 2) {
2781 s = p + *r + 1;
2782 if(*s)
2783 goto nextgp;
2784 if(s > maxa)
2785 if((maxa = s) > &amem[ACTSIZE])
2786 error("a array overflow");
2788 /* we have found amem spot */
2789 *p = *q2;
2790 if(p > maxa)
2791 if((maxa = p) > &amem[ACTSIZE])
2792 error("a array overflow");
2793 for(r = q1; r < q2; r += 2) {
2794 s = p + *r + 1;
2795 *s = r[1];
2797 pgo[i] = p-amem;
2798 if(adb > 1)
2799 Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]);
2800 return;
2802 nextgp:;
2804 error("cannot place goto %d\n", i);
2807 void
2808 stin(int i)
2810 int *r, *s, n, flag, j, *q1, *q2;
2812 tystate[i] = 0;
2814 /* enter state i into the amem array */
2815 q2 = mem0+temp1[i+1];
2816 q1 = mem0+temp1[i];
2817 /* find an acceptable place */
2818 for(n = -maxoff; n < ACTSIZE; n++) {
2819 flag = 0;
2820 for(r = q1; r < q2; r += 2) {
2821 if((s = *r + n + amem) < amem)
2822 goto nextn;
2823 if(*s == 0)
2824 flag++;
2825 else
2826 if(*s != r[1])
2827 goto nextn;
2830 /* check that the position equals another only if the states are identical */
2831 for(j=0; j<nstate; j++) {
2832 if(indgo[j] == n) {
2834 /* we have some disagreement */
2835 if(flag)
2836 goto nextn;
2837 if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) {
2839 /* states are equal */
2840 indgo[i] = n;
2841 if(adb > 1)
2842 Bprint(ftable,
2843 "State %d: entry at %d equals state %d\n",
2844 i, n, j);
2845 return;
2848 /* we have some disagreement */
2849 goto nextn;
2853 for(r = q1; r < q2; r += 2) {
2854 if((s = *r+n+amem) >= &amem[ACTSIZE])
2855 error("out of space in optimizer a array");
2856 if(s > maxa)
2857 maxa = s;
2858 if(*s != 0 && *s != r[1])
2859 error("clobber of a array, pos'n %d, by %d", s-amem, r[1]);
2860 *s = r[1];
2862 indgo[i] = n;
2863 if(adb > 1)
2864 Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]);
2865 return;
2866 nextn:;
2868 error("Error; failure to place state %d\n", i);
2872 * finds the next i
2874 int
2875 nxti(void)
2877 int i, max, maxi;
2879 max = 0;
2880 maxi = 0;
2881 for(i = 1; i <= nnonter; i++)
2882 if(ggreed[i] >= max) {
2883 max = ggreed[i];
2884 maxi = -i;
2886 for(i = 0; i < nstate; ++i)
2887 if(tystate[i] >= max) {
2888 max = tystate[i];
2889 maxi = i;
2891 if(nxdb)
2892 Bprint(ftable, "nxti = %d, max = %d\n", maxi, max);
2893 if(max == 0)
2894 return NOMORE;
2895 return maxi;
2899 * write summary
2901 void
2902 osummary(void)
2905 int i, *p;
2907 if(foutput == 0)
2908 return;
2909 i = 0;
2910 for(p = maxa; p >= amem; p--)
2911 if(*p == 0)
2912 i++;
2914 Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n",
2915 (int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE);
2916 Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i);
2917 Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff);
2921 * this version is for C
2922 * write out the optimized parser
2924 void
2925 aoutput(void)
2927 Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1));
2928 arout("yyact", amem, (maxa-amem)+1);
2929 arout("yypact", indgo, nstate);
2930 arout("yypgo", pgo, nnonter+1);
2933 void
2934 arout(char *s, int *v, int n)
2936 int i;
2938 Bprint(ftable, "static\tconst\tshort %s[] =\n{", s);
2939 for(i = 0; i < n;) {
2940 if(i%10 == 0)
2941 Bprint(ftable, "\n");
2942 Bprint(ftable, "%4d", v[i]);
2943 i++;
2944 if(i == n)
2945 Bprint(ftable, "\n};\n");
2946 else
2947 Bprint(ftable, ",");
2952 * read and convert an integer from the standard input
2953 * return the terminating character
2954 * blanks, tabs, and newlines are ignored
2956 int
2957 gtnm(void)
2959 int sign, val, c;
2961 sign = 0;
2962 val = 0;
2963 while((c=Bgetrune(finput)) != Beof) {
2964 if(isdigit(c)) {
2965 val = val*10 + c-'0';
2966 continue;
2968 if(c == '-') {
2969 sign = 1;
2970 continue;
2972 break;
2974 if(sign)
2975 val = -val;
2976 *pmem++ = val;
2977 if(pmem >= &mem0[MEMSIZE])
2978 error("out of space");
2979 return c;