Blob


1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
6 #define Bungetrune Bungetc /* ok for now. */
8 /*
9 * all these are 32 bit
10 */
11 #define TBITSET ((32+NTERMS)/32) /* BOTCH?? +31 */
12 #define BIT(a,i) ((a)[(i)>>5] & (1<<((i)&037)))
13 #define SETBIT(a,i) ((a)[(i)>>5] |= (1<<((i)&037)))
14 #define NWORDS(n) (((n)+32)/32)
16 char *PARSER = "#9/lib/yaccpar";
17 char *PARSERS = "#9/lib/yaccpars";
19 #define TEMPNAME "y.tmp.XXXXXX"
20 #define ACTNAME "y.acts.XXXXXX"
21 #define OFILE "tab.c"
22 #define FILEU "output"
23 #define FILED "tab.h"
24 #define FILEDEBUG "debug"
26 enum
27 {
28 /*
29 * the following are adjustable
30 * according to memory size
31 */
32 ACTSIZE = 40000,
33 MEMSIZE = 40000,
34 NSTATES = 2000,
35 NTERMS = 511,
36 NPROD = 1600,
37 NNONTERM = 600,
38 TEMPSIZE = 2000,
39 CNAMSZ = 10000,
40 LSETSIZE = 2400,
41 WSETSIZE = 350,
43 NAMESIZE = 50,
44 NTYPES = 63,
45 ISIZE = 400,
47 PRIVATE = 0xE000, /* unicode private use */
49 /* relationships which must hold:
50 TBITSET ints must hold NTERMS+1 bits...
51 WSETSIZE >= NNONTERM
52 LSETSIZE >= NNONTERM
53 TEMPSIZE >= NTERMS + NNONTERM + 1
54 TEMPSIZE >= NSTATES
55 */
57 NTBASE = 010000,
58 ERRCODE = 8190,
59 ACCEPTCODE = 8191,
61 NOASC = 0, /* no assoc. */
62 LASC = 1, /* left assoc. */
63 RASC = 2, /* right assoc. */
64 BASC = 3, /* binary assoc. */
66 /* flags for state generation */
68 DONE = 0,
69 MUSTDO = 1,
70 MUSTLOOKAHEAD = 2,
72 /* flags for a rule having an action, and being reduced */
74 ACTFLAG = 04,
75 REDFLAG = 010,
77 /* output parser flags */
78 YYFLAG1 = -1000,
80 /* parse tokens */
81 IDENTIFIER = PRIVATE,
82 MARK,
83 TERM,
84 LEFT,
85 RIGHT,
86 BINARY,
87 PREC,
88 LCURLY,
89 IDENTCOLON,
90 NUMBER,
91 START,
92 TYPEDEF,
93 TYPENAME,
94 UNION,
96 ENDFILE = 0,
98 EMPTY = 1,
99 WHOKNOWS = 0,
100 OK = 1,
101 NOMORE = -1000
102 };
104 /* macros for getting associativity and precedence levels */
106 #define ASSOC(i) ((i)&03)
107 #define PLEVEL(i) (((i)>>4)&077)
108 #define TYPE(i) (((i)>>10)&077)
110 /* macros for setting associativity and precedence levels */
112 #define SETASC(i,j) i |= j
113 #define SETPLEV(i,j) i |= (j<<4)
114 #define SETTYPE(i,j) i |= (j<<10)
116 /* looping macros */
118 #define TLOOP(i) for(i=1; i<=ntokens; i++)
119 #define NTLOOP(i) for(i=0; i<=nnonter; i++)
120 #define PLOOP(s,i) for(i=s; i<nprod; i++)
121 #define SLOOP(i) for(i=0; i<nstate; i++)
122 #define WSBUMP(x) x++
123 #define WSLOOP(s,j) for(j=s; j<cwp; j++)
124 #define ITMLOOP(i,p,q) for(q=pstate[i+1], p=pstate[i]; p<q; p++)
125 #define SETLOOP(i) for(i=0; i<tbitset; i++)
127 /* command to clobber tempfiles after use */
129 #define ZAPFILE(x) if(x) remove(x)
131 /* I/O descriptors */
133 Biobuf* faction; /* file for saving actions */
134 Biobuf* fdefine; /* file for #defines */
135 Biobuf* fdebug; /* y.debug for strings for debugging */
136 Biobuf* ftable; /* y.tab.c file */
137 Biobuf* ftemp; /* tempfile to pass 2 */
138 Biobuf* finput; /* input file */
139 Biobuf* foutput; /* y.output file */
141 /* communication variables between various I/O routines */
143 char* infile; /* input file name */
144 int numbval; /* value of an input number */
145 char tokname[NAMESIZE+4]; /* input token name, slop for runes and 0 */
147 /* structure declarations */
149 typedef
150 struct
152 int lset[TBITSET];
153 } Lkset;
155 typedef
156 struct
158 int* pitem;
159 Lkset* look;
160 } Item;
162 typedef
163 struct
165 char* name;
166 int value;
167 } Symb;
169 typedef
170 struct
172 int* pitem;
173 int flag;
174 Lkset ws;
175 } Wset;
177 /* storage of names */
179 char cnames[CNAMSZ]; /* place where token and nonterminal names are stored */
180 int cnamsz = CNAMSZ; /* size of cnames */
181 char* cnamp = cnames; /* place where next name is to be put in */
182 int ndefout = 4; /* number of defined symbols output */
183 char* tempname;
184 char* actname;
185 char ttempname[] = TEMPNAME;
186 char tactname[] = ACTNAME;
187 char* parser;
188 char* yydebug;
189 int yyarg;
190 int yyline = 1;
192 /* storage of types */
193 int ntypes; /* number of types defined */
194 char* typeset[NTYPES]; /* pointers to type tags */
196 /* token information */
198 int ntokens = 0 ; /* number of tokens */
199 Symb tokset[NTERMS];
200 int toklev[NTERMS]; /* vector with the precedence of the terminals */
202 /* nonterminal information */
204 int nnonter = -1; /* the number of nonterminals */
205 Symb nontrst[NNONTERM];
206 int start; /* start symbol */
208 /* assigned token type values */
209 int extval = 0;
211 char* ytabc = OFILE; /* name of y.tab.c */
213 /* grammar rule information */
215 int mem0[MEMSIZE] ; /* production storage */
216 int* mem = mem0;
217 int nprod = 1; /* number of productions */
218 int* prdptr[NPROD]; /* pointers to descriptions of productions */
219 int levprd[NPROD]; /* precedence levels for the productions */
220 int rlines[NPROD]; /* line number for this rule */
222 /* state information */
224 int nstate = 0; /* number of states */
225 Item* pstate[NSTATES+2]; /* pointers to the descriptions of the states */
226 int tystate[NSTATES]; /* contains type information about the states */
227 int defact[NSTATES]; /* the default actions of states */
228 int tstates[NTERMS]; /* states generated by terminal gotos */
229 int ntstates[NNONTERM]; /* states generated by nonterminal gotos */
230 int mstates[NSTATES]; /* chain of overflows of term/nonterm generation lists */
231 int lastred; /* the number of the last reduction of a state */
233 /* lookahead set information */
235 Lkset lkst[LSETSIZE];
236 int nolook; /* flag to turn off lookahead computations */
237 int tbitset; /* size of lookahead sets */
238 int nlset = 0; /* next lookahead set index */
239 int nolook = 0; /* flag to suppress lookahead computations */
240 Lkset clset; /* temporary storage for lookahead computations */
242 /* working set information */
244 Wset wsets[WSETSIZE];
245 Wset* cwp;
247 /* storage for action table */
249 int amem[ACTSIZE]; /* action table storage */
250 int* memp = amem; /* next free action table position */
251 int indgo[NSTATES]; /* index to the stored goto table */
253 /* temporary vector, indexable by states, terms, or ntokens */
255 int temp1[TEMPSIZE]; /* temporary storage, indexed by terms + ntokens or states */
256 int lineno = 1; /* current input line number */
257 int fatfl = 1; /* if on, error is fatal */
258 int nerrors = 0; /* number of errors */
260 /* statistics collection variables */
262 int zzgoent;
263 int zzgobest;
264 int zzacent;
265 int zzexcp;
266 int zzclose;
267 int zzrrconf;
268 int zzsrconf;
270 int* ggreed = lkst[0].lset;
271 int* pgo = wsets[0].ws.lset;
272 int* yypgo = &nontrst[0].value;
274 int maxspr = 0; /* maximum spread of any entry */
275 int maxoff = 0; /* maximum offset into a array */
276 int* pmem = mem0;
277 int* maxa;
278 int nxdb = 0;
279 int adb = 0;
282 /* storage for information about the nonterminals */
284 int** pres[NNONTERM+2]; /* vector of pointers to productions yielding each nonterminal */
285 Lkset* pfirst[NNONTERM+2]; /* vector of pointers to first sets for each nonterminal */
286 int pempty[NNONTERM+1]; /* vector of nonterminals nontrivially deriving e */
288 /* random stuff picked out from between functions */
290 int indebug = 0;
291 Wset* zzcwp = wsets;
292 int zzgoent = 0;
293 int zzgobest = 0;
294 int zzacent = 0;
295 int zzexcp = 0;
296 int zzclose = 0;
297 int zzsrconf = 0;
298 int* zzmemsz = mem0;
299 int zzrrconf = 0;
300 int pidebug = 0; /* debugging flag for putitem */
301 int gsdebug = 0;
302 int cldebug = 0; /* debugging flag for closure */
303 int pkdebug = 0;
304 int g2debug = 0;
306 struct
308 char* name;
309 long value;
310 } resrv[] =
312 "binary", BINARY,
313 "left", LEFT,
314 "nonassoc", BINARY,
315 "prec", PREC,
316 "right", RIGHT,
317 "start", START,
318 "term", TERM,
319 "token", TERM,
320 "type", TYPEDEF,
321 "union", UNION,
322 0,
323 };
325 /* define functions */
327 void main(int, char**);
328 void others(void);
329 char* chcopy(char*, char*);
330 char* writem(int*);
331 char* symnam(int);
332 void summary(void);
333 void error(char*, ...);
334 void aryfil(int*, int, int);
335 int setunion(int*, int*);
336 void prlook(Lkset*);
337 void cpres(void);
338 void cpfir(void);
339 int state(int);
340 void putitem(int*, Lkset*);
341 void cempty(void);
342 void stagen(void);
343 void closure(int);
344 Lkset* flset(Lkset*);
345 void cleantmp(void);
346 void intr(void);
347 void setup(int, char**);
348 void finact(void);
349 int defin(int, char*);
350 void defout(int);
351 char* cstash(char*);
352 long gettok(void);
353 int fdtype(int);
354 int chfind(int, char*);
355 void cpyunion(void);
356 void cpycode(void);
357 int skipcom(void);
358 void cpyact(int);
359 void openup(char*, int, int, int, char*);
360 void output(void);
361 int apack(int*, int);
362 void go2out(void);
363 void go2gen(int);
364 void precftn(int, int, int);
365 void wract(int);
366 void wrstate(int);
367 void warray(char*, int*, int);
368 void hideprod(void);
369 void callopt(void);
370 void gin(int);
371 void stin(int);
372 int nxti(void);
373 void osummary(void);
374 void aoutput(void);
375 void arout(char*, int*, int);
376 int gtnm(void);
378 void
379 main(int argc, char *argv[])
381 PARSER = unsharp(PARSER);
382 PARSERS = unsharp(PARSERS);
383 parser = PARSER;
385 setup(argc, argv); /* initialize and read productions */
386 tbitset = NWORDS(ntokens);
387 cpres(); /* make table of which productions yield a given nonterminal */
388 cempty(); /* make a table of which nonterminals can match the empty string */
389 cpfir(); /* make a table of firsts of nonterminals */
390 stagen(); /* generate the states */
391 output(); /* write the states and the tables */
392 go2out();
393 hideprod();
394 summary();
395 callopt();
396 others();
397 exits(0);
400 /*
401 * put out other arrays, copy the parsers
402 */
403 void
404 others(void)
406 int c, i, j;
408 finput = Bopen(parser, OREAD);
409 if(finput == 0)
410 error("cannot open parser %s: %r", parser);
411 warray("yyr1", levprd, nprod);
412 aryfil(temp1, nprod, 0);
413 PLOOP(1, i)
414 temp1[i] = prdptr[i+1]-prdptr[i]-2;
415 warray("yyr2", temp1, nprod);
417 aryfil(temp1, nstate, -1000);
418 TLOOP(i)
419 for(j=tstates[i]; j!=0; j=mstates[j])
420 temp1[j] = i;
421 NTLOOP(i)
422 for(j=ntstates[i]; j!=0; j=mstates[j])
423 temp1[j] = -i;
424 warray("yychk", temp1, nstate);
425 warray("yydef", defact, nstate);
427 /* put out token translation tables */
428 /* table 1 has 0-256 */
429 aryfil(temp1, 256, 0);
430 c = 0;
431 TLOOP(i) {
432 j = tokset[i].value;
433 if(j >= 0 && j < 256) {
434 if(temp1[j]) {
435 print("yacc bug -- cant have 2 different Ts with same value\n");
436 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
437 nerrors++;
439 temp1[j] = i;
440 if(j > c)
441 c = j;
444 warray("yytok1", temp1, c+1);
446 /* table 2 has PRIVATE-PRIVATE+256 */
447 aryfil(temp1, 256, 0);
448 c = 0;
449 TLOOP(i) {
450 j = tokset[i].value - PRIVATE;
451 if(j >= 0 && j < 256) {
452 if(temp1[j]) {
453 print("yacc bug -- cant have 2 different Ts with same value\n");
454 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
455 nerrors++;
457 temp1[j] = i;
458 if(j > c)
459 c = j;
462 warray("yytok2", temp1, c+1);
464 /* table 3 has everything else */
465 Bprint(ftable, "static\tconst\tlong yytok3[] =\n{\n");
466 c = 0;
467 TLOOP(i) {
468 j = tokset[i].value;
469 if(j >= 0 && j < 256)
470 continue;
471 if(j >= PRIVATE && j < 256+PRIVATE)
472 continue;
474 Bprint(ftable, "%4d,%4d,", j, i);
475 c++;
476 if(c%5 == 0)
477 Bprint(ftable, "\n");
479 Bprint(ftable, "%4d\n};\n", 0);
481 /* copy parser text */
482 while((c=Bgetrune(finput)) != Beof) {
483 if(c == '$') {
484 if((c = Bgetrune(finput)) != 'A')
485 Bputrune(ftable, '$');
486 else { /* copy actions */
487 faction = Bopen(actname, OREAD);
488 if(faction == 0)
489 error("cannot reopen action tempfile");
490 while((c=Bgetrune(faction)) != Beof)
491 Bputrune(ftable, c);
492 Bterm(faction);
493 ZAPFILE(actname);
494 c = Bgetrune(finput);
497 Bputrune(ftable, c);
499 Bterm(ftable);
502 /*
503 * copies string q into p, returning next free char ptr
504 */
505 char*
506 chcopy(char* p, char* q)
508 int c;
510 while(c = *q) {
511 if(c == '"')
512 *p++ = '\\';
513 *p++ = c;
514 q++;
516 *p = 0;
517 return p;
520 /*
521 * creates output string for item pointed to by pp
522 */
523 char*
524 writem(int *pp)
526 int i,*p;
527 static char sarr[ISIZE];
528 char* q;
530 for(p=pp; *p>0; p++)
532 p = prdptr[-*p];
533 q = chcopy(sarr, nontrst[*p-NTBASE].name);
534 q = chcopy(q, ": ");
535 for(;;) {
536 *q = ' ';
537 p++;
538 if(p == pp)
539 *q = '.';
540 q++;
541 *q = '\0';
542 i = *p;
543 if(i <= 0)
544 break;
545 q = chcopy(q, symnam(i));
546 if(q > &sarr[ISIZE-30])
547 error("item too big");
550 /* an item calling for a reduction */
551 i = *pp;
552 if(i < 0 ) {
553 q = chcopy(q, " (");
554 sprint(q, "%d)", -i);
556 return sarr;
559 /*
560 * return a pointer to the name of symbol i
561 */
562 char*
563 symnam(int i)
565 char* cp;
567 cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
568 if(*cp == ' ')
569 cp++;
570 return cp;
573 /*
574 * output the summary on y.output
575 */
576 void
577 summary(void)
580 if(foutput != 0) {
581 Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n",
582 ntokens, NTERMS, nnonter, NNONTERM);
583 Bprint(foutput, "%d/%d grammar rules, %d/%d states\n",
584 nprod, NPROD, nstate, NSTATES);
585 Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n",
586 zzsrconf, zzrrconf);
587 Bprint(foutput, "%d/%d working sets used\n",
588 (int)(zzcwp-wsets), WSETSIZE);
589 Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n",
590 (int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE);
591 Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE);
592 Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate);
593 Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp);
594 Bprint(foutput, "%d goto entries\n", zzgoent);
595 Bprint(foutput, "%d entries saved by goto default\n", zzgobest);
597 if(zzsrconf != 0 || zzrrconf != 0) {
598 print("\nconflicts: ");
599 if(zzsrconf)
600 print("%d shift/reduce", zzsrconf);
601 if(zzsrconf && zzrrconf)
602 print(", ");
603 if(zzrrconf)
604 print("%d reduce/reduce", zzrrconf);
605 print("\n");
607 if(ftemp != 0) {
608 Bterm(ftemp);
609 ftemp = 0;
611 if(fdefine != 0) {
612 Bterm(fdefine);
613 fdefine = 0;
617 /*
618 * write out error comment -- NEEDS WORK
619 */
620 void
621 error(char *s, ...)
623 va_list arg;
625 nerrors++;
626 fprint(2, "\n fatal error:");
627 va_start(arg, s);
628 vfprint(2, s, arg);
629 va_end(arg);
630 fprint(2, ", %s:%d\n", infile, lineno);
631 if(!fatfl)
632 return;
633 summary();
634 cleantmp();
635 exits("error");
638 /*
639 * set elements 0 through n-1 to c
640 */
641 void
642 aryfil(int *v, int n, int c)
644 int i;
646 for(i=0; i<n; i++)
647 v[i] = c;
650 /*
651 * set a to the union of a and b
652 * return 1 if b is not a subset of a, 0 otherwise
653 */
654 int
655 setunion(int *a, int *b)
657 int i, x, sub;
659 sub = 0;
660 SETLOOP(i) {
661 x = *a;
662 *a |= *b;
663 if(*a != x)
664 sub = 1;
665 a++;
666 b++;
668 return sub;
671 void
672 prlook(Lkset* p)
674 int j, *pp;
676 pp = p->lset;
677 if(pp == 0)
678 Bprint(foutput, "\tNULL");
679 else {
680 Bprint(foutput, " { ");
681 TLOOP(j)
682 if(BIT(pp,j))
683 Bprint(foutput, "%s ", symnam(j));
684 Bprint(foutput, "}");
688 /*
689 * compute an array with the beginnings of productions yielding given nonterminals
690 * The array pres points to these lists
691 * the array pyield has the lists: the total size is only NPROD+1
692 */
693 void
694 cpres(void)
696 int c, j, i, **pmem;
697 static int *pyield[NPROD];
699 pmem = pyield;
700 NTLOOP(i) {
701 c = i+NTBASE;
702 pres[i] = pmem;
703 fatfl = 0; /* make undefined symbols nonfatal */
704 PLOOP(0, j)
705 if(*prdptr[j] == c)
706 *pmem++ = prdptr[j]+1;
707 if(pres[i] == pmem)
708 error("nonterminal %s not defined!", nontrst[i].name);
710 pres[i] = pmem;
711 fatfl = 1;
712 if(nerrors) {
713 summary();
714 cleantmp();
715 exits("error");
717 if(pmem != &pyield[nprod])
718 error("internal Yacc error: pyield %d", pmem-&pyield[nprod]);
721 /*
722 * compute an array with the first of nonterminals
723 */
724 void
725 cpfir(void)
727 int *p, **s, i, **t, ch, changes;
729 zzcwp = &wsets[nnonter];
730 NTLOOP(i) {
731 aryfil(wsets[i].ws.lset, tbitset, 0);
732 t = pres[i+1];
733 /* initially fill the sets */
734 for(s=pres[i]; s<t; ++s)
735 for(p = *s; (ch = *p) > 0; ++p) {
736 if(ch < NTBASE) {
737 SETBIT(wsets[i].ws.lset, ch);
738 break;
740 if(!pempty[ch-NTBASE])
741 break;
745 /* now, reflect transitivity */
746 changes = 1;
747 while(changes) {
748 changes = 0;
749 NTLOOP(i) {
750 t = pres[i+1];
751 for(s = pres[i]; s < t; ++s)
752 for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) {
753 changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset);
754 if(!pempty[ch])
755 break;
760 NTLOOP(i)
761 pfirst[i] = flset(&wsets[i].ws);
762 if(!indebug)
763 return;
764 if(foutput != 0)
765 NTLOOP(i) {
766 Bprint(foutput, "\n%s: ", nontrst[i].name);
767 prlook(pfirst[i]);
768 Bprint(foutput, " %d\n", pempty[i]);
772 /*
773 * sorts last state,and sees if it equals earlier ones. returns state number
774 */
775 int
776 state(int c)
778 Item *p1, *p2, *k, *l, *q1, *q2;
779 int size1, size2, i;
781 p1 = pstate[nstate];
782 p2 = pstate[nstate+1];
783 if(p1 == p2)
784 return 0; /* null state */
785 /* sort the items */
786 for(k = p2-1; k > p1; k--) /* make k the biggest */
787 for(l = k-1; l >= p1; --l)
788 if(l->pitem > k->pitem) {
789 int *s;
790 Lkset *ss;
792 s = k->pitem;
793 k->pitem = l->pitem;
794 l->pitem = s;
795 ss = k->look;
796 k->look = l->look;
797 l->look = ss;
799 size1 = p2 - p1; /* size of state */
801 for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) {
802 /* get ith state */
803 q1 = pstate[i];
804 q2 = pstate[i+1];
805 size2 = q2 - q1;
806 if(size1 != size2)
807 continue;
808 k = p1;
809 for(l = q1; l < q2; l++) {
810 if(l->pitem != k->pitem)
811 break;
812 k++;
814 if(l != q2)
815 continue;
816 /* found it */
817 pstate[nstate+1] = pstate[nstate]; /* delete last state */
818 /* fix up lookaheads */
819 if(nolook)
820 return i;
821 for(l = q1, k = p1; l < q2; ++l, ++k ) {
822 int s;
824 SETLOOP(s)
825 clset.lset[s] = l->look->lset[s];
826 if(setunion(clset.lset, k->look->lset)) {
827 tystate[i] = MUSTDO;
828 /* register the new set */
829 l->look = flset( &clset );
832 return i;
834 /* state is new */
835 if(nolook)
836 error("yacc state/nolook error");
837 pstate[nstate+2] = p2;
838 if(nstate+1 >= NSTATES)
839 error("too many states");
840 if(c >= NTBASE) {
841 mstates[nstate] = ntstates[c-NTBASE];
842 ntstates[c-NTBASE] = nstate;
843 } else {
844 mstates[nstate] = tstates[c];
845 tstates[c] = nstate;
847 tystate[nstate] = MUSTDO;
848 return nstate++;
851 void
852 putitem(int *ptr, Lkset *lptr)
854 Item *j;
856 if(pidebug && foutput != 0)
857 Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate);
858 j = pstate[nstate+1];
859 j->pitem = ptr;
860 if(!nolook)
861 j->look = flset(lptr);
862 pstate[nstate+1] = ++j;
863 if((int*)j > zzmemsz) {
864 zzmemsz = (int*)j;
865 if(zzmemsz >= &mem0[MEMSIZE])
866 error("out of state space");
870 /*
871 * mark nonterminals which derive the empty string
872 * also, look for nonterminals which don't derive any token strings
873 */
874 void
875 cempty(void)
878 int i, *p;
880 /* first, use the array pempty to detect productions that can never be reduced */
881 /* set pempty to WHONOWS */
882 aryfil(pempty, nnonter+1, WHOKNOWS);
884 /* now, look at productions, marking nonterminals which derive something */
885 more:
886 PLOOP(0, i) {
887 if(pempty[*prdptr[i] - NTBASE])
888 continue;
889 for(p = prdptr[i]+1; *p >= 0; ++p)
890 if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS)
891 break;
892 /* production can be derived */
893 if(*p < 0) {
894 pempty[*prdptr[i]-NTBASE] = OK;
895 goto more;
899 /* now, look at the nonterminals, to see if they are all OK */
900 NTLOOP(i) {
901 /* the added production rises or falls as the start symbol ... */
902 if(i == 0)
903 continue;
904 if(pempty[i] != OK) {
905 fatfl = 0;
906 error("nonterminal %s never derives any token string", nontrst[i].name);
910 if(nerrors) {
911 summary();
912 cleantmp();
913 exits("error");
916 /* now, compute the pempty array, to see which nonterminals derive the empty string */
917 /* set pempty to WHOKNOWS */
918 aryfil( pempty, nnonter+1, WHOKNOWS);
920 /* loop as long as we keep finding empty nonterminals */
922 again:
923 PLOOP(1, i) {
924 /* not known to be empty */
925 if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) {
926 for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p)
928 /* we have a nontrivially empty nonterminal */
929 if(*p < 0) {
930 pempty[*prdptr[i]-NTBASE] = EMPTY;
931 /* got one ... try for another */
932 goto again;
938 /*
939 * generate the states
940 */
941 void
942 stagen(void)
945 int c, i, j, more;
946 Wset *p, *q;
948 /* initialize */
949 nstate = 0;
951 /* THIS IS FUNNY from the standpoint of portability
952 * it represents the magic moment when the mem0 array, which has
953 * been holding the productions, starts to hold item pointers, of a
954 * different type...
955 * someday, alloc should be used to allocate all this stuff... for now, we
956 * accept that if pointers don't fit in integers, there is a problem...
957 */
959 pstate[0] = pstate[1] = (Item*)mem;
960 aryfil(clset.lset, tbitset, 0);
961 putitem(prdptr[0]+1, &clset);
962 tystate[0] = MUSTDO;
963 nstate = 1;
964 pstate[2] = pstate[1];
966 aryfil(amem, ACTSIZE, 0);
968 /* now, the main state generation loop */
969 for(more=1; more;) {
970 more = 0;
971 SLOOP(i) {
972 if(tystate[i] != MUSTDO)
973 continue;
974 tystate[i] = DONE;
975 aryfil(temp1, nnonter+1, 0);
976 /* take state i, close it, and do gotos */
977 closure(i);
978 /* generate goto's */
979 WSLOOP(wsets, p) {
980 if(p->flag)
981 continue;
982 p->flag = 1;
983 c = *(p->pitem);
984 if(c <= 1) {
985 if(pstate[i+1]-pstate[i] <= p-wsets)
986 tystate[i] = MUSTLOOKAHEAD;
987 continue;
989 /* do a goto on c */
990 WSLOOP(p, q)
991 /* this item contributes to the goto */
992 if(c == *(q->pitem)) {
993 putitem(q->pitem+1, &q->ws);
994 q->flag = 1;
996 if(c < NTBASE)
997 state(c); /* register new state */
998 else
999 temp1[c-NTBASE] = state(c);
1001 if(gsdebug && foutput != 0) {
1002 Bprint(foutput, "%d: ", i);
1003 NTLOOP(j)
1004 if(temp1[j])
1005 Bprint(foutput, "%s %d, ",
1006 nontrst[j].name, temp1[j]);
1007 Bprint(foutput, "\n");
1009 indgo[i] = apack(&temp1[1], nnonter-1) - 1;
1010 /* do some more */
1011 more = 1;
1017 * generate the closure of state i
1019 void
1020 closure(int i)
1023 Wset *u, *v;
1024 Item *p, *q;
1025 int c, ch, work, k, *pi, **s, **t;
1027 zzclose++;
1029 /* first, copy kernel of state i to wsets */
1030 cwp = wsets;
1031 ITMLOOP(i, p, q) {
1032 cwp->pitem = p->pitem;
1033 cwp->flag = 1; /* this item must get closed */
1034 SETLOOP(k)
1035 cwp->ws.lset[k] = p->look->lset[k];
1036 WSBUMP(cwp);
1039 /* now, go through the loop, closing each item */
1040 work = 1;
1041 while(work) {
1042 work = 0;
1043 WSLOOP(wsets, u) {
1044 if(u->flag == 0)
1045 continue;
1046 /* dot is before c */
1047 c = *(u->pitem);
1048 if(c < NTBASE) {
1049 u->flag = 0;
1050 /* only interesting case is where . is before nonterminal */
1051 continue;
1054 /* compute the lookahead */
1055 aryfil(clset.lset, tbitset, 0);
1057 /* find items involving c */
1058 WSLOOP(u, v)
1059 if(v->flag == 1 && *(pi=v->pitem) == c) {
1060 v->flag = 0;
1061 if(nolook)
1062 continue;
1063 while((ch = *++pi) > 0) {
1064 /* terminal symbol */
1065 if(ch < NTBASE) {
1066 SETBIT(clset.lset, ch);
1067 break;
1069 /* nonterminal symbol */
1070 setunion(clset.lset, pfirst[ch-NTBASE]->lset);
1071 if(!pempty[ch-NTBASE])
1072 break;
1074 if(ch <= 0)
1075 setunion(clset.lset, v->ws.lset);
1079 * now loop over productions derived from c
1080 * c is now nonterminal number
1082 c -= NTBASE;
1083 t = pres[c+1];
1084 for(s = pres[c]; s < t; ++s) {
1086 * put these items into the closure
1087 * is the item there
1089 WSLOOP(wsets, v)
1090 /* yes, it is there */
1091 if(v->pitem == *s) {
1092 if(nolook)
1093 goto nexts;
1094 if(setunion(v->ws.lset, clset.lset))
1095 v->flag = work = 1;
1096 goto nexts;
1099 /* not there; make a new entry */
1100 if(cwp-wsets+1 >= WSETSIZE)
1101 error( "working set overflow");
1102 cwp->pitem = *s;
1103 cwp->flag = 1;
1104 if(!nolook) {
1105 work = 1;
1106 SETLOOP(k) cwp->ws.lset[k] = clset.lset[k];
1108 WSBUMP(cwp);
1110 nexts:;
1115 /* have computed closure; flags are reset; return */
1116 if(cwp > zzcwp)
1117 zzcwp = cwp;
1118 if(cldebug && foutput != 0) {
1119 Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook);
1120 WSLOOP(wsets, u) {
1121 if(u->flag)
1122 Bprint(foutput, "flag set!\n");
1123 u->flag = 0;
1124 Bprint(foutput, "\t%s", writem(u->pitem));
1125 prlook(&u->ws);
1126 Bprint(foutput, "\n");
1132 * decide if the lookahead set pointed to by p is known
1133 * return pointer to a perminent location for the set
1135 Lkset*
1136 flset(Lkset *p)
1138 Lkset *q;
1139 int *u, *v, *w, j;
1141 for(q = &lkst[nlset]; q-- > lkst;) {
1142 u = p->lset;
1143 v = q->lset;
1144 w = &v[tbitset];
1145 while(v < w)
1146 if(*u++ != *v++)
1147 goto more;
1148 /* we have matched */
1149 return q;
1150 more:;
1152 /* add a new one */
1153 q = &lkst[nlset++];
1154 if(nlset >= LSETSIZE)
1155 error("too many lookahead sets");
1156 SETLOOP(j)
1157 q->lset[j] = p->lset[j];
1158 return q;
1161 void
1162 cleantmp(void)
1164 ZAPFILE(actname);
1165 ZAPFILE(tempname);
1168 void
1169 intr(void)
1171 cleantmp();
1172 exits("interrupted");
1175 void
1176 setup(int argc, char *argv[])
1178 long c, t;
1179 int i, j, fd, lev, ty, ytab, *p;
1180 int vflag, dflag, stem;
1181 char actnm[8], *stemc, *s, dirbuf[128];
1182 Biobuf *fout;
1184 ytab = 0;
1185 vflag = 0;
1186 dflag = 0;
1187 stem = 0;
1188 stemc = "y";
1189 foutput = 0;
1190 fdefine = 0;
1191 fdebug = 0;
1192 ARGBEGIN{
1193 case 'v':
1194 case 'V':
1195 vflag++;
1196 break;
1197 case 'D':
1198 yydebug = ARGF();
1199 break;
1200 case 'a':
1201 yyarg = 1;
1202 break;
1203 case 'd':
1204 dflag++;
1205 break;
1206 case 'l':
1207 yyline = 0;
1208 break;
1209 case 'o':
1210 ytab++;
1211 ytabc = ARGF();
1212 break;
1213 case 's':
1214 stem++;
1215 stemc = ARGF();
1216 break;
1217 case 'S':
1218 parser = PARSERS;
1219 break;
1220 default:
1221 error("illegal option: %c", ARGC());
1222 }ARGEND
1223 openup(stemc, dflag, vflag, ytab, ytabc);
1224 fout = dflag?fdefine:ftable;
1225 if(yyarg){
1226 Bprint(ftable, "#define\tYYARG\t1\n\n");
1228 if((fd = mkstemp(ttempname)) >= 0){
1229 tempname = ttempname;
1230 ftemp = Bfdopen(fd, OWRITE);
1232 if((fd = mkstemp(tactname)) >= 0){
1233 actname = tactname;
1234 faction = Bfdopen(fd, OWRITE);
1236 if(ftemp == 0 || faction == 0)
1237 error("cannot open temp file");
1238 if(argc < 1)
1239 error("no input file");
1240 infile = argv[0];
1241 if(infile[0] != '/' && getwd(dirbuf, sizeof dirbuf)!=nil){
1242 i = strlen(infile)+1+strlen(dirbuf)+1+10;
1243 s = malloc(i);
1244 if(s != nil){
1245 snprint(s, i, "%s/%s", dirbuf, infile);
1246 cleanname(s);
1247 infile = s;
1250 finput = Bopen(infile, OREAD);
1251 if(finput == 0)
1252 error("cannot open '%s'", argv[0]);
1253 cnamp = cnames;
1255 defin(0, "$end");
1256 extval = PRIVATE; /* tokens start in unicode 'private use' */
1257 defin(0, "error");
1258 defin(1, "$accept");
1259 defin(0, "$unk");
1260 mem = mem0;
1261 i = 0;
1263 for(t = gettok(); t != MARK && t != ENDFILE;)
1264 switch(t) {
1265 case ';':
1266 t = gettok();
1267 break;
1269 case START:
1270 if(gettok() != IDENTIFIER)
1271 error("bad %%start construction");
1272 start = chfind(1, tokname);
1273 t = gettok();
1274 continue;
1276 case TYPEDEF:
1277 if(gettok() != TYPENAME)
1278 error("bad syntax in %%type");
1279 ty = numbval;
1280 for(;;) {
1281 t = gettok();
1282 switch(t) {
1283 case IDENTIFIER:
1284 if((t=chfind(1, tokname)) < NTBASE) {
1285 j = TYPE(toklev[t]);
1286 if(j != 0 && j != ty)
1287 error("type redeclaration of token %s",
1288 tokset[t].name);
1289 else
1290 SETTYPE(toklev[t], ty);
1291 } else {
1292 j = nontrst[t-NTBASE].value;
1293 if(j != 0 && j != ty)
1294 error("type redeclaration of nonterminal %s",
1295 nontrst[t-NTBASE].name );
1296 else
1297 nontrst[t-NTBASE].value = ty;
1299 case ',':
1300 continue;
1301 case ';':
1302 t = gettok();
1303 default:
1304 break;
1306 break;
1308 continue;
1310 case UNION:
1311 /* copy the union declaration to the output */
1312 cpyunion();
1313 t = gettok();
1314 continue;
1316 case LEFT:
1317 case BINARY:
1318 case RIGHT:
1319 i++;
1321 case TERM:
1322 /* nonzero means new prec. and assoc. */
1323 lev = t-TERM;
1324 ty = 0;
1326 /* get identifiers so defined */
1327 t = gettok();
1329 /* there is a type defined */
1330 if(t == TYPENAME) {
1331 ty = numbval;
1332 t = gettok();
1334 for(;;) {
1335 switch(t) {
1336 case ',':
1337 t = gettok();
1338 continue;
1340 case ';':
1341 break;
1343 case IDENTIFIER:
1344 j = chfind(0, tokname);
1345 if(j >= NTBASE)
1346 error("%s defined earlier as nonterminal", tokname);
1347 if(lev) {
1348 if(ASSOC(toklev[j]))
1349 error("redeclaration of precedence of %s", tokname);
1350 SETASC(toklev[j], lev);
1351 SETPLEV(toklev[j], i);
1353 if(ty) {
1354 if(TYPE(toklev[j]))
1355 error("redeclaration of type of %s", tokname);
1356 SETTYPE(toklev[j],ty);
1358 t = gettok();
1359 if(t == NUMBER) {
1360 tokset[j].value = numbval;
1361 if(j < ndefout && j > 3)
1362 error("please define type number of %s earlier",
1363 tokset[j].name);
1364 t = gettok();
1366 continue;
1368 break;
1370 continue;
1372 case LCURLY:
1373 defout(0);
1374 cpycode();
1375 t = gettok();
1376 continue;
1378 default:
1379 error("syntax error");
1381 if(t == ENDFILE)
1382 error("unexpected EOF before %%");
1384 /* t is MARK */
1385 if(!yyarg)
1386 Bprint(ftable, "extern int yyerrflag;\n");
1387 Bprint(ftable, "#ifndef YYMAXDEPTH\n");
1388 Bprint(ftable, "#define YYMAXDEPTH 150\n");
1389 Bprint(ftable, "#endif\n" );
1390 if(!ntypes) {
1391 Bprint(ftable, "#ifndef YYSTYPE\n");
1392 Bprint(ftable, "#define YYSTYPE int\n");
1393 Bprint(ftable, "#endif\n");
1395 if(!yyarg){
1396 Bprint(ftable, "YYSTYPE yylval;\n");
1397 Bprint(ftable, "YYSTYPE yyval;\n");
1398 }else{
1399 if(dflag)
1400 Bprint(ftable, "#include \"%s.%s\"\n\n", stemc, FILED);
1401 Bprint(fout, "struct Yyarg {\n");
1402 Bprint(fout, "\tint\tyynerrs;\n");
1403 Bprint(fout, "\tint\tyyerrflag;\n");
1404 Bprint(fout, "\tvoid*\targ;\n");
1405 Bprint(fout, "\tYYSTYPE\tyyval;\n");
1406 Bprint(fout, "\tYYSTYPE\tyylval;\n");
1407 Bprint(fout, "};\n\n");
1409 prdptr[0] = mem;
1411 /* added production */
1412 *mem++ = NTBASE;
1414 /* if start is 0, we will overwrite with the lhs of the first rule */
1415 *mem++ = start;
1416 *mem++ = 1;
1417 *mem++ = 0;
1418 prdptr[1] = mem;
1419 while((t=gettok()) == LCURLY)
1420 cpycode();
1421 if(t != IDENTCOLON)
1422 error("bad syntax on first rule");
1424 if(!start)
1425 prdptr[0][1] = chfind(1, tokname);
1427 /* read rules */
1428 while(t != MARK && t != ENDFILE) {
1429 /* process a rule */
1430 rlines[nprod] = lineno;
1431 if(t == '|')
1432 *mem++ = *prdptr[nprod-1];
1433 else
1434 if(t == IDENTCOLON) {
1435 *mem = chfind(1, tokname);
1436 if(*mem < NTBASE)
1437 error("token illegal on LHS of grammar rule");
1438 mem++;
1439 } else
1440 error("illegal rule: missing semicolon or | ?");
1441 /* read rule body */
1442 t = gettok();
1444 more_rule:
1445 while(t == IDENTIFIER) {
1446 *mem = chfind(1, tokname);
1447 if(*mem < NTBASE)
1448 levprd[nprod] = toklev[*mem];
1449 mem++;
1450 t = gettok();
1452 if(t == PREC) {
1453 if(gettok() != IDENTIFIER)
1454 error("illegal %%prec syntax");
1455 j = chfind(2, tokname);
1456 if(j >= NTBASE)
1457 error("nonterminal %s illegal after %%prec",
1458 nontrst[j-NTBASE].name);
1459 levprd[nprod] = toklev[j];
1460 t = gettok();
1462 if(t == '=') {
1463 levprd[nprod] |= ACTFLAG;
1464 Bprint(faction, "\ncase %d:", nprod);
1465 cpyact(mem-prdptr[nprod]-1);
1466 Bprint(faction, " break;");
1467 if((t=gettok()) == IDENTIFIER) {
1469 /* action within rule... */
1470 sprint(actnm, "$$%d", nprod);
1472 /* make it a nonterminal */
1473 j = chfind(1, actnm);
1476 * the current rule will become rule number nprod+1
1477 * move the contents down, and make room for the null
1479 for(p = mem; p >= prdptr[nprod]; --p)
1480 p[2] = *p;
1481 mem += 2;
1483 /* enter null production for action */
1484 p = prdptr[nprod];
1485 *p++ = j;
1486 *p++ = -nprod;
1488 /* update the production information */
1489 levprd[nprod+1] = levprd[nprod] & ~ACTFLAG;
1490 levprd[nprod] = ACTFLAG;
1491 if(++nprod >= NPROD)
1492 error("more than %d rules", NPROD);
1493 prdptr[nprod] = p;
1495 /* make the action appear in the original rule */
1496 *mem++ = j;
1498 /* get some more of the rule */
1499 goto more_rule;
1503 while(t == ';')
1504 t = gettok();
1505 *mem++ = -nprod;
1507 /* check that default action is reasonable */
1508 if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) {
1510 /* no explicit action, LHS has value */
1511 int tempty;
1513 tempty = prdptr[nprod][1];
1514 if(tempty < 0)
1515 error("must return a value, since LHS has a type");
1516 else
1517 if(tempty >= NTBASE)
1518 tempty = nontrst[tempty-NTBASE].value;
1519 else
1520 tempty = TYPE(toklev[tempty]);
1521 if(tempty != nontrst[*prdptr[nprod]-NTBASE].value)
1522 error("default action causes potential type clash");
1524 nprod++;
1525 if(nprod >= NPROD)
1526 error("more than %d rules", NPROD);
1527 prdptr[nprod] = mem;
1528 levprd[nprod] = 0;
1531 /* end of all rules */
1532 defout(1);
1534 finact();
1535 if(t == MARK) {
1536 Bprint(ftable, "\n");
1537 if(yyline)
1538 Bprint(ftable, "#line\t%d\t\"%s\"\n", lineno, infile);
1539 while((c=Bgetrune(finput)) != Beof)
1540 Bputrune(ftable, c);
1542 Bterm(finput);
1546 * finish action routine
1548 void
1549 finact(void)
1552 Bterm(faction);
1553 Bprint(ftable, "#define YYEOFCODE %d\n", 1);
1554 Bprint(ftable, "#define YYERRCODE %d\n", 2);
1558 * define s to be a terminal if t=0
1559 * or a nonterminal if t=1
1561 int
1562 defin(int nt, char *s)
1564 int val;
1565 Rune rune;
1567 val = 0;
1568 if(nt) {
1569 nnonter++;
1570 if(nnonter >= NNONTERM)
1571 error("too many nonterminals, limit %d",NNONTERM);
1572 nontrst[nnonter].name = cstash(s);
1573 return NTBASE + nnonter;
1576 /* must be a token */
1577 ntokens++;
1578 if(ntokens >= NTERMS)
1579 error("too many terminals, limit %d", NTERMS);
1580 tokset[ntokens].name = cstash(s);
1582 /* establish value for token */
1583 /* single character literal */
1584 if(s[0] == ' ') {
1585 val = chartorune(&rune, &s[1]);
1586 if(s[val+1] == 0) {
1587 val = rune;
1588 goto out;
1592 /* escape sequence */
1593 if(s[0] == ' ' && s[1] == '\\') {
1594 if(s[3] == 0) {
1595 /* single character escape sequence */
1596 switch(s[2]) {
1597 case 'n': val = '\n'; break;
1598 case 'r': val = '\r'; break;
1599 case 'b': val = '\b'; break;
1600 case 't': val = '\t'; break;
1601 case 'f': val = '\f'; break;
1602 case '\'': val = '\''; break;
1603 case '"': val = '"'; break;
1604 case '\\': val = '\\'; break;
1605 default: error("invalid escape");
1607 goto out;
1610 /* \nnn sequence */
1611 if(s[2] >= '0' && s[2] <= '7') {
1612 if(s[3] < '0' ||
1613 s[3] > '7' ||
1614 s[4] < '0' ||
1615 s[4] > '7' ||
1616 s[5] != 0)
1617 error("illegal \\nnn construction");
1618 val = 64*s[2] + 8*s[3] + s[4] - 73*'0';
1619 if(val == 0)
1620 error("'\\000' is illegal");
1621 goto out;
1623 error("unknown escape");
1625 val = extval++;
1627 out:
1628 tokset[ntokens].value = val;
1629 toklev[ntokens] = 0;
1630 return ntokens;
1634 * write out the defines (at the end of the declaration section)
1636 void
1637 defout(int last)
1639 int i, c;
1640 char sar[NAMESIZE+10];
1642 for(i=ndefout; i<=ntokens; i++) {
1643 /* non-literals */
1644 c = tokset[i].name[0];
1645 if(c != ' ' && c != '$') {
1646 Bprint(ftable, "#define %s %d\n",
1647 tokset[i].name, tokset[i].value);
1648 if(fdefine)
1649 Bprint(fdefine, "#define\t%s\t%d\n",
1650 tokset[i].name, tokset[i].value);
1653 ndefout = ntokens+1;
1654 if(last && fdebug) {
1655 Bprint(fdebug, "static char* yytoknames[] =\n{\n");
1656 TLOOP(i) {
1657 if(tokset[i].name) {
1658 chcopy(sar, tokset[i].name);
1659 Bprint(fdebug, "\t\"%s\",\n", sar);
1660 continue;
1662 Bprint(fdebug, "\t0,\n");
1664 Bprint(fdebug, "};\n");
1668 char*
1669 cstash(char *s)
1671 char *temp;
1673 temp = cnamp;
1674 do {
1675 if(cnamp >= &cnames[cnamsz])
1676 error("too many characters in id's and literals");
1677 else
1678 *cnamp++ = *s;
1679 } while(*s++);
1680 return temp;
1683 long
1684 gettok(void)
1686 long c;
1687 Rune rune;
1688 int i, base, match, reserve;
1689 static int peekline;
1691 begin:
1692 reserve = 0;
1693 lineno += peekline;
1694 peekline = 0;
1695 c = Bgetrune(finput);
1696 while(c == ' ' || c == '\n' || c == '\t' || c == '\f') {
1697 if(c == '\n')
1698 lineno++;
1699 c = Bgetrune(finput);
1702 /* skip comment */
1703 if(c == '/') {
1704 lineno += skipcom();
1705 goto begin;
1707 switch(c) {
1708 case Beof:
1709 return ENDFILE;
1711 case '{':
1712 Bungetrune(finput);
1713 return '=';
1715 case '<':
1716 /* get, and look up, a type name (union member name) */
1717 i = 0;
1718 while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') {
1719 rune = c;
1720 c = runetochar(&tokname[i], &rune);
1721 if(i < NAMESIZE)
1722 i += c;
1724 if(c != '>')
1725 error("unterminated < ... > clause");
1726 tokname[i] = 0;
1727 for(i=1; i<=ntypes; i++)
1728 if(!strcmp(typeset[i], tokname)) {
1729 numbval = i;
1730 return TYPENAME;
1732 ntypes++;
1733 numbval = ntypes;
1734 typeset[numbval] = cstash(tokname);
1735 return TYPENAME;
1737 case '"':
1738 case '\'':
1739 match = c;
1740 tokname[0] = ' ';
1741 i = 1;
1742 for(;;) {
1743 c = Bgetrune(finput);
1744 if(c == '\n' || c <= 0)
1745 error("illegal or missing ' or \"" );
1746 if(c == '\\') {
1747 tokname[i] = '\\';
1748 if(i < NAMESIZE)
1749 i++;
1750 c = Bgetrune(finput);
1751 } else
1752 if(c == match)
1753 break;
1754 rune = c;
1755 c = runetochar(&tokname[i], &rune);
1756 if(i < NAMESIZE)
1757 i += c;
1759 break;
1761 case '%':
1762 case '\\':
1763 switch(c = Bgetrune(finput)) {
1764 case '0': return TERM;
1765 case '<': return LEFT;
1766 case '2': return BINARY;
1767 case '>': return RIGHT;
1768 case '%':
1769 case '\\': return MARK;
1770 case '=': return PREC;
1771 case '{': return LCURLY;
1772 default: reserve = 1;
1775 default:
1776 /* number */
1777 if(isdigit(c)) {
1778 numbval = c-'0';
1779 base = (c=='0')? 8: 10;
1780 for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput))
1781 numbval = numbval*base + (c-'0');
1782 Bungetrune(finput);
1783 return NUMBER;
1785 if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$') {
1786 i = 0;
1787 while(islower(c) || isupper(c) || isdigit(c) ||
1788 c == '-' || c=='_' || c=='.' || c=='$') {
1789 if(reserve && isupper(c))
1790 c += 'a'-'A';
1791 rune = c;
1792 c = runetochar(&tokname[i], &rune);
1793 if(i < NAMESIZE)
1794 i += c;
1795 c = Bgetrune(finput);
1797 } else
1798 return c;
1799 Bungetrune(finput);
1801 tokname[i] = 0;
1803 /* find a reserved word */
1804 if(reserve) {
1805 for(c=0; resrv[c].name; c++)
1806 if(strcmp(tokname, resrv[c].name) == 0)
1807 return resrv[c].value;
1808 error("invalid escape, or illegal reserved word: %s", tokname);
1811 /* look ahead to distinguish IDENTIFIER from IDENTCOLON */
1812 c = Bgetrune(finput);
1813 while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') {
1814 if(c == '\n')
1815 peekline++;
1816 /* look for comments */
1817 if(c == '/')
1818 peekline += skipcom();
1819 c = Bgetrune(finput);
1821 if(c == ':')
1822 return IDENTCOLON;
1823 Bungetrune(finput);
1824 return IDENTIFIER;
1828 * determine the type of a symbol
1830 int
1831 fdtype(int t)
1833 int v;
1835 if(t >= NTBASE)
1836 v = nontrst[t-NTBASE].value;
1837 else
1838 v = TYPE(toklev[t]);
1839 if(v <= 0)
1840 error("must specify type for %s", (t>=NTBASE)?
1841 nontrst[t-NTBASE].name: tokset[t].name);
1842 return v;
1845 int
1846 chfind(int t, char *s)
1848 int i;
1850 if(s[0] == ' ')
1851 t = 0;
1852 TLOOP(i)
1853 if(!strcmp(s, tokset[i].name))
1854 return i;
1855 NTLOOP(i)
1856 if(!strcmp(s, nontrst[i].name))
1857 return NTBASE+i;
1859 /* cannot find name */
1860 if(t > 1)
1861 error("%s should have been defined earlier", s);
1862 return defin(t, s);
1866 * copy the union declaration to the output, and the define file if present
1868 void
1869 cpyunion(void)
1871 long c;
1872 int level;
1874 Bprint(ftable, "\n");
1875 if(yyline)
1876 Bprint(ftable, "#line\t%d\t\"%s\"\n", lineno, infile);
1877 Bprint(ftable, "typedef union ");
1878 if(fdefine != 0)
1879 Bprint(fdefine, "\ntypedef union ");
1881 level = 0;
1882 for(;;) {
1883 if((c=Bgetrune(finput)) == Beof)
1884 error("EOF encountered while processing %%union");
1885 Bputrune(ftable, c);
1886 if(fdefine != 0)
1887 Bputrune(fdefine, c);
1888 switch(c) {
1889 case '\n':
1890 lineno++;
1891 break;
1892 case '{':
1893 level++;
1894 break;
1895 case '}':
1896 level--;
1898 /* we are finished copying */
1899 if(level == 0) {
1900 Bprint(ftable, " YYSTYPE;\n");
1901 if(fdefine != 0){
1902 Bprint(fdefine, "\tYYSTYPE;\n");
1903 if(!yyarg)
1904 Bprint(fdefine, "extern\tYYSTYPE\tyylval;\n");
1906 return;
1913 * copies code between \{ and \}
1915 void
1916 cpycode(void)
1918 long c;
1920 c = Bgetrune(finput);
1921 if(c == '\n') {
1922 c = Bgetrune(finput);
1923 lineno++;
1925 Bprint(ftable, "\n");
1926 if(yyline)
1927 Bprint(ftable, "#line\t%d\t\"%s\"\n", lineno, infile);
1928 while(c != Beof) {
1929 if(c == '\\') {
1930 if((c=Bgetrune(finput)) == '}')
1931 return;
1932 Bputc(ftable, '\\');
1934 if(c == '%') {
1935 if((c=Bgetrune(finput)) == '}')
1936 return;
1937 Bputc(ftable, '%');
1939 Bputrune(ftable, c);
1940 if(c == '\n')
1941 lineno++;
1942 c = Bgetrune(finput);
1944 error("eof before %%}");
1948 * skip over comments
1949 * skipcom is called after reading a '/'
1951 int
1952 skipcom(void)
1954 long c;
1955 int i;
1957 /* i is the number of lines skipped */
1958 i = 0;
1959 if(Bgetrune(finput) != '*')
1960 error("illegal comment");
1961 c = Bgetrune(finput);
1962 while(c != Beof) {
1963 while(c == '*')
1964 if((c=Bgetrune(finput)) == '/')
1965 return i;
1966 if(c == '\n')
1967 i++;
1968 c = Bgetrune(finput);
1970 error("EOF inside comment");
1971 return 0;
1975 * copy C action to the next ; or closing }
1977 void
1978 cpyact(int offset)
1980 long c;
1981 int brac, match, j, s, fnd, tok;
1983 Bprint(faction, "\n");
1984 if(yyline)
1985 Bprint(faction, "#line\t%d\t\"%s\"\n", lineno, infile);
1986 brac = 0;
1988 loop:
1989 c = Bgetrune(finput);
1990 swt:
1991 switch(c) {
1992 case ';':
1993 if(brac == 0) {
1994 Bputrune(faction, c);
1995 return;
1997 goto lcopy;
1999 case '{':
2000 brac++;
2001 goto lcopy;
2003 case '$':
2004 s = 1;
2005 tok = -1;
2006 c = Bgetrune(finput);
2008 /* type description */
2009 if(c == '<') {
2010 Bungetrune(finput);
2011 if(gettok() != TYPENAME)
2012 error("bad syntax on $<ident> clause");
2013 tok = numbval;
2014 c = Bgetrune(finput);
2016 if(c == '$') {
2017 Bprint(faction, "yyval");
2019 /* put out the proper tag... */
2020 if(ntypes) {
2021 if(tok < 0)
2022 tok = fdtype(*prdptr[nprod]);
2023 Bprint(faction, ".%s", typeset[tok]);
2025 goto loop;
2027 if(c == '-') {
2028 s = -s;
2029 c = Bgetrune(finput);
2031 if(isdigit(c)) {
2032 j = 0;
2033 while(isdigit(c)) {
2034 j = j*10 + (c-'0');
2035 c = Bgetrune(finput);
2037 Bungetrune(finput);
2038 j = j*s - offset;
2039 if(j > 0)
2040 error("Illegal use of $%d", j+offset);
2042 dollar:
2043 Bprint(faction, "yypt[-%d].yyv", -j);
2045 /* put out the proper tag */
2046 if(ntypes) {
2047 if(j+offset <= 0 && tok < 0)
2048 error("must specify type of $%d", j+offset);
2049 if(tok < 0)
2050 tok = fdtype(prdptr[nprod][j+offset]);
2051 Bprint(faction, ".%s", typeset[tok]);
2053 goto loop;
2055 if(isupper(c) || islower(c) || c == '_' || c == '.') {
2056 int tok; /* tok used oustide for type info */
2058 /* look for $name */
2059 Bungetrune(finput);
2060 if(gettok() != IDENTIFIER)
2061 error("$ must be followed by an identifier");
2062 tok = chfind(2, tokname);
2063 if((c = Bgetrune(finput)) != '#') {
2064 Bungetrune(finput);
2065 fnd = -1;
2066 } else
2067 if(gettok() != NUMBER) {
2068 error("# must be followed by number");
2069 fnd = -1;
2070 } else
2071 fnd = numbval;
2072 for(j=1; j<=offset; ++j)
2073 if(tok == prdptr[nprod][j]) {
2074 if(--fnd <= 0) {
2075 j -= offset;
2076 goto dollar;
2079 error("$name or $name#number not found");
2081 Bputc(faction, '$');
2082 if(s < 0 )
2083 Bputc(faction, '-');
2084 goto swt;
2086 case '}':
2087 brac--;
2088 if(brac)
2089 goto lcopy;
2090 Bputrune(faction, c);
2091 return;
2093 case '/':
2094 /* look for comments */
2095 Bputrune(faction, c);
2096 c = Bgetrune(finput);
2097 if(c != '*')
2098 goto swt;
2100 /* it really is a comment */
2101 Bputrune(faction, c);
2102 c = Bgetrune(finput);
2103 while(c >= 0) {
2104 while(c == '*') {
2105 Bputrune(faction, c);
2106 if((c=Bgetrune(finput)) == '/')
2107 goto lcopy;
2109 Bputrune(faction, c);
2110 if(c == '\n')
2111 lineno++;
2112 c = Bgetrune(finput);
2114 error("EOF inside comment");
2116 case '\'':
2117 /* character constant */
2118 match = '\'';
2119 goto string;
2121 case '"':
2122 /* character string */
2123 match = '"';
2125 string:
2126 Bputrune(faction, c);
2127 while(c = Bgetrune(finput)) {
2128 if(c == '\\') {
2129 Bputrune(faction, c);
2130 c = Bgetrune(finput);
2131 if(c == '\n')
2132 lineno++;
2133 } else {
2134 if(c == match)
2135 goto lcopy;
2136 if(c == '\n')
2137 error("newline in string or char. const.");
2139 Bputrune(faction, c);
2141 error("EOF in string or character constant");
2143 case Beof:
2144 error("action does not terminate");
2146 case '\n':
2147 lineno++;
2148 goto lcopy;
2151 lcopy:
2152 Bputrune(faction, c);
2153 goto loop;
2156 void
2157 openup(char *stem, int dflag, int vflag, int ytab, char *ytabc)
2159 char buf[256];
2161 if(vflag) {
2162 sprint(buf, "%s.%s", stem, FILEU);
2163 foutput = Bopen(buf, OWRITE);
2164 if(foutput == 0)
2165 error("cannot open %s", buf);
2167 if(yydebug) {
2168 sprint(buf, "%s.%s", stem, FILEDEBUG);
2169 if((fdebug = Bopen(buf, OWRITE)) == 0)
2170 error("can't open %s", buf);
2172 if(dflag) {
2173 sprint(buf, "%s.%s", stem, FILED);
2174 fdefine = Bopen(buf, OWRITE);
2175 if(fdefine == 0)
2176 error("can't create %s", buf);
2178 if(ytab == 0)
2179 sprint(buf, "%s.%s", stem, OFILE);
2180 else
2181 strcpy(buf, ytabc);
2182 ftable = Bopen(buf, OWRITE);
2183 if(ftable == 0)
2184 error("cannot open table file %s", buf);
2188 * print the output for the states
2190 void
2191 output(void)
2193 int i, k, c;
2194 Wset *u, *v;
2196 Bprint(ftable, "static\tconst\tshort yyexca[] =\n{");
2197 if(fdebug)
2198 Bprint(fdebug, "static\tconst\tchar* yystates[] =\n{\n");
2200 /* output the stuff for state i */
2201 SLOOP(i) {
2202 nolook = tystate[i]!=MUSTLOOKAHEAD;
2203 closure(i);
2205 /* output actions */
2206 nolook = 1;
2207 aryfil(temp1, ntokens+nnonter+1, 0);
2208 WSLOOP(wsets, u) {
2209 c = *(u->pitem);
2210 if(c > 1 && c < NTBASE && temp1[c] == 0) {
2211 WSLOOP(u, v)
2212 if(c == *(v->pitem))
2213 putitem(v->pitem+1, (Lkset*)0);
2214 temp1[c] = state(c);
2215 } else
2216 if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0)
2217 temp1[c+ntokens] = amem[indgo[i]+c];
2219 if(i == 1)
2220 temp1[1] = ACCEPTCODE;
2222 /* now, we have the shifts; look at the reductions */
2223 lastred = 0;
2224 WSLOOP(wsets, u) {
2225 c = *u->pitem;
2227 /* reduction */
2228 if(c <= 0) {
2229 lastred = -c;
2230 TLOOP(k)
2231 if(BIT(u->ws.lset, k)) {
2232 if(temp1[k] == 0)
2233 temp1[k] = c;
2234 else
2235 if(temp1[k] < 0) { /* reduce/reduce conflict */
2236 if(foutput)
2237 Bprint(foutput,
2238 "\n%d: reduce/reduce conflict"
2239 " (red'ns %d and %d ) on %s",
2240 i, -temp1[k], lastred,
2241 symnam(k));
2242 if(-temp1[k] > lastred)
2243 temp1[k] = -lastred;
2244 zzrrconf++;
2245 } else
2246 /* potential shift/reduce conflict */
2247 precftn( lastred, k, i );
2251 wract(i);
2254 if(fdebug)
2255 Bprint(fdebug, "};\n");
2256 Bprint(ftable, "};\n");
2257 Bprint(ftable, "#define YYNPROD %d\n", nprod);
2258 Bprint(ftable, "#define YYPRIVATE %d\n", PRIVATE);
2259 if(yydebug)
2260 Bprint(ftable, "#define yydebug %s\n", yydebug);
2264 * pack state i from temp1 into amem
2266 int
2267 apack(int *p, int n)
2269 int *pp, *qq, *rr, off, *q, *r;
2271 /* we don't need to worry about checking because
2272 * we will only look at entries known to be there...
2273 * eliminate leading and trailing 0's
2276 q = p+n;
2277 for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off)
2279 /* no actions */
2280 if(pp > q)
2281 return 0;
2282 p = pp;
2284 /* now, find a place for the elements from p to q, inclusive */
2285 r = &amem[ACTSIZE-1];
2286 for(rr = amem; rr <= r; rr++, off++) {
2287 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2288 if(*pp != 0)
2289 if(*pp != *qq && *qq != 0)
2290 goto nextk;
2292 /* we have found an acceptable k */
2293 if(pkdebug && foutput != 0)
2294 Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem));
2295 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2296 if(*pp) {
2297 if(qq > r)
2298 error("action table overflow");
2299 if(qq > memp)
2300 memp = qq;
2301 *qq = *pp;
2303 if(pkdebug && foutput != 0)
2304 for(pp = amem; pp <= memp; pp += 10) {
2305 Bprint(foutput, "\t");
2306 for(qq = pp; qq <= pp+9; qq++)
2307 Bprint(foutput, "%d ", *qq);
2308 Bprint(foutput, "\n");
2310 return(off);
2311 nextk:;
2313 error("no space in action table");
2314 return 0;
2318 * output the gotos for the nontermninals
2320 void
2321 go2out(void)
2323 int i, j, k, best, count, cbest, times;
2325 /* mark begining of gotos */
2326 Bprint(ftemp, "$\n");
2327 for(i = 1; i <= nnonter; i++) {
2328 go2gen(i);
2330 /* find the best one to make default */
2331 best = -1;
2332 times = 0;
2334 /* is j the most frequent */
2335 for(j = 0; j <= nstate; j++) {
2336 if(tystate[j] == 0)
2337 continue;
2338 if(tystate[j] == best)
2339 continue;
2341 /* is tystate[j] the most frequent */
2342 count = 0;
2343 cbest = tystate[j];
2344 for(k = j; k <= nstate; k++)
2345 if(tystate[k] == cbest)
2346 count++;
2347 if(count > times) {
2348 best = cbest;
2349 times = count;
2353 /* best is now the default entry */
2354 zzgobest += times-1;
2355 for(j = 0; j <= nstate; j++)
2356 if(tystate[j] != 0 && tystate[j] != best) {
2357 Bprint(ftemp, "%d,%d,", j, tystate[j]);
2358 zzgoent++;
2361 /* now, the default */
2362 if(best == -1)
2363 best = 0;
2364 zzgoent++;
2365 Bprint(ftemp, "%d\n", best);
2370 * output the gotos for nonterminal c
2372 void
2373 go2gen(int c)
2375 int i, work, cc;
2376 Item *p, *q;
2379 /* first, find nonterminals with gotos on c */
2380 aryfil(temp1, nnonter+1, 0);
2381 temp1[c] = 1;
2382 work = 1;
2383 while(work) {
2384 work = 0;
2385 PLOOP(0, i)
2387 /* cc is a nonterminal */
2388 if((cc=prdptr[i][1]-NTBASE) >= 0)
2389 /* cc has a goto on c */
2390 if(temp1[cc] != 0) {
2392 /* thus, the left side of production i does too */
2393 cc = *prdptr[i]-NTBASE;
2394 if(temp1[cc] == 0) {
2395 work = 1;
2396 temp1[cc] = 1;
2401 /* now, we have temp1[c] = 1 if a goto on c in closure of cc */
2402 if(g2debug && foutput != 0) {
2403 Bprint(foutput, "%s: gotos on ", nontrst[c].name);
2404 NTLOOP(i)
2405 if(temp1[i])
2406 Bprint(foutput, "%s ", nontrst[i].name);
2407 Bprint(foutput, "\n");
2410 /* now, go through and put gotos into tystate */
2411 aryfil(tystate, nstate, 0);
2412 SLOOP(i)
2413 ITMLOOP(i, p, q)
2414 if((cc = *p->pitem) >= NTBASE)
2415 /* goto on c is possible */
2416 if(temp1[cc-NTBASE]) {
2417 tystate[i] = amem[indgo[i]+c];
2418 break;
2423 * decide a shift/reduce conflict by precedence.
2424 * r is a rule number, t a token number
2425 * the conflict is in state s
2426 * temp1[t] is changed to reflect the action
2428 void
2429 precftn(int r, int t, int s)
2431 int lp, lt, action;
2433 lp = levprd[r];
2434 lt = toklev[t];
2435 if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) {
2437 /* conflict */
2438 if(foutput != 0)
2439 Bprint(foutput,
2440 "\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s",
2441 s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t));
2442 zzsrconf++;
2443 return;
2445 if(PLEVEL(lt) == PLEVEL(lp))
2446 action = ASSOC(lt);
2447 else
2448 if(PLEVEL(lt) > PLEVEL(lp))
2449 action = RASC; /* shift */
2450 else
2451 action = LASC; /* reduce */
2452 switch(action) {
2453 case BASC: /* error action */
2454 temp1[t] = ERRCODE;
2455 break;
2456 case LASC: /* reduce */
2457 temp1[t] = -r;
2458 break;
2463 * output state i
2464 * temp1 has the actions, lastred the default
2466 void
2467 wract(int i)
2469 int p, p0, p1, ntimes, tred, count, j, flag;
2471 /* find the best choice for lastred */
2472 lastred = 0;
2473 ntimes = 0;
2474 TLOOP(j) {
2475 if(temp1[j] >= 0)
2476 continue;
2477 if(temp1[j]+lastred == 0)
2478 continue;
2479 /* count the number of appearances of temp1[j] */
2480 count = 0;
2481 tred = -temp1[j];
2482 levprd[tred] |= REDFLAG;
2483 TLOOP(p)
2484 if(temp1[p]+tred == 0)
2485 count++;
2486 if(count > ntimes) {
2487 lastred = tred;
2488 ntimes = count;
2493 * for error recovery, arrange that, if there is a shift on the
2494 * error recovery token, `error', that the default be the error action
2496 if(temp1[2] > 0)
2497 lastred = 0;
2499 /* clear out entries in temp1 which equal lastred */
2500 TLOOP(p)
2501 if(temp1[p]+lastred == 0)
2502 temp1[p] = 0;
2504 wrstate(i);
2505 defact[i] = lastred;
2506 flag = 0;
2507 TLOOP(p0)
2508 if((p1=temp1[p0]) != 0) {
2509 if(p1 < 0) {
2510 p1 = -p1;
2511 goto exc;
2513 if(p1 == ACCEPTCODE) {
2514 p1 = -1;
2515 goto exc;
2517 if(p1 == ERRCODE) {
2518 p1 = 0;
2519 exc:
2520 if(flag++ == 0)
2521 Bprint(ftable, "-1, %d,\n", i);
2522 Bprint(ftable, "\t%d, %d,\n", p0, p1);
2523 zzexcp++;
2524 continue;
2526 Bprint(ftemp, "%d,%d,", p0, p1);
2527 zzacent++;
2529 if(flag) {
2530 defact[i] = -2;
2531 Bprint(ftable, "\t-2, %d,\n", lastred);
2533 Bprint(ftemp, "\n");
2537 * writes state i
2539 void
2540 wrstate(int i)
2542 int j0, j1;
2543 Item *pp, *qq;
2544 Wset *u;
2546 if(fdebug) {
2547 if(lastred) {
2548 Bprint(fdebug, " 0, /*%d*/\n", i);
2549 } else {
2550 Bprint(fdebug, " \"");
2551 ITMLOOP(i, pp, qq)
2552 Bprint(fdebug, "%s\\n", writem(pp->pitem));
2553 if(tystate[i] == MUSTLOOKAHEAD)
2554 WSLOOP(wsets + (pstate[i+1] - pstate[i]), u)
2555 if(*u->pitem < 0)
2556 Bprint(fdebug, "%s\\n", writem(u->pitem));
2557 Bprint(fdebug, "\", /*%d*/\n", i);
2560 if(foutput == 0)
2561 return;
2562 Bprint(foutput, "\nstate %d\n", i);
2563 ITMLOOP(i, pp, qq)
2564 Bprint(foutput, "\t%s\n", writem(pp->pitem));
2565 if(tystate[i] == MUSTLOOKAHEAD)
2566 /* print out empty productions in closure */
2567 WSLOOP(wsets+(pstate[i+1]-pstate[i]), u)
2568 if(*u->pitem < 0)
2569 Bprint(foutput, "\t%s\n", writem(u->pitem));
2571 /* check for state equal to another */
2572 TLOOP(j0)
2573 if((j1=temp1[j0]) != 0) {
2574 Bprint(foutput, "\n\t%s ", symnam(j0));
2575 /* shift, error, or accept */
2576 if(j1 > 0) {
2577 if(j1 == ACCEPTCODE)
2578 Bprint(foutput, "accept");
2579 else
2580 if(j1 == ERRCODE)
2581 Bprint(foutput, "error");
2582 else
2583 Bprint(foutput, "shift %d", j1);
2584 } else
2585 Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]);
2588 /* output the final production */
2589 if(lastred)
2590 Bprint(foutput, "\n\t. reduce %d (src line %d)\n\n",
2591 lastred, rlines[lastred]);
2592 else
2593 Bprint(foutput, "\n\t. error\n\n");
2595 /* now, output nonterminal actions */
2596 j1 = ntokens;
2597 for(j0 = 1; j0 <= nnonter; j0++) {
2598 j1++;
2599 if(temp1[j1])
2600 Bprint(foutput, "\t%s goto %d\n", symnam(j0+NTBASE), temp1[j1]);
2604 void
2605 warray(char *s, int *v, int n)
2607 int i;
2609 Bprint(ftable, "static\tconst\tshort %s[] =\n{", s);
2610 for(i=0;;) {
2611 if(i%10 == 0)
2612 Bprint(ftable, "\n");
2613 Bprint(ftable, "%4d", v[i]);
2614 i++;
2615 if(i >= n) {
2616 Bprint(ftable, "\n};\n");
2617 break;
2619 Bprint(ftable, ",");
2624 * in order to free up the mem and amem arrays for the optimizer,
2625 * and still be able to output yyr1, etc., after the sizes of
2626 * the action array is known, we hide the nonterminals
2627 * derived by productions in levprd.
2630 void
2631 hideprod(void)
2633 int i, j;
2635 j = 0;
2636 levprd[0] = 0;
2637 PLOOP(1, i) {
2638 if(!(levprd[i] & REDFLAG)) {
2639 j++;
2640 if(foutput != 0)
2641 Bprint(foutput, "Rule not reduced: %s\n", writem(prdptr[i]));
2643 levprd[i] = *prdptr[i] - NTBASE;
2645 if(j)
2646 print("%d rules never reduced\n", j);
2649 void
2650 callopt(void)
2652 int i, *p, j, k, *q;
2654 /* read the arrays from tempfile and set parameters */
2655 finput = Bopen(tempname, OREAD);
2656 if(finput == 0)
2657 error("optimizer cannot open tempfile");
2659 pgo[0] = 0;
2660 temp1[0] = 0;
2661 nstate = 0;
2662 nnonter = 0;
2663 for(;;) {
2664 switch(gtnm()) {
2665 case '\n':
2666 nstate++;
2667 pmem--;
2668 temp1[nstate] = pmem - mem0;
2669 case ',':
2670 continue;
2671 case '$':
2672 break;
2673 default:
2674 error("bad tempfile %s", tempname);
2676 break;
2679 pmem--;
2680 temp1[nstate] = yypgo[0] = pmem - mem0;
2681 for(;;) {
2682 switch(gtnm()) {
2683 case '\n':
2684 nnonter++;
2685 yypgo[nnonter] = pmem-mem0;
2686 case ',':
2687 continue;
2688 case -1:
2689 break;
2690 default:
2691 error("bad tempfile");
2693 break;
2695 pmem--;
2696 yypgo[nnonter--] = pmem - mem0;
2697 for(i = 0; i < nstate; i++) {
2698 k = 32000;
2699 j = 0;
2700 q = mem0 + temp1[i+1];
2701 for(p = mem0 + temp1[i]; p < q ; p += 2) {
2702 if(*p > j)
2703 j = *p;
2704 if(*p < k)
2705 k = *p;
2707 /* nontrivial situation */
2708 if(k <= j) {
2709 /* j is now the range */
2710 /* j -= k; *//* call scj */
2711 if(k > maxoff)
2712 maxoff = k;
2714 tystate[i] = (temp1[i+1]-temp1[i]) + 2*j;
2715 if(j > maxspr)
2716 maxspr = j;
2719 /* initialize ggreed table */
2720 for(i = 1; i <= nnonter; i++) {
2721 ggreed[i] = 1;
2722 j = 0;
2724 /* minimum entry index is always 0 */
2725 q = mem0 + yypgo[i+1] - 1;
2726 for(p = mem0+yypgo[i]; p < q ; p += 2) {
2727 ggreed[i] += 2;
2728 if(*p > j)
2729 j = *p;
2731 ggreed[i] = ggreed[i] + 2*j;
2732 if(j > maxoff)
2733 maxoff = j;
2736 /* now, prepare to put the shift actions into the amem array */
2737 for(i = 0; i < ACTSIZE; i++)
2738 amem[i] = 0;
2739 maxa = amem;
2740 for(i = 0; i < nstate; i++) {
2741 if(tystate[i] == 0 && adb > 1)
2742 Bprint(ftable, "State %d: null\n", i);
2743 indgo[i] = YYFLAG1;
2745 while((i = nxti()) != NOMORE)
2746 if(i >= 0)
2747 stin(i);
2748 else
2749 gin(-i);
2751 /* print amem array */
2752 if(adb > 2 )
2753 for(p = amem; p <= maxa; p += 10) {
2754 Bprint(ftable, "%4d ", (int)(p-amem));
2755 for(i = 0; i < 10; ++i)
2756 Bprint(ftable, "%4d ", p[i]);
2757 Bprint(ftable, "\n");
2760 /* write out the output appropriate to the language */
2761 aoutput();
2762 osummary();
2763 ZAPFILE(tempname);
2766 void
2767 gin(int i)
2769 int *p, *r, *s, *q1, *q2;
2771 /* enter gotos on nonterminal i into array amem */
2772 ggreed[i] = 0;
2774 q2 = mem0+ yypgo[i+1] - 1;
2775 q1 = mem0 + yypgo[i];
2777 /* now, find amem place for it */
2778 for(p = amem; p < &amem[ACTSIZE]; p++) {
2779 if(*p)
2780 continue;
2781 for(r = q1; r < q2; r += 2) {
2782 s = p + *r + 1;
2783 if(*s)
2784 goto nextgp;
2785 if(s > maxa)
2786 if((maxa = s) > &amem[ACTSIZE])
2787 error("a array overflow");
2789 /* we have found amem spot */
2790 *p = *q2;
2791 if(p > maxa)
2792 if((maxa = p) > &amem[ACTSIZE])
2793 error("a array overflow");
2794 for(r = q1; r < q2; r += 2) {
2795 s = p + *r + 1;
2796 *s = r[1];
2798 pgo[i] = p-amem;
2799 if(adb > 1)
2800 Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]);
2801 return;
2803 nextgp:;
2805 error("cannot place goto %d\n", i);
2808 void
2809 stin(int i)
2811 int *r, *s, n, flag, j, *q1, *q2;
2813 tystate[i] = 0;
2815 /* enter state i into the amem array */
2816 q2 = mem0+temp1[i+1];
2817 q1 = mem0+temp1[i];
2818 /* find an acceptable place */
2819 for(n = -maxoff; n < ACTSIZE; n++) {
2820 flag = 0;
2821 for(r = q1; r < q2; r += 2) {
2822 if((s = *r + n + amem) < amem)
2823 goto nextn;
2824 if(*s == 0)
2825 flag++;
2826 else
2827 if(*s != r[1])
2828 goto nextn;
2831 /* check that the position equals another only if the states are identical */
2832 for(j=0; j<nstate; j++) {
2833 if(indgo[j] == n) {
2835 /* we have some disagreement */
2836 if(flag)
2837 goto nextn;
2838 if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) {
2840 /* states are equal */
2841 indgo[i] = n;
2842 if(adb > 1)
2843 Bprint(ftable,
2844 "State %d: entry at %d equals state %d\n",
2845 i, n, j);
2846 return;
2849 /* we have some disagreement */
2850 goto nextn;
2854 for(r = q1; r < q2; r += 2) {
2855 if((s = *r+n+amem) >= &amem[ACTSIZE])
2856 error("out of space in optimizer a array");
2857 if(s > maxa)
2858 maxa = s;
2859 if(*s != 0 && *s != r[1])
2860 error("clobber of a array, pos'n %d, by %d", s-amem, r[1]);
2861 *s = r[1];
2863 indgo[i] = n;
2864 if(adb > 1)
2865 Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]);
2866 return;
2867 nextn:;
2869 error("Error; failure to place state %d\n", i);
2873 * finds the next i
2875 int
2876 nxti(void)
2878 int i, max, maxi;
2880 max = 0;
2881 maxi = 0;
2882 for(i = 1; i <= nnonter; i++)
2883 if(ggreed[i] >= max) {
2884 max = ggreed[i];
2885 maxi = -i;
2887 for(i = 0; i < nstate; ++i)
2888 if(tystate[i] >= max) {
2889 max = tystate[i];
2890 maxi = i;
2892 if(nxdb)
2893 Bprint(ftable, "nxti = %d, max = %d\n", maxi, max);
2894 if(max == 0)
2895 return NOMORE;
2896 return maxi;
2900 * write summary
2902 void
2903 osummary(void)
2906 int i, *p;
2908 if(foutput == 0)
2909 return;
2910 i = 0;
2911 for(p = maxa; p >= amem; p--)
2912 if(*p == 0)
2913 i++;
2915 Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n",
2916 (int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE);
2917 Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i);
2918 Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff);
2922 * this version is for C
2923 * write out the optimized parser
2925 void
2926 aoutput(void)
2928 Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1));
2929 arout("yyact", amem, (maxa-amem)+1);
2930 arout("yypact", indgo, nstate);
2931 arout("yypgo", pgo, nnonter+1);
2934 void
2935 arout(char *s, int *v, int n)
2937 int i;
2939 Bprint(ftable, "static\tconst\tshort %s[] =\n{", s);
2940 for(i = 0; i < n;) {
2941 if(i%10 == 0)
2942 Bprint(ftable, "\n");
2943 Bprint(ftable, "%4d", v[i]);
2944 i++;
2945 if(i == n)
2946 Bprint(ftable, "\n};\n");
2947 else
2948 Bprint(ftable, ",");
2953 * read and convert an integer from the standard input
2954 * return the terminating character
2955 * blanks, tabs, and newlines are ignored
2957 int
2958 gtnm(void)
2960 int sign, val, c;
2962 sign = 0;
2963 val = 0;
2964 while((c=Bgetrune(finput)) != Beof) {
2965 if(isdigit(c)) {
2966 val = val*10 + c-'0';
2967 continue;
2969 if(c == '-') {
2970 sign = 1;
2971 continue;
2973 break;
2975 if(sign)
2976 val = -val;
2977 *pmem++ = val;
2978 if(pmem >= &mem0[MEMSIZE])
2979 error("out of space");
2980 return c;