Blob


1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
6 #define Bungetrune Bungetc /* ok for now. */
8 /*
9 * all these are 32 bit
10 */
11 #define TBITSET ((32+NTERMS)/32) /* BOTCH?? +31 */
12 #define BIT(a,i) ((a)[(i)>>5] & (1<<((i)&037)))
13 #define SETBIT(a,i) ((a)[(i)>>5] |= (1<<((i)&037)))
14 #define NWORDS(n) (((n)+32)/32)
16 #define PARSER "#9/lib/yaccpar"
17 #define PARSERS "#9/lib/yaccpars"
18 #define TEMPNAME "y.tmp.XXXXXX"
19 #define ACTNAME "y.acts.XXXXXX"
20 #define OFILE "tab.c"
21 #define FILEU "output"
22 #define FILED "tab.h"
23 #define FILEDEBUG "debug"
25 enum
26 {
27 /*
28 * the following are adjustable
29 * according to memory size
30 */
31 ACTSIZE = 40000,
32 MEMSIZE = 40000,
33 NSTATES = 2000,
34 NTERMS = 511,
35 NPROD = 1600,
36 NNONTERM = 600,
37 TEMPSIZE = 2000,
38 CNAMSZ = 10000,
39 LSETSIZE = 2400,
40 WSETSIZE = 350,
42 NAMESIZE = 50,
43 NTYPES = 63,
44 ISIZE = 400,
46 PRIVATE = 0xE000, /* unicode private use */
48 /* relationships which must hold:
49 TBITSET ints must hold NTERMS+1 bits...
50 WSETSIZE >= NNONTERM
51 LSETSIZE >= NNONTERM
52 TEMPSIZE >= NTERMS + NNONTERM + 1
53 TEMPSIZE >= NSTATES
54 */
56 NTBASE = 010000,
57 ERRCODE = 8190,
58 ACCEPTCODE = 8191,
60 NOASC = 0, /* no assoc. */
61 LASC = 1, /* left assoc. */
62 RASC = 2, /* right assoc. */
63 BASC = 3, /* binary assoc. */
65 /* flags for state generation */
67 DONE = 0,
68 MUSTDO = 1,
69 MUSTLOOKAHEAD = 2,
71 /* flags for a rule having an action, and being reduced */
73 ACTFLAG = 04,
74 REDFLAG = 010,
76 /* output parser flags */
77 YYFLAG1 = -1000,
79 /* parse tokens */
80 IDENTIFIER = PRIVATE,
81 MARK,
82 TERM,
83 LEFT,
84 RIGHT,
85 BINARY,
86 PREC,
87 LCURLY,
88 IDENTCOLON,
89 NUMBER,
90 START,
91 TYPEDEF,
92 TYPENAME,
93 UNION,
95 ENDFILE = 0,
97 EMPTY = 1,
98 WHOKNOWS = 0,
99 OK = 1,
100 NOMORE = -1000,
101 };
103 /* macros for getting associativity and precedence levels */
105 #define ASSOC(i) ((i)&03)
106 #define PLEVEL(i) (((i)>>4)&077)
107 #define TYPE(i) (((i)>>10)&077)
109 /* macros for setting associativity and precedence levels */
111 #define SETASC(i,j) i |= j
112 #define SETPLEV(i,j) i |= (j<<4)
113 #define SETTYPE(i,j) i |= (j<<10)
115 /* looping macros */
117 #define TLOOP(i) for(i=1; i<=ntokens; i++)
118 #define NTLOOP(i) for(i=0; i<=nnonter; i++)
119 #define PLOOP(s,i) for(i=s; i<nprod; i++)
120 #define SLOOP(i) for(i=0; i<nstate; i++)
121 #define WSBUMP(x) x++
122 #define WSLOOP(s,j) for(j=s; j<cwp; j++)
123 #define ITMLOOP(i,p,q) for(q=pstate[i+1], p=pstate[i]; p<q; p++)
124 #define SETLOOP(i) for(i=0; i<tbitset; i++)
126 /* command to clobber tempfiles after use */
128 #define ZAPFILE(x) if(x) remove(x)
130 /* I/O descriptors */
132 Biobuf* faction; /* file for saving actions */
133 Biobuf* fdefine; /* file for #defines */
134 Biobuf* fdebug; /* y.debug for strings for debugging */
135 Biobuf* ftable; /* y.tab.c file */
136 Biobuf* ftemp; /* tempfile to pass 2 */
137 Biobuf* finput; /* input file */
138 Biobuf* foutput; /* y.output file */
140 /* communication variables between various I/O routines */
142 char* infile; /* input file name */
143 int numbval; /* value of an input number */
144 char tokname[NAMESIZE+4]; /* input token name, slop for runes and 0 */
146 /* structure declarations */
148 typedef
149 struct
151 int lset[TBITSET];
152 } Lkset;
154 typedef
155 struct
157 int* pitem;
158 Lkset* look;
159 } Item;
161 typedef
162 struct
164 char* name;
165 int value;
166 } Symb;
168 typedef
169 struct
171 int* pitem;
172 int flag;
173 Lkset ws;
174 } Wset;
176 /* storage of names */
178 char cnames[CNAMSZ]; /* place where token and nonterminal names are stored */
179 int cnamsz = CNAMSZ; /* size of cnames */
180 char* cnamp = cnames; /* place where next name is to be put in */
181 int ndefout = 4; /* number of defined symbols output */
182 char* tempname;
183 char* actname;
184 char ttempname[] = TEMPNAME;
185 char tactname[] = ACTNAME;
186 char* parser = PARSER;
187 char* yydebug;
189 /* storage of types */
190 int ntypes; /* number of types defined */
191 char* typeset[NTYPES]; /* pointers to type tags */
193 /* token information */
195 int ntokens = 0 ; /* number of tokens */
196 Symb tokset[NTERMS];
197 int toklev[NTERMS]; /* vector with the precedence of the terminals */
199 /* nonterminal information */
201 int nnonter = -1; /* the number of nonterminals */
202 Symb nontrst[NNONTERM];
203 int start; /* start symbol */
205 /* assigned token type values */
206 int extval = 0;
208 char* ytabc = OFILE; /* name of y.tab.c */
210 /* grammar rule information */
212 int mem0[MEMSIZE] ; /* production storage */
213 int* mem = mem0;
214 int nprod = 1; /* number of productions */
215 int* prdptr[NPROD]; /* pointers to descriptions of productions */
216 int levprd[NPROD]; /* precedence levels for the productions */
217 int rlines[NPROD]; /* line number for this rule */
219 /* state information */
221 int nstate = 0; /* number of states */
222 Item* pstate[NSTATES+2]; /* pointers to the descriptions of the states */
223 int tystate[NSTATES]; /* contains type information about the states */
224 int defact[NSTATES]; /* the default actions of states */
225 int tstates[NTERMS]; /* states generated by terminal gotos */
226 int ntstates[NNONTERM]; /* states generated by nonterminal gotos */
227 int mstates[NSTATES]; /* chain of overflows of term/nonterm generation lists */
228 int lastred; /* the number of the last reduction of a state */
230 /* lookahead set information */
232 Lkset lkst[LSETSIZE];
233 int nolook; /* flag to turn off lookahead computations */
234 int tbitset; /* size of lookahead sets */
235 int nlset = 0; /* next lookahead set index */
236 int nolook = 0; /* flag to suppress lookahead computations */
237 Lkset clset; /* temporary storage for lookahead computations */
239 /* working set information */
241 Wset wsets[WSETSIZE];
242 Wset* cwp;
244 /* storage for action table */
246 int amem[ACTSIZE]; /* action table storage */
247 int* memp = amem; /* next free action table position */
248 int indgo[NSTATES]; /* index to the stored goto table */
250 /* temporary vector, indexable by states, terms, or ntokens */
252 int temp1[TEMPSIZE]; /* temporary storage, indexed by terms + ntokens or states */
253 int lineno = 1; /* current input line number */
254 int fatfl = 1; /* if on, error is fatal */
255 int nerrors = 0; /* number of errors */
257 /* statistics collection variables */
259 int zzgoent;
260 int zzgobest;
261 int zzacent;
262 int zzexcp;
263 int zzclose;
264 int zzrrconf;
265 int zzsrconf;
267 int* ggreed = lkst[0].lset;
268 int* pgo = wsets[0].ws.lset;
269 int* yypgo = &nontrst[0].value;
271 int maxspr = 0; /* maximum spread of any entry */
272 int maxoff = 0; /* maximum offset into a array */
273 int* pmem = mem0;
274 int* maxa;
275 int nxdb = 0;
276 int adb = 0;
279 /* storage for information about the nonterminals */
281 int** pres[NNONTERM+2]; /* vector of pointers to productions yielding each nonterminal */
282 Lkset* pfirst[NNONTERM+2]; /* vector of pointers to first sets for each nonterminal */
283 int pempty[NNONTERM+1]; /* vector of nonterminals nontrivially deriving e */
285 /* random stuff picked out from between functions */
287 int indebug = 0;
288 Wset* zzcwp = wsets;
289 int zzgoent = 0;
290 int zzgobest = 0;
291 int zzacent = 0;
292 int zzexcp = 0;
293 int zzclose = 0;
294 int zzsrconf = 0;
295 int* zzmemsz = mem0;
296 int zzrrconf = 0;
297 int pidebug = 0; /* debugging flag for putitem */
298 int gsdebug = 0;
299 int cldebug = 0; /* debugging flag for closure */
300 int pkdebug = 0;
301 int g2debug = 0;
303 struct
305 char* name;
306 long value;
307 } resrv[] =
309 "binary", BINARY,
310 "left", LEFT,
311 "nonassoc", BINARY,
312 "prec", PREC,
313 "right", RIGHT,
314 "start", START,
315 "term", TERM,
316 "token", TERM,
317 "type", TYPEDEF,
318 "union", UNION,
319 0,
320 };
322 /* define functions */
324 void main(int, char**);
325 void others(void);
326 char* chcopy(char*, char*);
327 char* writem(int*);
328 char* symnam(int);
329 void summary(void);
330 void error(char*, ...);
331 void aryfil(int*, int, int);
332 int setunion(int*, int*);
333 void prlook(Lkset*);
334 void cpres(void);
335 void cpfir(void);
336 int state(int);
337 void putitem(int*, Lkset*);
338 void cempty(void);
339 void stagen(void);
340 void closure(int);
341 Lkset* flset(Lkset*);
342 void cleantmp(void);
343 void intr(void);
344 void setup(int, char**);
345 void finact(void);
346 int defin(int, char*);
347 void defout(int);
348 char* cstash(char*);
349 long gettok(void);
350 int fdtype(int);
351 int chfind(int, char*);
352 void cpyunion(void);
353 void cpycode(void);
354 int skipcom(void);
355 void cpyact(int);
356 void openup(char*, int, int, int, char*);
357 void output(void);
358 int apack(int*, int);
359 void go2out(void);
360 void go2gen(int);
361 void precftn(int, int, int);
362 void wract(int);
363 void wrstate(int);
364 void warray(char*, int*, int);
365 void hideprod(void);
366 void callopt(void);
367 void gin(int);
368 void stin(int);
369 int nxti(void);
370 void osummary(void);
371 void aoutput(void);
372 void arout(char*, int*, int);
373 int gtnm(void);
375 void
376 main(int argc, char *argv[])
379 setup(argc, argv); /* initialize and read productions */
380 tbitset = NWORDS(ntokens);
381 cpres(); /* make table of which productions yield a given nonterminal */
382 cempty(); /* make a table of which nonterminals can match the empty string */
383 cpfir(); /* make a table of firsts of nonterminals */
384 stagen(); /* generate the states */
385 output(); /* write the states and the tables */
386 go2out();
387 hideprod();
388 summary();
389 callopt();
390 others();
391 exits(0);
394 /*
395 * put out other arrays, copy the parsers
396 */
397 void
398 others(void)
400 int c, i, j;
402 finput = Bopen(unsharp(parser), OREAD);
403 if(finput == 0)
404 error("cannot open parser %s: %r", parser);
405 warray("yyr1", levprd, nprod);
406 aryfil(temp1, nprod, 0);
407 PLOOP(1, i)
408 temp1[i] = prdptr[i+1]-prdptr[i]-2;
409 warray("yyr2", temp1, nprod);
411 aryfil(temp1, nstate, -1000);
412 TLOOP(i)
413 for(j=tstates[i]; j!=0; j=mstates[j])
414 temp1[j] = i;
415 NTLOOP(i)
416 for(j=ntstates[i]; j!=0; j=mstates[j])
417 temp1[j] = -i;
418 warray("yychk", temp1, nstate);
419 warray("yydef", defact, nstate);
421 /* put out token translation tables */
422 /* table 1 has 0-256 */
423 aryfil(temp1, 256, 0);
424 c = 0;
425 TLOOP(i) {
426 j = tokset[i].value;
427 if(j >= 0 && j < 256) {
428 if(temp1[j]) {
429 print("yacc bug -- cant have 2 different Ts with same value\n");
430 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
431 nerrors++;
433 temp1[j] = i;
434 if(j > c)
435 c = j;
438 warray("yytok1", temp1, c+1);
440 /* table 2 has PRIVATE-PRIVATE+256 */
441 aryfil(temp1, 256, 0);
442 c = 0;
443 TLOOP(i) {
444 j = tokset[i].value - PRIVATE;
445 if(j >= 0 && j < 256) {
446 if(temp1[j]) {
447 print("yacc bug -- cant have 2 different Ts with same value\n");
448 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
449 nerrors++;
451 temp1[j] = i;
452 if(j > c)
453 c = j;
456 warray("yytok2", temp1, c+1);
458 /* table 3 has everything else */
459 Bprint(ftable, "long yytok3[] =\n{\n");
460 c = 0;
461 TLOOP(i) {
462 j = tokset[i].value;
463 if(j >= 0 && j < 256)
464 continue;
465 if(j >= PRIVATE && j < 256+PRIVATE)
466 continue;
468 Bprint(ftable, "%4d,%4d,", j, i);
469 c++;
470 if(c%5 == 0)
471 Bprint(ftable, "\n");
473 Bprint(ftable, "%4d\n};\n", 0);
475 /* copy parser text */
476 while((c=Bgetrune(finput)) != Beof) {
477 if(c == '$') {
478 if((c = Bgetrune(finput)) != 'A')
479 Bputrune(ftable, '$');
480 else { /* copy actions */
481 faction = Bopen(actname, OREAD);
482 if(faction == 0)
483 error("cannot reopen action tempfile");
484 while((c=Bgetrune(faction)) != Beof)
485 Bputrune(ftable, c);
486 Bterm(faction);
487 ZAPFILE(actname);
488 c = Bgetrune(finput);
491 Bputrune(ftable, c);
493 Bterm(ftable);
496 /*
497 * copies string q into p, returning next free char ptr
498 */
499 char*
500 chcopy(char* p, char* q)
502 int c;
504 while(c = *q) {
505 if(c == '"')
506 *p++ = '\\';
507 *p++ = c;
508 q++;
510 *p = 0;
511 return p;
514 /*
515 * creates output string for item pointed to by pp
516 */
517 char*
518 writem(int *pp)
520 int i,*p;
521 static char sarr[ISIZE];
522 char* q;
524 for(p=pp; *p>0; p++)
526 p = prdptr[-*p];
527 q = chcopy(sarr, nontrst[*p-NTBASE].name);
528 q = chcopy(q, ": ");
529 for(;;) {
530 *q = ' ';
531 p++;
532 if(p == pp)
533 *q = '.';
534 q++;
535 *q = '\0';
536 i = *p;
537 if(i <= 0)
538 break;
539 q = chcopy(q, symnam(i));
540 if(q > &sarr[ISIZE-30])
541 error("item too big");
544 /* an item calling for a reduction */
545 i = *pp;
546 if(i < 0 ) {
547 q = chcopy(q, " (");
548 sprint(q, "%d)", -i);
550 return sarr;
553 /*
554 * return a pointer to the name of symbol i
555 */
556 char*
557 symnam(int i)
559 char* cp;
561 cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
562 if(*cp == ' ')
563 cp++;
564 return cp;
567 /*
568 * output the summary on y.output
569 */
570 void
571 summary(void)
574 if(foutput != 0) {
575 Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n",
576 ntokens, NTERMS, nnonter, NNONTERM);
577 Bprint(foutput, "%d/%d grammar rules, %d/%d states\n",
578 nprod, NPROD, nstate, NSTATES);
579 Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n",
580 zzsrconf, zzrrconf);
581 Bprint(foutput, "%d/%d working sets used\n",
582 (int)(zzcwp-wsets), WSETSIZE);
583 Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n",
584 (int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE);
585 Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE);
586 Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate);
587 Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp);
588 Bprint(foutput, "%d goto entries\n", zzgoent);
589 Bprint(foutput, "%d entries saved by goto default\n", zzgobest);
591 if(zzsrconf != 0 || zzrrconf != 0) {
592 print("\nconflicts: ");
593 if(zzsrconf)
594 print("%d shift/reduce", zzsrconf);
595 if(zzsrconf && zzrrconf)
596 print(", ");
597 if(zzrrconf)
598 print("%d reduce/reduce", zzrrconf);
599 print("\n");
601 if(ftemp != 0) {
602 Bterm(ftemp);
603 ftemp = 0;
605 if(fdefine != 0) {
606 Bterm(fdefine);
607 fdefine = 0;
611 /*
612 * write out error comment -- NEEDS WORK
613 */
614 void
615 error(char *s, ...)
618 nerrors++;
619 fprint(2, "\n fatal error:");
620 fprint(2, s, (&s)[1]);
621 fprint(2, ", %s:%d\n", infile, lineno);
622 if(!fatfl)
623 return;
624 summary();
625 cleantmp();
626 exits("error");
629 /*
630 * set elements 0 through n-1 to c
631 */
632 void
633 aryfil(int *v, int n, int c)
635 int i;
637 for(i=0; i<n; i++)
638 v[i] = c;
641 /*
642 * set a to the union of a and b
643 * return 1 if b is not a subset of a, 0 otherwise
644 */
645 int
646 setunion(int *a, int *b)
648 int i, x, sub;
650 sub = 0;
651 SETLOOP(i) {
652 x = *a;
653 *a |= *b;
654 if(*a != x)
655 sub = 1;
656 a++;
657 b++;
659 return sub;
662 void
663 prlook(Lkset* p)
665 int j, *pp;
667 pp = p->lset;
668 if(pp == 0)
669 Bprint(foutput, "\tNULL");
670 else {
671 Bprint(foutput, " { ");
672 TLOOP(j)
673 if(BIT(pp,j))
674 Bprint(foutput, "%s ", symnam(j));
675 Bprint(foutput, "}");
679 /*
680 * compute an array with the beginnings of productions yielding given nonterminals
681 * The array pres points to these lists
682 * the array pyield has the lists: the total size is only NPROD+1
683 */
684 void
685 cpres(void)
687 int c, j, i, **pmem;
688 static int *pyield[NPROD];
690 pmem = pyield;
691 NTLOOP(i) {
692 c = i+NTBASE;
693 pres[i] = pmem;
694 fatfl = 0; /* make undefined symbols nonfatal */
695 PLOOP(0, j)
696 if(*prdptr[j] == c)
697 *pmem++ = prdptr[j]+1;
698 if(pres[i] == pmem)
699 error("nonterminal %s not defined!", nontrst[i].name);
701 pres[i] = pmem;
702 fatfl = 1;
703 if(nerrors) {
704 summary();
705 cleantmp();
706 exits("error");
708 if(pmem != &pyield[nprod])
709 error("internal Yacc error: pyield %d", pmem-&pyield[nprod]);
712 /*
713 * compute an array with the first of nonterminals
714 */
715 void
716 cpfir(void)
718 int *p, **s, i, **t, ch, changes;
720 zzcwp = &wsets[nnonter];
721 NTLOOP(i) {
722 aryfil(wsets[i].ws.lset, tbitset, 0);
723 t = pres[i+1];
724 /* initially fill the sets */
725 for(s=pres[i]; s<t; ++s)
726 for(p = *s; (ch = *p) > 0; ++p) {
727 if(ch < NTBASE) {
728 SETBIT(wsets[i].ws.lset, ch);
729 break;
731 if(!pempty[ch-NTBASE])
732 break;
736 /* now, reflect transitivity */
737 changes = 1;
738 while(changes) {
739 changes = 0;
740 NTLOOP(i) {
741 t = pres[i+1];
742 for(s = pres[i]; s < t; ++s)
743 for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) {
744 changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset);
745 if(!pempty[ch])
746 break;
751 NTLOOP(i)
752 pfirst[i] = flset(&wsets[i].ws);
753 if(!indebug)
754 return;
755 if(foutput != 0)
756 NTLOOP(i) {
757 Bprint(foutput, "\n%s: ", nontrst[i].name);
758 prlook(pfirst[i]);
759 Bprint(foutput, " %d\n", pempty[i]);
763 /*
764 * sorts last state,and sees if it equals earlier ones. returns state number
765 */
766 int
767 state(int c)
769 Item *p1, *p2, *k, *l, *q1, *q2;
770 int size1, size2, i;
772 p1 = pstate[nstate];
773 p2 = pstate[nstate+1];
774 if(p1 == p2)
775 return 0; /* null state */
776 /* sort the items */
777 for(k = p2-1; k > p1; k--) /* make k the biggest */
778 for(l = k-1; l >= p1; --l)
779 if(l->pitem > k->pitem) {
780 int *s;
781 Lkset *ss;
783 s = k->pitem;
784 k->pitem = l->pitem;
785 l->pitem = s;
786 ss = k->look;
787 k->look = l->look;
788 l->look = ss;
790 size1 = p2 - p1; /* size of state */
792 for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) {
793 /* get ith state */
794 q1 = pstate[i];
795 q2 = pstate[i+1];
796 size2 = q2 - q1;
797 if(size1 != size2)
798 continue;
799 k = p1;
800 for(l = q1; l < q2; l++) {
801 if(l->pitem != k->pitem)
802 break;
803 k++;
805 if(l != q2)
806 continue;
807 /* found it */
808 pstate[nstate+1] = pstate[nstate]; /* delete last state */
809 /* fix up lookaheads */
810 if(nolook)
811 return i;
812 for(l = q1, k = p1; l < q2; ++l, ++k ) {
813 int s;
815 SETLOOP(s)
816 clset.lset[s] = l->look->lset[s];
817 if(setunion(clset.lset, k->look->lset)) {
818 tystate[i] = MUSTDO;
819 /* register the new set */
820 l->look = flset( &clset );
823 return i;
825 /* state is new */
826 if(nolook)
827 error("yacc state/nolook error");
828 pstate[nstate+2] = p2;
829 if(nstate+1 >= NSTATES)
830 error("too many states");
831 if(c >= NTBASE) {
832 mstates[nstate] = ntstates[c-NTBASE];
833 ntstates[c-NTBASE] = nstate;
834 } else {
835 mstates[nstate] = tstates[c];
836 tstates[c] = nstate;
838 tystate[nstate] = MUSTDO;
839 return nstate++;
842 void
843 putitem(int *ptr, Lkset *lptr)
845 Item *j;
847 if(pidebug && foutput != 0)
848 Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate);
849 j = pstate[nstate+1];
850 j->pitem = ptr;
851 if(!nolook)
852 j->look = flset(lptr);
853 pstate[nstate+1] = ++j;
854 if((int*)j > zzmemsz) {
855 zzmemsz = (int*)j;
856 if(zzmemsz >= &mem0[MEMSIZE])
857 error("out of state space");
861 /*
862 * mark nonterminals which derive the empty string
863 * also, look for nonterminals which don't derive any token strings
864 */
865 void
866 cempty(void)
869 int i, *p;
871 /* first, use the array pempty to detect productions that can never be reduced */
872 /* set pempty to WHONOWS */
873 aryfil(pempty, nnonter+1, WHOKNOWS);
875 /* now, look at productions, marking nonterminals which derive something */
876 more:
877 PLOOP(0, i) {
878 if(pempty[*prdptr[i] - NTBASE])
879 continue;
880 for(p = prdptr[i]+1; *p >= 0; ++p)
881 if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS)
882 break;
883 /* production can be derived */
884 if(*p < 0) {
885 pempty[*prdptr[i]-NTBASE] = OK;
886 goto more;
890 /* now, look at the nonterminals, to see if they are all OK */
891 NTLOOP(i) {
892 /* the added production rises or falls as the start symbol ... */
893 if(i == 0)
894 continue;
895 if(pempty[i] != OK) {
896 fatfl = 0;
897 error("nonterminal %s never derives any token string", nontrst[i].name);
901 if(nerrors) {
902 summary();
903 cleantmp();
904 exits("error");
907 /* now, compute the pempty array, to see which nonterminals derive the empty string */
908 /* set pempty to WHOKNOWS */
909 aryfil( pempty, nnonter+1, WHOKNOWS);
911 /* loop as long as we keep finding empty nonterminals */
913 again:
914 PLOOP(1, i) {
915 /* not known to be empty */
916 if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) {
917 for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p)
919 /* we have a nontrivially empty nonterminal */
920 if(*p < 0) {
921 pempty[*prdptr[i]-NTBASE] = EMPTY;
922 /* got one ... try for another */
923 goto again;
929 /*
930 * generate the states
931 */
932 void
933 stagen(void)
936 int c, i, j, more;
937 Wset *p, *q;
939 /* initialize */
940 nstate = 0;
942 /* THIS IS FUNNY from the standpoint of portability
943 * it represents the magic moment when the mem0 array, which has
944 * been holding the productions, starts to hold item pointers, of a
945 * different type...
946 * someday, alloc should be used to allocate all this stuff... for now, we
947 * accept that if pointers don't fit in integers, there is a problem...
948 */
950 pstate[0] = pstate[1] = (Item*)mem;
951 aryfil(clset.lset, tbitset, 0);
952 putitem(prdptr[0]+1, &clset);
953 tystate[0] = MUSTDO;
954 nstate = 1;
955 pstate[2] = pstate[1];
957 aryfil(amem, ACTSIZE, 0);
959 /* now, the main state generation loop */
960 for(more=1; more;) {
961 more = 0;
962 SLOOP(i) {
963 if(tystate[i] != MUSTDO)
964 continue;
965 tystate[i] = DONE;
966 aryfil(temp1, nnonter+1, 0);
967 /* take state i, close it, and do gotos */
968 closure(i);
969 /* generate goto's */
970 WSLOOP(wsets, p) {
971 if(p->flag)
972 continue;
973 p->flag = 1;
974 c = *(p->pitem);
975 if(c <= 1) {
976 if(pstate[i+1]-pstate[i] <= p-wsets)
977 tystate[i] = MUSTLOOKAHEAD;
978 continue;
980 /* do a goto on c */
981 WSLOOP(p, q)
982 /* this item contributes to the goto */
983 if(c == *(q->pitem)) {
984 putitem(q->pitem+1, &q->ws);
985 q->flag = 1;
987 if(c < NTBASE)
988 state(c); /* register new state */
989 else
990 temp1[c-NTBASE] = state(c);
992 if(gsdebug && foutput != 0) {
993 Bprint(foutput, "%d: ", i);
994 NTLOOP(j)
995 if(temp1[j])
996 Bprint(foutput, "%s %d, ",
997 nontrst[j].name, temp1[j]);
998 Bprint(foutput, "\n");
1000 indgo[i] = apack(&temp1[1], nnonter-1) - 1;
1001 /* do some more */
1002 more = 1;
1008 * generate the closure of state i
1010 void
1011 closure(int i)
1014 Wset *u, *v;
1015 Item *p, *q;
1016 int c, ch, work, k, *pi, **s, **t;
1018 zzclose++;
1020 /* first, copy kernel of state i to wsets */
1021 cwp = wsets;
1022 ITMLOOP(i, p, q) {
1023 cwp->pitem = p->pitem;
1024 cwp->flag = 1; /* this item must get closed */
1025 SETLOOP(k)
1026 cwp->ws.lset[k] = p->look->lset[k];
1027 WSBUMP(cwp);
1030 /* now, go through the loop, closing each item */
1031 work = 1;
1032 while(work) {
1033 work = 0;
1034 WSLOOP(wsets, u) {
1035 if(u->flag == 0)
1036 continue;
1037 /* dot is before c */
1038 c = *(u->pitem);
1039 if(c < NTBASE) {
1040 u->flag = 0;
1041 /* only interesting case is where . is before nonterminal */
1042 continue;
1045 /* compute the lookahead */
1046 aryfil(clset.lset, tbitset, 0);
1048 /* find items involving c */
1049 WSLOOP(u, v)
1050 if(v->flag == 1 && *(pi=v->pitem) == c) {
1051 v->flag = 0;
1052 if(nolook)
1053 continue;
1054 while((ch = *++pi) > 0) {
1055 /* terminal symbol */
1056 if(ch < NTBASE) {
1057 SETBIT(clset.lset, ch);
1058 break;
1060 /* nonterminal symbol */
1061 setunion(clset.lset, pfirst[ch-NTBASE]->lset);
1062 if(!pempty[ch-NTBASE])
1063 break;
1065 if(ch <= 0)
1066 setunion(clset.lset, v->ws.lset);
1070 * now loop over productions derived from c
1071 * c is now nonterminal number
1073 c -= NTBASE;
1074 t = pres[c+1];
1075 for(s = pres[c]; s < t; ++s) {
1077 * put these items into the closure
1078 * is the item there
1080 WSLOOP(wsets, v)
1081 /* yes, it is there */
1082 if(v->pitem == *s) {
1083 if(nolook)
1084 goto nexts;
1085 if(setunion(v->ws.lset, clset.lset))
1086 v->flag = work = 1;
1087 goto nexts;
1090 /* not there; make a new entry */
1091 if(cwp-wsets+1 >= WSETSIZE)
1092 error( "working set overflow");
1093 cwp->pitem = *s;
1094 cwp->flag = 1;
1095 if(!nolook) {
1096 work = 1;
1097 SETLOOP(k) cwp->ws.lset[k] = clset.lset[k];
1099 WSBUMP(cwp);
1101 nexts:;
1106 /* have computed closure; flags are reset; return */
1107 if(cwp > zzcwp)
1108 zzcwp = cwp;
1109 if(cldebug && foutput != 0) {
1110 Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook);
1111 WSLOOP(wsets, u) {
1112 if(u->flag)
1113 Bprint(foutput, "flag set!\n");
1114 u->flag = 0;
1115 Bprint(foutput, "\t%s", writem(u->pitem));
1116 prlook(&u->ws);
1117 Bprint(foutput, "\n");
1123 * decide if the lookahead set pointed to by p is known
1124 * return pointer to a perminent location for the set
1126 Lkset*
1127 flset(Lkset *p)
1129 Lkset *q;
1130 int *u, *v, *w, j;
1132 for(q = &lkst[nlset]; q-- > lkst;) {
1133 u = p->lset;
1134 v = q->lset;
1135 w = &v[tbitset];
1136 while(v < w)
1137 if(*u++ != *v++)
1138 goto more;
1139 /* we have matched */
1140 return q;
1141 more:;
1143 /* add a new one */
1144 q = &lkst[nlset++];
1145 if(nlset >= LSETSIZE)
1146 error("too many lookahead sets");
1147 SETLOOP(j)
1148 q->lset[j] = p->lset[j];
1149 return q;
1152 void
1153 cleantmp(void)
1155 ZAPFILE(actname);
1156 ZAPFILE(tempname);
1159 void
1160 intr(void)
1162 cleantmp();
1163 exits("interrupted");
1166 void
1167 setup(int argc, char *argv[])
1169 long c, t;
1170 int i, j, fd, lev, ty, ytab, *p;
1171 int vflag, dflag, stem;
1172 char actnm[8], *stemc, *s, dirbuf[128];
1174 ytab = 0;
1175 vflag = 0;
1176 dflag = 0;
1177 stem = 0;
1178 stemc = "y";
1179 foutput = 0;
1180 fdefine = 0;
1181 fdebug = 0;
1182 ARGBEGIN{
1183 case 'v':
1184 case 'V':
1185 vflag++;
1186 break;
1187 case 'D':
1188 yydebug = ARGF();
1189 break;
1190 case 'd':
1191 dflag++;
1192 break;
1193 case 'o':
1194 ytab++;
1195 ytabc = ARGF();
1196 break;
1197 case 's':
1198 stem++;
1199 stemc = ARGF();
1200 break;
1201 case 'S':
1202 parser = PARSERS;
1203 break;
1204 default:
1205 error("illegal option: %c", ARGC());
1206 }ARGEND
1207 openup(stemc, dflag, vflag, ytab, ytabc);
1209 if((fd = mkstemp(ttempname)) >= 0){
1210 tempname = ttempname;
1211 ftemp = Bfdopen(fd, OWRITE);
1213 if((fd = mkstemp(tactname)) >= 0){
1214 actname = tactname;
1215 faction = Bfdopen(fd, OWRITE);
1217 if(ftemp == 0 || faction == 0)
1218 error("cannot open temp file");
1219 if(argc < 1)
1220 error("no input file");
1221 infile = argv[0];
1222 if(infile[0] != '/' && getwd(dirbuf, sizeof dirbuf)!=nil){
1223 i = strlen(infile)+1+strlen(dirbuf)+1+10;
1224 s = malloc(i);
1225 if(s != nil){
1226 snprint(s, i, "%s/%s", dirbuf, infile);
1227 cleanname(s);
1228 infile = s;
1231 finput = Bopen(infile, OREAD);
1232 if(finput == 0)
1233 error("cannot open '%s'", argv[0]);
1234 cnamp = cnames;
1236 defin(0, "$end");
1237 extval = PRIVATE; /* tokens start in unicode 'private use' */
1238 defin(0, "error");
1239 defin(1, "$accept");
1240 defin(0, "$unk");
1241 mem = mem0;
1242 i = 0;
1244 for(t = gettok(); t != MARK && t != ENDFILE;)
1245 switch(t) {
1246 case ';':
1247 t = gettok();
1248 break;
1250 case START:
1251 if(gettok() != IDENTIFIER)
1252 error("bad %%start construction");
1253 start = chfind(1, tokname);
1254 t = gettok();
1255 continue;
1257 case TYPEDEF:
1258 if(gettok() != TYPENAME)
1259 error("bad syntax in %%type");
1260 ty = numbval;
1261 for(;;) {
1262 t = gettok();
1263 switch(t) {
1264 case IDENTIFIER:
1265 if((t=chfind(1, tokname)) < NTBASE) {
1266 j = TYPE(toklev[t]);
1267 if(j != 0 && j != ty)
1268 error("type redeclaration of token %s",
1269 tokset[t].name);
1270 else
1271 SETTYPE(toklev[t], ty);
1272 } else {
1273 j = nontrst[t-NTBASE].value;
1274 if(j != 0 && j != ty)
1275 error("type redeclaration of nonterminal %s",
1276 nontrst[t-NTBASE].name );
1277 else
1278 nontrst[t-NTBASE].value = ty;
1280 case ',':
1281 continue;
1282 case ';':
1283 t = gettok();
1284 default:
1285 break;
1287 break;
1289 continue;
1291 case UNION:
1292 /* copy the union declaration to the output */
1293 cpyunion();
1294 t = gettok();
1295 continue;
1297 case LEFT:
1298 case BINARY:
1299 case RIGHT:
1300 i++;
1302 case TERM:
1303 /* nonzero means new prec. and assoc. */
1304 lev = t-TERM;
1305 ty = 0;
1307 /* get identifiers so defined */
1308 t = gettok();
1310 /* there is a type defined */
1311 if(t == TYPENAME) {
1312 ty = numbval;
1313 t = gettok();
1315 for(;;) {
1316 switch(t) {
1317 case ',':
1318 t = gettok();
1319 continue;
1321 case ';':
1322 break;
1324 case IDENTIFIER:
1325 j = chfind(0, tokname);
1326 if(j >= NTBASE)
1327 error("%s defined earlier as nonterminal", tokname);
1328 if(lev) {
1329 if(ASSOC(toklev[j]))
1330 error("redeclaration of precedence of %s", tokname);
1331 SETASC(toklev[j], lev);
1332 SETPLEV(toklev[j], i);
1334 if(ty) {
1335 if(TYPE(toklev[j]))
1336 error("redeclaration of type of %s", tokname);
1337 SETTYPE(toklev[j],ty);
1339 t = gettok();
1340 if(t == NUMBER) {
1341 tokset[j].value = numbval;
1342 if(j < ndefout && j > 3)
1343 error("please define type number of %s earlier",
1344 tokset[j].name);
1345 t = gettok();
1347 continue;
1349 break;
1351 continue;
1353 case LCURLY:
1354 defout(0);
1355 cpycode();
1356 t = gettok();
1357 continue;
1359 default:
1360 error("syntax error");
1362 if(t == ENDFILE)
1363 error("unexpected EOF before %%");
1365 /* t is MARK */
1366 Bprint(ftable, "extern int yyerrflag;\n");
1367 Bprint(ftable, "#ifndef YYMAXDEPTH\n");
1368 Bprint(ftable, "#define YYMAXDEPTH 150\n");
1369 Bprint(ftable, "#endif\n" );
1370 if(!ntypes) {
1371 Bprint(ftable, "#ifndef YYSTYPE\n");
1372 Bprint(ftable, "#define YYSTYPE int\n");
1373 Bprint(ftable, "#endif\n");
1375 Bprint(ftable, "YYSTYPE yylval;\n");
1376 Bprint(ftable, "YYSTYPE yyval;\n");
1378 prdptr[0] = mem;
1380 /* added production */
1381 *mem++ = NTBASE;
1383 /* if start is 0, we will overwrite with the lhs of the first rule */
1384 *mem++ = start;
1385 *mem++ = 1;
1386 *mem++ = 0;
1387 prdptr[1] = mem;
1388 while((t=gettok()) == LCURLY)
1389 cpycode();
1390 if(t != IDENTCOLON)
1391 error("bad syntax on first rule");
1393 if(!start)
1394 prdptr[0][1] = chfind(1, tokname);
1396 /* read rules */
1397 while(t != MARK && t != ENDFILE) {
1398 /* process a rule */
1399 rlines[nprod] = lineno;
1400 if(t == '|')
1401 *mem++ = *prdptr[nprod-1];
1402 else
1403 if(t == IDENTCOLON) {
1404 *mem = chfind(1, tokname);
1405 if(*mem < NTBASE)
1406 error("token illegal on LHS of grammar rule");
1407 mem++;
1408 } else
1409 error("illegal rule: missing semicolon or | ?");
1410 /* read rule body */
1411 t = gettok();
1413 more_rule:
1414 while(t == IDENTIFIER) {
1415 *mem = chfind(1, tokname);
1416 if(*mem < NTBASE)
1417 levprd[nprod] = toklev[*mem];
1418 mem++;
1419 t = gettok();
1421 if(t == PREC) {
1422 if(gettok() != IDENTIFIER)
1423 error("illegal %%prec syntax");
1424 j = chfind(2, tokname);
1425 if(j >= NTBASE)
1426 error("nonterminal %s illegal after %%prec",
1427 nontrst[j-NTBASE].name);
1428 levprd[nprod] = toklev[j];
1429 t = gettok();
1431 if(t == '=') {
1432 levprd[nprod] |= ACTFLAG;
1433 Bprint(faction, "\ncase %d:", nprod);
1434 cpyact(mem-prdptr[nprod]-1);
1435 Bprint(faction, " break;");
1436 if((t=gettok()) == IDENTIFIER) {
1438 /* action within rule... */
1439 sprint(actnm, "$$%d", nprod);
1441 /* make it a nonterminal */
1442 j = chfind(1, actnm);
1445 * the current rule will become rule number nprod+1
1446 * move the contents down, and make room for the null
1448 for(p = mem; p >= prdptr[nprod]; --p)
1449 p[2] = *p;
1450 mem += 2;
1452 /* enter null production for action */
1453 p = prdptr[nprod];
1454 *p++ = j;
1455 *p++ = -nprod;
1457 /* update the production information */
1458 levprd[nprod+1] = levprd[nprod] & ~ACTFLAG;
1459 levprd[nprod] = ACTFLAG;
1460 if(++nprod >= NPROD)
1461 error("more than %d rules", NPROD);
1462 prdptr[nprod] = p;
1464 /* make the action appear in the original rule */
1465 *mem++ = j;
1467 /* get some more of the rule */
1468 goto more_rule;
1472 while(t == ';')
1473 t = gettok();
1474 *mem++ = -nprod;
1476 /* check that default action is reasonable */
1477 if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) {
1479 /* no explicit action, LHS has value */
1480 int tempty;
1482 tempty = prdptr[nprod][1];
1483 if(tempty < 0)
1484 error("must return a value, since LHS has a type");
1485 else
1486 if(tempty >= NTBASE)
1487 tempty = nontrst[tempty-NTBASE].value;
1488 else
1489 tempty = TYPE(toklev[tempty]);
1490 if(tempty != nontrst[*prdptr[nprod]-NTBASE].value)
1491 error("default action causes potential type clash");
1493 nprod++;
1494 if(nprod >= NPROD)
1495 error("more than %d rules", NPROD);
1496 prdptr[nprod] = mem;
1497 levprd[nprod] = 0;
1500 /* end of all rules */
1501 defout(1);
1503 finact();
1504 if(t == MARK) {
1505 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1506 while((c=Bgetrune(finput)) != Beof)
1507 Bputrune(ftable, c);
1509 Bterm(finput);
1513 * finish action routine
1515 void
1516 finact(void)
1519 Bterm(faction);
1520 Bprint(ftable, "#define YYEOFCODE %d\n", 1);
1521 Bprint(ftable, "#define YYERRCODE %d\n", 2);
1525 * define s to be a terminal if t=0
1526 * or a nonterminal if t=1
1528 int
1529 defin(int nt, char *s)
1531 int val;
1532 Rune rune;
1534 val = 0;
1535 if(nt) {
1536 nnonter++;
1537 if(nnonter >= NNONTERM)
1538 error("too many nonterminals, limit %d",NNONTERM);
1539 nontrst[nnonter].name = cstash(s);
1540 return NTBASE + nnonter;
1543 /* must be a token */
1544 ntokens++;
1545 if(ntokens >= NTERMS)
1546 error("too many terminals, limit %d", NTERMS);
1547 tokset[ntokens].name = cstash(s);
1549 /* establish value for token */
1550 /* single character literal */
1551 if(s[0] == ' ') {
1552 val = chartorune(&rune, &s[1]);
1553 if(s[val+1] == 0) {
1554 val = rune;
1555 goto out;
1559 /* escape sequence */
1560 if(s[0] == ' ' && s[1] == '\\') {
1561 if(s[3] == 0) {
1562 /* single character escape sequence */
1563 switch(s[2]) {
1564 case 'n': val = '\n'; break;
1565 case 'r': val = '\r'; break;
1566 case 'b': val = '\b'; break;
1567 case 't': val = '\t'; break;
1568 case 'f': val = '\f'; break;
1569 case '\'': val = '\''; break;
1570 case '"': val = '"'; break;
1571 case '\\': val = '\\'; break;
1572 default: error("invalid escape");
1574 goto out;
1577 /* \nnn sequence */
1578 if(s[2] >= '0' && s[2] <= '7') {
1579 if(s[3] < '0' ||
1580 s[3] > '7' ||
1581 s[4] < '0' ||
1582 s[4] > '7' ||
1583 s[5] != 0)
1584 error("illegal \\nnn construction");
1585 val = 64*s[2] + 8*s[3] + s[4] - 73*'0';
1586 if(val == 0)
1587 error("'\\000' is illegal");
1588 goto out;
1590 error("unknown escape");
1592 val = extval++;
1594 out:
1595 tokset[ntokens].value = val;
1596 toklev[ntokens] = 0;
1597 return ntokens;
1601 * write out the defines (at the end of the declaration section)
1603 void
1604 defout(int last)
1606 int i, c;
1607 char sar[NAMESIZE+10];
1609 for(i=ndefout; i<=ntokens; i++) {
1610 /* non-literals */
1611 c = tokset[i].name[0];
1612 if(c != ' ' && c != '$') {
1613 Bprint(ftable, "#define %s %d\n",
1614 tokset[i].name, tokset[i].value);
1615 if(fdefine)
1616 Bprint(fdefine, "#define\t%s\t%d\n",
1617 tokset[i].name, tokset[i].value);
1620 ndefout = ntokens+1;
1621 if(last && fdebug) {
1622 Bprint(fdebug, "char* yytoknames[] =\n{\n");
1623 TLOOP(i) {
1624 if(tokset[i].name) {
1625 chcopy(sar, tokset[i].name);
1626 Bprint(fdebug, "\t\"%s\",\n", sar);
1627 continue;
1629 Bprint(fdebug, "\t0,\n");
1631 Bprint(fdebug, "};\n");
1635 char*
1636 cstash(char *s)
1638 char *temp;
1640 temp = cnamp;
1641 do {
1642 if(cnamp >= &cnames[cnamsz])
1643 error("too many characters in id's and literals");
1644 else
1645 *cnamp++ = *s;
1646 } while(*s++);
1647 return temp;
1650 long
1651 gettok(void)
1653 long c;
1654 Rune rune;
1655 int i, base, match, reserve;
1656 static int peekline;
1658 begin:
1659 reserve = 0;
1660 lineno += peekline;
1661 peekline = 0;
1662 c = Bgetrune(finput);
1663 while(c == ' ' || c == '\n' || c == '\t' || c == '\f') {
1664 if(c == '\n')
1665 lineno++;
1666 c = Bgetrune(finput);
1669 /* skip comment */
1670 if(c == '/') {
1671 lineno += skipcom();
1672 goto begin;
1674 switch(c) {
1675 case Beof:
1676 return ENDFILE;
1678 case '{':
1679 Bungetrune(finput);
1680 return '=';
1682 case '<':
1683 /* get, and look up, a type name (union member name) */
1684 i = 0;
1685 while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') {
1686 rune = c;
1687 c = runetochar(&tokname[i], &rune);
1688 if(i < NAMESIZE)
1689 i += c;
1691 if(c != '>')
1692 error("unterminated < ... > clause");
1693 tokname[i] = 0;
1694 for(i=1; i<=ntypes; i++)
1695 if(!strcmp(typeset[i], tokname)) {
1696 numbval = i;
1697 return TYPENAME;
1699 ntypes++;
1700 numbval = ntypes;
1701 typeset[numbval] = cstash(tokname);
1702 return TYPENAME;
1704 case '"':
1705 case '\'':
1706 match = c;
1707 tokname[0] = ' ';
1708 i = 1;
1709 for(;;) {
1710 c = Bgetrune(finput);
1711 if(c == '\n' || c <= 0)
1712 error("illegal or missing ' or \"" );
1713 if(c == '\\') {
1714 tokname[i] = '\\';
1715 if(i < NAMESIZE)
1716 i++;
1717 c = Bgetrune(finput);
1718 } else
1719 if(c == match)
1720 break;
1721 rune = c;
1722 c = runetochar(&tokname[i], &rune);
1723 if(i < NAMESIZE)
1724 i += c;
1726 break;
1728 case '%':
1729 case '\\':
1730 switch(c = Bgetrune(finput)) {
1731 case '0': return TERM;
1732 case '<': return LEFT;
1733 case '2': return BINARY;
1734 case '>': return RIGHT;
1735 case '%':
1736 case '\\': return MARK;
1737 case '=': return PREC;
1738 case '{': return LCURLY;
1739 default: reserve = 1;
1742 default:
1743 /* number */
1744 if(isdigit(c)) {
1745 numbval = c-'0';
1746 base = (c=='0')? 8: 10;
1747 for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput))
1748 numbval = numbval*base + (c-'0');
1749 Bungetrune(finput);
1750 return NUMBER;
1752 if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$') {
1753 i = 0;
1754 while(islower(c) || isupper(c) || isdigit(c) ||
1755 c == '-' || c=='_' || c=='.' || c=='$') {
1756 if(reserve && isupper(c))
1757 c += 'a'-'A';
1758 rune = c;
1759 c = runetochar(&tokname[i], &rune);
1760 if(i < NAMESIZE)
1761 i += c;
1762 c = Bgetrune(finput);
1764 } else
1765 return c;
1766 Bungetrune(finput);
1768 tokname[i] = 0;
1770 /* find a reserved word */
1771 if(reserve) {
1772 for(c=0; resrv[c].name; c++)
1773 if(strcmp(tokname, resrv[c].name) == 0)
1774 return resrv[c].value;
1775 error("invalid escape, or illegal reserved word: %s", tokname);
1778 /* look ahead to distinguish IDENTIFIER from IDENTCOLON */
1779 c = Bgetrune(finput);
1780 while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') {
1781 if(c == '\n')
1782 peekline++;
1783 /* look for comments */
1784 if(c == '/')
1785 peekline += skipcom();
1786 c = Bgetrune(finput);
1788 if(c == ':')
1789 return IDENTCOLON;
1790 Bungetrune(finput);
1791 return IDENTIFIER;
1795 * determine the type of a symbol
1797 int
1798 fdtype(int t)
1800 int v;
1802 if(t >= NTBASE)
1803 v = nontrst[t-NTBASE].value;
1804 else
1805 v = TYPE(toklev[t]);
1806 if(v <= 0)
1807 error("must specify type for %s", (t>=NTBASE)?
1808 nontrst[t-NTBASE].name: tokset[t].name);
1809 return v;
1812 int
1813 chfind(int t, char *s)
1815 int i;
1817 if(s[0] == ' ')
1818 t = 0;
1819 TLOOP(i)
1820 if(!strcmp(s, tokset[i].name))
1821 return i;
1822 NTLOOP(i)
1823 if(!strcmp(s, nontrst[i].name))
1824 return NTBASE+i;
1826 /* cannot find name */
1827 if(t > 1)
1828 error("%s should have been defined earlier", s);
1829 return defin(t, s);
1833 * copy the union declaration to the output, and the define file if present
1835 void
1836 cpyunion(void)
1838 long c;
1839 int level;
1841 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1842 Bprint(ftable, "typedef union ");
1843 if(fdefine != 0)
1844 Bprint(fdefine, "\ntypedef union ");
1846 level = 0;
1847 for(;;) {
1848 if((c=Bgetrune(finput)) == Beof)
1849 error("EOF encountered while processing %%union");
1850 Bputrune(ftable, c);
1851 if(fdefine != 0)
1852 Bputrune(fdefine, c);
1853 switch(c) {
1854 case '\n':
1855 lineno++;
1856 break;
1857 case '{':
1858 level++;
1859 break;
1860 case '}':
1861 level--;
1863 /* we are finished copying */
1864 if(level == 0) {
1865 Bprint(ftable, " YYSTYPE;\n");
1866 if(fdefine != 0)
1867 Bprint(fdefine, "\tYYSTYPE;\nextern\tYYSTYPE\tyylval;\n");
1868 return;
1875 * copies code between \{ and \}
1877 void
1878 cpycode(void)
1881 long c;
1883 c = Bgetrune(finput);
1884 if(c == '\n') {
1885 c = Bgetrune(finput);
1886 lineno++;
1888 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1889 while(c != Beof) {
1890 if(c == '\\') {
1891 if((c=Bgetrune(finput)) == '}')
1892 return;
1893 Bputc(ftable, '\\');
1895 if(c == '%') {
1896 if((c=Bgetrune(finput)) == '}')
1897 return;
1898 Bputc(ftable, '%');
1900 Bputrune(ftable, c);
1901 if(c == '\n')
1902 lineno++;
1903 c = Bgetrune(finput);
1905 error("eof before %%}");
1909 * skip over comments
1910 * skipcom is called after reading a '/'
1912 int
1913 skipcom(void)
1915 long c;
1916 int i;
1918 /* i is the number of lines skipped */
1919 i = 0;
1920 if(Bgetrune(finput) != '*')
1921 error("illegal comment");
1922 c = Bgetrune(finput);
1923 while(c != Beof) {
1924 while(c == '*')
1925 if((c=Bgetrune(finput)) == '/')
1926 return i;
1927 if(c == '\n')
1928 i++;
1929 c = Bgetrune(finput);
1931 error("EOF inside comment");
1932 return 0;
1936 * copy C action to the next ; or closing }
1938 void
1939 cpyact(int offset)
1941 long c;
1942 int brac, match, j, s, fnd, tok;
1944 Bprint(faction, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1945 brac = 0;
1947 loop:
1948 c = Bgetrune(finput);
1949 swt:
1950 switch(c) {
1951 case ';':
1952 if(brac == 0) {
1953 Bputrune(faction, c);
1954 return;
1956 goto lcopy;
1958 case '{':
1959 brac++;
1960 goto lcopy;
1962 case '$':
1963 s = 1;
1964 tok = -1;
1965 c = Bgetrune(finput);
1967 /* type description */
1968 if(c == '<') {
1969 Bungetrune(finput);
1970 if(gettok() != TYPENAME)
1971 error("bad syntax on $<ident> clause");
1972 tok = numbval;
1973 c = Bgetrune(finput);
1975 if(c == '$') {
1976 Bprint(faction, "yyval");
1978 /* put out the proper tag... */
1979 if(ntypes) {
1980 if(tok < 0)
1981 tok = fdtype(*prdptr[nprod]);
1982 Bprint(faction, ".%s", typeset[tok]);
1984 goto loop;
1986 if(c == '-') {
1987 s = -s;
1988 c = Bgetrune(finput);
1990 if(isdigit(c)) {
1991 j = 0;
1992 while(isdigit(c)) {
1993 j = j*10 + (c-'0');
1994 c = Bgetrune(finput);
1996 Bungetrune(finput);
1997 j = j*s - offset;
1998 if(j > 0)
1999 error("Illegal use of $%d", j+offset);
2001 dollar:
2002 Bprint(faction, "yypt[-%d].yyv", -j);
2004 /* put out the proper tag */
2005 if(ntypes) {
2006 if(j+offset <= 0 && tok < 0)
2007 error("must specify type of $%d", j+offset);
2008 if(tok < 0)
2009 tok = fdtype(prdptr[nprod][j+offset]);
2010 Bprint(faction, ".%s", typeset[tok]);
2012 goto loop;
2014 if(isupper(c) || islower(c) || c == '_' || c == '.') {
2015 int tok; /* tok used oustide for type info */
2017 /* look for $name */
2018 Bungetrune(finput);
2019 if(gettok() != IDENTIFIER)
2020 error("$ must be followed by an identifier");
2021 tok = chfind(2, tokname);
2022 if((c = Bgetrune(finput)) != '#') {
2023 Bungetrune(finput);
2024 fnd = -1;
2025 } else
2026 if(gettok() != NUMBER) {
2027 error("# must be followed by number");
2028 fnd = -1;
2029 } else
2030 fnd = numbval;
2031 for(j=1; j<=offset; ++j)
2032 if(tok == prdptr[nprod][j]) {
2033 if(--fnd <= 0) {
2034 j -= offset;
2035 goto dollar;
2038 error("$name or $name#number not found");
2040 Bputc(faction, '$');
2041 if(s < 0 )
2042 Bputc(faction, '-');
2043 goto swt;
2045 case '}':
2046 brac--;
2047 if(brac)
2048 goto lcopy;
2049 Bputrune(faction, c);
2050 return;
2052 case '/':
2053 /* look for comments */
2054 Bputrune(faction, c);
2055 c = Bgetrune(finput);
2056 if(c != '*')
2057 goto swt;
2059 /* it really is a comment */
2060 Bputrune(faction, c);
2061 c = Bgetrune(finput);
2062 while(c >= 0) {
2063 while(c == '*') {
2064 Bputrune(faction, c);
2065 if((c=Bgetrune(finput)) == '/')
2066 goto lcopy;
2068 Bputrune(faction, c);
2069 if(c == '\n')
2070 lineno++;
2071 c = Bgetrune(finput);
2073 error("EOF inside comment");
2075 case '\'':
2076 /* character constant */
2077 match = '\'';
2078 goto string;
2080 case '"':
2081 /* character string */
2082 match = '"';
2084 string:
2085 Bputrune(faction, c);
2086 while(c = Bgetrune(finput)) {
2087 if(c == '\\') {
2088 Bputrune(faction, c);
2089 c = Bgetrune(finput);
2090 if(c == '\n')
2091 lineno++;
2092 } else
2093 if(c == match)
2094 goto lcopy;
2095 if(c == '\n')
2096 error("newline in string or char. const.");
2097 Bputrune(faction, c);
2099 error("EOF in string or character constant");
2101 case Beof:
2102 error("action does not terminate");
2104 case '\n':
2105 lineno++;
2106 goto lcopy;
2109 lcopy:
2110 Bputrune(faction, c);
2111 goto loop;
2114 void
2115 openup(char *stem, int dflag, int vflag, int ytab, char *ytabc)
2117 char buf[256];
2119 if(vflag) {
2120 sprint(buf, "%s.%s", stem, FILEU);
2121 foutput = Bopen(buf, OWRITE);
2122 if(foutput == 0)
2123 error("cannot open %s", buf);
2125 if(yydebug) {
2126 sprint(buf, "%s.%s", stem, FILEDEBUG);
2127 if((fdebug = Bopen(buf, OWRITE)) == 0)
2128 error("can't open %s", buf);
2130 if(dflag) {
2131 sprint(buf, "%s.%s", stem, FILED);
2132 fdefine = Bopen(buf, OWRITE);
2133 if(fdefine == 0)
2134 error("can't create %s", buf);
2136 if(ytab == 0)
2137 sprint(buf, "%s.%s", stem, OFILE);
2138 else
2139 strcpy(buf, ytabc);
2140 ftable = Bopen(buf, OWRITE);
2141 if(ftable == 0)
2142 error("cannot open table file %s", buf);
2146 * print the output for the states
2148 void
2149 output(void)
2151 int i, k, c;
2152 Wset *u, *v;
2154 Bprint(ftable, "short yyexca[] =\n{");
2155 if(fdebug)
2156 Bprint(fdebug, "char* yystates[] =\n{\n");
2158 /* output the stuff for state i */
2159 SLOOP(i) {
2160 nolook = tystate[i]!=MUSTLOOKAHEAD;
2161 closure(i);
2163 /* output actions */
2164 nolook = 1;
2165 aryfil(temp1, ntokens+nnonter+1, 0);
2166 WSLOOP(wsets, u) {
2167 c = *(u->pitem);
2168 if(c > 1 && c < NTBASE && temp1[c] == 0) {
2169 WSLOOP(u, v)
2170 if(c == *(v->pitem))
2171 putitem(v->pitem+1, (Lkset*)0);
2172 temp1[c] = state(c);
2173 } else
2174 if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0)
2175 temp1[c+ntokens] = amem[indgo[i]+c];
2177 if(i == 1)
2178 temp1[1] = ACCEPTCODE;
2180 /* now, we have the shifts; look at the reductions */
2181 lastred = 0;
2182 WSLOOP(wsets, u) {
2183 c = *u->pitem;
2185 /* reduction */
2186 if(c <= 0) {
2187 lastred = -c;
2188 TLOOP(k)
2189 if(BIT(u->ws.lset, k)) {
2190 if(temp1[k] == 0)
2191 temp1[k] = c;
2192 else
2193 if(temp1[k] < 0) { /* reduce/reduce conflict */
2194 if(foutput)
2195 Bprint(foutput,
2196 "\n%d: reduce/reduce conflict"
2197 " (red'ns %d and %d ) on %s",
2198 i, -temp1[k], lastred,
2199 symnam(k));
2200 if(-temp1[k] > lastred)
2201 temp1[k] = -lastred;
2202 zzrrconf++;
2203 } else
2204 /* potential shift/reduce conflict */
2205 precftn( lastred, k, i );
2209 wract(i);
2212 if(fdebug)
2213 Bprint(fdebug, "};\n");
2214 Bprint(ftable, "};\n");
2215 Bprint(ftable, "#define YYNPROD %d\n", nprod);
2216 Bprint(ftable, "#define YYPRIVATE %d\n", PRIVATE);
2217 if(yydebug)
2218 Bprint(ftable, "#define yydebug %s\n", yydebug);
2222 * pack state i from temp1 into amem
2224 int
2225 apack(int *p, int n)
2227 int *pp, *qq, *rr, off, *q, *r;
2229 /* we don't need to worry about checking because
2230 * we will only look at entries known to be there...
2231 * eliminate leading and trailing 0's
2234 q = p+n;
2235 for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off)
2237 /* no actions */
2238 if(pp > q)
2239 return 0;
2240 p = pp;
2242 /* now, find a place for the elements from p to q, inclusive */
2243 r = &amem[ACTSIZE-1];
2244 for(rr = amem; rr <= r; rr++, off++) {
2245 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2246 if(*pp != 0)
2247 if(*pp != *qq && *qq != 0)
2248 goto nextk;
2250 /* we have found an acceptable k */
2251 if(pkdebug && foutput != 0)
2252 Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem));
2253 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2254 if(*pp) {
2255 if(qq > r)
2256 error("action table overflow");
2257 if(qq > memp)
2258 memp = qq;
2259 *qq = *pp;
2261 if(pkdebug && foutput != 0)
2262 for(pp = amem; pp <= memp; pp += 10) {
2263 Bprint(foutput, "\t");
2264 for(qq = pp; qq <= pp+9; qq++)
2265 Bprint(foutput, "%d ", *qq);
2266 Bprint(foutput, "\n");
2268 return(off);
2269 nextk:;
2271 error("no space in action table");
2272 return 0;
2276 * output the gotos for the nontermninals
2278 void
2279 go2out(void)
2281 int i, j, k, best, count, cbest, times;
2283 /* mark begining of gotos */
2284 Bprint(ftemp, "$\n");
2285 for(i = 1; i <= nnonter; i++) {
2286 go2gen(i);
2288 /* find the best one to make default */
2289 best = -1;
2290 times = 0;
2292 /* is j the most frequent */
2293 for(j = 0; j <= nstate; j++) {
2294 if(tystate[j] == 0)
2295 continue;
2296 if(tystate[j] == best)
2297 continue;
2299 /* is tystate[j] the most frequent */
2300 count = 0;
2301 cbest = tystate[j];
2302 for(k = j; k <= nstate; k++)
2303 if(tystate[k] == cbest)
2304 count++;
2305 if(count > times) {
2306 best = cbest;
2307 times = count;
2311 /* best is now the default entry */
2312 zzgobest += times-1;
2313 for(j = 0; j <= nstate; j++)
2314 if(tystate[j] != 0 && tystate[j] != best) {
2315 Bprint(ftemp, "%d,%d,", j, tystate[j]);
2316 zzgoent++;
2319 /* now, the default */
2320 if(best == -1)
2321 best = 0;
2322 zzgoent++;
2323 Bprint(ftemp, "%d\n", best);
2328 * output the gotos for nonterminal c
2330 void
2331 go2gen(int c)
2333 int i, work, cc;
2334 Item *p, *q;
2337 /* first, find nonterminals with gotos on c */
2338 aryfil(temp1, nnonter+1, 0);
2339 temp1[c] = 1;
2340 work = 1;
2341 while(work) {
2342 work = 0;
2343 PLOOP(0, i)
2345 /* cc is a nonterminal */
2346 if((cc=prdptr[i][1]-NTBASE) >= 0)
2347 /* cc has a goto on c */
2348 if(temp1[cc] != 0) {
2350 /* thus, the left side of production i does too */
2351 cc = *prdptr[i]-NTBASE;
2352 if(temp1[cc] == 0) {
2353 work = 1;
2354 temp1[cc] = 1;
2359 /* now, we have temp1[c] = 1 if a goto on c in closure of cc */
2360 if(g2debug && foutput != 0) {
2361 Bprint(foutput, "%s: gotos on ", nontrst[c].name);
2362 NTLOOP(i)
2363 if(temp1[i])
2364 Bprint(foutput, "%s ", nontrst[i].name);
2365 Bprint(foutput, "\n");
2368 /* now, go through and put gotos into tystate */
2369 aryfil(tystate, nstate, 0);
2370 SLOOP(i)
2371 ITMLOOP(i, p, q)
2372 if((cc = *p->pitem) >= NTBASE)
2373 /* goto on c is possible */
2374 if(temp1[cc-NTBASE]) {
2375 tystate[i] = amem[indgo[i]+c];
2376 break;
2381 * decide a shift/reduce conflict by precedence.
2382 * r is a rule number, t a token number
2383 * the conflict is in state s
2384 * temp1[t] is changed to reflect the action
2386 void
2387 precftn(int r, int t, int s)
2389 int lp, lt, action;
2391 lp = levprd[r];
2392 lt = toklev[t];
2393 if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) {
2395 /* conflict */
2396 if(foutput != 0)
2397 Bprint(foutput,
2398 "\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s",
2399 s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t));
2400 zzsrconf++;
2401 return;
2403 if(PLEVEL(lt) == PLEVEL(lp))
2404 action = ASSOC(lt);
2405 else
2406 if(PLEVEL(lt) > PLEVEL(lp))
2407 action = RASC; /* shift */
2408 else
2409 action = LASC; /* reduce */
2410 switch(action) {
2411 case BASC: /* error action */
2412 temp1[t] = ERRCODE;
2413 break;
2414 case LASC: /* reduce */
2415 temp1[t] = -r;
2416 break;
2421 * output state i
2422 * temp1 has the actions, lastred the default
2424 void
2425 wract(int i)
2427 int p, p0, p1, ntimes, tred, count, j, flag;
2429 /* find the best choice for lastred */
2430 lastred = 0;
2431 ntimes = 0;
2432 TLOOP(j) {
2433 if(temp1[j] >= 0)
2434 continue;
2435 if(temp1[j]+lastred == 0)
2436 continue;
2437 /* count the number of appearances of temp1[j] */
2438 count = 0;
2439 tred = -temp1[j];
2440 levprd[tred] |= REDFLAG;
2441 TLOOP(p)
2442 if(temp1[p]+tred == 0)
2443 count++;
2444 if(count > ntimes) {
2445 lastred = tred;
2446 ntimes = count;
2451 * for error recovery, arrange that, if there is a shift on the
2452 * error recovery token, `error', that the default be the error action
2454 if(temp1[2] > 0)
2455 lastred = 0;
2457 /* clear out entries in temp1 which equal lastred */
2458 TLOOP(p)
2459 if(temp1[p]+lastred == 0)
2460 temp1[p] = 0;
2462 wrstate(i);
2463 defact[i] = lastred;
2464 flag = 0;
2465 TLOOP(p0)
2466 if((p1=temp1[p0]) != 0) {
2467 if(p1 < 0) {
2468 p1 = -p1;
2469 goto exc;
2471 if(p1 == ACCEPTCODE) {
2472 p1 = -1;
2473 goto exc;
2475 if(p1 == ERRCODE) {
2476 p1 = 0;
2477 exc:
2478 if(flag++ == 0)
2479 Bprint(ftable, "-1, %d,\n", i);
2480 Bprint(ftable, "\t%d, %d,\n", p0, p1);
2481 zzexcp++;
2482 continue;
2484 Bprint(ftemp, "%d,%d,", p0, p1);
2485 zzacent++;
2487 if(flag) {
2488 defact[i] = -2;
2489 Bprint(ftable, "\t-2, %d,\n", lastred);
2491 Bprint(ftemp, "\n");
2495 * writes state i
2497 void
2498 wrstate(int i)
2500 int j0, j1;
2501 Item *pp, *qq;
2502 Wset *u;
2504 if(fdebug) {
2505 if(lastred) {
2506 Bprint(fdebug, " 0, /*%d*/\n", i);
2507 } else {
2508 Bprint(fdebug, " \"");
2509 ITMLOOP(i, pp, qq)
2510 Bprint(fdebug, "%s\\n", writem(pp->pitem));
2511 if(tystate[i] == MUSTLOOKAHEAD)
2512 WSLOOP(wsets + (pstate[i+1] - pstate[i]), u)
2513 if(*u->pitem < 0)
2514 Bprint(fdebug, "%s\\n", writem(u->pitem));
2515 Bprint(fdebug, "\", /*%d*/\n", i);
2518 if(foutput == 0)
2519 return;
2520 Bprint(foutput, "\nstate %d\n", i);
2521 ITMLOOP(i, pp, qq)
2522 Bprint(foutput, "\t%s\n", writem(pp->pitem));
2523 if(tystate[i] == MUSTLOOKAHEAD)
2524 /* print out empty productions in closure */
2525 WSLOOP(wsets+(pstate[i+1]-pstate[i]), u)
2526 if(*u->pitem < 0)
2527 Bprint(foutput, "\t%s\n", writem(u->pitem));
2529 /* check for state equal to another */
2530 TLOOP(j0)
2531 if((j1=temp1[j0]) != 0) {
2532 Bprint(foutput, "\n\t%s ", symnam(j0));
2533 /* shift, error, or accept */
2534 if(j1 > 0) {
2535 if(j1 == ACCEPTCODE)
2536 Bprint(foutput, "accept");
2537 else
2538 if(j1 == ERRCODE)
2539 Bprint(foutput, "error");
2540 else
2541 Bprint(foutput, "shift %d", j1);
2542 } else
2543 Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]);
2546 /* output the final production */
2547 if(lastred)
2548 Bprint(foutput, "\n\t. reduce %d (src line %d)\n\n",
2549 lastred, rlines[lastred]);
2550 else
2551 Bprint(foutput, "\n\t. error\n\n");
2553 /* now, output nonterminal actions */
2554 j1 = ntokens;
2555 for(j0 = 1; j0 <= nnonter; j0++) {
2556 j1++;
2557 if(temp1[j1])
2558 Bprint(foutput, "\t%s goto %d\n", symnam(j0+NTBASE), temp1[j1]);
2562 void
2563 warray(char *s, int *v, int n)
2565 int i;
2567 Bprint(ftable, "short %s[] =\n{", s);
2568 for(i=0;;) {
2569 if(i%10 == 0)
2570 Bprint(ftable, "\n");
2571 Bprint(ftable, "%4d", v[i]);
2572 i++;
2573 if(i >= n) {
2574 Bprint(ftable, "\n};\n");
2575 break;
2577 Bprint(ftable, ",");
2582 * in order to free up the mem and amem arrays for the optimizer,
2583 * and still be able to output yyr1, etc., after the sizes of
2584 * the action array is known, we hide the nonterminals
2585 * derived by productions in levprd.
2588 void
2589 hideprod(void)
2591 int i, j;
2593 j = 0;
2594 levprd[0] = 0;
2595 PLOOP(1, i) {
2596 if(!(levprd[i] & REDFLAG)) {
2597 j++;
2598 if(foutput != 0)
2599 Bprint(foutput, "Rule not reduced: %s\n", writem(prdptr[i]));
2601 levprd[i] = *prdptr[i] - NTBASE;
2603 if(j)
2604 print("%d rules never reduced\n", j);
2607 void
2608 callopt(void)
2610 int i, *p, j, k, *q;
2612 /* read the arrays from tempfile and set parameters */
2613 finput = Bopen(tempname, OREAD);
2614 if(finput == 0)
2615 error("optimizer cannot open tempfile");
2617 pgo[0] = 0;
2618 temp1[0] = 0;
2619 nstate = 0;
2620 nnonter = 0;
2621 for(;;) {
2622 switch(gtnm()) {
2623 case '\n':
2624 nstate++;
2625 pmem--;
2626 temp1[nstate] = pmem - mem0;
2627 case ',':
2628 continue;
2629 case '$':
2630 break;
2631 default:
2632 error("bad tempfile");
2634 break;
2637 pmem--;
2638 temp1[nstate] = yypgo[0] = pmem - mem0;
2639 for(;;) {
2640 switch(gtnm()) {
2641 case '\n':
2642 nnonter++;
2643 yypgo[nnonter] = pmem-mem0;
2644 case ',':
2645 continue;
2646 case -1:
2647 break;
2648 default:
2649 error("bad tempfile");
2651 break;
2653 pmem--;
2654 yypgo[nnonter--] = pmem - mem0;
2655 for(i = 0; i < nstate; i++) {
2656 k = 32000;
2657 j = 0;
2658 q = mem0 + temp1[i+1];
2659 for(p = mem0 + temp1[i]; p < q ; p += 2) {
2660 if(*p > j)
2661 j = *p;
2662 if(*p < k)
2663 k = *p;
2665 /* nontrivial situation */
2666 if(k <= j) {
2667 /* j is now the range */
2668 /* j -= k; *//* call scj */
2669 if(k > maxoff)
2670 maxoff = k;
2672 tystate[i] = (temp1[i+1]-temp1[i]) + 2*j;
2673 if(j > maxspr)
2674 maxspr = j;
2677 /* initialize ggreed table */
2678 for(i = 1; i <= nnonter; i++) {
2679 ggreed[i] = 1;
2680 j = 0;
2682 /* minimum entry index is always 0 */
2683 q = mem0 + yypgo[i+1] - 1;
2684 for(p = mem0+yypgo[i]; p < q ; p += 2) {
2685 ggreed[i] += 2;
2686 if(*p > j)
2687 j = *p;
2689 ggreed[i] = ggreed[i] + 2*j;
2690 if(j > maxoff)
2691 maxoff = j;
2694 /* now, prepare to put the shift actions into the amem array */
2695 for(i = 0; i < ACTSIZE; i++)
2696 amem[i] = 0;
2697 maxa = amem;
2698 for(i = 0; i < nstate; i++) {
2699 if(tystate[i] == 0 && adb > 1)
2700 Bprint(ftable, "State %d: null\n", i);
2701 indgo[i] = YYFLAG1;
2703 while((i = nxti()) != NOMORE)
2704 if(i >= 0)
2705 stin(i);
2706 else
2707 gin(-i);
2709 /* print amem array */
2710 if(adb > 2 )
2711 for(p = amem; p <= maxa; p += 10) {
2712 Bprint(ftable, "%4d ", (int)(p-amem));
2713 for(i = 0; i < 10; ++i)
2714 Bprint(ftable, "%4d ", p[i]);
2715 Bprint(ftable, "\n");
2718 /* write out the output appropriate to the language */
2719 aoutput();
2720 osummary();
2721 ZAPFILE(tempname);
2724 void
2725 gin(int i)
2727 int *p, *r, *s, *q1, *q2;
2729 /* enter gotos on nonterminal i into array amem */
2730 ggreed[i] = 0;
2732 q2 = mem0+ yypgo[i+1] - 1;
2733 q1 = mem0 + yypgo[i];
2735 /* now, find amem place for it */
2736 for(p = amem; p < &amem[ACTSIZE]; p++) {
2737 if(*p)
2738 continue;
2739 for(r = q1; r < q2; r += 2) {
2740 s = p + *r + 1;
2741 if(*s)
2742 goto nextgp;
2743 if(s > maxa)
2744 if((maxa = s) > &amem[ACTSIZE])
2745 error("a array overflow");
2747 /* we have found amem spot */
2748 *p = *q2;
2749 if(p > maxa)
2750 if((maxa = p) > &amem[ACTSIZE])
2751 error("a array overflow");
2752 for(r = q1; r < q2; r += 2) {
2753 s = p + *r + 1;
2754 *s = r[1];
2756 pgo[i] = p-amem;
2757 if(adb > 1)
2758 Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]);
2759 return;
2761 nextgp:;
2763 error("cannot place goto %d\n", i);
2766 void
2767 stin(int i)
2769 int *r, *s, n, flag, j, *q1, *q2;
2771 tystate[i] = 0;
2773 /* enter state i into the amem array */
2774 q2 = mem0+temp1[i+1];
2775 q1 = mem0+temp1[i];
2776 /* find an acceptable place */
2777 for(n = -maxoff; n < ACTSIZE; n++) {
2778 flag = 0;
2779 for(r = q1; r < q2; r += 2) {
2780 if((s = *r + n + amem) < amem)
2781 goto nextn;
2782 if(*s == 0)
2783 flag++;
2784 else
2785 if(*s != r[1])
2786 goto nextn;
2789 /* check that the position equals another only if the states are identical */
2790 for(j=0; j<nstate; j++) {
2791 if(indgo[j] == n) {
2793 /* we have some disagreement */
2794 if(flag)
2795 goto nextn;
2796 if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) {
2798 /* states are equal */
2799 indgo[i] = n;
2800 if(adb > 1)
2801 Bprint(ftable,
2802 "State %d: entry at %d equals state %d\n",
2803 i, n, j);
2804 return;
2807 /* we have some disagreement */
2808 goto nextn;
2812 for(r = q1; r < q2; r += 2) {
2813 if((s = *r+n+amem) >= &amem[ACTSIZE])
2814 error("out of space in optimizer a array");
2815 if(s > maxa)
2816 maxa = s;
2817 if(*s != 0 && *s != r[1])
2818 error("clobber of a array, pos'n %d, by %d", s-amem, r[1]);
2819 *s = r[1];
2821 indgo[i] = n;
2822 if(adb > 1)
2823 Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]);
2824 return;
2825 nextn:;
2827 error("Error; failure to place state %d\n", i);
2831 * finds the next i
2833 int
2834 nxti(void)
2836 int i, max, maxi;
2838 max = 0;
2839 maxi = 0;
2840 for(i = 1; i <= nnonter; i++)
2841 if(ggreed[i] >= max) {
2842 max = ggreed[i];
2843 maxi = -i;
2845 for(i = 0; i < nstate; ++i)
2846 if(tystate[i] >= max) {
2847 max = tystate[i];
2848 maxi = i;
2850 if(nxdb)
2851 Bprint(ftable, "nxti = %d, max = %d\n", maxi, max);
2852 if(max == 0)
2853 return NOMORE;
2854 return maxi;
2858 * write summary
2860 void
2861 osummary(void)
2864 int i, *p;
2866 if(foutput == 0)
2867 return;
2868 i = 0;
2869 for(p = maxa; p >= amem; p--)
2870 if(*p == 0)
2871 i++;
2873 Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n",
2874 (int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE);
2875 Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i);
2876 Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff);
2880 * this version is for C
2881 * write out the optimized parser
2883 void
2884 aoutput(void)
2886 Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1));
2887 arout("yyact", amem, (maxa-amem)+1);
2888 arout("yypact", indgo, nstate);
2889 arout("yypgo", pgo, nnonter+1);
2892 void
2893 arout(char *s, int *v, int n)
2895 int i;
2897 Bprint(ftable, "short %s[] =\n{", s);
2898 for(i = 0; i < n;) {
2899 if(i%10 == 0)
2900 Bprint(ftable, "\n");
2901 Bprint(ftable, "%4d", v[i]);
2902 i++;
2903 if(i == n)
2904 Bprint(ftable, "\n};\n");
2905 else
2906 Bprint(ftable, ",");
2911 * read and convert an integer from the standard input
2912 * return the terminating character
2913 * blanks, tabs, and newlines are ignored
2915 int
2916 gtnm(void)
2918 int sign, val, c;
2920 sign = 0;
2921 val = 0;
2922 while((c=Bgetrune(finput)) != Beof) {
2923 if(isdigit(c)) {
2924 val = val*10 + c-'0';
2925 continue;
2927 if(c == '-') {
2928 sign = 1;
2929 continue;
2931 break;
2933 if(sign)
2934 val = -val;
2935 *pmem++ = val;
2936 if(pmem >= &mem0[MEMSIZE])
2937 error("out of space");
2938 return c;