Blob


1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
6 #define Bungetrune Bungetc /* ok for now. */
8 /*
9 * all these are 32 bit
10 */
11 #define TBITSET ((32+NTERMS)/32) /* BOTCH?? +31 */
12 #define BIT(a,i) ((a)[(i)>>5] & (1<<((i)&037)))
13 #define SETBIT(a,i) ((a)[(i)>>5] |= (1<<((i)&037)))
14 #define NWORDS(n) (((n)+32)/32)
16 #define PARSER "#9/lib/yaccpar"
17 #define PARSERS "#9/lib/yaccpars"
18 #define TEMPNAME "y.tmp.XXXXXX"
19 #define ACTNAME "y.acts.XXXXXX"
20 #define OFILE "tab.c"
21 #define FILEU "output"
22 #define FILED "tab.h"
23 #define FILEDEBUG "debug"
25 enum
26 {
27 /*
28 * the following are adjustable
29 * according to memory size
30 */
31 ACTSIZE = 40000,
32 MEMSIZE = 40000,
33 NSTATES = 2000,
34 NTERMS = 511,
35 NPROD = 1600,
36 NNONTERM = 600,
37 TEMPSIZE = 2000,
38 CNAMSZ = 10000,
39 LSETSIZE = 2400,
40 WSETSIZE = 350,
42 NAMESIZE = 50,
43 NTYPES = 63,
44 ISIZE = 400,
46 PRIVATE = 0xE000, /* unicode private use */
48 /* relationships which must hold:
49 TBITSET ints must hold NTERMS+1 bits...
50 WSETSIZE >= NNONTERM
51 LSETSIZE >= NNONTERM
52 TEMPSIZE >= NTERMS + NNONTERM + 1
53 TEMPSIZE >= NSTATES
54 */
56 NTBASE = 010000,
57 ERRCODE = 8190,
58 ACCEPTCODE = 8191,
60 NOASC = 0, /* no assoc. */
61 LASC = 1, /* left assoc. */
62 RASC = 2, /* right assoc. */
63 BASC = 3, /* binary assoc. */
65 /* flags for state generation */
67 DONE = 0,
68 MUSTDO = 1,
69 MUSTLOOKAHEAD = 2,
71 /* flags for a rule having an action, and being reduced */
73 ACTFLAG = 04,
74 REDFLAG = 010,
76 /* output parser flags */
77 YYFLAG1 = -1000,
79 /* parse tokens */
80 IDENTIFIER = PRIVATE,
81 MARK,
82 TERM,
83 LEFT,
84 RIGHT,
85 BINARY,
86 PREC,
87 LCURLY,
88 IDENTCOLON,
89 NUMBER,
90 START,
91 TYPEDEF,
92 TYPENAME,
93 UNION,
95 ENDFILE = 0,
97 EMPTY = 1,
98 WHOKNOWS = 0,
99 OK = 1,
100 NOMORE = -1000,
101 };
103 /* macros for getting associativity and precedence levels */
105 #define ASSOC(i) ((i)&03)
106 #define PLEVEL(i) (((i)>>4)&077)
107 #define TYPE(i) (((i)>>10)&077)
109 /* macros for setting associativity and precedence levels */
111 #define SETASC(i,j) i |= j
112 #define SETPLEV(i,j) i |= (j<<4)
113 #define SETTYPE(i,j) i |= (j<<10)
115 /* looping macros */
117 #define TLOOP(i) for(i=1; i<=ntokens; i++)
118 #define NTLOOP(i) for(i=0; i<=nnonter; i++)
119 #define PLOOP(s,i) for(i=s; i<nprod; i++)
120 #define SLOOP(i) for(i=0; i<nstate; i++)
121 #define WSBUMP(x) x++
122 #define WSLOOP(s,j) for(j=s; j<cwp; j++)
123 #define ITMLOOP(i,p,q) for(q=pstate[i+1], p=pstate[i]; p<q; p++)
124 #define SETLOOP(i) for(i=0; i<tbitset; i++)
126 /* command to clobber tempfiles after use */
128 #define ZAPFILE(x) if(x) remove(x)
130 /* I/O descriptors */
132 Biobuf* faction; /* file for saving actions */
133 Biobuf* fdefine; /* file for #defines */
134 Biobuf* fdebug; /* y.debug for strings for debugging */
135 Biobuf* ftable; /* y.tab.c file */
136 Biobuf* ftemp; /* tempfile to pass 2 */
137 Biobuf* finput; /* input file */
138 Biobuf* foutput; /* y.output file */
140 /* communication variables between various I/O routines */
142 char* infile; /* input file name */
143 int numbval; /* value of an input number */
144 char tokname[NAMESIZE+4]; /* input token name, slop for runes and 0 */
146 /* structure declarations */
148 typedef
149 struct
151 int lset[TBITSET];
152 } Lkset;
154 typedef
155 struct
157 int* pitem;
158 Lkset* look;
159 } Item;
161 typedef
162 struct
164 char* name;
165 int value;
166 } Symb;
168 typedef
169 struct
171 int* pitem;
172 int flag;
173 Lkset ws;
174 } Wset;
176 /* storage of names */
178 char cnames[CNAMSZ]; /* place where token and nonterminal names are stored */
179 int cnamsz = CNAMSZ; /* size of cnames */
180 char* cnamp = cnames; /* place where next name is to be put in */
181 int ndefout = 4; /* number of defined symbols output */
182 char* tempname;
183 char* actname;
184 char ttempname[] = TEMPNAME;
185 char tactname[] = ACTNAME;
186 char* parser = PARSER;
187 char* yydebug;
189 /* storage of types */
190 int ntypes; /* number of types defined */
191 char* typeset[NTYPES]; /* pointers to type tags */
193 /* token information */
195 int ntokens = 0 ; /* number of tokens */
196 Symb tokset[NTERMS];
197 int toklev[NTERMS]; /* vector with the precedence of the terminals */
199 /* nonterminal information */
201 int nnonter = -1; /* the number of nonterminals */
202 Symb nontrst[NNONTERM];
203 int start; /* start symbol */
205 /* assigned token type values */
206 int extval = 0;
208 char* ytabc = OFILE; /* name of y.tab.c */
210 /* grammar rule information */
212 int mem0[MEMSIZE] ; /* production storage */
213 int* mem = mem0;
214 int nprod = 1; /* number of productions */
215 int* prdptr[NPROD]; /* pointers to descriptions of productions */
216 int levprd[NPROD]; /* precedence levels for the productions */
217 int rlines[NPROD]; /* line number for this rule */
219 /* state information */
221 int nstate = 0; /* number of states */
222 Item* pstate[NSTATES+2]; /* pointers to the descriptions of the states */
223 int tystate[NSTATES]; /* contains type information about the states */
224 int defact[NSTATES]; /* the default actions of states */
225 int tstates[NTERMS]; /* states generated by terminal gotos */
226 int ntstates[NNONTERM]; /* states generated by nonterminal gotos */
227 int mstates[NSTATES]; /* chain of overflows of term/nonterm generation lists */
228 int lastred; /* the number of the last reduction of a state */
230 /* lookahead set information */
232 Lkset lkst[LSETSIZE];
233 int nolook; /* flag to turn off lookahead computations */
234 int tbitset; /* size of lookahead sets */
235 int nlset = 0; /* next lookahead set index */
236 int nolook = 0; /* flag to suppress lookahead computations */
237 Lkset clset; /* temporary storage for lookahead computations */
239 /* working set information */
241 Wset wsets[WSETSIZE];
242 Wset* cwp;
244 /* storage for action table */
246 int amem[ACTSIZE]; /* action table storage */
247 int* memp = amem; /* next free action table position */
248 int indgo[NSTATES]; /* index to the stored goto table */
250 /* temporary vector, indexable by states, terms, or ntokens */
252 int temp1[TEMPSIZE]; /* temporary storage, indexed by terms + ntokens or states */
253 int lineno = 1; /* current input line number */
254 int fatfl = 1; /* if on, error is fatal */
255 int nerrors = 0; /* number of errors */
257 /* statistics collection variables */
259 int zzgoent;
260 int zzgobest;
261 int zzacent;
262 int zzexcp;
263 int zzclose;
264 int zzrrconf;
265 int zzsrconf;
267 int* ggreed = lkst[0].lset;
268 int* pgo = wsets[0].ws.lset;
269 int* yypgo = &nontrst[0].value;
271 int maxspr = 0; /* maximum spread of any entry */
272 int maxoff = 0; /* maximum offset into a array */
273 int* pmem = mem0;
274 int* maxa;
275 int nxdb = 0;
276 int adb = 0;
279 /* storage for information about the nonterminals */
281 int** pres[NNONTERM+2]; /* vector of pointers to productions yielding each nonterminal */
282 Lkset* pfirst[NNONTERM+2]; /* vector of pointers to first sets for each nonterminal */
283 int pempty[NNONTERM+1]; /* vector of nonterminals nontrivially deriving e */
285 /* random stuff picked out from between functions */
287 int indebug = 0;
288 Wset* zzcwp = wsets;
289 int zzgoent = 0;
290 int zzgobest = 0;
291 int zzacent = 0;
292 int zzexcp = 0;
293 int zzclose = 0;
294 int zzsrconf = 0;
295 int* zzmemsz = mem0;
296 int zzrrconf = 0;
297 int pidebug = 0; /* debugging flag for putitem */
298 int gsdebug = 0;
299 int cldebug = 0; /* debugging flag for closure */
300 int pkdebug = 0;
301 int g2debug = 0;
303 struct
305 char* name;
306 long value;
307 } resrv[] =
309 "binary", BINARY,
310 "left", LEFT,
311 "nonassoc", BINARY,
312 "prec", PREC,
313 "right", RIGHT,
314 "start", START,
315 "term", TERM,
316 "token", TERM,
317 "type", TYPEDEF,
318 "union", UNION,
319 0,
320 };
322 /* define functions */
324 void main(int, char**);
325 void others(void);
326 char* chcopy(char*, char*);
327 char* writem(int*);
328 char* symnam(int);
329 void summary(void);
330 void error(char*, ...);
331 void aryfil(int*, int, int);
332 int setunion(int*, int*);
333 void prlook(Lkset*);
334 void cpres(void);
335 void cpfir(void);
336 int state(int);
337 void putitem(int*, Lkset*);
338 void cempty(void);
339 void stagen(void);
340 void closure(int);
341 Lkset* flset(Lkset*);
342 void cleantmp(void);
343 void intr(void);
344 void setup(int, char**);
345 void finact(void);
346 int defin(int, char*);
347 void defout(int);
348 char* cstash(char*);
349 long gettok(void);
350 int fdtype(int);
351 int chfind(int, char*);
352 void cpyunion(void);
353 void cpycode(void);
354 int skipcom(void);
355 void cpyact(int);
356 void openup(char*, int, int, int, char*);
357 void output(void);
358 int apack(int*, int);
359 void go2out(void);
360 void go2gen(int);
361 void precftn(int, int, int);
362 void wract(int);
363 void wrstate(int);
364 void warray(char*, int*, int);
365 void hideprod(void);
366 void callopt(void);
367 void gin(int);
368 void stin(int);
369 int nxti(void);
370 void osummary(void);
371 void aoutput(void);
372 void arout(char*, int*, int);
373 int gtnm(void);
375 void
376 main(int argc, char *argv[])
379 setup(argc, argv); /* initialize and read productions */
380 tbitset = NWORDS(ntokens);
381 cpres(); /* make table of which productions yield a given nonterminal */
382 cempty(); /* make a table of which nonterminals can match the empty string */
383 cpfir(); /* make a table of firsts of nonterminals */
384 stagen(); /* generate the states */
385 output(); /* write the states and the tables */
386 go2out();
387 hideprod();
388 summary();
389 callopt();
390 others();
391 exits(0);
394 /*
395 * put out other arrays, copy the parsers
396 */
397 void
398 others(void)
400 int c, i, j;
402 finput = Bopen(unsharp(parser), OREAD);
403 if(finput == 0)
404 error("cannot open parser %s: %r", parser);
405 warray("yyr1", levprd, nprod);
406 aryfil(temp1, nprod, 0);
407 PLOOP(1, i)
408 temp1[i] = prdptr[i+1]-prdptr[i]-2;
409 warray("yyr2", temp1, nprod);
411 aryfil(temp1, nstate, -1000);
412 TLOOP(i)
413 for(j=tstates[i]; j!=0; j=mstates[j])
414 temp1[j] = i;
415 NTLOOP(i)
416 for(j=ntstates[i]; j!=0; j=mstates[j])
417 temp1[j] = -i;
418 warray("yychk", temp1, nstate);
419 warray("yydef", defact, nstate);
421 /* put out token translation tables */
422 /* table 1 has 0-256 */
423 aryfil(temp1, 256, 0);
424 c = 0;
425 TLOOP(i) {
426 j = tokset[i].value;
427 if(j >= 0 && j < 256) {
428 if(temp1[j]) {
429 print("yacc bug -- cant have 2 different Ts with same value\n");
430 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
431 nerrors++;
433 temp1[j] = i;
434 if(j > c)
435 c = j;
438 warray("yytok1", temp1, c+1);
440 /* table 2 has PRIVATE-PRIVATE+256 */
441 aryfil(temp1, 256, 0);
442 c = 0;
443 TLOOP(i) {
444 j = tokset[i].value - PRIVATE;
445 if(j >= 0 && j < 256) {
446 if(temp1[j]) {
447 print("yacc bug -- cant have 2 different Ts with same value\n");
448 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
449 nerrors++;
451 temp1[j] = i;
452 if(j > c)
453 c = j;
456 warray("yytok2", temp1, c+1);
458 /* table 3 has everything else */
459 Bprint(ftable, "long yytok3[] =\n{\n");
460 c = 0;
461 TLOOP(i) {
462 j = tokset[i].value;
463 if(j >= 0 && j < 256)
464 continue;
465 if(j >= PRIVATE && j < 256+PRIVATE)
466 continue;
468 Bprint(ftable, "%4d,%4d,", j, i);
469 c++;
470 if(c%5 == 0)
471 Bprint(ftable, "\n");
473 Bprint(ftable, "%4d\n};\n", 0);
475 /* copy parser text */
476 while((c=Bgetrune(finput)) != Beof) {
477 if(c == '$') {
478 if((c = Bgetrune(finput)) != 'A')
479 Bputrune(ftable, '$');
480 else { /* copy actions */
481 faction = Bopen(actname, OREAD);
482 if(faction == 0)
483 error("cannot reopen action tempfile");
484 while((c=Bgetrune(faction)) != Beof)
485 Bputrune(ftable, c);
486 Bterm(faction);
487 ZAPFILE(actname);
488 c = Bgetrune(finput);
491 Bputrune(ftable, c);
493 Bterm(ftable);
496 /*
497 * copies string q into p, returning next free char ptr
498 */
499 char*
500 chcopy(char* p, char* q)
502 int c;
504 while(c = *q) {
505 if(c == '"')
506 *p++ = '\\';
507 *p++ = c;
508 q++;
510 *p = 0;
511 return p;
514 /*
515 * creates output string for item pointed to by pp
516 */
517 char*
518 writem(int *pp)
520 int i,*p;
521 static char sarr[ISIZE];
522 char* q;
524 for(p=pp; *p>0; p++)
526 p = prdptr[-*p];
527 q = chcopy(sarr, nontrst[*p-NTBASE].name);
528 q = chcopy(q, ": ");
529 for(;;) {
530 *q = ' ';
531 p++;
532 if(p == pp)
533 *q = '.';
534 q++;
535 *q = '\0';
536 i = *p;
537 if(i <= 0)
538 break;
539 q = chcopy(q, symnam(i));
540 if(q > &sarr[ISIZE-30])
541 error("item too big");
544 /* an item calling for a reduction */
545 i = *pp;
546 if(i < 0 ) {
547 q = chcopy(q, " (");
548 sprint(q, "%d)", -i);
550 return sarr;
553 /*
554 * return a pointer to the name of symbol i
555 */
556 char*
557 symnam(int i)
559 char* cp;
561 cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
562 if(*cp == ' ')
563 cp++;
564 return cp;
567 /*
568 * output the summary on y.output
569 */
570 void
571 summary(void)
574 if(foutput != 0) {
575 Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n",
576 ntokens, NTERMS, nnonter, NNONTERM);
577 Bprint(foutput, "%d/%d grammar rules, %d/%d states\n",
578 nprod, NPROD, nstate, NSTATES);
579 Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n",
580 zzsrconf, zzrrconf);
581 Bprint(foutput, "%d/%d working sets used\n",
582 (int)(zzcwp-wsets), WSETSIZE);
583 Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n",
584 (int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE);
585 Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE);
586 Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate);
587 Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp);
588 Bprint(foutput, "%d goto entries\n", zzgoent);
589 Bprint(foutput, "%d entries saved by goto default\n", zzgobest);
591 if(zzsrconf != 0 || zzrrconf != 0) {
592 print("\nconflicts: ");
593 if(zzsrconf)
594 print("%d shift/reduce", zzsrconf);
595 if(zzsrconf && zzrrconf)
596 print(", ");
597 if(zzrrconf)
598 print("%d reduce/reduce", zzrrconf);
599 print("\n");
601 if(ftemp != 0) {
602 Bterm(ftemp);
603 ftemp = 0;
605 if(fdefine != 0) {
606 Bterm(fdefine);
607 fdefine = 0;
611 /*
612 * write out error comment -- NEEDS WORK
613 */
614 void
615 error(char *s, ...)
617 va_list arg;
619 nerrors++;
620 fprint(2, "\n fatal error:");
621 va_start(arg, s);
622 vfprint(2, s, arg);
623 va_end(arg);
624 fprint(2, ", %s:%d\n", infile, lineno);
625 if(!fatfl)
626 return;
627 summary();
628 cleantmp();
629 exits("error");
632 /*
633 * set elements 0 through n-1 to c
634 */
635 void
636 aryfil(int *v, int n, int c)
638 int i;
640 for(i=0; i<n; i++)
641 v[i] = c;
644 /*
645 * set a to the union of a and b
646 * return 1 if b is not a subset of a, 0 otherwise
647 */
648 int
649 setunion(int *a, int *b)
651 int i, x, sub;
653 sub = 0;
654 SETLOOP(i) {
655 x = *a;
656 *a |= *b;
657 if(*a != x)
658 sub = 1;
659 a++;
660 b++;
662 return sub;
665 void
666 prlook(Lkset* p)
668 int j, *pp;
670 pp = p->lset;
671 if(pp == 0)
672 Bprint(foutput, "\tNULL");
673 else {
674 Bprint(foutput, " { ");
675 TLOOP(j)
676 if(BIT(pp,j))
677 Bprint(foutput, "%s ", symnam(j));
678 Bprint(foutput, "}");
682 /*
683 * compute an array with the beginnings of productions yielding given nonterminals
684 * The array pres points to these lists
685 * the array pyield has the lists: the total size is only NPROD+1
686 */
687 void
688 cpres(void)
690 int c, j, i, **pmem;
691 static int *pyield[NPROD];
693 pmem = pyield;
694 NTLOOP(i) {
695 c = i+NTBASE;
696 pres[i] = pmem;
697 fatfl = 0; /* make undefined symbols nonfatal */
698 PLOOP(0, j)
699 if(*prdptr[j] == c)
700 *pmem++ = prdptr[j]+1;
701 if(pres[i] == pmem)
702 error("nonterminal %s not defined!", nontrst[i].name);
704 pres[i] = pmem;
705 fatfl = 1;
706 if(nerrors) {
707 summary();
708 cleantmp();
709 exits("error");
711 if(pmem != &pyield[nprod])
712 error("internal Yacc error: pyield %d", pmem-&pyield[nprod]);
715 /*
716 * compute an array with the first of nonterminals
717 */
718 void
719 cpfir(void)
721 int *p, **s, i, **t, ch, changes;
723 zzcwp = &wsets[nnonter];
724 NTLOOP(i) {
725 aryfil(wsets[i].ws.lset, tbitset, 0);
726 t = pres[i+1];
727 /* initially fill the sets */
728 for(s=pres[i]; s<t; ++s)
729 for(p = *s; (ch = *p) > 0; ++p) {
730 if(ch < NTBASE) {
731 SETBIT(wsets[i].ws.lset, ch);
732 break;
734 if(!pempty[ch-NTBASE])
735 break;
739 /* now, reflect transitivity */
740 changes = 1;
741 while(changes) {
742 changes = 0;
743 NTLOOP(i) {
744 t = pres[i+1];
745 for(s = pres[i]; s < t; ++s)
746 for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) {
747 changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset);
748 if(!pempty[ch])
749 break;
754 NTLOOP(i)
755 pfirst[i] = flset(&wsets[i].ws);
756 if(!indebug)
757 return;
758 if(foutput != 0)
759 NTLOOP(i) {
760 Bprint(foutput, "\n%s: ", nontrst[i].name);
761 prlook(pfirst[i]);
762 Bprint(foutput, " %d\n", pempty[i]);
766 /*
767 * sorts last state,and sees if it equals earlier ones. returns state number
768 */
769 int
770 state(int c)
772 Item *p1, *p2, *k, *l, *q1, *q2;
773 int size1, size2, i;
775 p1 = pstate[nstate];
776 p2 = pstate[nstate+1];
777 if(p1 == p2)
778 return 0; /* null state */
779 /* sort the items */
780 for(k = p2-1; k > p1; k--) /* make k the biggest */
781 for(l = k-1; l >= p1; --l)
782 if(l->pitem > k->pitem) {
783 int *s;
784 Lkset *ss;
786 s = k->pitem;
787 k->pitem = l->pitem;
788 l->pitem = s;
789 ss = k->look;
790 k->look = l->look;
791 l->look = ss;
793 size1 = p2 - p1; /* size of state */
795 for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) {
796 /* get ith state */
797 q1 = pstate[i];
798 q2 = pstate[i+1];
799 size2 = q2 - q1;
800 if(size1 != size2)
801 continue;
802 k = p1;
803 for(l = q1; l < q2; l++) {
804 if(l->pitem != k->pitem)
805 break;
806 k++;
808 if(l != q2)
809 continue;
810 /* found it */
811 pstate[nstate+1] = pstate[nstate]; /* delete last state */
812 /* fix up lookaheads */
813 if(nolook)
814 return i;
815 for(l = q1, k = p1; l < q2; ++l, ++k ) {
816 int s;
818 SETLOOP(s)
819 clset.lset[s] = l->look->lset[s];
820 if(setunion(clset.lset, k->look->lset)) {
821 tystate[i] = MUSTDO;
822 /* register the new set */
823 l->look = flset( &clset );
826 return i;
828 /* state is new */
829 if(nolook)
830 error("yacc state/nolook error");
831 pstate[nstate+2] = p2;
832 if(nstate+1 >= NSTATES)
833 error("too many states");
834 if(c >= NTBASE) {
835 mstates[nstate] = ntstates[c-NTBASE];
836 ntstates[c-NTBASE] = nstate;
837 } else {
838 mstates[nstate] = tstates[c];
839 tstates[c] = nstate;
841 tystate[nstate] = MUSTDO;
842 return nstate++;
845 void
846 putitem(int *ptr, Lkset *lptr)
848 Item *j;
850 if(pidebug && foutput != 0)
851 Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate);
852 j = pstate[nstate+1];
853 j->pitem = ptr;
854 if(!nolook)
855 j->look = flset(lptr);
856 pstate[nstate+1] = ++j;
857 if((int*)j > zzmemsz) {
858 zzmemsz = (int*)j;
859 if(zzmemsz >= &mem0[MEMSIZE])
860 error("out of state space");
864 /*
865 * mark nonterminals which derive the empty string
866 * also, look for nonterminals which don't derive any token strings
867 */
868 void
869 cempty(void)
872 int i, *p;
874 /* first, use the array pempty to detect productions that can never be reduced */
875 /* set pempty to WHONOWS */
876 aryfil(pempty, nnonter+1, WHOKNOWS);
878 /* now, look at productions, marking nonterminals which derive something */
879 more:
880 PLOOP(0, i) {
881 if(pempty[*prdptr[i] - NTBASE])
882 continue;
883 for(p = prdptr[i]+1; *p >= 0; ++p)
884 if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS)
885 break;
886 /* production can be derived */
887 if(*p < 0) {
888 pempty[*prdptr[i]-NTBASE] = OK;
889 goto more;
893 /* now, look at the nonterminals, to see if they are all OK */
894 NTLOOP(i) {
895 /* the added production rises or falls as the start symbol ... */
896 if(i == 0)
897 continue;
898 if(pempty[i] != OK) {
899 fatfl = 0;
900 error("nonterminal %s never derives any token string", nontrst[i].name);
904 if(nerrors) {
905 summary();
906 cleantmp();
907 exits("error");
910 /* now, compute the pempty array, to see which nonterminals derive the empty string */
911 /* set pempty to WHOKNOWS */
912 aryfil( pempty, nnonter+1, WHOKNOWS);
914 /* loop as long as we keep finding empty nonterminals */
916 again:
917 PLOOP(1, i) {
918 /* not known to be empty */
919 if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) {
920 for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p)
922 /* we have a nontrivially empty nonterminal */
923 if(*p < 0) {
924 pempty[*prdptr[i]-NTBASE] = EMPTY;
925 /* got one ... try for another */
926 goto again;
932 /*
933 * generate the states
934 */
935 void
936 stagen(void)
939 int c, i, j, more;
940 Wset *p, *q;
942 /* initialize */
943 nstate = 0;
945 /* THIS IS FUNNY from the standpoint of portability
946 * it represents the magic moment when the mem0 array, which has
947 * been holding the productions, starts to hold item pointers, of a
948 * different type...
949 * someday, alloc should be used to allocate all this stuff... for now, we
950 * accept that if pointers don't fit in integers, there is a problem...
951 */
953 pstate[0] = pstate[1] = (Item*)mem;
954 aryfil(clset.lset, tbitset, 0);
955 putitem(prdptr[0]+1, &clset);
956 tystate[0] = MUSTDO;
957 nstate = 1;
958 pstate[2] = pstate[1];
960 aryfil(amem, ACTSIZE, 0);
962 /* now, the main state generation loop */
963 for(more=1; more;) {
964 more = 0;
965 SLOOP(i) {
966 if(tystate[i] != MUSTDO)
967 continue;
968 tystate[i] = DONE;
969 aryfil(temp1, nnonter+1, 0);
970 /* take state i, close it, and do gotos */
971 closure(i);
972 /* generate goto's */
973 WSLOOP(wsets, p) {
974 if(p->flag)
975 continue;
976 p->flag = 1;
977 c = *(p->pitem);
978 if(c <= 1) {
979 if(pstate[i+1]-pstate[i] <= p-wsets)
980 tystate[i] = MUSTLOOKAHEAD;
981 continue;
983 /* do a goto on c */
984 WSLOOP(p, q)
985 /* this item contributes to the goto */
986 if(c == *(q->pitem)) {
987 putitem(q->pitem+1, &q->ws);
988 q->flag = 1;
990 if(c < NTBASE)
991 state(c); /* register new state */
992 else
993 temp1[c-NTBASE] = state(c);
995 if(gsdebug && foutput != 0) {
996 Bprint(foutput, "%d: ", i);
997 NTLOOP(j)
998 if(temp1[j])
999 Bprint(foutput, "%s %d, ",
1000 nontrst[j].name, temp1[j]);
1001 Bprint(foutput, "\n");
1003 indgo[i] = apack(&temp1[1], nnonter-1) - 1;
1004 /* do some more */
1005 more = 1;
1011 * generate the closure of state i
1013 void
1014 closure(int i)
1017 Wset *u, *v;
1018 Item *p, *q;
1019 int c, ch, work, k, *pi, **s, **t;
1021 zzclose++;
1023 /* first, copy kernel of state i to wsets */
1024 cwp = wsets;
1025 ITMLOOP(i, p, q) {
1026 cwp->pitem = p->pitem;
1027 cwp->flag = 1; /* this item must get closed */
1028 SETLOOP(k)
1029 cwp->ws.lset[k] = p->look->lset[k];
1030 WSBUMP(cwp);
1033 /* now, go through the loop, closing each item */
1034 work = 1;
1035 while(work) {
1036 work = 0;
1037 WSLOOP(wsets, u) {
1038 if(u->flag == 0)
1039 continue;
1040 /* dot is before c */
1041 c = *(u->pitem);
1042 if(c < NTBASE) {
1043 u->flag = 0;
1044 /* only interesting case is where . is before nonterminal */
1045 continue;
1048 /* compute the lookahead */
1049 aryfil(clset.lset, tbitset, 0);
1051 /* find items involving c */
1052 WSLOOP(u, v)
1053 if(v->flag == 1 && *(pi=v->pitem) == c) {
1054 v->flag = 0;
1055 if(nolook)
1056 continue;
1057 while((ch = *++pi) > 0) {
1058 /* terminal symbol */
1059 if(ch < NTBASE) {
1060 SETBIT(clset.lset, ch);
1061 break;
1063 /* nonterminal symbol */
1064 setunion(clset.lset, pfirst[ch-NTBASE]->lset);
1065 if(!pempty[ch-NTBASE])
1066 break;
1068 if(ch <= 0)
1069 setunion(clset.lset, v->ws.lset);
1073 * now loop over productions derived from c
1074 * c is now nonterminal number
1076 c -= NTBASE;
1077 t = pres[c+1];
1078 for(s = pres[c]; s < t; ++s) {
1080 * put these items into the closure
1081 * is the item there
1083 WSLOOP(wsets, v)
1084 /* yes, it is there */
1085 if(v->pitem == *s) {
1086 if(nolook)
1087 goto nexts;
1088 if(setunion(v->ws.lset, clset.lset))
1089 v->flag = work = 1;
1090 goto nexts;
1093 /* not there; make a new entry */
1094 if(cwp-wsets+1 >= WSETSIZE)
1095 error( "working set overflow");
1096 cwp->pitem = *s;
1097 cwp->flag = 1;
1098 if(!nolook) {
1099 work = 1;
1100 SETLOOP(k) cwp->ws.lset[k] = clset.lset[k];
1102 WSBUMP(cwp);
1104 nexts:;
1109 /* have computed closure; flags are reset; return */
1110 if(cwp > zzcwp)
1111 zzcwp = cwp;
1112 if(cldebug && foutput != 0) {
1113 Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook);
1114 WSLOOP(wsets, u) {
1115 if(u->flag)
1116 Bprint(foutput, "flag set!\n");
1117 u->flag = 0;
1118 Bprint(foutput, "\t%s", writem(u->pitem));
1119 prlook(&u->ws);
1120 Bprint(foutput, "\n");
1126 * decide if the lookahead set pointed to by p is known
1127 * return pointer to a perminent location for the set
1129 Lkset*
1130 flset(Lkset *p)
1132 Lkset *q;
1133 int *u, *v, *w, j;
1135 for(q = &lkst[nlset]; q-- > lkst;) {
1136 u = p->lset;
1137 v = q->lset;
1138 w = &v[tbitset];
1139 while(v < w)
1140 if(*u++ != *v++)
1141 goto more;
1142 /* we have matched */
1143 return q;
1144 more:;
1146 /* add a new one */
1147 q = &lkst[nlset++];
1148 if(nlset >= LSETSIZE)
1149 error("too many lookahead sets");
1150 SETLOOP(j)
1151 q->lset[j] = p->lset[j];
1152 return q;
1155 void
1156 cleantmp(void)
1158 ZAPFILE(actname);
1159 ZAPFILE(tempname);
1162 void
1163 intr(void)
1165 cleantmp();
1166 exits("interrupted");
1169 void
1170 setup(int argc, char *argv[])
1172 long c, t;
1173 int i, j, fd, lev, ty, ytab, *p;
1174 int vflag, dflag, stem;
1175 char actnm[8], *stemc, *s, dirbuf[128];
1177 ytab = 0;
1178 vflag = 0;
1179 dflag = 0;
1180 stem = 0;
1181 stemc = "y";
1182 foutput = 0;
1183 fdefine = 0;
1184 fdebug = 0;
1185 ARGBEGIN{
1186 case 'v':
1187 case 'V':
1188 vflag++;
1189 break;
1190 case 'D':
1191 yydebug = ARGF();
1192 break;
1193 case 'd':
1194 dflag++;
1195 break;
1196 case 'o':
1197 ytab++;
1198 ytabc = ARGF();
1199 break;
1200 case 's':
1201 stem++;
1202 stemc = ARGF();
1203 break;
1204 case 'S':
1205 parser = PARSERS;
1206 break;
1207 default:
1208 error("illegal option: %c", ARGC());
1209 }ARGEND
1210 openup(stemc, dflag, vflag, ytab, ytabc);
1212 if((fd = mkstemp(ttempname)) >= 0){
1213 tempname = ttempname;
1214 ftemp = Bfdopen(fd, OWRITE);
1216 if((fd = mkstemp(tactname)) >= 0){
1217 actname = tactname;
1218 faction = Bfdopen(fd, OWRITE);
1220 if(ftemp == 0 || faction == 0)
1221 error("cannot open temp file");
1222 if(argc < 1)
1223 error("no input file");
1224 infile = argv[0];
1225 if(infile[0] != '/' && getwd(dirbuf, sizeof dirbuf)!=nil){
1226 i = strlen(infile)+1+strlen(dirbuf)+1+10;
1227 s = malloc(i);
1228 if(s != nil){
1229 snprint(s, i, "%s/%s", dirbuf, infile);
1230 cleanname(s);
1231 infile = s;
1234 finput = Bopen(infile, OREAD);
1235 if(finput == 0)
1236 error("cannot open '%s'", argv[0]);
1237 cnamp = cnames;
1239 defin(0, "$end");
1240 extval = PRIVATE; /* tokens start in unicode 'private use' */
1241 defin(0, "error");
1242 defin(1, "$accept");
1243 defin(0, "$unk");
1244 mem = mem0;
1245 i = 0;
1247 for(t = gettok(); t != MARK && t != ENDFILE;)
1248 switch(t) {
1249 case ';':
1250 t = gettok();
1251 break;
1253 case START:
1254 if(gettok() != IDENTIFIER)
1255 error("bad %%start construction");
1256 start = chfind(1, tokname);
1257 t = gettok();
1258 continue;
1260 case TYPEDEF:
1261 if(gettok() != TYPENAME)
1262 error("bad syntax in %%type");
1263 ty = numbval;
1264 for(;;) {
1265 t = gettok();
1266 switch(t) {
1267 case IDENTIFIER:
1268 if((t=chfind(1, tokname)) < NTBASE) {
1269 j = TYPE(toklev[t]);
1270 if(j != 0 && j != ty)
1271 error("type redeclaration of token %s",
1272 tokset[t].name);
1273 else
1274 SETTYPE(toklev[t], ty);
1275 } else {
1276 j = nontrst[t-NTBASE].value;
1277 if(j != 0 && j != ty)
1278 error("type redeclaration of nonterminal %s",
1279 nontrst[t-NTBASE].name );
1280 else
1281 nontrst[t-NTBASE].value = ty;
1283 case ',':
1284 continue;
1285 case ';':
1286 t = gettok();
1287 default:
1288 break;
1290 break;
1292 continue;
1294 case UNION:
1295 /* copy the union declaration to the output */
1296 cpyunion();
1297 t = gettok();
1298 continue;
1300 case LEFT:
1301 case BINARY:
1302 case RIGHT:
1303 i++;
1305 case TERM:
1306 /* nonzero means new prec. and assoc. */
1307 lev = t-TERM;
1308 ty = 0;
1310 /* get identifiers so defined */
1311 t = gettok();
1313 /* there is a type defined */
1314 if(t == TYPENAME) {
1315 ty = numbval;
1316 t = gettok();
1318 for(;;) {
1319 switch(t) {
1320 case ',':
1321 t = gettok();
1322 continue;
1324 case ';':
1325 break;
1327 case IDENTIFIER:
1328 j = chfind(0, tokname);
1329 if(j >= NTBASE)
1330 error("%s defined earlier as nonterminal", tokname);
1331 if(lev) {
1332 if(ASSOC(toklev[j]))
1333 error("redeclaration of precedence of %s", tokname);
1334 SETASC(toklev[j], lev);
1335 SETPLEV(toklev[j], i);
1337 if(ty) {
1338 if(TYPE(toklev[j]))
1339 error("redeclaration of type of %s", tokname);
1340 SETTYPE(toklev[j],ty);
1342 t = gettok();
1343 if(t == NUMBER) {
1344 tokset[j].value = numbval;
1345 if(j < ndefout && j > 3)
1346 error("please define type number of %s earlier",
1347 tokset[j].name);
1348 t = gettok();
1350 continue;
1352 break;
1354 continue;
1356 case LCURLY:
1357 defout(0);
1358 cpycode();
1359 t = gettok();
1360 continue;
1362 default:
1363 error("syntax error");
1365 if(t == ENDFILE)
1366 error("unexpected EOF before %%");
1368 /* t is MARK */
1369 Bprint(ftable, "extern int yyerrflag;\n");
1370 Bprint(ftable, "#ifndef YYMAXDEPTH\n");
1371 Bprint(ftable, "#define YYMAXDEPTH 150\n");
1372 Bprint(ftable, "#endif\n" );
1373 if(!ntypes) {
1374 Bprint(ftable, "#ifndef YYSTYPE\n");
1375 Bprint(ftable, "#define YYSTYPE int\n");
1376 Bprint(ftable, "#endif\n");
1378 Bprint(ftable, "YYSTYPE yylval;\n");
1379 Bprint(ftable, "YYSTYPE yyval;\n");
1381 prdptr[0] = mem;
1383 /* added production */
1384 *mem++ = NTBASE;
1386 /* if start is 0, we will overwrite with the lhs of the first rule */
1387 *mem++ = start;
1388 *mem++ = 1;
1389 *mem++ = 0;
1390 prdptr[1] = mem;
1391 while((t=gettok()) == LCURLY)
1392 cpycode();
1393 if(t != IDENTCOLON)
1394 error("bad syntax on first rule");
1396 if(!start)
1397 prdptr[0][1] = chfind(1, tokname);
1399 /* read rules */
1400 while(t != MARK && t != ENDFILE) {
1401 /* process a rule */
1402 rlines[nprod] = lineno;
1403 if(t == '|')
1404 *mem++ = *prdptr[nprod-1];
1405 else
1406 if(t == IDENTCOLON) {
1407 *mem = chfind(1, tokname);
1408 if(*mem < NTBASE)
1409 error("token illegal on LHS of grammar rule");
1410 mem++;
1411 } else
1412 error("illegal rule: missing semicolon or | ?");
1413 /* read rule body */
1414 t = gettok();
1416 more_rule:
1417 while(t == IDENTIFIER) {
1418 *mem = chfind(1, tokname);
1419 if(*mem < NTBASE)
1420 levprd[nprod] = toklev[*mem];
1421 mem++;
1422 t = gettok();
1424 if(t == PREC) {
1425 if(gettok() != IDENTIFIER)
1426 error("illegal %%prec syntax");
1427 j = chfind(2, tokname);
1428 if(j >= NTBASE)
1429 error("nonterminal %s illegal after %%prec",
1430 nontrst[j-NTBASE].name);
1431 levprd[nprod] = toklev[j];
1432 t = gettok();
1434 if(t == '=') {
1435 levprd[nprod] |= ACTFLAG;
1436 Bprint(faction, "\ncase %d:", nprod);
1437 cpyact(mem-prdptr[nprod]-1);
1438 Bprint(faction, " break;");
1439 if((t=gettok()) == IDENTIFIER) {
1441 /* action within rule... */
1442 sprint(actnm, "$$%d", nprod);
1444 /* make it a nonterminal */
1445 j = chfind(1, actnm);
1448 * the current rule will become rule number nprod+1
1449 * move the contents down, and make room for the null
1451 for(p = mem; p >= prdptr[nprod]; --p)
1452 p[2] = *p;
1453 mem += 2;
1455 /* enter null production for action */
1456 p = prdptr[nprod];
1457 *p++ = j;
1458 *p++ = -nprod;
1460 /* update the production information */
1461 levprd[nprod+1] = levprd[nprod] & ~ACTFLAG;
1462 levprd[nprod] = ACTFLAG;
1463 if(++nprod >= NPROD)
1464 error("more than %d rules", NPROD);
1465 prdptr[nprod] = p;
1467 /* make the action appear in the original rule */
1468 *mem++ = j;
1470 /* get some more of the rule */
1471 goto more_rule;
1475 while(t == ';')
1476 t = gettok();
1477 *mem++ = -nprod;
1479 /* check that default action is reasonable */
1480 if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) {
1482 /* no explicit action, LHS has value */
1483 int tempty;
1485 tempty = prdptr[nprod][1];
1486 if(tempty < 0)
1487 error("must return a value, since LHS has a type");
1488 else
1489 if(tempty >= NTBASE)
1490 tempty = nontrst[tempty-NTBASE].value;
1491 else
1492 tempty = TYPE(toklev[tempty]);
1493 if(tempty != nontrst[*prdptr[nprod]-NTBASE].value)
1494 error("default action causes potential type clash");
1496 nprod++;
1497 if(nprod >= NPROD)
1498 error("more than %d rules", NPROD);
1499 prdptr[nprod] = mem;
1500 levprd[nprod] = 0;
1503 /* end of all rules */
1504 defout(1);
1506 finact();
1507 if(t == MARK) {
1508 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1509 while((c=Bgetrune(finput)) != Beof)
1510 Bputrune(ftable, c);
1512 Bterm(finput);
1516 * finish action routine
1518 void
1519 finact(void)
1522 Bterm(faction);
1523 Bprint(ftable, "#define YYEOFCODE %d\n", 1);
1524 Bprint(ftable, "#define YYERRCODE %d\n", 2);
1528 * define s to be a terminal if t=0
1529 * or a nonterminal if t=1
1531 int
1532 defin(int nt, char *s)
1534 int val;
1535 Rune rune;
1537 val = 0;
1538 if(nt) {
1539 nnonter++;
1540 if(nnonter >= NNONTERM)
1541 error("too many nonterminals, limit %d",NNONTERM);
1542 nontrst[nnonter].name = cstash(s);
1543 return NTBASE + nnonter;
1546 /* must be a token */
1547 ntokens++;
1548 if(ntokens >= NTERMS)
1549 error("too many terminals, limit %d", NTERMS);
1550 tokset[ntokens].name = cstash(s);
1552 /* establish value for token */
1553 /* single character literal */
1554 if(s[0] == ' ') {
1555 val = chartorune(&rune, &s[1]);
1556 if(s[val+1] == 0) {
1557 val = rune;
1558 goto out;
1562 /* escape sequence */
1563 if(s[0] == ' ' && s[1] == '\\') {
1564 if(s[3] == 0) {
1565 /* single character escape sequence */
1566 switch(s[2]) {
1567 case 'n': val = '\n'; break;
1568 case 'r': val = '\r'; break;
1569 case 'b': val = '\b'; break;
1570 case 't': val = '\t'; break;
1571 case 'f': val = '\f'; break;
1572 case '\'': val = '\''; break;
1573 case '"': val = '"'; break;
1574 case '\\': val = '\\'; break;
1575 default: error("invalid escape");
1577 goto out;
1580 /* \nnn sequence */
1581 if(s[2] >= '0' && s[2] <= '7') {
1582 if(s[3] < '0' ||
1583 s[3] > '7' ||
1584 s[4] < '0' ||
1585 s[4] > '7' ||
1586 s[5] != 0)
1587 error("illegal \\nnn construction");
1588 val = 64*s[2] + 8*s[3] + s[4] - 73*'0';
1589 if(val == 0)
1590 error("'\\000' is illegal");
1591 goto out;
1593 error("unknown escape");
1595 val = extval++;
1597 out:
1598 tokset[ntokens].value = val;
1599 toklev[ntokens] = 0;
1600 return ntokens;
1604 * write out the defines (at the end of the declaration section)
1606 void
1607 defout(int last)
1609 int i, c;
1610 char sar[NAMESIZE+10];
1612 for(i=ndefout; i<=ntokens; i++) {
1613 /* non-literals */
1614 c = tokset[i].name[0];
1615 if(c != ' ' && c != '$') {
1616 Bprint(ftable, "#define %s %d\n",
1617 tokset[i].name, tokset[i].value);
1618 if(fdefine)
1619 Bprint(fdefine, "#define\t%s\t%d\n",
1620 tokset[i].name, tokset[i].value);
1623 ndefout = ntokens+1;
1624 if(last && fdebug) {
1625 Bprint(fdebug, "char* yytoknames[] =\n{\n");
1626 TLOOP(i) {
1627 if(tokset[i].name) {
1628 chcopy(sar, tokset[i].name);
1629 Bprint(fdebug, "\t\"%s\",\n", sar);
1630 continue;
1632 Bprint(fdebug, "\t0,\n");
1634 Bprint(fdebug, "};\n");
1638 char*
1639 cstash(char *s)
1641 char *temp;
1643 temp = cnamp;
1644 do {
1645 if(cnamp >= &cnames[cnamsz])
1646 error("too many characters in id's and literals");
1647 else
1648 *cnamp++ = *s;
1649 } while(*s++);
1650 return temp;
1653 long
1654 gettok(void)
1656 long c;
1657 Rune rune;
1658 int i, base, match, reserve;
1659 static int peekline;
1661 begin:
1662 reserve = 0;
1663 lineno += peekline;
1664 peekline = 0;
1665 c = Bgetrune(finput);
1666 while(c == ' ' || c == '\n' || c == '\t' || c == '\f') {
1667 if(c == '\n')
1668 lineno++;
1669 c = Bgetrune(finput);
1672 /* skip comment */
1673 if(c == '/') {
1674 lineno += skipcom();
1675 goto begin;
1677 switch(c) {
1678 case Beof:
1679 return ENDFILE;
1681 case '{':
1682 Bungetrune(finput);
1683 return '=';
1685 case '<':
1686 /* get, and look up, a type name (union member name) */
1687 i = 0;
1688 while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') {
1689 rune = c;
1690 c = runetochar(&tokname[i], &rune);
1691 if(i < NAMESIZE)
1692 i += c;
1694 if(c != '>')
1695 error("unterminated < ... > clause");
1696 tokname[i] = 0;
1697 for(i=1; i<=ntypes; i++)
1698 if(!strcmp(typeset[i], tokname)) {
1699 numbval = i;
1700 return TYPENAME;
1702 ntypes++;
1703 numbval = ntypes;
1704 typeset[numbval] = cstash(tokname);
1705 return TYPENAME;
1707 case '"':
1708 case '\'':
1709 match = c;
1710 tokname[0] = ' ';
1711 i = 1;
1712 for(;;) {
1713 c = Bgetrune(finput);
1714 if(c == '\n' || c <= 0)
1715 error("illegal or missing ' or \"" );
1716 if(c == '\\') {
1717 tokname[i] = '\\';
1718 if(i < NAMESIZE)
1719 i++;
1720 c = Bgetrune(finput);
1721 } else
1722 if(c == match)
1723 break;
1724 rune = c;
1725 c = runetochar(&tokname[i], &rune);
1726 if(i < NAMESIZE)
1727 i += c;
1729 break;
1731 case '%':
1732 case '\\':
1733 switch(c = Bgetrune(finput)) {
1734 case '0': return TERM;
1735 case '<': return LEFT;
1736 case '2': return BINARY;
1737 case '>': return RIGHT;
1738 case '%':
1739 case '\\': return MARK;
1740 case '=': return PREC;
1741 case '{': return LCURLY;
1742 default: reserve = 1;
1745 default:
1746 /* number */
1747 if(isdigit(c)) {
1748 numbval = c-'0';
1749 base = (c=='0')? 8: 10;
1750 for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput))
1751 numbval = numbval*base + (c-'0');
1752 Bungetrune(finput);
1753 return NUMBER;
1755 if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$') {
1756 i = 0;
1757 while(islower(c) || isupper(c) || isdigit(c) ||
1758 c == '-' || c=='_' || c=='.' || c=='$') {
1759 if(reserve && isupper(c))
1760 c += 'a'-'A';
1761 rune = c;
1762 c = runetochar(&tokname[i], &rune);
1763 if(i < NAMESIZE)
1764 i += c;
1765 c = Bgetrune(finput);
1767 } else
1768 return c;
1769 Bungetrune(finput);
1771 tokname[i] = 0;
1773 /* find a reserved word */
1774 if(reserve) {
1775 for(c=0; resrv[c].name; c++)
1776 if(strcmp(tokname, resrv[c].name) == 0)
1777 return resrv[c].value;
1778 error("invalid escape, or illegal reserved word: %s", tokname);
1781 /* look ahead to distinguish IDENTIFIER from IDENTCOLON */
1782 c = Bgetrune(finput);
1783 while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') {
1784 if(c == '\n')
1785 peekline++;
1786 /* look for comments */
1787 if(c == '/')
1788 peekline += skipcom();
1789 c = Bgetrune(finput);
1791 if(c == ':')
1792 return IDENTCOLON;
1793 Bungetrune(finput);
1794 return IDENTIFIER;
1798 * determine the type of a symbol
1800 int
1801 fdtype(int t)
1803 int v;
1805 if(t >= NTBASE)
1806 v = nontrst[t-NTBASE].value;
1807 else
1808 v = TYPE(toklev[t]);
1809 if(v <= 0)
1810 error("must specify type for %s", (t>=NTBASE)?
1811 nontrst[t-NTBASE].name: tokset[t].name);
1812 return v;
1815 int
1816 chfind(int t, char *s)
1818 int i;
1820 if(s[0] == ' ')
1821 t = 0;
1822 TLOOP(i)
1823 if(!strcmp(s, tokset[i].name))
1824 return i;
1825 NTLOOP(i)
1826 if(!strcmp(s, nontrst[i].name))
1827 return NTBASE+i;
1829 /* cannot find name */
1830 if(t > 1)
1831 error("%s should have been defined earlier", s);
1832 return defin(t, s);
1836 * copy the union declaration to the output, and the define file if present
1838 void
1839 cpyunion(void)
1841 long c;
1842 int level;
1844 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1845 Bprint(ftable, "typedef union ");
1846 if(fdefine != 0)
1847 Bprint(fdefine, "\ntypedef union ");
1849 level = 0;
1850 for(;;) {
1851 if((c=Bgetrune(finput)) == Beof)
1852 error("EOF encountered while processing %%union");
1853 Bputrune(ftable, c);
1854 if(fdefine != 0)
1855 Bputrune(fdefine, c);
1856 switch(c) {
1857 case '\n':
1858 lineno++;
1859 break;
1860 case '{':
1861 level++;
1862 break;
1863 case '}':
1864 level--;
1866 /* we are finished copying */
1867 if(level == 0) {
1868 Bprint(ftable, " YYSTYPE;\n");
1869 if(fdefine != 0)
1870 Bprint(fdefine, "\tYYSTYPE;\nextern\tYYSTYPE\tyylval;\n");
1871 return;
1878 * copies code between \{ and \}
1880 void
1881 cpycode(void)
1884 long c;
1886 c = Bgetrune(finput);
1887 if(c == '\n') {
1888 c = Bgetrune(finput);
1889 lineno++;
1891 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1892 while(c != Beof) {
1893 if(c == '\\') {
1894 if((c=Bgetrune(finput)) == '}')
1895 return;
1896 Bputc(ftable, '\\');
1898 if(c == '%') {
1899 if((c=Bgetrune(finput)) == '}')
1900 return;
1901 Bputc(ftable, '%');
1903 Bputrune(ftable, c);
1904 if(c == '\n')
1905 lineno++;
1906 c = Bgetrune(finput);
1908 error("eof before %%}");
1912 * skip over comments
1913 * skipcom is called after reading a '/'
1915 int
1916 skipcom(void)
1918 long c;
1919 int i;
1921 /* i is the number of lines skipped */
1922 i = 0;
1923 if(Bgetrune(finput) != '*')
1924 error("illegal comment");
1925 c = Bgetrune(finput);
1926 while(c != Beof) {
1927 while(c == '*')
1928 if((c=Bgetrune(finput)) == '/')
1929 return i;
1930 if(c == '\n')
1931 i++;
1932 c = Bgetrune(finput);
1934 error("EOF inside comment");
1935 return 0;
1939 * copy C action to the next ; or closing }
1941 void
1942 cpyact(int offset)
1944 long c;
1945 int brac, match, j, s, fnd, tok;
1947 Bprint(faction, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1948 brac = 0;
1950 loop:
1951 c = Bgetrune(finput);
1952 swt:
1953 switch(c) {
1954 case ';':
1955 if(brac == 0) {
1956 Bputrune(faction, c);
1957 return;
1959 goto lcopy;
1961 case '{':
1962 brac++;
1963 goto lcopy;
1965 case '$':
1966 s = 1;
1967 tok = -1;
1968 c = Bgetrune(finput);
1970 /* type description */
1971 if(c == '<') {
1972 Bungetrune(finput);
1973 if(gettok() != TYPENAME)
1974 error("bad syntax on $<ident> clause");
1975 tok = numbval;
1976 c = Bgetrune(finput);
1978 if(c == '$') {
1979 Bprint(faction, "yyval");
1981 /* put out the proper tag... */
1982 if(ntypes) {
1983 if(tok < 0)
1984 tok = fdtype(*prdptr[nprod]);
1985 Bprint(faction, ".%s", typeset[tok]);
1987 goto loop;
1989 if(c == '-') {
1990 s = -s;
1991 c = Bgetrune(finput);
1993 if(isdigit(c)) {
1994 j = 0;
1995 while(isdigit(c)) {
1996 j = j*10 + (c-'0');
1997 c = Bgetrune(finput);
1999 Bungetrune(finput);
2000 j = j*s - offset;
2001 if(j > 0)
2002 error("Illegal use of $%d", j+offset);
2004 dollar:
2005 Bprint(faction, "yypt[-%d].yyv", -j);
2007 /* put out the proper tag */
2008 if(ntypes) {
2009 if(j+offset <= 0 && tok < 0)
2010 error("must specify type of $%d", j+offset);
2011 if(tok < 0)
2012 tok = fdtype(prdptr[nprod][j+offset]);
2013 Bprint(faction, ".%s", typeset[tok]);
2015 goto loop;
2017 if(isupper(c) || islower(c) || c == '_' || c == '.') {
2018 int tok; /* tok used oustide for type info */
2020 /* look for $name */
2021 Bungetrune(finput);
2022 if(gettok() != IDENTIFIER)
2023 error("$ must be followed by an identifier");
2024 tok = chfind(2, tokname);
2025 if((c = Bgetrune(finput)) != '#') {
2026 Bungetrune(finput);
2027 fnd = -1;
2028 } else
2029 if(gettok() != NUMBER) {
2030 error("# must be followed by number");
2031 fnd = -1;
2032 } else
2033 fnd = numbval;
2034 for(j=1; j<=offset; ++j)
2035 if(tok == prdptr[nprod][j]) {
2036 if(--fnd <= 0) {
2037 j -= offset;
2038 goto dollar;
2041 error("$name or $name#number not found");
2043 Bputc(faction, '$');
2044 if(s < 0 )
2045 Bputc(faction, '-');
2046 goto swt;
2048 case '}':
2049 brac--;
2050 if(brac)
2051 goto lcopy;
2052 Bputrune(faction, c);
2053 return;
2055 case '/':
2056 /* look for comments */
2057 Bputrune(faction, c);
2058 c = Bgetrune(finput);
2059 if(c != '*')
2060 goto swt;
2062 /* it really is a comment */
2063 Bputrune(faction, c);
2064 c = Bgetrune(finput);
2065 while(c >= 0) {
2066 while(c == '*') {
2067 Bputrune(faction, c);
2068 if((c=Bgetrune(finput)) == '/')
2069 goto lcopy;
2071 Bputrune(faction, c);
2072 if(c == '\n')
2073 lineno++;
2074 c = Bgetrune(finput);
2076 error("EOF inside comment");
2078 case '\'':
2079 /* character constant */
2080 match = '\'';
2081 goto string;
2083 case '"':
2084 /* character string */
2085 match = '"';
2087 string:
2088 Bputrune(faction, c);
2089 while(c = Bgetrune(finput)) {
2090 if(c == '\\') {
2091 Bputrune(faction, c);
2092 c = Bgetrune(finput);
2093 if(c == '\n')
2094 lineno++;
2095 } else
2096 if(c == match)
2097 goto lcopy;
2098 if(c == '\n')
2099 error("newline in string or char. const.");
2100 Bputrune(faction, c);
2102 error("EOF in string or character constant");
2104 case Beof:
2105 error("action does not terminate");
2107 case '\n':
2108 lineno++;
2109 goto lcopy;
2112 lcopy:
2113 Bputrune(faction, c);
2114 goto loop;
2117 void
2118 openup(char *stem, int dflag, int vflag, int ytab, char *ytabc)
2120 char buf[256];
2122 if(vflag) {
2123 sprint(buf, "%s.%s", stem, FILEU);
2124 foutput = Bopen(buf, OWRITE);
2125 if(foutput == 0)
2126 error("cannot open %s", buf);
2128 if(yydebug) {
2129 sprint(buf, "%s.%s", stem, FILEDEBUG);
2130 if((fdebug = Bopen(buf, OWRITE)) == 0)
2131 error("can't open %s", buf);
2133 if(dflag) {
2134 sprint(buf, "%s.%s", stem, FILED);
2135 fdefine = Bopen(buf, OWRITE);
2136 if(fdefine == 0)
2137 error("can't create %s", buf);
2139 if(ytab == 0)
2140 sprint(buf, "%s.%s", stem, OFILE);
2141 else
2142 strcpy(buf, ytabc);
2143 ftable = Bopen(buf, OWRITE);
2144 if(ftable == 0)
2145 error("cannot open table file %s", buf);
2149 * print the output for the states
2151 void
2152 output(void)
2154 int i, k, c;
2155 Wset *u, *v;
2157 Bprint(ftable, "short yyexca[] =\n{");
2158 if(fdebug)
2159 Bprint(fdebug, "char* yystates[] =\n{\n");
2161 /* output the stuff for state i */
2162 SLOOP(i) {
2163 nolook = tystate[i]!=MUSTLOOKAHEAD;
2164 closure(i);
2166 /* output actions */
2167 nolook = 1;
2168 aryfil(temp1, ntokens+nnonter+1, 0);
2169 WSLOOP(wsets, u) {
2170 c = *(u->pitem);
2171 if(c > 1 && c < NTBASE && temp1[c] == 0) {
2172 WSLOOP(u, v)
2173 if(c == *(v->pitem))
2174 putitem(v->pitem+1, (Lkset*)0);
2175 temp1[c] = state(c);
2176 } else
2177 if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0)
2178 temp1[c+ntokens] = amem[indgo[i]+c];
2180 if(i == 1)
2181 temp1[1] = ACCEPTCODE;
2183 /* now, we have the shifts; look at the reductions */
2184 lastred = 0;
2185 WSLOOP(wsets, u) {
2186 c = *u->pitem;
2188 /* reduction */
2189 if(c <= 0) {
2190 lastred = -c;
2191 TLOOP(k)
2192 if(BIT(u->ws.lset, k)) {
2193 if(temp1[k] == 0)
2194 temp1[k] = c;
2195 else
2196 if(temp1[k] < 0) { /* reduce/reduce conflict */
2197 if(foutput)
2198 Bprint(foutput,
2199 "\n%d: reduce/reduce conflict"
2200 " (red'ns %d and %d ) on %s",
2201 i, -temp1[k], lastred,
2202 symnam(k));
2203 if(-temp1[k] > lastred)
2204 temp1[k] = -lastred;
2205 zzrrconf++;
2206 } else
2207 /* potential shift/reduce conflict */
2208 precftn( lastred, k, i );
2212 wract(i);
2215 if(fdebug)
2216 Bprint(fdebug, "};\n");
2217 Bprint(ftable, "};\n");
2218 Bprint(ftable, "#define YYNPROD %d\n", nprod);
2219 Bprint(ftable, "#define YYPRIVATE %d\n", PRIVATE);
2220 if(yydebug)
2221 Bprint(ftable, "#define yydebug %s\n", yydebug);
2225 * pack state i from temp1 into amem
2227 int
2228 apack(int *p, int n)
2230 int *pp, *qq, *rr, off, *q, *r;
2232 /* we don't need to worry about checking because
2233 * we will only look at entries known to be there...
2234 * eliminate leading and trailing 0's
2237 q = p+n;
2238 for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off)
2240 /* no actions */
2241 if(pp > q)
2242 return 0;
2243 p = pp;
2245 /* now, find a place for the elements from p to q, inclusive */
2246 r = &amem[ACTSIZE-1];
2247 for(rr = amem; rr <= r; rr++, off++) {
2248 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2249 if(*pp != 0)
2250 if(*pp != *qq && *qq != 0)
2251 goto nextk;
2253 /* we have found an acceptable k */
2254 if(pkdebug && foutput != 0)
2255 Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem));
2256 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2257 if(*pp) {
2258 if(qq > r)
2259 error("action table overflow");
2260 if(qq > memp)
2261 memp = qq;
2262 *qq = *pp;
2264 if(pkdebug && foutput != 0)
2265 for(pp = amem; pp <= memp; pp += 10) {
2266 Bprint(foutput, "\t");
2267 for(qq = pp; qq <= pp+9; qq++)
2268 Bprint(foutput, "%d ", *qq);
2269 Bprint(foutput, "\n");
2271 return(off);
2272 nextk:;
2274 error("no space in action table");
2275 return 0;
2279 * output the gotos for the nontermninals
2281 void
2282 go2out(void)
2284 int i, j, k, best, count, cbest, times;
2286 /* mark begining of gotos */
2287 Bprint(ftemp, "$\n");
2288 for(i = 1; i <= nnonter; i++) {
2289 go2gen(i);
2291 /* find the best one to make default */
2292 best = -1;
2293 times = 0;
2295 /* is j the most frequent */
2296 for(j = 0; j <= nstate; j++) {
2297 if(tystate[j] == 0)
2298 continue;
2299 if(tystate[j] == best)
2300 continue;
2302 /* is tystate[j] the most frequent */
2303 count = 0;
2304 cbest = tystate[j];
2305 for(k = j; k <= nstate; k++)
2306 if(tystate[k] == cbest)
2307 count++;
2308 if(count > times) {
2309 best = cbest;
2310 times = count;
2314 /* best is now the default entry */
2315 zzgobest += times-1;
2316 for(j = 0; j <= nstate; j++)
2317 if(tystate[j] != 0 && tystate[j] != best) {
2318 Bprint(ftemp, "%d,%d,", j, tystate[j]);
2319 zzgoent++;
2322 /* now, the default */
2323 if(best == -1)
2324 best = 0;
2325 zzgoent++;
2326 Bprint(ftemp, "%d\n", best);
2331 * output the gotos for nonterminal c
2333 void
2334 go2gen(int c)
2336 int i, work, cc;
2337 Item *p, *q;
2340 /* first, find nonterminals with gotos on c */
2341 aryfil(temp1, nnonter+1, 0);
2342 temp1[c] = 1;
2343 work = 1;
2344 while(work) {
2345 work = 0;
2346 PLOOP(0, i)
2348 /* cc is a nonterminal */
2349 if((cc=prdptr[i][1]-NTBASE) >= 0)
2350 /* cc has a goto on c */
2351 if(temp1[cc] != 0) {
2353 /* thus, the left side of production i does too */
2354 cc = *prdptr[i]-NTBASE;
2355 if(temp1[cc] == 0) {
2356 work = 1;
2357 temp1[cc] = 1;
2362 /* now, we have temp1[c] = 1 if a goto on c in closure of cc */
2363 if(g2debug && foutput != 0) {
2364 Bprint(foutput, "%s: gotos on ", nontrst[c].name);
2365 NTLOOP(i)
2366 if(temp1[i])
2367 Bprint(foutput, "%s ", nontrst[i].name);
2368 Bprint(foutput, "\n");
2371 /* now, go through and put gotos into tystate */
2372 aryfil(tystate, nstate, 0);
2373 SLOOP(i)
2374 ITMLOOP(i, p, q)
2375 if((cc = *p->pitem) >= NTBASE)
2376 /* goto on c is possible */
2377 if(temp1[cc-NTBASE]) {
2378 tystate[i] = amem[indgo[i]+c];
2379 break;
2384 * decide a shift/reduce conflict by precedence.
2385 * r is a rule number, t a token number
2386 * the conflict is in state s
2387 * temp1[t] is changed to reflect the action
2389 void
2390 precftn(int r, int t, int s)
2392 int lp, lt, action;
2394 lp = levprd[r];
2395 lt = toklev[t];
2396 if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) {
2398 /* conflict */
2399 if(foutput != 0)
2400 Bprint(foutput,
2401 "\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s",
2402 s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t));
2403 zzsrconf++;
2404 return;
2406 if(PLEVEL(lt) == PLEVEL(lp))
2407 action = ASSOC(lt);
2408 else
2409 if(PLEVEL(lt) > PLEVEL(lp))
2410 action = RASC; /* shift */
2411 else
2412 action = LASC; /* reduce */
2413 switch(action) {
2414 case BASC: /* error action */
2415 temp1[t] = ERRCODE;
2416 break;
2417 case LASC: /* reduce */
2418 temp1[t] = -r;
2419 break;
2424 * output state i
2425 * temp1 has the actions, lastred the default
2427 void
2428 wract(int i)
2430 int p, p0, p1, ntimes, tred, count, j, flag;
2432 /* find the best choice for lastred */
2433 lastred = 0;
2434 ntimes = 0;
2435 TLOOP(j) {
2436 if(temp1[j] >= 0)
2437 continue;
2438 if(temp1[j]+lastred == 0)
2439 continue;
2440 /* count the number of appearances of temp1[j] */
2441 count = 0;
2442 tred = -temp1[j];
2443 levprd[tred] |= REDFLAG;
2444 TLOOP(p)
2445 if(temp1[p]+tred == 0)
2446 count++;
2447 if(count > ntimes) {
2448 lastred = tred;
2449 ntimes = count;
2454 * for error recovery, arrange that, if there is a shift on the
2455 * error recovery token, `error', that the default be the error action
2457 if(temp1[2] > 0)
2458 lastred = 0;
2460 /* clear out entries in temp1 which equal lastred */
2461 TLOOP(p)
2462 if(temp1[p]+lastred == 0)
2463 temp1[p] = 0;
2465 wrstate(i);
2466 defact[i] = lastred;
2467 flag = 0;
2468 TLOOP(p0)
2469 if((p1=temp1[p0]) != 0) {
2470 if(p1 < 0) {
2471 p1 = -p1;
2472 goto exc;
2474 if(p1 == ACCEPTCODE) {
2475 p1 = -1;
2476 goto exc;
2478 if(p1 == ERRCODE) {
2479 p1 = 0;
2480 exc:
2481 if(flag++ == 0)
2482 Bprint(ftable, "-1, %d,\n", i);
2483 Bprint(ftable, "\t%d, %d,\n", p0, p1);
2484 zzexcp++;
2485 continue;
2487 Bprint(ftemp, "%d,%d,", p0, p1);
2488 zzacent++;
2490 if(flag) {
2491 defact[i] = -2;
2492 Bprint(ftable, "\t-2, %d,\n", lastred);
2494 Bprint(ftemp, "\n");
2498 * writes state i
2500 void
2501 wrstate(int i)
2503 int j0, j1;
2504 Item *pp, *qq;
2505 Wset *u;
2507 if(fdebug) {
2508 if(lastred) {
2509 Bprint(fdebug, " 0, /*%d*/\n", i);
2510 } else {
2511 Bprint(fdebug, " \"");
2512 ITMLOOP(i, pp, qq)
2513 Bprint(fdebug, "%s\\n", writem(pp->pitem));
2514 if(tystate[i] == MUSTLOOKAHEAD)
2515 WSLOOP(wsets + (pstate[i+1] - pstate[i]), u)
2516 if(*u->pitem < 0)
2517 Bprint(fdebug, "%s\\n", writem(u->pitem));
2518 Bprint(fdebug, "\", /*%d*/\n", i);
2521 if(foutput == 0)
2522 return;
2523 Bprint(foutput, "\nstate %d\n", i);
2524 ITMLOOP(i, pp, qq)
2525 Bprint(foutput, "\t%s\n", writem(pp->pitem));
2526 if(tystate[i] == MUSTLOOKAHEAD)
2527 /* print out empty productions in closure */
2528 WSLOOP(wsets+(pstate[i+1]-pstate[i]), u)
2529 if(*u->pitem < 0)
2530 Bprint(foutput, "\t%s\n", writem(u->pitem));
2532 /* check for state equal to another */
2533 TLOOP(j0)
2534 if((j1=temp1[j0]) != 0) {
2535 Bprint(foutput, "\n\t%s ", symnam(j0));
2536 /* shift, error, or accept */
2537 if(j1 > 0) {
2538 if(j1 == ACCEPTCODE)
2539 Bprint(foutput, "accept");
2540 else
2541 if(j1 == ERRCODE)
2542 Bprint(foutput, "error");
2543 else
2544 Bprint(foutput, "shift %d", j1);
2545 } else
2546 Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]);
2549 /* output the final production */
2550 if(lastred)
2551 Bprint(foutput, "\n\t. reduce %d (src line %d)\n\n",
2552 lastred, rlines[lastred]);
2553 else
2554 Bprint(foutput, "\n\t. error\n\n");
2556 /* now, output nonterminal actions */
2557 j1 = ntokens;
2558 for(j0 = 1; j0 <= nnonter; j0++) {
2559 j1++;
2560 if(temp1[j1])
2561 Bprint(foutput, "\t%s goto %d\n", symnam(j0+NTBASE), temp1[j1]);
2565 void
2566 warray(char *s, int *v, int n)
2568 int i;
2570 Bprint(ftable, "short %s[] =\n{", s);
2571 for(i=0;;) {
2572 if(i%10 == 0)
2573 Bprint(ftable, "\n");
2574 Bprint(ftable, "%4d", v[i]);
2575 i++;
2576 if(i >= n) {
2577 Bprint(ftable, "\n};\n");
2578 break;
2580 Bprint(ftable, ",");
2585 * in order to free up the mem and amem arrays for the optimizer,
2586 * and still be able to output yyr1, etc., after the sizes of
2587 * the action array is known, we hide the nonterminals
2588 * derived by productions in levprd.
2591 void
2592 hideprod(void)
2594 int i, j;
2596 j = 0;
2597 levprd[0] = 0;
2598 PLOOP(1, i) {
2599 if(!(levprd[i] & REDFLAG)) {
2600 j++;
2601 if(foutput != 0)
2602 Bprint(foutput, "Rule not reduced: %s\n", writem(prdptr[i]));
2604 levprd[i] = *prdptr[i] - NTBASE;
2606 if(j)
2607 print("%d rules never reduced\n", j);
2610 void
2611 callopt(void)
2613 int i, *p, j, k, *q;
2615 /* read the arrays from tempfile and set parameters */
2616 finput = Bopen(tempname, OREAD);
2617 if(finput == 0)
2618 error("optimizer cannot open tempfile");
2620 pgo[0] = 0;
2621 temp1[0] = 0;
2622 nstate = 0;
2623 nnonter = 0;
2624 for(;;) {
2625 switch(gtnm()) {
2626 case '\n':
2627 nstate++;
2628 pmem--;
2629 temp1[nstate] = pmem - mem0;
2630 case ',':
2631 continue;
2632 case '$':
2633 break;
2634 default:
2635 error("bad tempfile %s", tempname);
2637 break;
2640 pmem--;
2641 temp1[nstate] = yypgo[0] = pmem - mem0;
2642 for(;;) {
2643 switch(gtnm()) {
2644 case '\n':
2645 nnonter++;
2646 yypgo[nnonter] = pmem-mem0;
2647 case ',':
2648 continue;
2649 case -1:
2650 break;
2651 default:
2652 error("bad tempfile");
2654 break;
2656 pmem--;
2657 yypgo[nnonter--] = pmem - mem0;
2658 for(i = 0; i < nstate; i++) {
2659 k = 32000;
2660 j = 0;
2661 q = mem0 + temp1[i+1];
2662 for(p = mem0 + temp1[i]; p < q ; p += 2) {
2663 if(*p > j)
2664 j = *p;
2665 if(*p < k)
2666 k = *p;
2668 /* nontrivial situation */
2669 if(k <= j) {
2670 /* j is now the range */
2671 /* j -= k; *//* call scj */
2672 if(k > maxoff)
2673 maxoff = k;
2675 tystate[i] = (temp1[i+1]-temp1[i]) + 2*j;
2676 if(j > maxspr)
2677 maxspr = j;
2680 /* initialize ggreed table */
2681 for(i = 1; i <= nnonter; i++) {
2682 ggreed[i] = 1;
2683 j = 0;
2685 /* minimum entry index is always 0 */
2686 q = mem0 + yypgo[i+1] - 1;
2687 for(p = mem0+yypgo[i]; p < q ; p += 2) {
2688 ggreed[i] += 2;
2689 if(*p > j)
2690 j = *p;
2692 ggreed[i] = ggreed[i] + 2*j;
2693 if(j > maxoff)
2694 maxoff = j;
2697 /* now, prepare to put the shift actions into the amem array */
2698 for(i = 0; i < ACTSIZE; i++)
2699 amem[i] = 0;
2700 maxa = amem;
2701 for(i = 0; i < nstate; i++) {
2702 if(tystate[i] == 0 && adb > 1)
2703 Bprint(ftable, "State %d: null\n", i);
2704 indgo[i] = YYFLAG1;
2706 while((i = nxti()) != NOMORE)
2707 if(i >= 0)
2708 stin(i);
2709 else
2710 gin(-i);
2712 /* print amem array */
2713 if(adb > 2 )
2714 for(p = amem; p <= maxa; p += 10) {
2715 Bprint(ftable, "%4d ", (int)(p-amem));
2716 for(i = 0; i < 10; ++i)
2717 Bprint(ftable, "%4d ", p[i]);
2718 Bprint(ftable, "\n");
2721 /* write out the output appropriate to the language */
2722 aoutput();
2723 osummary();
2724 ZAPFILE(tempname);
2727 void
2728 gin(int i)
2730 int *p, *r, *s, *q1, *q2;
2732 /* enter gotos on nonterminal i into array amem */
2733 ggreed[i] = 0;
2735 q2 = mem0+ yypgo[i+1] - 1;
2736 q1 = mem0 + yypgo[i];
2738 /* now, find amem place for it */
2739 for(p = amem; p < &amem[ACTSIZE]; p++) {
2740 if(*p)
2741 continue;
2742 for(r = q1; r < q2; r += 2) {
2743 s = p + *r + 1;
2744 if(*s)
2745 goto nextgp;
2746 if(s > maxa)
2747 if((maxa = s) > &amem[ACTSIZE])
2748 error("a array overflow");
2750 /* we have found amem spot */
2751 *p = *q2;
2752 if(p > maxa)
2753 if((maxa = p) > &amem[ACTSIZE])
2754 error("a array overflow");
2755 for(r = q1; r < q2; r += 2) {
2756 s = p + *r + 1;
2757 *s = r[1];
2759 pgo[i] = p-amem;
2760 if(adb > 1)
2761 Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]);
2762 return;
2764 nextgp:;
2766 error("cannot place goto %d\n", i);
2769 void
2770 stin(int i)
2772 int *r, *s, n, flag, j, *q1, *q2;
2774 tystate[i] = 0;
2776 /* enter state i into the amem array */
2777 q2 = mem0+temp1[i+1];
2778 q1 = mem0+temp1[i];
2779 /* find an acceptable place */
2780 for(n = -maxoff; n < ACTSIZE; n++) {
2781 flag = 0;
2782 for(r = q1; r < q2; r += 2) {
2783 if((s = *r + n + amem) < amem)
2784 goto nextn;
2785 if(*s == 0)
2786 flag++;
2787 else
2788 if(*s != r[1])
2789 goto nextn;
2792 /* check that the position equals another only if the states are identical */
2793 for(j=0; j<nstate; j++) {
2794 if(indgo[j] == n) {
2796 /* we have some disagreement */
2797 if(flag)
2798 goto nextn;
2799 if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) {
2801 /* states are equal */
2802 indgo[i] = n;
2803 if(adb > 1)
2804 Bprint(ftable,
2805 "State %d: entry at %d equals state %d\n",
2806 i, n, j);
2807 return;
2810 /* we have some disagreement */
2811 goto nextn;
2815 for(r = q1; r < q2; r += 2) {
2816 if((s = *r+n+amem) >= &amem[ACTSIZE])
2817 error("out of space in optimizer a array");
2818 if(s > maxa)
2819 maxa = s;
2820 if(*s != 0 && *s != r[1])
2821 error("clobber of a array, pos'n %d, by %d", s-amem, r[1]);
2822 *s = r[1];
2824 indgo[i] = n;
2825 if(adb > 1)
2826 Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]);
2827 return;
2828 nextn:;
2830 error("Error; failure to place state %d\n", i);
2834 * finds the next i
2836 int
2837 nxti(void)
2839 int i, max, maxi;
2841 max = 0;
2842 maxi = 0;
2843 for(i = 1; i <= nnonter; i++)
2844 if(ggreed[i] >= max) {
2845 max = ggreed[i];
2846 maxi = -i;
2848 for(i = 0; i < nstate; ++i)
2849 if(tystate[i] >= max) {
2850 max = tystate[i];
2851 maxi = i;
2853 if(nxdb)
2854 Bprint(ftable, "nxti = %d, max = %d\n", maxi, max);
2855 if(max == 0)
2856 return NOMORE;
2857 return maxi;
2861 * write summary
2863 void
2864 osummary(void)
2867 int i, *p;
2869 if(foutput == 0)
2870 return;
2871 i = 0;
2872 for(p = maxa; p >= amem; p--)
2873 if(*p == 0)
2874 i++;
2876 Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n",
2877 (int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE);
2878 Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i);
2879 Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff);
2883 * this version is for C
2884 * write out the optimized parser
2886 void
2887 aoutput(void)
2889 Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1));
2890 arout("yyact", amem, (maxa-amem)+1);
2891 arout("yypact", indgo, nstate);
2892 arout("yypgo", pgo, nnonter+1);
2895 void
2896 arout(char *s, int *v, int n)
2898 int i;
2900 Bprint(ftable, "short %s[] =\n{", s);
2901 for(i = 0; i < n;) {
2902 if(i%10 == 0)
2903 Bprint(ftable, "\n");
2904 Bprint(ftable, "%4d", v[i]);
2905 i++;
2906 if(i == n)
2907 Bprint(ftable, "\n};\n");
2908 else
2909 Bprint(ftable, ",");
2914 * read and convert an integer from the standard input
2915 * return the terminating character
2916 * blanks, tabs, and newlines are ignored
2918 int
2919 gtnm(void)
2921 int sign, val, c;
2923 sign = 0;
2924 val = 0;
2925 while((c=Bgetrune(finput)) != Beof) {
2926 if(isdigit(c)) {
2927 val = val*10 + c-'0';
2928 continue;
2930 if(c == '-') {
2931 sign = 1;
2932 continue;
2934 break;
2936 if(sign)
2937 val = -val;
2938 *pmem++ = val;
2939 if(pmem >= &mem0[MEMSIZE])
2940 error("out of space");
2941 return c;