Blob


1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
6 #define Bungetrune Bungetc /* ok for now. */
8 /*
9 * all these are 32 bit
10 */
11 #define TBITSET ((32+NTERMS)/32) /* BOTCH?? +31 */
12 #define BIT(a,i) ((a)[(i)>>5] & (1<<((i)&037)))
13 #define SETBIT(a,i) ((a)[(i)>>5] |= (1<<((i)&037)))
14 #define NWORDS(n) (((n)+32)/32)
16 #define PARSER "lib/yaccpar"
17 #define PARSERS "lib/yaccpars"
18 #define TEMPNAME "y.tmp.XXXXXX"
19 #define ACTNAME "y.acts.XXXXXX"
20 #define OFILE "tab.c"
21 #define FILEU "output"
22 #define FILED "tab.h"
23 #define FILEDEBUG "debug"
25 enum
26 {
27 /*
28 * the following are adjustable
29 * according to memory size
30 */
31 ACTSIZE = 40000,
32 MEMSIZE = 40000,
33 NSTATES = 2000,
34 NTERMS = 511,
35 NPROD = 1600,
36 NNONTERM = 600,
37 TEMPSIZE = 2000,
38 CNAMSZ = 10000,
39 LSETSIZE = 2400,
40 WSETSIZE = 350,
42 NAMESIZE = 50,
43 NTYPES = 63,
44 ISIZE = 400,
46 PRIVATE = 0xE000, /* unicode private use */
48 /* relationships which must hold:
49 TBITSET ints must hold NTERMS+1 bits...
50 WSETSIZE >= NNONTERM
51 LSETSIZE >= NNONTERM
52 TEMPSIZE >= NTERMS + NNONTERM + 1
53 TEMPSIZE >= NSTATES
54 */
56 NTBASE = 010000,
57 ERRCODE = 8190,
58 ACCEPTCODE = 8191,
60 NOASC = 0, /* no assoc. */
61 LASC = 1, /* left assoc. */
62 RASC = 2, /* right assoc. */
63 BASC = 3, /* binary assoc. */
65 /* flags for state generation */
67 DONE = 0,
68 MUSTDO = 1,
69 MUSTLOOKAHEAD = 2,
71 /* flags for a rule having an action, and being reduced */
73 ACTFLAG = 04,
74 REDFLAG = 010,
76 /* output parser flags */
77 YYFLAG1 = -1000,
79 /* parse tokens */
80 IDENTIFIER = PRIVATE,
81 MARK,
82 TERM,
83 LEFT,
84 RIGHT,
85 BINARY,
86 PREC,
87 LCURLY,
88 IDENTCOLON,
89 NUMBER,
90 START,
91 TYPEDEF,
92 TYPENAME,
93 UNION,
95 ENDFILE = 0,
97 EMPTY = 1,
98 WHOKNOWS = 0,
99 OK = 1,
100 NOMORE = -1000,
101 };
103 /* macros for getting associativity and precedence levels */
105 #define ASSOC(i) ((i)&03)
106 #define PLEVEL(i) (((i)>>4)&077)
107 #define TYPE(i) (((i)>>10)&077)
109 /* macros for setting associativity and precedence levels */
111 #define SETASC(i,j) i |= j
112 #define SETPLEV(i,j) i |= (j<<4)
113 #define SETTYPE(i,j) i |= (j<<10)
115 /* looping macros */
117 #define TLOOP(i) for(i=1; i<=ntokens; i++)
118 #define NTLOOP(i) for(i=0; i<=nnonter; i++)
119 #define PLOOP(s,i) for(i=s; i<nprod; i++)
120 #define SLOOP(i) for(i=0; i<nstate; i++)
121 #define WSBUMP(x) x++
122 #define WSLOOP(s,j) for(j=s; j<cwp; j++)
123 #define ITMLOOP(i,p,q) for(q=pstate[i+1], p=pstate[i]; p<q; p++)
124 #define SETLOOP(i) for(i=0; i<tbitset; i++)
126 /* command to clobber tempfiles after use */
128 #define ZAPFILE(x) if(x) remove(x)
130 /* I/O descriptors */
132 Biobuf* faction; /* file for saving actions */
133 Biobuf* fdefine; /* file for #defines */
134 Biobuf* fdebug; /* y.debug for strings for debugging */
135 Biobuf* ftable; /* y.tab.c file */
136 Biobuf* ftemp; /* tempfile to pass 2 */
137 Biobuf* finput; /* input file */
138 Biobuf* foutput; /* y.output file */
140 /* communication variables between various I/O routines */
142 char* infile; /* input file name */
143 int numbval; /* value of an input number */
144 char tokname[NAMESIZE+4]; /* input token name, slop for runes and 0 */
146 /* structure declarations */
148 typedef
149 struct
151 int lset[TBITSET];
152 } Lkset;
154 typedef
155 struct
157 int* pitem;
158 Lkset* look;
159 } Item;
161 typedef
162 struct
164 char* name;
165 int value;
166 } Symb;
168 typedef
169 struct
171 int* pitem;
172 int flag;
173 Lkset ws;
174 } Wset;
176 /* storage of names */
178 char cnames[CNAMSZ]; /* place where token and nonterminal names are stored */
179 int cnamsz = CNAMSZ; /* size of cnames */
180 char* cnamp = cnames; /* place where next name is to be put in */
181 int ndefout = 4; /* number of defined symbols output */
182 char* tempname;
183 char* actname;
184 char ttempname[] = TEMPNAME;
185 char tactname[] = ACTNAME;
186 char* parser = PARSER;
187 char* yydebug;
189 /* storage of types */
190 int ntypes; /* number of types defined */
191 char* typeset[NTYPES]; /* pointers to type tags */
193 /* token information */
195 int ntokens = 0 ; /* number of tokens */
196 Symb tokset[NTERMS];
197 int toklev[NTERMS]; /* vector with the precedence of the terminals */
199 /* nonterminal information */
201 int nnonter = -1; /* the number of nonterminals */
202 Symb nontrst[NNONTERM];
203 int start; /* start symbol */
205 /* assigned token type values */
206 int extval = 0;
208 char* ytabc = OFILE; /* name of y.tab.c */
210 /* grammar rule information */
212 int mem0[MEMSIZE] ; /* production storage */
213 int* mem = mem0;
214 int nprod = 1; /* number of productions */
215 int* prdptr[NPROD]; /* pointers to descriptions of productions */
216 int levprd[NPROD]; /* precedence levels for the productions */
217 int rlines[NPROD]; /* line number for this rule */
219 /* state information */
221 int nstate = 0; /* number of states */
222 Item* pstate[NSTATES+2]; /* pointers to the descriptions of the states */
223 int tystate[NSTATES]; /* contains type information about the states */
224 int defact[NSTATES]; /* the default actions of states */
225 int tstates[NTERMS]; /* states generated by terminal gotos */
226 int ntstates[NNONTERM]; /* states generated by nonterminal gotos */
227 int mstates[NSTATES]; /* chain of overflows of term/nonterm generation lists */
228 int lastred; /* the number of the last reduction of a state */
230 /* lookahead set information */
232 Lkset lkst[LSETSIZE];
233 int nolook; /* flag to turn off lookahead computations */
234 int tbitset; /* size of lookahead sets */
235 int nlset = 0; /* next lookahead set index */
236 int nolook = 0; /* flag to suppress lookahead computations */
237 Lkset clset; /* temporary storage for lookahead computations */
239 /* working set information */
241 Wset wsets[WSETSIZE];
242 Wset* cwp;
244 /* storage for action table */
246 int amem[ACTSIZE]; /* action table storage */
247 int* memp = amem; /* next free action table position */
248 int indgo[NSTATES]; /* index to the stored goto table */
250 /* temporary vector, indexable by states, terms, or ntokens */
252 int temp1[TEMPSIZE]; /* temporary storage, indexed by terms + ntokens or states */
253 int lineno = 1; /* current input line number */
254 int fatfl = 1; /* if on, error is fatal */
255 int nerrors = 0; /* number of errors */
257 /* statistics collection variables */
259 int zzgoent;
260 int zzgobest;
261 int zzacent;
262 int zzexcp;
263 int zzclose;
264 int zzrrconf;
265 int zzsrconf;
267 int* ggreed = lkst[0].lset;
268 int* pgo = wsets[0].ws.lset;
269 int* yypgo = &nontrst[0].value;
271 int maxspr = 0; /* maximum spread of any entry */
272 int maxoff = 0; /* maximum offset into a array */
273 int* pmem = mem0;
274 int* maxa;
275 int nxdb = 0;
276 int adb = 0;
279 /* storage for information about the nonterminals */
281 int** pres[NNONTERM+2]; /* vector of pointers to productions yielding each nonterminal */
282 Lkset* pfirst[NNONTERM+2]; /* vector of pointers to first sets for each nonterminal */
283 int pempty[NNONTERM+1]; /* vector of nonterminals nontrivially deriving e */
285 /* random stuff picked out from between functions */
287 int indebug = 0;
288 Wset* zzcwp = wsets;
289 int zzgoent = 0;
290 int zzgobest = 0;
291 int zzacent = 0;
292 int zzexcp = 0;
293 int zzclose = 0;
294 int zzsrconf = 0;
295 int* zzmemsz = mem0;
296 int zzrrconf = 0;
297 int pidebug = 0; /* debugging flag for putitem */
298 int gsdebug = 0;
299 int cldebug = 0; /* debugging flag for closure */
300 int pkdebug = 0;
301 int g2debug = 0;
303 struct
305 char* name;
306 long value;
307 } resrv[] =
309 "binary", BINARY,
310 "left", LEFT,
311 "nonassoc", BINARY,
312 "prec", PREC,
313 "right", RIGHT,
314 "start", START,
315 "term", TERM,
316 "token", TERM,
317 "type", TYPEDEF,
318 "union", UNION,
319 0,
320 };
322 /* define functions */
324 void main(int, char**);
325 void others(void);
326 char* chcopy(char*, char*);
327 char* writem(int*);
328 char* symnam(int);
329 void summary(void);
330 void error(char*, ...);
331 void aryfil(int*, int, int);
332 int setunion(int*, int*);
333 void prlook(Lkset*);
334 void cpres(void);
335 void cpfir(void);
336 int state(int);
337 void putitem(int*, Lkset*);
338 void cempty(void);
339 void stagen(void);
340 void closure(int);
341 Lkset* flset(Lkset*);
342 void cleantmp(void);
343 void intr(void);
344 void setup(int, char**);
345 void finact(void);
346 int defin(int, char*);
347 void defout(int);
348 char* cstash(char*);
349 long gettok(void);
350 int fdtype(int);
351 int chfind(int, char*);
352 void cpyunion(void);
353 void cpycode(void);
354 int skipcom(void);
355 void cpyact(int);
356 void openup(char*, int, int, int, char*);
357 void output(void);
358 int apack(int*, int);
359 void go2out(void);
360 void go2gen(int);
361 void precftn(int, int, int);
362 void wract(int);
363 void wrstate(int);
364 void warray(char*, int*, int);
365 void hideprod(void);
366 void callopt(void);
367 void gin(int);
368 void stin(int);
369 int nxti(void);
370 void osummary(void);
371 void aoutput(void);
372 void arout(char*, int*, int);
373 int gtnm(void);
375 void
376 main(int argc, char *argv[])
379 setup(argc, argv); /* initialize and read productions */
380 tbitset = NWORDS(ntokens);
381 cpres(); /* make table of which productions yield a given nonterminal */
382 cempty(); /* make a table of which nonterminals can match the empty string */
383 cpfir(); /* make a table of firsts of nonterminals */
384 stagen(); /* generate the states */
385 output(); /* write the states and the tables */
386 go2out();
387 hideprod();
388 summary();
389 callopt();
390 others();
391 exits(0);
394 /*
395 * put out other arrays, copy the parsers
396 */
397 void
398 others(void)
400 int c, i, j;
401 char *s, *root;
403 root = getenv("PLAN9");
404 if(root == nil)
405 root = "/usr/local/plan9";
406 s = malloc(strlen(root)+1+strlen(parser)+1);
407 strcpy(s, root);
408 strcat(s, "/");
409 strcat(s, parser);
410 finput = Bopen(s, OREAD);
411 if(finput == 0)
412 error("cannot find parser %s", s);
413 free(s);
414 warray("yyr1", levprd, nprod);
415 aryfil(temp1, nprod, 0);
416 PLOOP(1, i)
417 temp1[i] = prdptr[i+1]-prdptr[i]-2;
418 warray("yyr2", temp1, nprod);
420 aryfil(temp1, nstate, -1000);
421 TLOOP(i)
422 for(j=tstates[i]; j!=0; j=mstates[j])
423 temp1[j] = i;
424 NTLOOP(i)
425 for(j=ntstates[i]; j!=0; j=mstates[j])
426 temp1[j] = -i;
427 warray("yychk", temp1, nstate);
428 warray("yydef", defact, nstate);
430 /* put out token translation tables */
431 /* table 1 has 0-256 */
432 aryfil(temp1, 256, 0);
433 c = 0;
434 TLOOP(i) {
435 j = tokset[i].value;
436 if(j >= 0 && j < 256) {
437 if(temp1[j]) {
438 print("yacc bug -- cant have 2 different Ts with same value\n");
439 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
440 nerrors++;
442 temp1[j] = i;
443 if(j > c)
444 c = j;
447 warray("yytok1", temp1, c+1);
449 /* table 2 has PRIVATE-PRIVATE+256 */
450 aryfil(temp1, 256, 0);
451 c = 0;
452 TLOOP(i) {
453 j = tokset[i].value - PRIVATE;
454 if(j >= 0 && j < 256) {
455 if(temp1[j]) {
456 print("yacc bug -- cant have 2 different Ts with same value\n");
457 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
458 nerrors++;
460 temp1[j] = i;
461 if(j > c)
462 c = j;
465 warray("yytok2", temp1, c+1);
467 /* table 3 has everything else */
468 Bprint(ftable, "long yytok3[] =\n{\n");
469 c = 0;
470 TLOOP(i) {
471 j = tokset[i].value;
472 if(j >= 0 && j < 256)
473 continue;
474 if(j >= PRIVATE && j < 256+PRIVATE)
475 continue;
477 Bprint(ftable, "%4d,%4d,", j, i);
478 c++;
479 if(c%5 == 0)
480 Bprint(ftable, "\n");
482 Bprint(ftable, "%4d\n};\n", 0);
484 /* copy parser text */
485 while((c=Bgetrune(finput)) != Beof) {
486 if(c == '$') {
487 if((c = Bgetrune(finput)) != 'A')
488 Bputrune(ftable, '$');
489 else { /* copy actions */
490 faction = Bopen(actname, OREAD);
491 if(faction == 0)
492 error("cannot reopen action tempfile");
493 while((c=Bgetrune(faction)) != Beof)
494 Bputrune(ftable, c);
495 Bterm(faction);
496 ZAPFILE(actname);
497 c = Bgetrune(finput);
500 Bputrune(ftable, c);
502 Bterm(ftable);
505 /*
506 * copies string q into p, returning next free char ptr
507 */
508 char*
509 chcopy(char* p, char* q)
511 int c;
513 while(c = *q) {
514 if(c == '"')
515 *p++ = '\\';
516 *p++ = c;
517 q++;
519 *p = 0;
520 return p;
523 /*
524 * creates output string for item pointed to by pp
525 */
526 char*
527 writem(int *pp)
529 int i,*p;
530 static char sarr[ISIZE];
531 char* q;
533 for(p=pp; *p>0; p++)
535 p = prdptr[-*p];
536 q = chcopy(sarr, nontrst[*p-NTBASE].name);
537 q = chcopy(q, ": ");
538 for(;;) {
539 *q = ' ';
540 p++;
541 if(p == pp)
542 *q = '.';
543 q++;
544 *q = '\0';
545 i = *p;
546 if(i <= 0)
547 break;
548 q = chcopy(q, symnam(i));
549 if(q > &sarr[ISIZE-30])
550 error("item too big");
553 /* an item calling for a reduction */
554 i = *pp;
555 if(i < 0 ) {
556 q = chcopy(q, " (");
557 sprint(q, "%d)", -i);
559 return sarr;
562 /*
563 * return a pointer to the name of symbol i
564 */
565 char*
566 symnam(int i)
568 char* cp;
570 cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
571 if(*cp == ' ')
572 cp++;
573 return cp;
576 /*
577 * output the summary on y.output
578 */
579 void
580 summary(void)
583 if(foutput != 0) {
584 Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n",
585 ntokens, NTERMS, nnonter, NNONTERM);
586 Bprint(foutput, "%d/%d grammar rules, %d/%d states\n",
587 nprod, NPROD, nstate, NSTATES);
588 Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n",
589 zzsrconf, zzrrconf);
590 Bprint(foutput, "%d/%d working sets used\n",
591 (int)(zzcwp-wsets), WSETSIZE);
592 Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n",
593 (int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE);
594 Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE);
595 Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate);
596 Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp);
597 Bprint(foutput, "%d goto entries\n", zzgoent);
598 Bprint(foutput, "%d entries saved by goto default\n", zzgobest);
600 if(zzsrconf != 0 || zzrrconf != 0) {
601 print("\nconflicts: ");
602 if(zzsrconf)
603 print("%d shift/reduce", zzsrconf);
604 if(zzsrconf && zzrrconf)
605 print(", ");
606 if(zzrrconf)
607 print("%d reduce/reduce", zzrrconf);
608 print("\n");
610 if(ftemp != 0) {
611 Bterm(ftemp);
612 ftemp = 0;
614 if(fdefine != 0) {
615 Bterm(fdefine);
616 fdefine = 0;
620 /*
621 * write out error comment -- NEEDS WORK
622 */
623 void
624 error(char *s, ...)
627 nerrors++;
628 fprint(2, "\n fatal error:");
629 fprint(2, s, (&s)[1]);
630 fprint(2, ", %s:%d\n", infile, lineno);
631 if(!fatfl)
632 return;
633 summary();
634 cleantmp();
635 exits("error");
638 /*
639 * set elements 0 through n-1 to c
640 */
641 void
642 aryfil(int *v, int n, int c)
644 int i;
646 for(i=0; i<n; i++)
647 v[i] = c;
650 /*
651 * set a to the union of a and b
652 * return 1 if b is not a subset of a, 0 otherwise
653 */
654 int
655 setunion(int *a, int *b)
657 int i, x, sub;
659 sub = 0;
660 SETLOOP(i) {
661 x = *a;
662 *a |= *b;
663 if(*a != x)
664 sub = 1;
665 a++;
666 b++;
668 return sub;
671 void
672 prlook(Lkset* p)
674 int j, *pp;
676 pp = p->lset;
677 if(pp == 0)
678 Bprint(foutput, "\tNULL");
679 else {
680 Bprint(foutput, " { ");
681 TLOOP(j)
682 if(BIT(pp,j))
683 Bprint(foutput, "%s ", symnam(j));
684 Bprint(foutput, "}");
688 /*
689 * compute an array with the beginnings of productions yielding given nonterminals
690 * The array pres points to these lists
691 * the array pyield has the lists: the total size is only NPROD+1
692 */
693 void
694 cpres(void)
696 int c, j, i, **pmem;
697 static int *pyield[NPROD];
699 pmem = pyield;
700 NTLOOP(i) {
701 c = i+NTBASE;
702 pres[i] = pmem;
703 fatfl = 0; /* make undefined symbols nonfatal */
704 PLOOP(0, j)
705 if(*prdptr[j] == c)
706 *pmem++ = prdptr[j]+1;
707 if(pres[i] == pmem)
708 error("nonterminal %s not defined!", nontrst[i].name);
710 pres[i] = pmem;
711 fatfl = 1;
712 if(nerrors) {
713 summary();
714 cleantmp();
715 exits("error");
717 if(pmem != &pyield[nprod])
718 error("internal Yacc error: pyield %d", pmem-&pyield[nprod]);
721 /*
722 * compute an array with the first of nonterminals
723 */
724 void
725 cpfir(void)
727 int *p, **s, i, **t, ch, changes;
729 zzcwp = &wsets[nnonter];
730 NTLOOP(i) {
731 aryfil(wsets[i].ws.lset, tbitset, 0);
732 t = pres[i+1];
733 /* initially fill the sets */
734 for(s=pres[i]; s<t; ++s)
735 for(p = *s; (ch = *p) > 0; ++p) {
736 if(ch < NTBASE) {
737 SETBIT(wsets[i].ws.lset, ch);
738 break;
740 if(!pempty[ch-NTBASE])
741 break;
745 /* now, reflect transitivity */
746 changes = 1;
747 while(changes) {
748 changes = 0;
749 NTLOOP(i) {
750 t = pres[i+1];
751 for(s = pres[i]; s < t; ++s)
752 for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) {
753 changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset);
754 if(!pempty[ch])
755 break;
760 NTLOOP(i)
761 pfirst[i] = flset(&wsets[i].ws);
762 if(!indebug)
763 return;
764 if(foutput != 0)
765 NTLOOP(i) {
766 Bprint(foutput, "\n%s: ", nontrst[i].name);
767 prlook(pfirst[i]);
768 Bprint(foutput, " %d\n", pempty[i]);
772 /*
773 * sorts last state,and sees if it equals earlier ones. returns state number
774 */
775 int
776 state(int c)
778 Item *p1, *p2, *k, *l, *q1, *q2;
779 int size1, size2, i;
781 p1 = pstate[nstate];
782 p2 = pstate[nstate+1];
783 if(p1 == p2)
784 return 0; /* null state */
785 /* sort the items */
786 for(k = p2-1; k > p1; k--) /* make k the biggest */
787 for(l = k-1; l >= p1; --l)
788 if(l->pitem > k->pitem) {
789 int *s;
790 Lkset *ss;
792 s = k->pitem;
793 k->pitem = l->pitem;
794 l->pitem = s;
795 ss = k->look;
796 k->look = l->look;
797 l->look = ss;
799 size1 = p2 - p1; /* size of state */
801 for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) {
802 /* get ith state */
803 q1 = pstate[i];
804 q2 = pstate[i+1];
805 size2 = q2 - q1;
806 if(size1 != size2)
807 continue;
808 k = p1;
809 for(l = q1; l < q2; l++) {
810 if(l->pitem != k->pitem)
811 break;
812 k++;
814 if(l != q2)
815 continue;
816 /* found it */
817 pstate[nstate+1] = pstate[nstate]; /* delete last state */
818 /* fix up lookaheads */
819 if(nolook)
820 return i;
821 for(l = q1, k = p1; l < q2; ++l, ++k ) {
822 int s;
824 SETLOOP(s)
825 clset.lset[s] = l->look->lset[s];
826 if(setunion(clset.lset, k->look->lset)) {
827 tystate[i] = MUSTDO;
828 /* register the new set */
829 l->look = flset( &clset );
832 return i;
834 /* state is new */
835 if(nolook)
836 error("yacc state/nolook error");
837 pstate[nstate+2] = p2;
838 if(nstate+1 >= NSTATES)
839 error("too many states");
840 if(c >= NTBASE) {
841 mstates[nstate] = ntstates[c-NTBASE];
842 ntstates[c-NTBASE] = nstate;
843 } else {
844 mstates[nstate] = tstates[c];
845 tstates[c] = nstate;
847 tystate[nstate] = MUSTDO;
848 return nstate++;
851 void
852 putitem(int *ptr, Lkset *lptr)
854 Item *j;
856 if(pidebug && foutput != 0)
857 Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate);
858 j = pstate[nstate+1];
859 j->pitem = ptr;
860 if(!nolook)
861 j->look = flset(lptr);
862 pstate[nstate+1] = ++j;
863 if((int*)j > zzmemsz) {
864 zzmemsz = (int*)j;
865 if(zzmemsz >= &mem0[MEMSIZE])
866 error("out of state space");
870 /*
871 * mark nonterminals which derive the empty string
872 * also, look for nonterminals which don't derive any token strings
873 */
874 void
875 cempty(void)
878 int i, *p;
880 /* first, use the array pempty to detect productions that can never be reduced */
881 /* set pempty to WHONOWS */
882 aryfil(pempty, nnonter+1, WHOKNOWS);
884 /* now, look at productions, marking nonterminals which derive something */
885 more:
886 PLOOP(0, i) {
887 if(pempty[*prdptr[i] - NTBASE])
888 continue;
889 for(p = prdptr[i]+1; *p >= 0; ++p)
890 if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS)
891 break;
892 /* production can be derived */
893 if(*p < 0) {
894 pempty[*prdptr[i]-NTBASE] = OK;
895 goto more;
899 /* now, look at the nonterminals, to see if they are all OK */
900 NTLOOP(i) {
901 /* the added production rises or falls as the start symbol ... */
902 if(i == 0)
903 continue;
904 if(pempty[i] != OK) {
905 fatfl = 0;
906 error("nonterminal %s never derives any token string", nontrst[i].name);
910 if(nerrors) {
911 summary();
912 cleantmp();
913 exits("error");
916 /* now, compute the pempty array, to see which nonterminals derive the empty string */
917 /* set pempty to WHOKNOWS */
918 aryfil( pempty, nnonter+1, WHOKNOWS);
920 /* loop as long as we keep finding empty nonterminals */
922 again:
923 PLOOP(1, i) {
924 /* not known to be empty */
925 if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) {
926 for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p)
928 /* we have a nontrivially empty nonterminal */
929 if(*p < 0) {
930 pempty[*prdptr[i]-NTBASE] = EMPTY;
931 /* got one ... try for another */
932 goto again;
938 /*
939 * generate the states
940 */
941 void
942 stagen(void)
945 int c, i, j, more;
946 Wset *p, *q;
948 /* initialize */
949 nstate = 0;
951 /* THIS IS FUNNY from the standpoint of portability
952 * it represents the magic moment when the mem0 array, which has
953 * been holding the productions, starts to hold item pointers, of a
954 * different type...
955 * someday, alloc should be used to allocate all this stuff... for now, we
956 * accept that if pointers don't fit in integers, there is a problem...
957 */
959 pstate[0] = pstate[1] = (Item*)mem;
960 aryfil(clset.lset, tbitset, 0);
961 putitem(prdptr[0]+1, &clset);
962 tystate[0] = MUSTDO;
963 nstate = 1;
964 pstate[2] = pstate[1];
966 aryfil(amem, ACTSIZE, 0);
968 /* now, the main state generation loop */
969 for(more=1; more;) {
970 more = 0;
971 SLOOP(i) {
972 if(tystate[i] != MUSTDO)
973 continue;
974 tystate[i] = DONE;
975 aryfil(temp1, nnonter+1, 0);
976 /* take state i, close it, and do gotos */
977 closure(i);
978 /* generate goto's */
979 WSLOOP(wsets, p) {
980 if(p->flag)
981 continue;
982 p->flag = 1;
983 c = *(p->pitem);
984 if(c <= 1) {
985 if(pstate[i+1]-pstate[i] <= p-wsets)
986 tystate[i] = MUSTLOOKAHEAD;
987 continue;
989 /* do a goto on c */
990 WSLOOP(p, q)
991 /* this item contributes to the goto */
992 if(c == *(q->pitem)) {
993 putitem(q->pitem+1, &q->ws);
994 q->flag = 1;
996 if(c < NTBASE)
997 state(c); /* register new state */
998 else
999 temp1[c-NTBASE] = state(c);
1001 if(gsdebug && foutput != 0) {
1002 Bprint(foutput, "%d: ", i);
1003 NTLOOP(j)
1004 if(temp1[j])
1005 Bprint(foutput, "%s %d, ",
1006 nontrst[j].name, temp1[j]);
1007 Bprint(foutput, "\n");
1009 indgo[i] = apack(&temp1[1], nnonter-1) - 1;
1010 /* do some more */
1011 more = 1;
1017 * generate the closure of state i
1019 void
1020 closure(int i)
1023 Wset *u, *v;
1024 Item *p, *q;
1025 int c, ch, work, k, *pi, **s, **t;
1027 zzclose++;
1029 /* first, copy kernel of state i to wsets */
1030 cwp = wsets;
1031 ITMLOOP(i, p, q) {
1032 cwp->pitem = p->pitem;
1033 cwp->flag = 1; /* this item must get closed */
1034 SETLOOP(k)
1035 cwp->ws.lset[k] = p->look->lset[k];
1036 WSBUMP(cwp);
1039 /* now, go through the loop, closing each item */
1040 work = 1;
1041 while(work) {
1042 work = 0;
1043 WSLOOP(wsets, u) {
1044 if(u->flag == 0)
1045 continue;
1046 /* dot is before c */
1047 c = *(u->pitem);
1048 if(c < NTBASE) {
1049 u->flag = 0;
1050 /* only interesting case is where . is before nonterminal */
1051 continue;
1054 /* compute the lookahead */
1055 aryfil(clset.lset, tbitset, 0);
1057 /* find items involving c */
1058 WSLOOP(u, v)
1059 if(v->flag == 1 && *(pi=v->pitem) == c) {
1060 v->flag = 0;
1061 if(nolook)
1062 continue;
1063 while((ch = *++pi) > 0) {
1064 /* terminal symbol */
1065 if(ch < NTBASE) {
1066 SETBIT(clset.lset, ch);
1067 break;
1069 /* nonterminal symbol */
1070 setunion(clset.lset, pfirst[ch-NTBASE]->lset);
1071 if(!pempty[ch-NTBASE])
1072 break;
1074 if(ch <= 0)
1075 setunion(clset.lset, v->ws.lset);
1079 * now loop over productions derived from c
1080 * c is now nonterminal number
1082 c -= NTBASE;
1083 t = pres[c+1];
1084 for(s = pres[c]; s < t; ++s) {
1086 * put these items into the closure
1087 * is the item there
1089 WSLOOP(wsets, v)
1090 /* yes, it is there */
1091 if(v->pitem == *s) {
1092 if(nolook)
1093 goto nexts;
1094 if(setunion(v->ws.lset, clset.lset))
1095 v->flag = work = 1;
1096 goto nexts;
1099 /* not there; make a new entry */
1100 if(cwp-wsets+1 >= WSETSIZE)
1101 error( "working set overflow");
1102 cwp->pitem = *s;
1103 cwp->flag = 1;
1104 if(!nolook) {
1105 work = 1;
1106 SETLOOP(k) cwp->ws.lset[k] = clset.lset[k];
1108 WSBUMP(cwp);
1110 nexts:;
1115 /* have computed closure; flags are reset; return */
1116 if(cwp > zzcwp)
1117 zzcwp = cwp;
1118 if(cldebug && foutput != 0) {
1119 Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook);
1120 WSLOOP(wsets, u) {
1121 if(u->flag)
1122 Bprint(foutput, "flag set!\n");
1123 u->flag = 0;
1124 Bprint(foutput, "\t%s", writem(u->pitem));
1125 prlook(&u->ws);
1126 Bprint(foutput, "\n");
1132 * decide if the lookahead set pointed to by p is known
1133 * return pointer to a perminent location for the set
1135 Lkset*
1136 flset(Lkset *p)
1138 Lkset *q;
1139 int *u, *v, *w, j;
1141 for(q = &lkst[nlset]; q-- > lkst;) {
1142 u = p->lset;
1143 v = q->lset;
1144 w = &v[tbitset];
1145 while(v < w)
1146 if(*u++ != *v++)
1147 goto more;
1148 /* we have matched */
1149 return q;
1150 more:;
1152 /* add a new one */
1153 q = &lkst[nlset++];
1154 if(nlset >= LSETSIZE)
1155 error("too many lookahead sets");
1156 SETLOOP(j)
1157 q->lset[j] = p->lset[j];
1158 return q;
1161 void
1162 cleantmp(void)
1164 ZAPFILE(actname);
1165 ZAPFILE(tempname);
1168 void
1169 intr(void)
1171 cleantmp();
1172 exits("interrupted");
1175 void
1176 setup(int argc, char *argv[])
1178 long c, t;
1179 int i, j, fd, lev, ty, ytab, *p;
1180 int vflag, dflag, stem;
1181 char actnm[8], *stemc, *s, dirbuf[128];
1183 ytab = 0;
1184 vflag = 0;
1185 dflag = 0;
1186 stem = 0;
1187 stemc = "y";
1188 foutput = 0;
1189 fdefine = 0;
1190 fdebug = 0;
1191 ARGBEGIN{
1192 case 'v':
1193 case 'V':
1194 vflag++;
1195 break;
1196 case 'D':
1197 yydebug = ARGF();
1198 break;
1199 case 'd':
1200 dflag++;
1201 break;
1202 case 'o':
1203 ytab++;
1204 ytabc = ARGF();
1205 break;
1206 case 's':
1207 stem++;
1208 stemc = ARGF();
1209 break;
1210 case 'S':
1211 parser = PARSERS;
1212 break;
1213 default:
1214 error("illegal option: %c", ARGC());
1215 }ARGEND
1216 openup(stemc, dflag, vflag, ytab, ytabc);
1218 if((fd = mkstemp(ttempname)) >= 0){
1219 tempname = ttempname;
1220 ftemp = Bfdopen(fd, OWRITE);
1222 if((fd = mkstemp(tactname)) >= 0){
1223 actname = tactname;
1224 faction = Bfdopen(fd, OWRITE);
1226 if(ftemp == 0 || faction == 0)
1227 error("cannot open temp file");
1228 if(argc < 1)
1229 error("no input file");
1230 infile = argv[0];
1231 if(infile[0] != '/' && getwd(dirbuf, sizeof dirbuf)!=nil){
1232 i = strlen(infile)+1+strlen(dirbuf)+1+10;
1233 s = malloc(i);
1234 if(s != nil){
1235 snprint(s, i, "%s/%s", dirbuf, infile);
1236 cleanname(s);
1237 infile = s;
1240 finput = Bopen(infile, OREAD);
1241 if(finput == 0)
1242 error("cannot open '%s'", argv[0]);
1243 cnamp = cnames;
1245 defin(0, "$end");
1246 extval = PRIVATE; /* tokens start in unicode 'private use' */
1247 defin(0, "error");
1248 defin(1, "$accept");
1249 defin(0, "$unk");
1250 mem = mem0;
1251 i = 0;
1253 for(t = gettok(); t != MARK && t != ENDFILE;)
1254 switch(t) {
1255 case ';':
1256 t = gettok();
1257 break;
1259 case START:
1260 if(gettok() != IDENTIFIER)
1261 error("bad %%start construction");
1262 start = chfind(1, tokname);
1263 t = gettok();
1264 continue;
1266 case TYPEDEF:
1267 if(gettok() != TYPENAME)
1268 error("bad syntax in %%type");
1269 ty = numbval;
1270 for(;;) {
1271 t = gettok();
1272 switch(t) {
1273 case IDENTIFIER:
1274 if((t=chfind(1, tokname)) < NTBASE) {
1275 j = TYPE(toklev[t]);
1276 if(j != 0 && j != ty)
1277 error("type redeclaration of token %s",
1278 tokset[t].name);
1279 else
1280 SETTYPE(toklev[t], ty);
1281 } else {
1282 j = nontrst[t-NTBASE].value;
1283 if(j != 0 && j != ty)
1284 error("type redeclaration of nonterminal %s",
1285 nontrst[t-NTBASE].name );
1286 else
1287 nontrst[t-NTBASE].value = ty;
1289 case ',':
1290 continue;
1291 case ';':
1292 t = gettok();
1293 default:
1294 break;
1296 break;
1298 continue;
1300 case UNION:
1301 /* copy the union declaration to the output */
1302 cpyunion();
1303 t = gettok();
1304 continue;
1306 case LEFT:
1307 case BINARY:
1308 case RIGHT:
1309 i++;
1311 case TERM:
1312 /* nonzero means new prec. and assoc. */
1313 lev = t-TERM;
1314 ty = 0;
1316 /* get identifiers so defined */
1317 t = gettok();
1319 /* there is a type defined */
1320 if(t == TYPENAME) {
1321 ty = numbval;
1322 t = gettok();
1324 for(;;) {
1325 switch(t) {
1326 case ',':
1327 t = gettok();
1328 continue;
1330 case ';':
1331 break;
1333 case IDENTIFIER:
1334 j = chfind(0, tokname);
1335 if(j >= NTBASE)
1336 error("%s defined earlier as nonterminal", tokname);
1337 if(lev) {
1338 if(ASSOC(toklev[j]))
1339 error("redeclaration of precedence of %s", tokname);
1340 SETASC(toklev[j], lev);
1341 SETPLEV(toklev[j], i);
1343 if(ty) {
1344 if(TYPE(toklev[j]))
1345 error("redeclaration of type of %s", tokname);
1346 SETTYPE(toklev[j],ty);
1348 t = gettok();
1349 if(t == NUMBER) {
1350 tokset[j].value = numbval;
1351 if(j < ndefout && j > 3)
1352 error("please define type number of %s earlier",
1353 tokset[j].name);
1354 t = gettok();
1356 continue;
1358 break;
1360 continue;
1362 case LCURLY:
1363 defout(0);
1364 cpycode();
1365 t = gettok();
1366 continue;
1368 default:
1369 error("syntax error");
1371 if(t == ENDFILE)
1372 error("unexpected EOF before %%");
1374 /* t is MARK */
1375 Bprint(ftable, "extern int yyerrflag;\n");
1376 Bprint(ftable, "#ifndef YYMAXDEPTH\n");
1377 Bprint(ftable, "#define YYMAXDEPTH 150\n");
1378 Bprint(ftable, "#endif\n" );
1379 if(!ntypes) {
1380 Bprint(ftable, "#ifndef YYSTYPE\n");
1381 Bprint(ftable, "#define YYSTYPE int\n");
1382 Bprint(ftable, "#endif\n");
1384 Bprint(ftable, "YYSTYPE yylval;\n");
1385 Bprint(ftable, "YYSTYPE yyval;\n");
1387 prdptr[0] = mem;
1389 /* added production */
1390 *mem++ = NTBASE;
1392 /* if start is 0, we will overwrite with the lhs of the first rule */
1393 *mem++ = start;
1394 *mem++ = 1;
1395 *mem++ = 0;
1396 prdptr[1] = mem;
1397 while((t=gettok()) == LCURLY)
1398 cpycode();
1399 if(t != IDENTCOLON)
1400 error("bad syntax on first rule");
1402 if(!start)
1403 prdptr[0][1] = chfind(1, tokname);
1405 /* read rules */
1406 while(t != MARK && t != ENDFILE) {
1407 /* process a rule */
1408 rlines[nprod] = lineno;
1409 if(t == '|')
1410 *mem++ = *prdptr[nprod-1];
1411 else
1412 if(t == IDENTCOLON) {
1413 *mem = chfind(1, tokname);
1414 if(*mem < NTBASE)
1415 error("token illegal on LHS of grammar rule");
1416 mem++;
1417 } else
1418 error("illegal rule: missing semicolon or | ?");
1419 /* read rule body */
1420 t = gettok();
1422 more_rule:
1423 while(t == IDENTIFIER) {
1424 *mem = chfind(1, tokname);
1425 if(*mem < NTBASE)
1426 levprd[nprod] = toklev[*mem];
1427 mem++;
1428 t = gettok();
1430 if(t == PREC) {
1431 if(gettok() != IDENTIFIER)
1432 error("illegal %%prec syntax");
1433 j = chfind(2, tokname);
1434 if(j >= NTBASE)
1435 error("nonterminal %s illegal after %%prec",
1436 nontrst[j-NTBASE].name);
1437 levprd[nprod] = toklev[j];
1438 t = gettok();
1440 if(t == '=') {
1441 levprd[nprod] |= ACTFLAG;
1442 Bprint(faction, "\ncase %d:", nprod);
1443 cpyact(mem-prdptr[nprod]-1);
1444 Bprint(faction, " break;");
1445 if((t=gettok()) == IDENTIFIER) {
1447 /* action within rule... */
1448 sprint(actnm, "$$%d", nprod);
1450 /* make it a nonterminal */
1451 j = chfind(1, actnm);
1454 * the current rule will become rule number nprod+1
1455 * move the contents down, and make room for the null
1457 for(p = mem; p >= prdptr[nprod]; --p)
1458 p[2] = *p;
1459 mem += 2;
1461 /* enter null production for action */
1462 p = prdptr[nprod];
1463 *p++ = j;
1464 *p++ = -nprod;
1466 /* update the production information */
1467 levprd[nprod+1] = levprd[nprod] & ~ACTFLAG;
1468 levprd[nprod] = ACTFLAG;
1469 if(++nprod >= NPROD)
1470 error("more than %d rules", NPROD);
1471 prdptr[nprod] = p;
1473 /* make the action appear in the original rule */
1474 *mem++ = j;
1476 /* get some more of the rule */
1477 goto more_rule;
1481 while(t == ';')
1482 t = gettok();
1483 *mem++ = -nprod;
1485 /* check that default action is reasonable */
1486 if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) {
1488 /* no explicit action, LHS has value */
1489 int tempty;
1491 tempty = prdptr[nprod][1];
1492 if(tempty < 0)
1493 error("must return a value, since LHS has a type");
1494 else
1495 if(tempty >= NTBASE)
1496 tempty = nontrst[tempty-NTBASE].value;
1497 else
1498 tempty = TYPE(toklev[tempty]);
1499 if(tempty != nontrst[*prdptr[nprod]-NTBASE].value)
1500 error("default action causes potential type clash");
1502 nprod++;
1503 if(nprod >= NPROD)
1504 error("more than %d rules", NPROD);
1505 prdptr[nprod] = mem;
1506 levprd[nprod] = 0;
1509 /* end of all rules */
1510 defout(1);
1512 finact();
1513 if(t == MARK) {
1514 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1515 while((c=Bgetrune(finput)) != Beof)
1516 Bputrune(ftable, c);
1518 Bterm(finput);
1522 * finish action routine
1524 void
1525 finact(void)
1528 Bterm(faction);
1529 Bprint(ftable, "#define YYEOFCODE %d\n", 1);
1530 Bprint(ftable, "#define YYERRCODE %d\n", 2);
1534 * define s to be a terminal if t=0
1535 * or a nonterminal if t=1
1537 int
1538 defin(int nt, char *s)
1540 int val;
1541 Rune rune;
1543 val = 0;
1544 if(nt) {
1545 nnonter++;
1546 if(nnonter >= NNONTERM)
1547 error("too many nonterminals, limit %d",NNONTERM);
1548 nontrst[nnonter].name = cstash(s);
1549 return NTBASE + nnonter;
1552 /* must be a token */
1553 ntokens++;
1554 if(ntokens >= NTERMS)
1555 error("too many terminals, limit %d", NTERMS);
1556 tokset[ntokens].name = cstash(s);
1558 /* establish value for token */
1559 /* single character literal */
1560 if(s[0] == ' ') {
1561 val = chartorune(&rune, &s[1]);
1562 if(s[val+1] == 0) {
1563 val = rune;
1564 goto out;
1568 /* escape sequence */
1569 if(s[0] == ' ' && s[1] == '\\') {
1570 if(s[3] == 0) {
1571 /* single character escape sequence */
1572 switch(s[2]) {
1573 case 'n': val = '\n'; break;
1574 case 'r': val = '\r'; break;
1575 case 'b': val = '\b'; break;
1576 case 't': val = '\t'; break;
1577 case 'f': val = '\f'; break;
1578 case '\'': val = '\''; break;
1579 case '"': val = '"'; break;
1580 case '\\': val = '\\'; break;
1581 default: error("invalid escape");
1583 goto out;
1586 /* \nnn sequence */
1587 if(s[2] >= '0' && s[2] <= '7') {
1588 if(s[3] < '0' ||
1589 s[3] > '7' ||
1590 s[4] < '0' ||
1591 s[4] > '7' ||
1592 s[5] != 0)
1593 error("illegal \\nnn construction");
1594 val = 64*s[2] + 8*s[3] + s[4] - 73*'0';
1595 if(val == 0)
1596 error("'\\000' is illegal");
1597 goto out;
1599 error("unknown escape");
1601 val = extval++;
1603 out:
1604 tokset[ntokens].value = val;
1605 toklev[ntokens] = 0;
1606 return ntokens;
1610 * write out the defines (at the end of the declaration section)
1612 void
1613 defout(int last)
1615 int i, c;
1616 char sar[NAMESIZE+10];
1618 for(i=ndefout; i<=ntokens; i++) {
1619 /* non-literals */
1620 c = tokset[i].name[0];
1621 if(c != ' ' && c != '$') {
1622 Bprint(ftable, "#define %s %d\n",
1623 tokset[i].name, tokset[i].value);
1624 if(fdefine)
1625 Bprint(fdefine, "#define\t%s\t%d\n",
1626 tokset[i].name, tokset[i].value);
1629 ndefout = ntokens+1;
1630 if(last && fdebug) {
1631 Bprint(fdebug, "char* yytoknames[] =\n{\n");
1632 TLOOP(i) {
1633 if(tokset[i].name) {
1634 chcopy(sar, tokset[i].name);
1635 Bprint(fdebug, "\t\"%s\",\n", sar);
1636 continue;
1638 Bprint(fdebug, "\t0,\n");
1640 Bprint(fdebug, "};\n");
1644 char*
1645 cstash(char *s)
1647 char *temp;
1649 temp = cnamp;
1650 do {
1651 if(cnamp >= &cnames[cnamsz])
1652 error("too many characters in id's and literals");
1653 else
1654 *cnamp++ = *s;
1655 } while(*s++);
1656 return temp;
1659 long
1660 gettok(void)
1662 long c;
1663 Rune rune;
1664 int i, base, match, reserve;
1665 static int peekline;
1667 begin:
1668 reserve = 0;
1669 lineno += peekline;
1670 peekline = 0;
1671 c = Bgetrune(finput);
1672 while(c == ' ' || c == '\n' || c == '\t' || c == '\f') {
1673 if(c == '\n')
1674 lineno++;
1675 c = Bgetrune(finput);
1678 /* skip comment */
1679 if(c == '/') {
1680 lineno += skipcom();
1681 goto begin;
1683 switch(c) {
1684 case Beof:
1685 return ENDFILE;
1687 case '{':
1688 Bungetrune(finput);
1689 return '=';
1691 case '<':
1692 /* get, and look up, a type name (union member name) */
1693 i = 0;
1694 while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') {
1695 rune = c;
1696 c = runetochar(&tokname[i], &rune);
1697 if(i < NAMESIZE)
1698 i += c;
1700 if(c != '>')
1701 error("unterminated < ... > clause");
1702 tokname[i] = 0;
1703 for(i=1; i<=ntypes; i++)
1704 if(!strcmp(typeset[i], tokname)) {
1705 numbval = i;
1706 return TYPENAME;
1708 ntypes++;
1709 numbval = ntypes;
1710 typeset[numbval] = cstash(tokname);
1711 return TYPENAME;
1713 case '"':
1714 case '\'':
1715 match = c;
1716 tokname[0] = ' ';
1717 i = 1;
1718 for(;;) {
1719 c = Bgetrune(finput);
1720 if(c == '\n' || c <= 0)
1721 error("illegal or missing ' or \"" );
1722 if(c == '\\') {
1723 tokname[i] = '\\';
1724 if(i < NAMESIZE)
1725 i++;
1726 c = Bgetrune(finput);
1727 } else
1728 if(c == match)
1729 break;
1730 rune = c;
1731 c = runetochar(&tokname[i], &rune);
1732 if(i < NAMESIZE)
1733 i += c;
1735 break;
1737 case '%':
1738 case '\\':
1739 switch(c = Bgetrune(finput)) {
1740 case '0': return TERM;
1741 case '<': return LEFT;
1742 case '2': return BINARY;
1743 case '>': return RIGHT;
1744 case '%':
1745 case '\\': return MARK;
1746 case '=': return PREC;
1747 case '{': return LCURLY;
1748 default: reserve = 1;
1751 default:
1752 /* number */
1753 if(isdigit(c)) {
1754 numbval = c-'0';
1755 base = (c=='0')? 8: 10;
1756 for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput))
1757 numbval = numbval*base + (c-'0');
1758 Bungetrune(finput);
1759 return NUMBER;
1761 if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$') {
1762 i = 0;
1763 while(islower(c) || isupper(c) || isdigit(c) ||
1764 c == '-' || c=='_' || c=='.' || c=='$') {
1765 if(reserve && isupper(c))
1766 c += 'a'-'A';
1767 rune = c;
1768 c = runetochar(&tokname[i], &rune);
1769 if(i < NAMESIZE)
1770 i += c;
1771 c = Bgetrune(finput);
1773 } else
1774 return c;
1775 Bungetrune(finput);
1777 tokname[i] = 0;
1779 /* find a reserved word */
1780 if(reserve) {
1781 for(c=0; resrv[c].name; c++)
1782 if(strcmp(tokname, resrv[c].name) == 0)
1783 return resrv[c].value;
1784 error("invalid escape, or illegal reserved word: %s", tokname);
1787 /* look ahead to distinguish IDENTIFIER from IDENTCOLON */
1788 c = Bgetrune(finput);
1789 while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') {
1790 if(c == '\n')
1791 peekline++;
1792 /* look for comments */
1793 if(c == '/')
1794 peekline += skipcom();
1795 c = Bgetrune(finput);
1797 if(c == ':')
1798 return IDENTCOLON;
1799 Bungetrune(finput);
1800 return IDENTIFIER;
1804 * determine the type of a symbol
1806 int
1807 fdtype(int t)
1809 int v;
1811 if(t >= NTBASE)
1812 v = nontrst[t-NTBASE].value;
1813 else
1814 v = TYPE(toklev[t]);
1815 if(v <= 0)
1816 error("must specify type for %s", (t>=NTBASE)?
1817 nontrst[t-NTBASE].name: tokset[t].name);
1818 return v;
1821 int
1822 chfind(int t, char *s)
1824 int i;
1826 if(s[0] == ' ')
1827 t = 0;
1828 TLOOP(i)
1829 if(!strcmp(s, tokset[i].name))
1830 return i;
1831 NTLOOP(i)
1832 if(!strcmp(s, nontrst[i].name))
1833 return NTBASE+i;
1835 /* cannot find name */
1836 if(t > 1)
1837 error("%s should have been defined earlier", s);
1838 return defin(t, s);
1842 * copy the union declaration to the output, and the define file if present
1844 void
1845 cpyunion(void)
1847 long c;
1848 int level;
1850 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1851 Bprint(ftable, "typedef union ");
1852 if(fdefine != 0)
1853 Bprint(fdefine, "\ntypedef union ");
1855 level = 0;
1856 for(;;) {
1857 if((c=Bgetrune(finput)) == Beof)
1858 error("EOF encountered while processing %%union");
1859 Bputrune(ftable, c);
1860 if(fdefine != 0)
1861 Bputrune(fdefine, c);
1862 switch(c) {
1863 case '\n':
1864 lineno++;
1865 break;
1866 case '{':
1867 level++;
1868 break;
1869 case '}':
1870 level--;
1872 /* we are finished copying */
1873 if(level == 0) {
1874 Bprint(ftable, " YYSTYPE;\n");
1875 if(fdefine != 0)
1876 Bprint(fdefine, "\tYYSTYPE;\nextern\tYYSTYPE\tyylval;\n");
1877 return;
1884 * copies code between \{ and \}
1886 void
1887 cpycode(void)
1890 long c;
1892 c = Bgetrune(finput);
1893 if(c == '\n') {
1894 c = Bgetrune(finput);
1895 lineno++;
1897 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1898 while(c != Beof) {
1899 if(c == '\\') {
1900 if((c=Bgetrune(finput)) == '}')
1901 return;
1902 Bputc(ftable, '\\');
1904 if(c == '%') {
1905 if((c=Bgetrune(finput)) == '}')
1906 return;
1907 Bputc(ftable, '%');
1909 Bputrune(ftable, c);
1910 if(c == '\n')
1911 lineno++;
1912 c = Bgetrune(finput);
1914 error("eof before %%}");
1918 * skip over comments
1919 * skipcom is called after reading a '/'
1921 int
1922 skipcom(void)
1924 long c;
1925 int i;
1927 /* i is the number of lines skipped */
1928 i = 0;
1929 if(Bgetrune(finput) != '*')
1930 error("illegal comment");
1931 c = Bgetrune(finput);
1932 while(c != Beof) {
1933 while(c == '*')
1934 if((c=Bgetrune(finput)) == '/')
1935 return i;
1936 if(c == '\n')
1937 i++;
1938 c = Bgetrune(finput);
1940 error("EOF inside comment");
1941 return 0;
1945 * copy C action to the next ; or closing }
1947 void
1948 cpyact(int offset)
1950 long c;
1951 int brac, match, j, s, fnd, tok;
1953 Bprint(faction, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1954 brac = 0;
1956 loop:
1957 c = Bgetrune(finput);
1958 swt:
1959 switch(c) {
1960 case ';':
1961 if(brac == 0) {
1962 Bputrune(faction, c);
1963 return;
1965 goto lcopy;
1967 case '{':
1968 brac++;
1969 goto lcopy;
1971 case '$':
1972 s = 1;
1973 tok = -1;
1974 c = Bgetrune(finput);
1976 /* type description */
1977 if(c == '<') {
1978 Bungetrune(finput);
1979 if(gettok() != TYPENAME)
1980 error("bad syntax on $<ident> clause");
1981 tok = numbval;
1982 c = Bgetrune(finput);
1984 if(c == '$') {
1985 Bprint(faction, "yyval");
1987 /* put out the proper tag... */
1988 if(ntypes) {
1989 if(tok < 0)
1990 tok = fdtype(*prdptr[nprod]);
1991 Bprint(faction, ".%s", typeset[tok]);
1993 goto loop;
1995 if(c == '-') {
1996 s = -s;
1997 c = Bgetrune(finput);
1999 if(isdigit(c)) {
2000 j = 0;
2001 while(isdigit(c)) {
2002 j = j*10 + (c-'0');
2003 c = Bgetrune(finput);
2005 Bungetrune(finput);
2006 j = j*s - offset;
2007 if(j > 0)
2008 error("Illegal use of $%d", j+offset);
2010 dollar:
2011 Bprint(faction, "yypt[-%d].yyv", -j);
2013 /* put out the proper tag */
2014 if(ntypes) {
2015 if(j+offset <= 0 && tok < 0)
2016 error("must specify type of $%d", j+offset);
2017 if(tok < 0)
2018 tok = fdtype(prdptr[nprod][j+offset]);
2019 Bprint(faction, ".%s", typeset[tok]);
2021 goto loop;
2023 if(isupper(c) || islower(c) || c == '_' || c == '.') {
2024 int tok; /* tok used oustide for type info */
2026 /* look for $name */
2027 Bungetrune(finput);
2028 if(gettok() != IDENTIFIER)
2029 error("$ must be followed by an identifier");
2030 tok = chfind(2, tokname);
2031 if((c = Bgetrune(finput)) != '#') {
2032 Bungetrune(finput);
2033 fnd = -1;
2034 } else
2035 if(gettok() != NUMBER) {
2036 error("# must be followed by number");
2037 fnd = -1;
2038 } else
2039 fnd = numbval;
2040 for(j=1; j<=offset; ++j)
2041 if(tok == prdptr[nprod][j]) {
2042 if(--fnd <= 0) {
2043 j -= offset;
2044 goto dollar;
2047 error("$name or $name#number not found");
2049 Bputc(faction, '$');
2050 if(s < 0 )
2051 Bputc(faction, '-');
2052 goto swt;
2054 case '}':
2055 brac--;
2056 if(brac)
2057 goto lcopy;
2058 Bputrune(faction, c);
2059 return;
2061 case '/':
2062 /* look for comments */
2063 Bputrune(faction, c);
2064 c = Bgetrune(finput);
2065 if(c != '*')
2066 goto swt;
2068 /* it really is a comment */
2069 Bputrune(faction, c);
2070 c = Bgetrune(finput);
2071 while(c >= 0) {
2072 while(c == '*') {
2073 Bputrune(faction, c);
2074 if((c=Bgetrune(finput)) == '/')
2075 goto lcopy;
2077 Bputrune(faction, c);
2078 if(c == '\n')
2079 lineno++;
2080 c = Bgetrune(finput);
2082 error("EOF inside comment");
2084 case '\'':
2085 /* character constant */
2086 match = '\'';
2087 goto string;
2089 case '"':
2090 /* character string */
2091 match = '"';
2093 string:
2094 Bputrune(faction, c);
2095 while(c = Bgetrune(finput)) {
2096 if(c == '\\') {
2097 Bputrune(faction, c);
2098 c = Bgetrune(finput);
2099 if(c == '\n')
2100 lineno++;
2101 } else
2102 if(c == match)
2103 goto lcopy;
2104 if(c == '\n')
2105 error("newline in string or char. const.");
2106 Bputrune(faction, c);
2108 error("EOF in string or character constant");
2110 case Beof:
2111 error("action does not terminate");
2113 case '\n':
2114 lineno++;
2115 goto lcopy;
2118 lcopy:
2119 Bputrune(faction, c);
2120 goto loop;
2123 void
2124 openup(char *stem, int dflag, int vflag, int ytab, char *ytabc)
2126 char buf[256];
2128 if(vflag) {
2129 sprint(buf, "%s.%s", stem, FILEU);
2130 foutput = Bopen(buf, OWRITE);
2131 if(foutput == 0)
2132 error("cannot open %s", buf);
2134 if(yydebug) {
2135 sprint(buf, "%s.%s", stem, FILEDEBUG);
2136 if((fdebug = Bopen(buf, OWRITE)) == 0)
2137 error("can't open %s", buf);
2139 if(dflag) {
2140 sprint(buf, "%s.%s", stem, FILED);
2141 fdefine = Bopen(buf, OWRITE);
2142 if(fdefine == 0)
2143 error("can't create %s", buf);
2145 if(ytab == 0)
2146 sprint(buf, "%s.%s", stem, OFILE);
2147 else
2148 strcpy(buf, ytabc);
2149 ftable = Bopen(buf, OWRITE);
2150 if(ftable == 0)
2151 error("cannot open table file %s", buf);
2155 * print the output for the states
2157 void
2158 output(void)
2160 int i, k, c;
2161 Wset *u, *v;
2163 Bprint(ftable, "short yyexca[] =\n{");
2164 if(fdebug)
2165 Bprint(fdebug, "char* yystates[] =\n{\n");
2167 /* output the stuff for state i */
2168 SLOOP(i) {
2169 nolook = tystate[i]!=MUSTLOOKAHEAD;
2170 closure(i);
2172 /* output actions */
2173 nolook = 1;
2174 aryfil(temp1, ntokens+nnonter+1, 0);
2175 WSLOOP(wsets, u) {
2176 c = *(u->pitem);
2177 if(c > 1 && c < NTBASE && temp1[c] == 0) {
2178 WSLOOP(u, v)
2179 if(c == *(v->pitem))
2180 putitem(v->pitem+1, (Lkset*)0);
2181 temp1[c] = state(c);
2182 } else
2183 if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0)
2184 temp1[c+ntokens] = amem[indgo[i]+c];
2186 if(i == 1)
2187 temp1[1] = ACCEPTCODE;
2189 /* now, we have the shifts; look at the reductions */
2190 lastred = 0;
2191 WSLOOP(wsets, u) {
2192 c = *u->pitem;
2194 /* reduction */
2195 if(c <= 0) {
2196 lastred = -c;
2197 TLOOP(k)
2198 if(BIT(u->ws.lset, k)) {
2199 if(temp1[k] == 0)
2200 temp1[k] = c;
2201 else
2202 if(temp1[k] < 0) { /* reduce/reduce conflict */
2203 if(foutput)
2204 Bprint(foutput,
2205 "\n%d: reduce/reduce conflict"
2206 " (red'ns %d and %d ) on %s",
2207 i, -temp1[k], lastred,
2208 symnam(k));
2209 if(-temp1[k] > lastred)
2210 temp1[k] = -lastred;
2211 zzrrconf++;
2212 } else
2213 /* potential shift/reduce conflict */
2214 precftn( lastred, k, i );
2218 wract(i);
2221 if(fdebug)
2222 Bprint(fdebug, "};\n");
2223 Bprint(ftable, "};\n");
2224 Bprint(ftable, "#define YYNPROD %d\n", nprod);
2225 Bprint(ftable, "#define YYPRIVATE %d\n", PRIVATE);
2226 if(yydebug)
2227 Bprint(ftable, "#define yydebug %s\n", yydebug);
2231 * pack state i from temp1 into amem
2233 int
2234 apack(int *p, int n)
2236 int *pp, *qq, *rr, off, *q, *r;
2238 /* we don't need to worry about checking because
2239 * we will only look at entries known to be there...
2240 * eliminate leading and trailing 0's
2243 q = p+n;
2244 for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off)
2246 /* no actions */
2247 if(pp > q)
2248 return 0;
2249 p = pp;
2251 /* now, find a place for the elements from p to q, inclusive */
2252 r = &amem[ACTSIZE-1];
2253 for(rr = amem; rr <= r; rr++, off++) {
2254 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2255 if(*pp != 0)
2256 if(*pp != *qq && *qq != 0)
2257 goto nextk;
2259 /* we have found an acceptable k */
2260 if(pkdebug && foutput != 0)
2261 Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem));
2262 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2263 if(*pp) {
2264 if(qq > r)
2265 error("action table overflow");
2266 if(qq > memp)
2267 memp = qq;
2268 *qq = *pp;
2270 if(pkdebug && foutput != 0)
2271 for(pp = amem; pp <= memp; pp += 10) {
2272 Bprint(foutput, "\t");
2273 for(qq = pp; qq <= pp+9; qq++)
2274 Bprint(foutput, "%d ", *qq);
2275 Bprint(foutput, "\n");
2277 return(off);
2278 nextk:;
2280 error("no space in action table");
2281 return 0;
2285 * output the gotos for the nontermninals
2287 void
2288 go2out(void)
2290 int i, j, k, best, count, cbest, times;
2292 /* mark begining of gotos */
2293 Bprint(ftemp, "$\n");
2294 for(i = 1; i <= nnonter; i++) {
2295 go2gen(i);
2297 /* find the best one to make default */
2298 best = -1;
2299 times = 0;
2301 /* is j the most frequent */
2302 for(j = 0; j <= nstate; j++) {
2303 if(tystate[j] == 0)
2304 continue;
2305 if(tystate[j] == best)
2306 continue;
2308 /* is tystate[j] the most frequent */
2309 count = 0;
2310 cbest = tystate[j];
2311 for(k = j; k <= nstate; k++)
2312 if(tystate[k] == cbest)
2313 count++;
2314 if(count > times) {
2315 best = cbest;
2316 times = count;
2320 /* best is now the default entry */
2321 zzgobest += times-1;
2322 for(j = 0; j <= nstate; j++)
2323 if(tystate[j] != 0 && tystate[j] != best) {
2324 Bprint(ftemp, "%d,%d,", j, tystate[j]);
2325 zzgoent++;
2328 /* now, the default */
2329 if(best == -1)
2330 best = 0;
2331 zzgoent++;
2332 Bprint(ftemp, "%d\n", best);
2337 * output the gotos for nonterminal c
2339 void
2340 go2gen(int c)
2342 int i, work, cc;
2343 Item *p, *q;
2346 /* first, find nonterminals with gotos on c */
2347 aryfil(temp1, nnonter+1, 0);
2348 temp1[c] = 1;
2349 work = 1;
2350 while(work) {
2351 work = 0;
2352 PLOOP(0, i)
2354 /* cc is a nonterminal */
2355 if((cc=prdptr[i][1]-NTBASE) >= 0)
2356 /* cc has a goto on c */
2357 if(temp1[cc] != 0) {
2359 /* thus, the left side of production i does too */
2360 cc = *prdptr[i]-NTBASE;
2361 if(temp1[cc] == 0) {
2362 work = 1;
2363 temp1[cc] = 1;
2368 /* now, we have temp1[c] = 1 if a goto on c in closure of cc */
2369 if(g2debug && foutput != 0) {
2370 Bprint(foutput, "%s: gotos on ", nontrst[c].name);
2371 NTLOOP(i)
2372 if(temp1[i])
2373 Bprint(foutput, "%s ", nontrst[i].name);
2374 Bprint(foutput, "\n");
2377 /* now, go through and put gotos into tystate */
2378 aryfil(tystate, nstate, 0);
2379 SLOOP(i)
2380 ITMLOOP(i, p, q)
2381 if((cc = *p->pitem) >= NTBASE)
2382 /* goto on c is possible */
2383 if(temp1[cc-NTBASE]) {
2384 tystate[i] = amem[indgo[i]+c];
2385 break;
2390 * decide a shift/reduce conflict by precedence.
2391 * r is a rule number, t a token number
2392 * the conflict is in state s
2393 * temp1[t] is changed to reflect the action
2395 void
2396 precftn(int r, int t, int s)
2398 int lp, lt, action;
2400 lp = levprd[r];
2401 lt = toklev[t];
2402 if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) {
2404 /* conflict */
2405 if(foutput != 0)
2406 Bprint(foutput,
2407 "\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s",
2408 s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t));
2409 zzsrconf++;
2410 return;
2412 if(PLEVEL(lt) == PLEVEL(lp))
2413 action = ASSOC(lt);
2414 else
2415 if(PLEVEL(lt) > PLEVEL(lp))
2416 action = RASC; /* shift */
2417 else
2418 action = LASC; /* reduce */
2419 switch(action) {
2420 case BASC: /* error action */
2421 temp1[t] = ERRCODE;
2422 break;
2423 case LASC: /* reduce */
2424 temp1[t] = -r;
2425 break;
2430 * output state i
2431 * temp1 has the actions, lastred the default
2433 void
2434 wract(int i)
2436 int p, p0, p1, ntimes, tred, count, j, flag;
2438 /* find the best choice for lastred */
2439 lastred = 0;
2440 ntimes = 0;
2441 TLOOP(j) {
2442 if(temp1[j] >= 0)
2443 continue;
2444 if(temp1[j]+lastred == 0)
2445 continue;
2446 /* count the number of appearances of temp1[j] */
2447 count = 0;
2448 tred = -temp1[j];
2449 levprd[tred] |= REDFLAG;
2450 TLOOP(p)
2451 if(temp1[p]+tred == 0)
2452 count++;
2453 if(count > ntimes) {
2454 lastred = tred;
2455 ntimes = count;
2460 * for error recovery, arrange that, if there is a shift on the
2461 * error recovery token, `error', that the default be the error action
2463 if(temp1[2] > 0)
2464 lastred = 0;
2466 /* clear out entries in temp1 which equal lastred */
2467 TLOOP(p)
2468 if(temp1[p]+lastred == 0)
2469 temp1[p] = 0;
2471 wrstate(i);
2472 defact[i] = lastred;
2473 flag = 0;
2474 TLOOP(p0)
2475 if((p1=temp1[p0]) != 0) {
2476 if(p1 < 0) {
2477 p1 = -p1;
2478 goto exc;
2480 if(p1 == ACCEPTCODE) {
2481 p1 = -1;
2482 goto exc;
2484 if(p1 == ERRCODE) {
2485 p1 = 0;
2486 exc:
2487 if(flag++ == 0)
2488 Bprint(ftable, "-1, %d,\n", i);
2489 Bprint(ftable, "\t%d, %d,\n", p0, p1);
2490 zzexcp++;
2491 continue;
2493 Bprint(ftemp, "%d,%d,", p0, p1);
2494 zzacent++;
2496 if(flag) {
2497 defact[i] = -2;
2498 Bprint(ftable, "\t-2, %d,\n", lastred);
2500 Bprint(ftemp, "\n");
2504 * writes state i
2506 void
2507 wrstate(int i)
2509 int j0, j1;
2510 Item *pp, *qq;
2511 Wset *u;
2513 if(fdebug) {
2514 if(lastred) {
2515 Bprint(fdebug, " 0, /*%d*/\n", i);
2516 } else {
2517 Bprint(fdebug, " \"");
2518 ITMLOOP(i, pp, qq)
2519 Bprint(fdebug, "%s\\n", writem(pp->pitem));
2520 if(tystate[i] == MUSTLOOKAHEAD)
2521 WSLOOP(wsets + (pstate[i+1] - pstate[i]), u)
2522 if(*u->pitem < 0)
2523 Bprint(fdebug, "%s\\n", writem(u->pitem));
2524 Bprint(fdebug, "\", /*%d*/\n", i);
2527 if(foutput == 0)
2528 return;
2529 Bprint(foutput, "\nstate %d\n", i);
2530 ITMLOOP(i, pp, qq)
2531 Bprint(foutput, "\t%s\n", writem(pp->pitem));
2532 if(tystate[i] == MUSTLOOKAHEAD)
2533 /* print out empty productions in closure */
2534 WSLOOP(wsets+(pstate[i+1]-pstate[i]), u)
2535 if(*u->pitem < 0)
2536 Bprint(foutput, "\t%s\n", writem(u->pitem));
2538 /* check for state equal to another */
2539 TLOOP(j0)
2540 if((j1=temp1[j0]) != 0) {
2541 Bprint(foutput, "\n\t%s ", symnam(j0));
2542 /* shift, error, or accept */
2543 if(j1 > 0) {
2544 if(j1 == ACCEPTCODE)
2545 Bprint(foutput, "accept");
2546 else
2547 if(j1 == ERRCODE)
2548 Bprint(foutput, "error");
2549 else
2550 Bprint(foutput, "shift %d", j1);
2551 } else
2552 Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]);
2555 /* output the final production */
2556 if(lastred)
2557 Bprint(foutput, "\n\t. reduce %d (src line %d)\n\n",
2558 lastred, rlines[lastred]);
2559 else
2560 Bprint(foutput, "\n\t. error\n\n");
2562 /* now, output nonterminal actions */
2563 j1 = ntokens;
2564 for(j0 = 1; j0 <= nnonter; j0++) {
2565 j1++;
2566 if(temp1[j1])
2567 Bprint(foutput, "\t%s goto %d\n", symnam(j0+NTBASE), temp1[j1]);
2571 void
2572 warray(char *s, int *v, int n)
2574 int i;
2576 Bprint(ftable, "short %s[] =\n{", s);
2577 for(i=0;;) {
2578 if(i%10 == 0)
2579 Bprint(ftable, "\n");
2580 Bprint(ftable, "%4d", v[i]);
2581 i++;
2582 if(i >= n) {
2583 Bprint(ftable, "\n};\n");
2584 break;
2586 Bprint(ftable, ",");
2591 * in order to free up the mem and amem arrays for the optimizer,
2592 * and still be able to output yyr1, etc., after the sizes of
2593 * the action array is known, we hide the nonterminals
2594 * derived by productions in levprd.
2597 void
2598 hideprod(void)
2600 int i, j;
2602 j = 0;
2603 levprd[0] = 0;
2604 PLOOP(1, i) {
2605 if(!(levprd[i] & REDFLAG)) {
2606 j++;
2607 if(foutput != 0)
2608 Bprint(foutput, "Rule not reduced: %s\n", writem(prdptr[i]));
2610 levprd[i] = *prdptr[i] - NTBASE;
2612 if(j)
2613 print("%d rules never reduced\n", j);
2616 void
2617 callopt(void)
2619 int i, *p, j, k, *q;
2621 /* read the arrays from tempfile and set parameters */
2622 finput = Bopen(tempname, OREAD);
2623 if(finput == 0)
2624 error("optimizer cannot open tempfile");
2626 pgo[0] = 0;
2627 temp1[0] = 0;
2628 nstate = 0;
2629 nnonter = 0;
2630 for(;;) {
2631 switch(gtnm()) {
2632 case '\n':
2633 nstate++;
2634 pmem--;
2635 temp1[nstate] = pmem - mem0;
2636 case ',':
2637 continue;
2638 case '$':
2639 break;
2640 default:
2641 error("bad tempfile");
2643 break;
2646 pmem--;
2647 temp1[nstate] = yypgo[0] = pmem - mem0;
2648 for(;;) {
2649 switch(gtnm()) {
2650 case '\n':
2651 nnonter++;
2652 yypgo[nnonter] = pmem-mem0;
2653 case ',':
2654 continue;
2655 case -1:
2656 break;
2657 default:
2658 error("bad tempfile");
2660 break;
2662 pmem--;
2663 yypgo[nnonter--] = pmem - mem0;
2664 for(i = 0; i < nstate; i++) {
2665 k = 32000;
2666 j = 0;
2667 q = mem0 + temp1[i+1];
2668 for(p = mem0 + temp1[i]; p < q ; p += 2) {
2669 if(*p > j)
2670 j = *p;
2671 if(*p < k)
2672 k = *p;
2674 /* nontrivial situation */
2675 if(k <= j) {
2676 /* j is now the range */
2677 /* j -= k; *//* call scj */
2678 if(k > maxoff)
2679 maxoff = k;
2681 tystate[i] = (temp1[i+1]-temp1[i]) + 2*j;
2682 if(j > maxspr)
2683 maxspr = j;
2686 /* initialize ggreed table */
2687 for(i = 1; i <= nnonter; i++) {
2688 ggreed[i] = 1;
2689 j = 0;
2691 /* minimum entry index is always 0 */
2692 q = mem0 + yypgo[i+1] - 1;
2693 for(p = mem0+yypgo[i]; p < q ; p += 2) {
2694 ggreed[i] += 2;
2695 if(*p > j)
2696 j = *p;
2698 ggreed[i] = ggreed[i] + 2*j;
2699 if(j > maxoff)
2700 maxoff = j;
2703 /* now, prepare to put the shift actions into the amem array */
2704 for(i = 0; i < ACTSIZE; i++)
2705 amem[i] = 0;
2706 maxa = amem;
2707 for(i = 0; i < nstate; i++) {
2708 if(tystate[i] == 0 && adb > 1)
2709 Bprint(ftable, "State %d: null\n", i);
2710 indgo[i] = YYFLAG1;
2712 while((i = nxti()) != NOMORE)
2713 if(i >= 0)
2714 stin(i);
2715 else
2716 gin(-i);
2718 /* print amem array */
2719 if(adb > 2 )
2720 for(p = amem; p <= maxa; p += 10) {
2721 Bprint(ftable, "%4d ", (int)(p-amem));
2722 for(i = 0; i < 10; ++i)
2723 Bprint(ftable, "%4d ", p[i]);
2724 Bprint(ftable, "\n");
2727 /* write out the output appropriate to the language */
2728 aoutput();
2729 osummary();
2730 ZAPFILE(tempname);
2733 void
2734 gin(int i)
2736 int *p, *r, *s, *q1, *q2;
2738 /* enter gotos on nonterminal i into array amem */
2739 ggreed[i] = 0;
2741 q2 = mem0+ yypgo[i+1] - 1;
2742 q1 = mem0 + yypgo[i];
2744 /* now, find amem place for it */
2745 for(p = amem; p < &amem[ACTSIZE]; p++) {
2746 if(*p)
2747 continue;
2748 for(r = q1; r < q2; r += 2) {
2749 s = p + *r + 1;
2750 if(*s)
2751 goto nextgp;
2752 if(s > maxa)
2753 if((maxa = s) > &amem[ACTSIZE])
2754 error("a array overflow");
2756 /* we have found amem spot */
2757 *p = *q2;
2758 if(p > maxa)
2759 if((maxa = p) > &amem[ACTSIZE])
2760 error("a array overflow");
2761 for(r = q1; r < q2; r += 2) {
2762 s = p + *r + 1;
2763 *s = r[1];
2765 pgo[i] = p-amem;
2766 if(adb > 1)
2767 Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]);
2768 return;
2770 nextgp:;
2772 error("cannot place goto %d\n", i);
2775 void
2776 stin(int i)
2778 int *r, *s, n, flag, j, *q1, *q2;
2780 tystate[i] = 0;
2782 /* enter state i into the amem array */
2783 q2 = mem0+temp1[i+1];
2784 q1 = mem0+temp1[i];
2785 /* find an acceptable place */
2786 for(n = -maxoff; n < ACTSIZE; n++) {
2787 flag = 0;
2788 for(r = q1; r < q2; r += 2) {
2789 if((s = *r + n + amem) < amem)
2790 goto nextn;
2791 if(*s == 0)
2792 flag++;
2793 else
2794 if(*s != r[1])
2795 goto nextn;
2798 /* check that the position equals another only if the states are identical */
2799 for(j=0; j<nstate; j++) {
2800 if(indgo[j] == n) {
2802 /* we have some disagreement */
2803 if(flag)
2804 goto nextn;
2805 if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) {
2807 /* states are equal */
2808 indgo[i] = n;
2809 if(adb > 1)
2810 Bprint(ftable,
2811 "State %d: entry at %d equals state %d\n",
2812 i, n, j);
2813 return;
2816 /* we have some disagreement */
2817 goto nextn;
2821 for(r = q1; r < q2; r += 2) {
2822 if((s = *r+n+amem) >= &amem[ACTSIZE])
2823 error("out of space in optimizer a array");
2824 if(s > maxa)
2825 maxa = s;
2826 if(*s != 0 && *s != r[1])
2827 error("clobber of a array, pos'n %d, by %d", s-amem, r[1]);
2828 *s = r[1];
2830 indgo[i] = n;
2831 if(adb > 1)
2832 Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]);
2833 return;
2834 nextn:;
2836 error("Error; failure to place state %d\n", i);
2840 * finds the next i
2842 int
2843 nxti(void)
2845 int i, max, maxi;
2847 max = 0;
2848 maxi = 0;
2849 for(i = 1; i <= nnonter; i++)
2850 if(ggreed[i] >= max) {
2851 max = ggreed[i];
2852 maxi = -i;
2854 for(i = 0; i < nstate; ++i)
2855 if(tystate[i] >= max) {
2856 max = tystate[i];
2857 maxi = i;
2859 if(nxdb)
2860 Bprint(ftable, "nxti = %d, max = %d\n", maxi, max);
2861 if(max == 0)
2862 return NOMORE;
2863 return maxi;
2867 * write summary
2869 void
2870 osummary(void)
2873 int i, *p;
2875 if(foutput == 0)
2876 return;
2877 i = 0;
2878 for(p = maxa; p >= amem; p--)
2879 if(*p == 0)
2880 i++;
2882 Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n",
2883 (int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE);
2884 Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i);
2885 Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff);
2889 * this version is for C
2890 * write out the optimized parser
2892 void
2893 aoutput(void)
2895 Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1));
2896 arout("yyact", amem, (maxa-amem)+1);
2897 arout("yypact", indgo, nstate);
2898 arout("yypgo", pgo, nnonter+1);
2901 void
2902 arout(char *s, int *v, int n)
2904 int i;
2906 Bprint(ftable, "short %s[] =\n{", s);
2907 for(i = 0; i < n;) {
2908 if(i%10 == 0)
2909 Bprint(ftable, "\n");
2910 Bprint(ftable, "%4d", v[i]);
2911 i++;
2912 if(i == n)
2913 Bprint(ftable, "\n};\n");
2914 else
2915 Bprint(ftable, ",");
2920 * read and convert an integer from the standard input
2921 * return the terminating character
2922 * blanks, tabs, and newlines are ignored
2924 int
2925 gtnm(void)
2927 int sign, val, c;
2929 sign = 0;
2930 val = 0;
2931 while((c=Bgetrune(finput)) != Beof) {
2932 if(isdigit(c)) {
2933 val = val*10 + c-'0';
2934 continue;
2936 if(c == '-') {
2937 sign = 1;
2938 continue;
2940 break;
2942 if(sign)
2943 val = -val;
2944 *pmem++ = val;
2945 if(pmem >= &mem0[MEMSIZE])
2946 error("out of space");
2947 return c;