Blob


1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
6 #define Bungetrune Bungetc /* ok for now. */
8 /*
9 * all these are 32 bit
10 */
11 #define TBITSET ((32+NTERMS)/32) /* BOTCH?? +31 */
12 #define BIT(a,i) ((a)[(i)>>5] & (1<<((i)&037)))
13 #define SETBIT(a,i) ((a)[(i)>>5] |= (1<<((i)&037)))
14 #define NWORDS(n) (((n)+32)/32)
16 char *PARSER = "#9/lib/yaccpar";
17 char *PARSERS = "#9/lib/yaccpars";
19 #define TEMPNAME "y.tmp.XXXXXX"
20 #define ACTNAME "y.acts.XXXXXX"
21 #define OFILE "tab.c"
22 #define FILEU "output"
23 #define FILED "tab.h"
24 #define FILEDEBUG "debug"
26 enum
27 {
28 /*
29 * the following are adjustable
30 * according to memory size
31 */
32 ACTSIZE = 40000,
33 MEMSIZE = 40000,
34 NSTATES = 2000,
35 NTERMS = 511,
36 NPROD = 1600,
37 NNONTERM = 600,
38 TEMPSIZE = 2000,
39 CNAMSZ = 10000,
40 LSETSIZE = 2400,
41 WSETSIZE = 350,
43 NAMESIZE = 50,
44 NTYPES = 63,
45 ISIZE = 400,
47 PRIVATE = 0xE000, /* unicode private use */
49 /* relationships which must hold:
50 TBITSET ints must hold NTERMS+1 bits...
51 WSETSIZE >= NNONTERM
52 LSETSIZE >= NNONTERM
53 TEMPSIZE >= NTERMS + NNONTERM + 1
54 TEMPSIZE >= NSTATES
55 */
57 NTBASE = 010000,
58 ERRCODE = 8190,
59 ACCEPTCODE = 8191,
61 NOASC = 0, /* no assoc. */
62 LASC = 1, /* left assoc. */
63 RASC = 2, /* right assoc. */
64 BASC = 3, /* binary assoc. */
66 /* flags for state generation */
68 DONE = 0,
69 MUSTDO = 1,
70 MUSTLOOKAHEAD = 2,
72 /* flags for a rule having an action, and being reduced */
74 ACTFLAG = 04,
75 REDFLAG = 010,
77 /* output parser flags */
78 YYFLAG1 = -1000,
80 /* parse tokens */
81 IDENTIFIER = PRIVATE,
82 MARK,
83 TERM,
84 LEFT,
85 RIGHT,
86 BINARY,
87 PREC,
88 LCURLY,
89 IDENTCOLON,
90 NUMBER,
91 START,
92 TYPEDEF,
93 TYPENAME,
94 UNION,
96 ENDFILE = 0,
98 EMPTY = 1,
99 WHOKNOWS = 0,
100 OK = 1,
101 NOMORE = -1000
102 };
104 /* macros for getting associativity and precedence levels */
106 #define ASSOC(i) ((i)&03)
107 #define PLEVEL(i) (((i)>>4)&077)
108 #define TYPE(i) (((i)>>10)&077)
110 /* macros for setting associativity and precedence levels */
112 #define SETASC(i,j) i |= j
113 #define SETPLEV(i,j) i |= (j<<4)
114 #define SETTYPE(i,j) i |= (j<<10)
116 /* looping macros */
118 #define TLOOP(i) for(i=1; i<=ntokens; i++)
119 #define NTLOOP(i) for(i=0; i<=nnonter; i++)
120 #define PLOOP(s,i) for(i=s; i<nprod; i++)
121 #define SLOOP(i) for(i=0; i<nstate; i++)
122 #define WSBUMP(x) x++
123 #define WSLOOP(s,j) for(j=s; j<cwp; j++)
124 #define ITMLOOP(i,p,q) for(q=pstate[i+1], p=pstate[i]; p<q; p++)
125 #define SETLOOP(i) for(i=0; i<tbitset; i++)
127 /* command to clobber tempfiles after use */
129 #define ZAPFILE(x) if(x) remove(x)
131 /* I/O descriptors */
133 Biobuf* faction; /* file for saving actions */
134 Biobuf* fdefine; /* file for #defines */
135 Biobuf* fdebug; /* y.debug for strings for debugging */
136 Biobuf* ftable; /* y.tab.c file */
137 Biobuf* ftemp; /* tempfile to pass 2 */
138 Biobuf* finput; /* input file */
139 Biobuf* foutput; /* y.output file */
141 /* communication variables between various I/O routines */
143 char* infile; /* input file name */
144 int numbval; /* value of an input number */
145 char tokname[NAMESIZE+4]; /* input token name, slop for runes and 0 */
147 /* structure declarations */
149 typedef
150 struct
152 int lset[TBITSET];
153 } Lkset;
155 typedef
156 struct
158 int* pitem;
159 Lkset* look;
160 } Item;
162 typedef
163 struct
165 char* name;
166 int value;
167 } Symb;
169 typedef
170 struct
172 int* pitem;
173 int flag;
174 Lkset ws;
175 } Wset;
177 /* storage of names */
179 char cnames[CNAMSZ]; /* place where token and nonterminal names are stored */
180 int cnamsz = CNAMSZ; /* size of cnames */
181 char* cnamp = cnames; /* place where next name is to be put in */
182 int ndefout = 4; /* number of defined symbols output */
183 char* tempname;
184 char* actname;
185 char ttempname[] = TEMPNAME;
186 char tactname[] = ACTNAME;
187 char* parser;
188 char* yydebug;
189 int yyarg;
190 int yyline = 1;
192 /* storage of types */
193 int ntypes; /* number of types defined */
194 char* typeset[NTYPES]; /* pointers to type tags */
196 /* token information */
198 int ntokens = 0 ; /* number of tokens */
199 Symb tokset[NTERMS];
200 int toklev[NTERMS]; /* vector with the precedence of the terminals */
202 /* nonterminal information */
204 int nnonter = -1; /* the number of nonterminals */
205 Symb nontrst[NNONTERM];
206 int start; /* start symbol */
208 /* assigned token type values */
209 int extval = 0;
211 char* ytabc = OFILE; /* name of y.tab.c */
213 /* grammar rule information */
215 int mem0[MEMSIZE] ; /* production storage */
216 int* mem = mem0;
217 int nprod = 1; /* number of productions */
218 int* prdptr[NPROD]; /* pointers to descriptions of productions */
219 int levprd[NPROD]; /* precedence levels for the productions */
220 int rlines[NPROD]; /* line number for this rule */
222 /* state information */
224 int nstate = 0; /* number of states */
225 Item* pstate[NSTATES+2]; /* pointers to the descriptions of the states */
226 int tystate[NSTATES]; /* contains type information about the states */
227 int defact[NSTATES]; /* the default actions of states */
228 int tstates[NTERMS]; /* states generated by terminal gotos */
229 int ntstates[NNONTERM]; /* states generated by nonterminal gotos */
230 int mstates[NSTATES]; /* chain of overflows of term/nonterm generation lists */
231 int lastred; /* the number of the last reduction of a state */
233 /* lookahead set information */
235 Lkset lkst[LSETSIZE];
236 int nolook; /* flag to turn off lookahead computations */
237 int tbitset; /* size of lookahead sets */
238 int nlset = 0; /* next lookahead set index */
239 int nolook = 0; /* flag to suppress lookahead computations */
240 Lkset clset; /* temporary storage for lookahead computations */
242 /* working set information */
244 Wset wsets[WSETSIZE];
245 Wset* cwp;
247 /* storage for action table */
249 int amem[ACTSIZE]; /* action table storage */
250 int* memp = amem; /* next free action table position */
251 int indgo[NSTATES]; /* index to the stored goto table */
253 /* temporary vector, indexable by states, terms, or ntokens */
255 int temp1[TEMPSIZE]; /* temporary storage, indexed by terms + ntokens or states */
256 int lineno = 1; /* current input line number */
257 int fatfl = 1; /* if on, error is fatal */
258 int nerrors = 0; /* number of errors */
260 /* statistics collection variables */
262 int zzgoent;
263 int zzgobest;
264 int zzacent;
265 int zzexcp;
266 int zzclose;
267 int zzrrconf;
268 int zzsrconf;
270 int* ggreed = lkst[0].lset;
271 int* pgo = wsets[0].ws.lset;
272 int* yypgo = &nontrst[0].value;
274 int maxspr = 0; /* maximum spread of any entry */
275 int maxoff = 0; /* maximum offset into a array */
276 int* pmem = mem0;
277 int* maxa;
278 int nxdb = 0;
279 int adb = 0;
282 /* storage for information about the nonterminals */
284 int** pres[NNONTERM+2]; /* vector of pointers to productions yielding each nonterminal */
285 Lkset* pfirst[NNONTERM+2]; /* vector of pointers to first sets for each nonterminal */
286 int pempty[NNONTERM+1]; /* vector of nonterminals nontrivially deriving e */
288 /* random stuff picked out from between functions */
290 int indebug = 0;
291 Wset* zzcwp = wsets;
292 int zzgoent = 0;
293 int zzgobest = 0;
294 int zzacent = 0;
295 int zzexcp = 0;
296 int zzclose = 0;
297 int zzsrconf = 0;
298 int* zzmemsz = mem0;
299 int zzrrconf = 0;
300 int pidebug = 0; /* debugging flag for putitem */
301 int gsdebug = 0;
302 int cldebug = 0; /* debugging flag for closure */
303 int pkdebug = 0;
304 int g2debug = 0;
306 struct
308 char* name;
309 long value;
310 } resrv[] =
312 "binary", BINARY,
313 "left", LEFT,
314 "nonassoc", BINARY,
315 "prec", PREC,
316 "right", RIGHT,
317 "start", START,
318 "term", TERM,
319 "token", TERM,
320 "type", TYPEDEF,
321 "union", UNION,
322 0,
323 };
325 /* define functions */
327 void main(int, char**);
328 void others(void);
329 char* chcopy(char*, char*);
330 char* writem(int*);
331 char* symnam(int);
332 void summary(void);
333 void error(char*, ...);
334 void aryfil(int*, int, int);
335 int setunion(int*, int*);
336 void prlook(Lkset*);
337 void cpres(void);
338 void cpfir(void);
339 int state(int);
340 void putitem(int*, Lkset*);
341 void cempty(void);
342 void stagen(void);
343 void closure(int);
344 Lkset* flset(Lkset*);
345 void cleantmp(void);
346 void intr(void);
347 void setup(int, char**);
348 void finact(void);
349 int defin(int, char*);
350 void defout(int);
351 char* cstash(char*);
352 int isvalidchar(long);
353 long gettok(void);
354 int fdtype(int);
355 int chfind(int, char*);
356 void cpyunion(void);
357 void cpycode(void);
358 int skipcom(void);
359 void cpyact(int);
360 void openup(char*, int, int, int, char*);
361 void output(void);
362 int apack(int*, int);
363 void go2out(void);
364 void go2gen(int);
365 void precftn(int, int, int);
366 void wract(int);
367 void wrstate(int);
368 void warray(char*, int*, int);
369 void hideprod(void);
370 void callopt(void);
371 void gin(int);
372 void stin(int);
373 int nxti(void);
374 void osummary(void);
375 void aoutput(void);
376 void arout(char*, int*, int);
377 int gtnm(void);
379 void
380 main(int argc, char *argv[])
382 PARSER = unsharp(PARSER);
383 PARSERS = unsharp(PARSERS);
384 parser = PARSER;
386 setup(argc, argv); /* initialize and read productions */
387 tbitset = NWORDS(ntokens);
388 cpres(); /* make table of which productions yield a given nonterminal */
389 cempty(); /* make a table of which nonterminals can match the empty string */
390 cpfir(); /* make a table of firsts of nonterminals */
391 stagen(); /* generate the states */
392 output(); /* write the states and the tables */
393 go2out();
394 hideprod();
395 summary();
396 callopt();
397 others();
398 exits(0);
401 /*
402 * put out other arrays, copy the parsers
403 */
404 void
405 others(void)
407 int c, i, j;
409 finput = Bopen(parser, OREAD);
410 if(finput == 0)
411 error("cannot open parser %s: %r", parser);
412 warray("yyr1", levprd, nprod);
413 aryfil(temp1, nprod, 0);
414 PLOOP(1, i)
415 temp1[i] = prdptr[i+1]-prdptr[i]-2;
416 warray("yyr2", temp1, nprod);
418 aryfil(temp1, nstate, -1000);
419 TLOOP(i)
420 for(j=tstates[i]; j!=0; j=mstates[j])
421 temp1[j] = i;
422 NTLOOP(i)
423 for(j=ntstates[i]; j!=0; j=mstates[j])
424 temp1[j] = -i;
425 warray("yychk", temp1, nstate);
426 warray("yydef", defact, nstate);
428 /* put out token translation tables */
429 /* table 1 has 0-256 */
430 aryfil(temp1, 256, 0);
431 c = 0;
432 TLOOP(i) {
433 j = tokset[i].value;
434 if(j >= 0 && j < 256) {
435 if(temp1[j]) {
436 print("yacc bug -- cant have 2 different Ts with same value\n");
437 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
438 nerrors++;
440 temp1[j] = i;
441 if(j > c)
442 c = j;
445 warray("yytok1", temp1, c+1);
447 /* table 2 has PRIVATE-PRIVATE+256 */
448 aryfil(temp1, 256, 0);
449 c = 0;
450 TLOOP(i) {
451 j = tokset[i].value - PRIVATE;
452 if(j >= 0 && j < 256) {
453 if(temp1[j]) {
454 print("yacc bug -- cant have 2 different Ts with same value\n");
455 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
456 nerrors++;
458 temp1[j] = i;
459 if(j > c)
460 c = j;
463 warray("yytok2", temp1, c+1);
465 /* table 3 has everything else */
466 Bprint(ftable, "static\tconst\tlong yytok3[] =\n{\n");
467 c = 0;
468 TLOOP(i) {
469 j = tokset[i].value;
470 if(j >= 0 && j < 256)
471 continue;
472 if(j >= PRIVATE && j < 256+PRIVATE)
473 continue;
475 Bprint(ftable, "%4d,%4d,", j, i);
476 c++;
477 if(c%5 == 0)
478 Bprint(ftable, "\n");
480 Bprint(ftable, "%4d\n};\n", 0);
482 /* copy parser text */
483 while((c=Bgetrune(finput)) != Beof) {
484 if(c == '$') {
485 if((c = Bgetrune(finput)) != 'A')
486 Bputrune(ftable, '$');
487 else { /* copy actions */
488 faction = Bopen(actname, OREAD);
489 if(faction == 0)
490 error("cannot reopen action tempfile");
491 while((c=Bgetrune(faction)) != Beof)
492 Bputrune(ftable, c);
493 Bterm(faction);
494 ZAPFILE(actname);
495 c = Bgetrune(finput);
498 Bputrune(ftable, c);
500 Bterm(ftable);
503 /*
504 * copies string q into p, returning next free char ptr
505 */
506 char*
507 chcopy(char* p, char* q)
509 int c;
511 while(c = *q) {
512 if(c == '"')
513 *p++ = '\\';
514 *p++ = c;
515 q++;
517 *p = 0;
518 return p;
521 /*
522 * creates output string for item pointed to by pp
523 */
524 char*
525 writem(int *pp)
527 int i,*p;
528 static char sarr[ISIZE];
529 char* q;
531 for(p=pp; *p>0; p++)
533 p = prdptr[-*p];
534 q = chcopy(sarr, nontrst[*p-NTBASE].name);
535 q = chcopy(q, ": ");
536 for(;;) {
537 *q = ' ';
538 p++;
539 if(p == pp)
540 *q = '.';
541 q++;
542 *q = '\0';
543 i = *p;
544 if(i <= 0)
545 break;
546 q = chcopy(q, symnam(i));
547 if(q > &sarr[ISIZE-30])
548 error("item too big");
551 /* an item calling for a reduction */
552 i = *pp;
553 if(i < 0 ) {
554 q = chcopy(q, " (");
555 sprint(q, "%d)", -i);
557 return sarr;
560 /*
561 * return a pointer to the name of symbol i
562 */
563 char*
564 symnam(int i)
566 char* cp;
568 cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
569 if(*cp == ' ')
570 cp++;
571 return cp;
574 /*
575 * output the summary on y.output
576 */
577 void
578 summary(void)
581 if(foutput != 0) {
582 Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n",
583 ntokens, NTERMS, nnonter, NNONTERM);
584 Bprint(foutput, "%d/%d grammar rules, %d/%d states\n",
585 nprod, NPROD, nstate, NSTATES);
586 Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n",
587 zzsrconf, zzrrconf);
588 Bprint(foutput, "%d/%d working sets used\n",
589 (int)(zzcwp-wsets), WSETSIZE);
590 Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n",
591 (int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE);
592 Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE);
593 Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate);
594 Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp);
595 Bprint(foutput, "%d goto entries\n", zzgoent);
596 Bprint(foutput, "%d entries saved by goto default\n", zzgobest);
598 if(zzsrconf != 0 || zzrrconf != 0) {
599 print("\nconflicts: ");
600 if(zzsrconf)
601 print("%d shift/reduce", zzsrconf);
602 if(zzsrconf && zzrrconf)
603 print(", ");
604 if(zzrrconf)
605 print("%d reduce/reduce", zzrrconf);
606 print("\n");
608 if(ftemp != 0) {
609 Bterm(ftemp);
610 ftemp = 0;
612 if(fdefine != 0) {
613 Bterm(fdefine);
614 fdefine = 0;
618 /*
619 * write out error comment -- NEEDS WORK
620 */
621 void
622 error(char *s, ...)
624 va_list arg;
626 nerrors++;
627 fprint(2, "\n fatal error:");
628 va_start(arg, s);
629 vfprint(2, s, arg);
630 va_end(arg);
631 fprint(2, ", %s:%d\n", infile, lineno);
632 if(!fatfl)
633 return;
634 summary();
635 cleantmp();
636 exits("error");
639 /*
640 * set elements 0 through n-1 to c
641 */
642 void
643 aryfil(int *v, int n, int c)
645 int i;
647 for(i=0; i<n; i++)
648 v[i] = c;
651 /*
652 * set a to the union of a and b
653 * return 1 if b is not a subset of a, 0 otherwise
654 */
655 int
656 setunion(int *a, int *b)
658 int i, x, sub;
660 sub = 0;
661 SETLOOP(i) {
662 x = *a;
663 *a |= *b;
664 if(*a != x)
665 sub = 1;
666 a++;
667 b++;
669 return sub;
672 void
673 prlook(Lkset* p)
675 int j, *pp;
677 pp = p->lset;
678 if(pp == 0)
679 Bprint(foutput, "\tNULL");
680 else {
681 Bprint(foutput, " { ");
682 TLOOP(j)
683 if(BIT(pp,j))
684 Bprint(foutput, "%s ", symnam(j));
685 Bprint(foutput, "}");
689 /*
690 * compute an array with the beginnings of productions yielding given nonterminals
691 * The array pres points to these lists
692 * the array pyield has the lists: the total size is only NPROD+1
693 */
694 void
695 cpres(void)
697 int c, j, i, **pmem;
698 static int *pyield[NPROD];
700 pmem = pyield;
701 NTLOOP(i) {
702 c = i+NTBASE;
703 pres[i] = pmem;
704 fatfl = 0; /* make undefined symbols nonfatal */
705 PLOOP(0, j)
706 if(*prdptr[j] == c)
707 *pmem++ = prdptr[j]+1;
708 if(pres[i] == pmem)
709 error("nonterminal %s not defined!", nontrst[i].name);
711 pres[i] = pmem;
712 fatfl = 1;
713 if(nerrors) {
714 summary();
715 cleantmp();
716 exits("error");
718 if(pmem != &pyield[nprod])
719 error("internal Yacc error: pyield %d", pmem-&pyield[nprod]);
722 /*
723 * compute an array with the first of nonterminals
724 */
725 void
726 cpfir(void)
728 int *p, **s, i, **t, ch, changes;
730 zzcwp = &wsets[nnonter];
731 NTLOOP(i) {
732 aryfil(wsets[i].ws.lset, tbitset, 0);
733 t = pres[i+1];
734 /* initially fill the sets */
735 for(s=pres[i]; s<t; ++s)
736 for(p = *s; (ch = *p) > 0; ++p) {
737 if(ch < NTBASE) {
738 SETBIT(wsets[i].ws.lset, ch);
739 break;
741 if(!pempty[ch-NTBASE])
742 break;
746 /* now, reflect transitivity */
747 changes = 1;
748 while(changes) {
749 changes = 0;
750 NTLOOP(i) {
751 t = pres[i+1];
752 for(s = pres[i]; s < t; ++s)
753 for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) {
754 changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset);
755 if(!pempty[ch])
756 break;
761 NTLOOP(i)
762 pfirst[i] = flset(&wsets[i].ws);
763 if(!indebug)
764 return;
765 if(foutput != 0)
766 NTLOOP(i) {
767 Bprint(foutput, "\n%s: ", nontrst[i].name);
768 prlook(pfirst[i]);
769 Bprint(foutput, " %d\n", pempty[i]);
773 /*
774 * sorts last state,and sees if it equals earlier ones. returns state number
775 */
776 int
777 state(int c)
779 Item *p1, *p2, *k, *l, *q1, *q2;
780 int size1, size2, i;
782 p1 = pstate[nstate];
783 p2 = pstate[nstate+1];
784 if(p1 == p2)
785 return 0; /* null state */
786 /* sort the items */
787 for(k = p2-1; k > p1; k--) /* make k the biggest */
788 for(l = k-1; l >= p1; --l)
789 if(l->pitem > k->pitem) {
790 int *s;
791 Lkset *ss;
793 s = k->pitem;
794 k->pitem = l->pitem;
795 l->pitem = s;
796 ss = k->look;
797 k->look = l->look;
798 l->look = ss;
800 size1 = p2 - p1; /* size of state */
802 for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) {
803 /* get ith state */
804 q1 = pstate[i];
805 q2 = pstate[i+1];
806 size2 = q2 - q1;
807 if(size1 != size2)
808 continue;
809 k = p1;
810 for(l = q1; l < q2; l++) {
811 if(l->pitem != k->pitem)
812 break;
813 k++;
815 if(l != q2)
816 continue;
817 /* found it */
818 pstate[nstate+1] = pstate[nstate]; /* delete last state */
819 /* fix up lookaheads */
820 if(nolook)
821 return i;
822 for(l = q1, k = p1; l < q2; ++l, ++k ) {
823 int s;
825 SETLOOP(s)
826 clset.lset[s] = l->look->lset[s];
827 if(setunion(clset.lset, k->look->lset)) {
828 tystate[i] = MUSTDO;
829 /* register the new set */
830 l->look = flset( &clset );
833 return i;
835 /* state is new */
836 if(nolook)
837 error("yacc state/nolook error");
838 pstate[nstate+2] = p2;
839 if(nstate+1 >= NSTATES)
840 error("too many states");
841 if(c >= NTBASE) {
842 mstates[nstate] = ntstates[c-NTBASE];
843 ntstates[c-NTBASE] = nstate;
844 } else {
845 mstates[nstate] = tstates[c];
846 tstates[c] = nstate;
848 tystate[nstate] = MUSTDO;
849 return nstate++;
852 void
853 putitem(int *ptr, Lkset *lptr)
855 Item *j;
857 if(pidebug && foutput != 0)
858 Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate);
859 j = pstate[nstate+1];
860 j->pitem = ptr;
861 if(!nolook)
862 j->look = flset(lptr);
863 pstate[nstate+1] = ++j;
864 if((int*)j > zzmemsz) {
865 zzmemsz = (int*)j;
866 if(zzmemsz >= &mem0[MEMSIZE])
867 error("out of state space");
871 /*
872 * mark nonterminals which derive the empty string
873 * also, look for nonterminals which don't derive any token strings
874 */
875 void
876 cempty(void)
879 int i, *p;
881 /* first, use the array pempty to detect productions that can never be reduced */
882 /* set pempty to WHONOWS */
883 aryfil(pempty, nnonter+1, WHOKNOWS);
885 /* now, look at productions, marking nonterminals which derive something */
886 more:
887 PLOOP(0, i) {
888 if(pempty[*prdptr[i] - NTBASE])
889 continue;
890 for(p = prdptr[i]+1; *p >= 0; ++p)
891 if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS)
892 break;
893 /* production can be derived */
894 if(*p < 0) {
895 pempty[*prdptr[i]-NTBASE] = OK;
896 goto more;
900 /* now, look at the nonterminals, to see if they are all OK */
901 NTLOOP(i) {
902 /* the added production rises or falls as the start symbol ... */
903 if(i == 0)
904 continue;
905 if(pempty[i] != OK) {
906 fatfl = 0;
907 error("nonterminal %s never derives any token string", nontrst[i].name);
911 if(nerrors) {
912 summary();
913 cleantmp();
914 exits("error");
917 /* now, compute the pempty array, to see which nonterminals derive the empty string */
918 /* set pempty to WHOKNOWS */
919 aryfil( pempty, nnonter+1, WHOKNOWS);
921 /* loop as long as we keep finding empty nonterminals */
923 again:
924 PLOOP(1, i) {
925 /* not known to be empty */
926 if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) {
927 for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p)
929 /* we have a nontrivially empty nonterminal */
930 if(*p < 0) {
931 pempty[*prdptr[i]-NTBASE] = EMPTY;
932 /* got one ... try for another */
933 goto again;
939 /*
940 * generate the states
941 */
942 void
943 stagen(void)
946 int c, i, j, more;
947 Wset *p, *q;
949 /* initialize */
950 nstate = 0;
952 /* THIS IS FUNNY from the standpoint of portability
953 * it represents the magic moment when the mem0 array, which has
954 * been holding the productions, starts to hold item pointers, of a
955 * different type...
956 * someday, alloc should be used to allocate all this stuff... for now, we
957 * accept that if pointers don't fit in integers, there is a problem...
958 */
960 pstate[0] = pstate[1] = (Item*)mem;
961 aryfil(clset.lset, tbitset, 0);
962 putitem(prdptr[0]+1, &clset);
963 tystate[0] = MUSTDO;
964 nstate = 1;
965 pstate[2] = pstate[1];
967 aryfil(amem, ACTSIZE, 0);
969 /* now, the main state generation loop */
970 for(more=1; more;) {
971 more = 0;
972 SLOOP(i) {
973 if(tystate[i] != MUSTDO)
974 continue;
975 tystate[i] = DONE;
976 aryfil(temp1, nnonter+1, 0);
977 /* take state i, close it, and do gotos */
978 closure(i);
979 /* generate goto's */
980 WSLOOP(wsets, p) {
981 if(p->flag)
982 continue;
983 p->flag = 1;
984 c = *(p->pitem);
985 if(c <= 1) {
986 if(pstate[i+1]-pstate[i] <= p-wsets)
987 tystate[i] = MUSTLOOKAHEAD;
988 continue;
990 /* do a goto on c */
991 WSLOOP(p, q)
992 /* this item contributes to the goto */
993 if(c == *(q->pitem)) {
994 putitem(q->pitem+1, &q->ws);
995 q->flag = 1;
997 if(c < NTBASE)
998 state(c); /* register new state */
999 else
1000 temp1[c-NTBASE] = state(c);
1002 if(gsdebug && foutput != 0) {
1003 Bprint(foutput, "%d: ", i);
1004 NTLOOP(j)
1005 if(temp1[j])
1006 Bprint(foutput, "%s %d, ",
1007 nontrst[j].name, temp1[j]);
1008 Bprint(foutput, "\n");
1010 indgo[i] = apack(&temp1[1], nnonter-1) - 1;
1011 /* do some more */
1012 more = 1;
1018 * generate the closure of state i
1020 void
1021 closure(int i)
1024 Wset *u, *v;
1025 Item *p, *q;
1026 int c, ch, work, k, *pi, **s, **t;
1028 zzclose++;
1030 /* first, copy kernel of state i to wsets */
1031 cwp = wsets;
1032 ITMLOOP(i, p, q) {
1033 cwp->pitem = p->pitem;
1034 cwp->flag = 1; /* this item must get closed */
1035 SETLOOP(k)
1036 cwp->ws.lset[k] = p->look->lset[k];
1037 WSBUMP(cwp);
1040 /* now, go through the loop, closing each item */
1041 work = 1;
1042 while(work) {
1043 work = 0;
1044 WSLOOP(wsets, u) {
1045 if(u->flag == 0)
1046 continue;
1047 /* dot is before c */
1048 c = *(u->pitem);
1049 if(c < NTBASE) {
1050 u->flag = 0;
1051 /* only interesting case is where . is before nonterminal */
1052 continue;
1055 /* compute the lookahead */
1056 aryfil(clset.lset, tbitset, 0);
1058 /* find items involving c */
1059 WSLOOP(u, v)
1060 if(v->flag == 1 && *(pi=v->pitem) == c) {
1061 v->flag = 0;
1062 if(nolook)
1063 continue;
1064 while((ch = *++pi) > 0) {
1065 /* terminal symbol */
1066 if(ch < NTBASE) {
1067 SETBIT(clset.lset, ch);
1068 break;
1070 /* nonterminal symbol */
1071 setunion(clset.lset, pfirst[ch-NTBASE]->lset);
1072 if(!pempty[ch-NTBASE])
1073 break;
1075 if(ch <= 0)
1076 setunion(clset.lset, v->ws.lset);
1080 * now loop over productions derived from c
1081 * c is now nonterminal number
1083 c -= NTBASE;
1084 t = pres[c+1];
1085 for(s = pres[c]; s < t; ++s) {
1087 * put these items into the closure
1088 * is the item there
1090 WSLOOP(wsets, v)
1091 /* yes, it is there */
1092 if(v->pitem == *s) {
1093 if(nolook)
1094 goto nexts;
1095 if(setunion(v->ws.lset, clset.lset))
1096 v->flag = work = 1;
1097 goto nexts;
1100 /* not there; make a new entry */
1101 if(cwp-wsets+1 >= WSETSIZE)
1102 error( "working set overflow");
1103 cwp->pitem = *s;
1104 cwp->flag = 1;
1105 if(!nolook) {
1106 work = 1;
1107 SETLOOP(k) cwp->ws.lset[k] = clset.lset[k];
1109 WSBUMP(cwp);
1111 nexts:;
1116 /* have computed closure; flags are reset; return */
1117 if(cwp > zzcwp)
1118 zzcwp = cwp;
1119 if(cldebug && foutput != 0) {
1120 Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook);
1121 WSLOOP(wsets, u) {
1122 if(u->flag)
1123 Bprint(foutput, "flag set!\n");
1124 u->flag = 0;
1125 Bprint(foutput, "\t%s", writem(u->pitem));
1126 prlook(&u->ws);
1127 Bprint(foutput, "\n");
1133 * decide if the lookahead set pointed to by p is known
1134 * return pointer to a perminent location for the set
1136 Lkset*
1137 flset(Lkset *p)
1139 Lkset *q;
1140 int *u, *v, *w, j;
1142 for(q = &lkst[nlset]; q-- > lkst;) {
1143 u = p->lset;
1144 v = q->lset;
1145 w = &v[tbitset];
1146 while(v < w)
1147 if(*u++ != *v++)
1148 goto more;
1149 /* we have matched */
1150 return q;
1151 more:;
1153 /* add a new one */
1154 q = &lkst[nlset++];
1155 if(nlset >= LSETSIZE)
1156 error("too many lookahead sets");
1157 SETLOOP(j)
1158 q->lset[j] = p->lset[j];
1159 return q;
1162 void
1163 cleantmp(void)
1165 ZAPFILE(actname);
1166 ZAPFILE(tempname);
1169 void
1170 intr(void)
1172 cleantmp();
1173 exits("interrupted");
1176 void
1177 setup(int argc, char *argv[])
1179 long c, t;
1180 int i, j, fd, lev, ty, ytab, *p;
1181 int vflag, dflag, stem;
1182 char actnm[8], *stemc, *s, dirbuf[128];
1183 Biobuf *fout;
1185 ytab = 0;
1186 vflag = 0;
1187 dflag = 0;
1188 stem = 0;
1189 stemc = "y";
1190 foutput = 0;
1191 fdefine = 0;
1192 fdebug = 0;
1193 ARGBEGIN{
1194 case 'v':
1195 case 'V':
1196 vflag++;
1197 break;
1198 case 'D':
1199 yydebug = ARGF();
1200 break;
1201 case 'a':
1202 yyarg = 1;
1203 break;
1204 case 'd':
1205 dflag++;
1206 break;
1207 case 'l':
1208 yyline = 0;
1209 break;
1210 case 'o':
1211 ytab++;
1212 ytabc = ARGF();
1213 break;
1214 case 's':
1215 stem++;
1216 stemc = ARGF();
1217 break;
1218 case 'S':
1219 parser = PARSERS;
1220 break;
1221 default:
1222 error("illegal option: %c", ARGC());
1223 }ARGEND
1224 openup(stemc, dflag, vflag, ytab, ytabc);
1225 fout = dflag?fdefine:ftable;
1226 if(yyarg){
1227 Bprint(ftable, "#define\tYYARG\t1\n\n");
1229 if((fd = mkstemp(ttempname)) >= 0){
1230 tempname = ttempname;
1231 ftemp = Bfdopen(fd, OWRITE);
1233 if((fd = mkstemp(tactname)) >= 0){
1234 actname = tactname;
1235 faction = Bfdopen(fd, OWRITE);
1237 if(ftemp == 0 || faction == 0)
1238 error("cannot open temp file");
1239 if(argc < 1)
1240 error("no input file");
1241 infile = argv[0];
1242 if(infile[0] != '/' && getwd(dirbuf, sizeof dirbuf)!=nil){
1243 i = strlen(infile)+1+strlen(dirbuf)+1+10;
1244 s = malloc(i);
1245 if(s != nil){
1246 snprint(s, i, "%s/%s", dirbuf, infile);
1247 cleanname(s);
1248 infile = s;
1251 finput = Bopen(infile, OREAD);
1252 if(finput == 0)
1253 error("cannot open '%s'", argv[0]);
1254 cnamp = cnames;
1256 defin(0, "$end");
1257 extval = PRIVATE; /* tokens start in unicode 'private use' */
1258 defin(0, "error");
1259 defin(1, "$accept");
1260 defin(0, "$unk");
1261 mem = mem0;
1262 i = 0;
1264 for(t = gettok(); t != MARK && t != ENDFILE;)
1265 switch(t) {
1266 case ';':
1267 t = gettok();
1268 break;
1270 case START:
1271 if(gettok() != IDENTIFIER)
1272 error("bad %%start construction");
1273 start = chfind(1, tokname);
1274 t = gettok();
1275 continue;
1277 case TYPEDEF:
1278 if(gettok() != TYPENAME)
1279 error("bad syntax in %%type");
1280 ty = numbval;
1281 for(;;) {
1282 t = gettok();
1283 switch(t) {
1284 case IDENTIFIER:
1285 if((t=chfind(1, tokname)) < NTBASE) {
1286 j = TYPE(toklev[t]);
1287 if(j != 0 && j != ty)
1288 error("type redeclaration of token %s",
1289 tokset[t].name);
1290 else
1291 SETTYPE(toklev[t], ty);
1292 } else {
1293 j = nontrst[t-NTBASE].value;
1294 if(j != 0 && j != ty)
1295 error("type redeclaration of nonterminal %s",
1296 nontrst[t-NTBASE].name );
1297 else
1298 nontrst[t-NTBASE].value = ty;
1300 case ',':
1301 continue;
1302 case ';':
1303 t = gettok();
1304 default:
1305 break;
1307 break;
1309 continue;
1311 case UNION:
1312 /* copy the union declaration to the output */
1313 cpyunion();
1314 t = gettok();
1315 continue;
1317 case LEFT:
1318 case BINARY:
1319 case RIGHT:
1320 i++;
1322 case TERM:
1323 /* nonzero means new prec. and assoc. */
1324 lev = t-TERM;
1325 ty = 0;
1327 /* get identifiers so defined */
1328 t = gettok();
1330 /* there is a type defined */
1331 if(t == TYPENAME) {
1332 ty = numbval;
1333 t = gettok();
1335 for(;;) {
1336 switch(t) {
1337 case ',':
1338 t = gettok();
1339 continue;
1341 case ';':
1342 break;
1344 case IDENTIFIER:
1345 j = chfind(0, tokname);
1346 if(j >= NTBASE)
1347 error("%s defined earlier as nonterminal", tokname);
1348 if(lev) {
1349 if(ASSOC(toklev[j]))
1350 error("redeclaration of precedence of %s", tokname);
1351 SETASC(toklev[j], lev);
1352 SETPLEV(toklev[j], i);
1354 if(ty) {
1355 if(TYPE(toklev[j]))
1356 error("redeclaration of type of %s", tokname);
1357 SETTYPE(toklev[j],ty);
1359 t = gettok();
1360 if(t == NUMBER) {
1361 tokset[j].value = numbval;
1362 if(j < ndefout && j > 3)
1363 error("please define type number of %s earlier",
1364 tokset[j].name);
1365 t = gettok();
1367 continue;
1369 break;
1371 continue;
1373 case LCURLY:
1374 defout(0);
1375 cpycode();
1376 t = gettok();
1377 continue;
1379 default:
1380 error("syntax error");
1382 if(t == ENDFILE)
1383 error("unexpected EOF before %%");
1385 /* t is MARK */
1386 if(!yyarg)
1387 Bprint(ftable, "extern int yyerrflag;\n");
1388 Bprint(ftable, "#ifndef YYMAXDEPTH\n");
1389 Bprint(ftable, "#define YYMAXDEPTH 150\n");
1390 Bprint(ftable, "#endif\n" );
1391 if(!ntypes) {
1392 Bprint(ftable, "#ifndef YYSTYPE\n");
1393 Bprint(ftable, "#define YYSTYPE int\n");
1394 Bprint(ftable, "#endif\n");
1396 if(!yyarg){
1397 Bprint(ftable, "YYSTYPE yylval;\n");
1398 Bprint(ftable, "YYSTYPE yyval;\n");
1399 }else{
1400 if(dflag)
1401 Bprint(ftable, "#include \"%s.%s\"\n\n", stemc, FILED);
1402 Bprint(fout, "struct Yyarg {\n");
1403 Bprint(fout, "\tint\tyynerrs;\n");
1404 Bprint(fout, "\tint\tyyerrflag;\n");
1405 Bprint(fout, "\tvoid*\targ;\n");
1406 Bprint(fout, "\tYYSTYPE\tyyval;\n");
1407 Bprint(fout, "\tYYSTYPE\tyylval;\n");
1408 Bprint(fout, "};\n\n");
1410 prdptr[0] = mem;
1412 /* added production */
1413 *mem++ = NTBASE;
1415 /* if start is 0, we will overwrite with the lhs of the first rule */
1416 *mem++ = start;
1417 *mem++ = 1;
1418 *mem++ = 0;
1419 prdptr[1] = mem;
1420 while((t=gettok()) == LCURLY)
1421 cpycode();
1422 if(t != IDENTCOLON)
1423 error("bad syntax on first rule");
1425 if(!start)
1426 prdptr[0][1] = chfind(1, tokname);
1428 /* read rules */
1429 while(t != MARK && t != ENDFILE) {
1430 /* process a rule */
1431 rlines[nprod] = lineno;
1432 if(t == '|')
1433 *mem++ = *prdptr[nprod-1];
1434 else
1435 if(t == IDENTCOLON) {
1436 *mem = chfind(1, tokname);
1437 if(*mem < NTBASE)
1438 error("token illegal on LHS of grammar rule");
1439 mem++;
1440 } else
1441 error("illegal rule: missing semicolon or | ?");
1442 /* read rule body */
1443 t = gettok();
1445 more_rule:
1446 while(t == IDENTIFIER) {
1447 *mem = chfind(1, tokname);
1448 if(*mem < NTBASE)
1449 levprd[nprod] = toklev[*mem];
1450 mem++;
1451 t = gettok();
1453 if(t == PREC) {
1454 if(gettok() != IDENTIFIER)
1455 error("illegal %%prec syntax");
1456 j = chfind(2, tokname);
1457 if(j >= NTBASE)
1458 error("nonterminal %s illegal after %%prec",
1459 nontrst[j-NTBASE].name);
1460 levprd[nprod] = toklev[j];
1461 t = gettok();
1463 if(t == '=') {
1464 levprd[nprod] |= ACTFLAG;
1465 Bprint(faction, "\ncase %d:", nprod);
1466 cpyact(mem-prdptr[nprod]-1);
1467 Bprint(faction, " break;");
1468 if((t=gettok()) == IDENTIFIER) {
1470 /* action within rule... */
1471 sprint(actnm, "$$%d", nprod);
1473 /* make it a nonterminal */
1474 j = chfind(1, actnm);
1477 * the current rule will become rule number nprod+1
1478 * move the contents down, and make room for the null
1480 for(p = mem; p >= prdptr[nprod]; --p)
1481 p[2] = *p;
1482 mem += 2;
1484 /* enter null production for action */
1485 p = prdptr[nprod];
1486 *p++ = j;
1487 *p++ = -nprod;
1489 /* update the production information */
1490 levprd[nprod+1] = levprd[nprod] & ~ACTFLAG;
1491 levprd[nprod] = ACTFLAG;
1492 if(++nprod >= NPROD)
1493 error("more than %d rules", NPROD);
1494 prdptr[nprod] = p;
1496 /* make the action appear in the original rule */
1497 *mem++ = j;
1499 /* get some more of the rule */
1500 goto more_rule;
1504 while(t == ';')
1505 t = gettok();
1506 *mem++ = -nprod;
1508 /* check that default action is reasonable */
1509 if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) {
1511 /* no explicit action, LHS has value */
1512 int tempty;
1514 tempty = prdptr[nprod][1];
1515 if(tempty < 0)
1516 error("must return a value, since LHS has a type");
1517 else
1518 if(tempty >= NTBASE)
1519 tempty = nontrst[tempty-NTBASE].value;
1520 else
1521 tempty = TYPE(toklev[tempty]);
1522 if(tempty != nontrst[*prdptr[nprod]-NTBASE].value)
1523 error("default action causes potential type clash");
1525 nprod++;
1526 if(nprod >= NPROD)
1527 error("more than %d rules", NPROD);
1528 prdptr[nprod] = mem;
1529 levprd[nprod] = 0;
1532 /* end of all rules */
1533 defout(1);
1535 finact();
1536 if(t == MARK) {
1537 Bprint(ftable, "\n");
1538 if(yyline)
1539 Bprint(ftable, "#line\t%d\t\"%s\"\n", lineno, infile);
1540 while((c=Bgetrune(finput)) != Beof)
1541 Bputrune(ftable, c);
1543 Bterm(finput);
1547 * finish action routine
1549 void
1550 finact(void)
1553 Bterm(faction);
1554 Bprint(ftable, "#define YYEOFCODE %d\n", 1);
1555 Bprint(ftable, "#define YYERRCODE %d\n", 2);
1559 * define s to be a terminal if t=0
1560 * or a nonterminal if t=1
1562 int
1563 defin(int nt, char *s)
1565 int val;
1566 Rune rune;
1568 val = 0;
1569 if(nt) {
1570 nnonter++;
1571 if(nnonter >= NNONTERM)
1572 error("too many nonterminals, limit %d",NNONTERM);
1573 nontrst[nnonter].name = cstash(s);
1574 return NTBASE + nnonter;
1577 /* must be a token */
1578 ntokens++;
1579 if(ntokens >= NTERMS)
1580 error("too many terminals, limit %d", NTERMS);
1581 tokset[ntokens].name = cstash(s);
1583 /* establish value for token */
1584 /* single character literal */
1585 if(s[0] == ' ') {
1586 val = chartorune(&rune, &s[1]);
1587 if(s[val+1] == 0) {
1588 val = rune;
1589 goto out;
1593 /* escape sequence */
1594 if(s[0] == ' ' && s[1] == '\\') {
1595 if(s[3] == 0) {
1596 /* single character escape sequence */
1597 switch(s[2]) {
1598 case 'n': val = '\n'; break;
1599 case 'r': val = '\r'; break;
1600 case 'b': val = '\b'; break;
1601 case 't': val = '\t'; break;
1602 case 'f': val = '\f'; break;
1603 case '\'': val = '\''; break;
1604 case '"': val = '"'; break;
1605 case '\\': val = '\\'; break;
1606 default: error("invalid escape");
1608 goto out;
1611 /* \nnn sequence */
1612 if(s[2] >= '0' && s[2] <= '7') {
1613 if(s[3] < '0' ||
1614 s[3] > '7' ||
1615 s[4] < '0' ||
1616 s[4] > '7' ||
1617 s[5] != 0)
1618 error("illegal \\nnn construction");
1619 val = 64*s[2] + 8*s[3] + s[4] - 73*'0';
1620 if(val == 0)
1621 error("'\\000' is illegal");
1622 goto out;
1624 error("unknown escape");
1626 val = extval++;
1628 out:
1629 tokset[ntokens].value = val;
1630 toklev[ntokens] = 0;
1631 return ntokens;
1635 * write out the defines (at the end of the declaration section)
1637 void
1638 defout(int last)
1640 int i, c;
1641 char sar[NAMESIZE+10];
1643 for(i=ndefout; i<=ntokens; i++) {
1644 /* non-literals */
1645 c = tokset[i].name[0];
1646 if(c != ' ' && c != '$') {
1647 Bprint(ftable, "#define %s %d\n",
1648 tokset[i].name, tokset[i].value);
1649 if(fdefine)
1650 Bprint(fdefine, "#define\t%s\t%d\n",
1651 tokset[i].name, tokset[i].value);
1654 ndefout = ntokens+1;
1655 if(last && fdebug) {
1656 Bprint(fdebug, "static char* yytoknames[] =\n{\n");
1657 TLOOP(i) {
1658 if(tokset[i].name) {
1659 chcopy(sar, tokset[i].name);
1660 Bprint(fdebug, "\t\"%s\",\n", sar);
1661 continue;
1663 Bprint(fdebug, "\t0,\n");
1665 Bprint(fdebug, "};\n");
1669 char*
1670 cstash(char *s)
1672 char *temp;
1674 temp = cnamp;
1675 do {
1676 if(cnamp >= &cnames[cnamsz])
1677 error("too many characters in id's and literals");
1678 else
1679 *cnamp++ = *s;
1680 } while(*s++);
1681 return temp;
1684 int
1685 isvalidchar(long i)
1687 return (i & ~0xffUL) == 0;
1690 long
1691 gettok(void)
1693 long c;
1694 Rune rune;
1695 int i, base, match, reserve;
1696 static int peekline;
1698 begin:
1699 reserve = 0;
1700 lineno += peekline;
1701 peekline = 0;
1702 c = Bgetrune(finput);
1703 while(c == ' ' || c == '\n' || c == '\t' || c == '\f') {
1704 if(c == '\n')
1705 lineno++;
1706 c = Bgetrune(finput);
1709 /* skip comment */
1710 if(c == '/') {
1711 lineno += skipcom();
1712 goto begin;
1714 switch(c) {
1715 case Beof:
1716 return ENDFILE;
1718 case '{':
1719 Bungetrune(finput);
1720 return '=';
1722 case '<':
1723 /* get, and look up, a type name (union member name) */
1724 i = 0;
1725 while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') {
1726 rune = c;
1727 c = runetochar(&tokname[i], &rune);
1728 if(i < NAMESIZE)
1729 i += c;
1731 if(c != '>')
1732 error("unterminated < ... > clause");
1733 tokname[i] = 0;
1734 for(i=1; i<=ntypes; i++)
1735 if(!strcmp(typeset[i], tokname)) {
1736 numbval = i;
1737 return TYPENAME;
1739 ntypes++;
1740 numbval = ntypes;
1741 typeset[numbval] = cstash(tokname);
1742 return TYPENAME;
1744 case '"':
1745 case '\'':
1746 match = c;
1747 tokname[0] = ' ';
1748 i = 1;
1749 for(;;) {
1750 c = Bgetrune(finput);
1751 if(c == '\n' || c <= 0)
1752 error("illegal or missing ' or \"" );
1753 if(c == '\\') {
1754 tokname[i] = '\\';
1755 if(i < NAMESIZE)
1756 i++;
1757 c = Bgetrune(finput);
1758 } else
1759 if(c == match)
1760 break;
1761 rune = c;
1762 c = runetochar(&tokname[i], &rune);
1763 if(i < NAMESIZE)
1764 i += c;
1766 break;
1768 case '%':
1769 case '\\':
1770 switch(c = Bgetrune(finput)) {
1771 case '0': return TERM;
1772 case '<': return LEFT;
1773 case '2': return BINARY;
1774 case '>': return RIGHT;
1775 case '%':
1776 case '\\': return MARK;
1777 case '=': return PREC;
1778 case '{': return LCURLY;
1779 default: reserve = 1;
1782 default:
1783 /* number */
1784 if(!isvalidchar(c))
1785 return c;
1786 if(isdigit(c)) {
1787 numbval = c-'0';
1788 base = (c=='0')? 8: 10;
1789 for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput))
1790 numbval = numbval*base + (c-'0');
1791 Bungetrune(finput);
1792 return NUMBER;
1794 if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$') {
1795 i = 0;
1796 while(isvalidchar(c) && (islower(c) || isupper(c) || isdigit(c) ||
1797 c == '-' || c=='_' || c=='.' || c=='$')) {
1798 if(reserve && isupper(c))
1799 c += 'a'-'A';
1800 rune = c;
1801 c = runetochar(&tokname[i], &rune);
1802 if(i < NAMESIZE)
1803 i += c;
1804 c = Bgetrune(finput);
1806 } else
1807 return c;
1808 if(c == Beof)
1809 return ENDFILE;
1810 Bungetrune(finput);
1812 tokname[i] = 0;
1814 /* find a reserved word */
1815 if(reserve) {
1816 for(c=0; resrv[c].name; c++)
1817 if(strcmp(tokname, resrv[c].name) == 0)
1818 return resrv[c].value;
1819 error("invalid escape, or illegal reserved word: %s", tokname);
1822 /* look ahead to distinguish IDENTIFIER from IDENTCOLON */
1823 c = Bgetrune(finput);
1824 while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') {
1825 if(c == '\n')
1826 peekline++;
1827 /* look for comments */
1828 if(c == '/')
1829 peekline += skipcom();
1830 c = Bgetrune(finput);
1832 if(c == ':')
1833 return IDENTCOLON;
1834 Bungetrune(finput);
1835 return IDENTIFIER;
1839 * determine the type of a symbol
1841 int
1842 fdtype(int t)
1844 int v;
1846 if(t >= NTBASE)
1847 v = nontrst[t-NTBASE].value;
1848 else
1849 v = TYPE(toklev[t]);
1850 if(v <= 0)
1851 error("must specify type for %s", (t>=NTBASE)?
1852 nontrst[t-NTBASE].name: tokset[t].name);
1853 return v;
1856 int
1857 chfind(int t, char *s)
1859 int i;
1861 if(s[0] == ' ')
1862 t = 0;
1863 TLOOP(i)
1864 if(!strcmp(s, tokset[i].name))
1865 return i;
1866 NTLOOP(i)
1867 if(!strcmp(s, nontrst[i].name))
1868 return NTBASE+i;
1870 /* cannot find name */
1871 if(t > 1)
1872 error("%s should have been defined earlier", s);
1873 return defin(t, s);
1877 * copy the union declaration to the output, and the define file if present
1879 void
1880 cpyunion(void)
1882 long c;
1883 int level;
1885 Bprint(ftable, "\n");
1886 if(yyline)
1887 Bprint(ftable, "#line\t%d\t\"%s\"\n", lineno, infile);
1888 Bprint(ftable, "typedef union ");
1889 if(fdefine != 0)
1890 Bprint(fdefine, "\ntypedef union ");
1892 level = 0;
1893 for(;;) {
1894 if((c=Bgetrune(finput)) == Beof)
1895 error("EOF encountered while processing %%union");
1896 Bputrune(ftable, c);
1897 if(fdefine != 0)
1898 Bputrune(fdefine, c);
1899 switch(c) {
1900 case '\n':
1901 lineno++;
1902 break;
1903 case '{':
1904 level++;
1905 break;
1906 case '}':
1907 level--;
1909 /* we are finished copying */
1910 if(level == 0) {
1911 Bprint(ftable, " YYSTYPE;\n");
1912 if(fdefine != 0){
1913 Bprint(fdefine, "\tYYSTYPE;\n");
1914 if(!yyarg)
1915 Bprint(fdefine, "extern\tYYSTYPE\tyylval;\n");
1917 return;
1924 * copies code between \{ and \}
1926 void
1927 cpycode(void)
1929 long c;
1931 c = Bgetrune(finput);
1932 if(c == '\n') {
1933 c = Bgetrune(finput);
1934 lineno++;
1936 Bprint(ftable, "\n");
1937 if(yyline)
1938 Bprint(ftable, "#line\t%d\t\"%s\"\n", lineno, infile);
1939 while(c != Beof) {
1940 if(c == '\\') {
1941 if((c=Bgetrune(finput)) == '}')
1942 return;
1943 Bputc(ftable, '\\');
1945 if(c == '%') {
1946 if((c=Bgetrune(finput)) == '}')
1947 return;
1948 Bputc(ftable, '%');
1950 Bputrune(ftable, c);
1951 if(c == '\n')
1952 lineno++;
1953 c = Bgetrune(finput);
1955 error("eof before %%}");
1959 * skip over comments
1960 * skipcom is called after reading a '/'
1962 int
1963 skipcom(void)
1965 long c;
1966 int i;
1968 /* i is the number of lines skipped */
1969 i = 0;
1970 if(Bgetrune(finput) != '*')
1971 error("illegal comment");
1972 c = Bgetrune(finput);
1973 while(c != Beof) {
1974 while(c == '*')
1975 if((c=Bgetrune(finput)) == '/')
1976 return i;
1977 if(c == '\n')
1978 i++;
1979 c = Bgetrune(finput);
1981 error("EOF inside comment");
1982 return 0;
1986 * copy C action to the next ; or closing }
1988 void
1989 cpyact(int offset)
1991 long c;
1992 int brac, match, j, s, fnd, tok;
1994 Bprint(faction, "\n");
1995 if(yyline)
1996 Bprint(faction, "#line\t%d\t\"%s\"\n", lineno, infile);
1997 brac = 0;
1999 loop:
2000 c = Bgetrune(finput);
2001 swt:
2002 switch(c) {
2003 case ';':
2004 if(brac == 0) {
2005 Bputrune(faction, c);
2006 return;
2008 goto lcopy;
2010 case '{':
2011 brac++;
2012 goto lcopy;
2014 case '$':
2015 s = 1;
2016 tok = -1;
2017 c = Bgetrune(finput);
2019 /* type description */
2020 if(c == '<') {
2021 Bungetrune(finput);
2022 if(gettok() != TYPENAME)
2023 error("bad syntax on $<ident> clause");
2024 tok = numbval;
2025 c = Bgetrune(finput);
2027 if(c == '$') {
2028 Bprint(faction, "yyval");
2030 /* put out the proper tag... */
2031 if(ntypes) {
2032 if(tok < 0)
2033 tok = fdtype(*prdptr[nprod]);
2034 Bprint(faction, ".%s", typeset[tok]);
2036 goto loop;
2038 if(c == '-') {
2039 s = -s;
2040 c = Bgetrune(finput);
2042 if(isvalidchar(c) && isdigit(c)) {
2043 j = 0;
2044 while(isdigit(c)) {
2045 j = j*10 + (c-'0');
2046 c = Bgetrune(finput);
2048 Bungetrune(finput);
2049 j = j*s - offset;
2050 if(j > 0)
2051 error("Illegal use of $%d", j+offset);
2053 dollar:
2054 Bprint(faction, "yypt[-%d].yyv", -j);
2056 /* put out the proper tag */
2057 if(ntypes) {
2058 if(j+offset <= 0 && tok < 0)
2059 error("must specify type of $%d", j+offset);
2060 if(tok < 0)
2061 tok = fdtype(prdptr[nprod][j+offset]);
2062 Bprint(faction, ".%s", typeset[tok]);
2064 goto loop;
2066 if(isvalidchar(c) && (isupper(c) || islower(c) || c == '_' || c == '.')) {
2067 int tok; /* tok used oustide for type info */
2069 /* look for $name */
2070 Bungetrune(finput);
2071 if(gettok() != IDENTIFIER)
2072 error("$ must be followed by an identifier");
2073 tok = chfind(2, tokname);
2074 if((c = Bgetrune(finput)) != '#') {
2075 Bungetrune(finput);
2076 fnd = -1;
2077 } else
2078 if(gettok() != NUMBER) {
2079 error("# must be followed by number");
2080 fnd = -1;
2081 } else
2082 fnd = numbval;
2083 for(j=1; j<=offset; ++j)
2084 if(tok == prdptr[nprod][j]) {
2085 if(--fnd <= 0) {
2086 j -= offset;
2087 goto dollar;
2090 error("$name or $name#number not found");
2092 Bputc(faction, '$');
2093 if(s < 0 )
2094 Bputc(faction, '-');
2095 goto swt;
2097 case '}':
2098 brac--;
2099 if(brac)
2100 goto lcopy;
2101 Bputrune(faction, c);
2102 return;
2104 case '/':
2105 /* look for comments */
2106 Bputrune(faction, c);
2107 c = Bgetrune(finput);
2108 if(c != '*')
2109 goto swt;
2111 /* it really is a comment */
2112 Bputrune(faction, c);
2113 c = Bgetrune(finput);
2114 while(c >= 0) {
2115 while(c == '*') {
2116 Bputrune(faction, c);
2117 if((c=Bgetrune(finput)) == '/')
2118 goto lcopy;
2120 Bputrune(faction, c);
2121 if(c == '\n')
2122 lineno++;
2123 c = Bgetrune(finput);
2125 error("EOF inside comment");
2127 case '\'':
2128 /* character constant */
2129 match = '\'';
2130 goto string;
2132 case '"':
2133 /* character string */
2134 match = '"';
2136 string:
2137 Bputrune(faction, c);
2138 while((c = Bgetrune(finput)) >= 0) {
2139 if(c == '\\') {
2140 Bputrune(faction, c);
2141 c = Bgetrune(finput);
2142 if(c == '\n')
2143 lineno++;
2144 } else {
2145 if(c == match)
2146 goto lcopy;
2147 if(c == '\n')
2148 error("newline in string or char. const.");
2150 Bputrune(faction, c);
2152 error("EOF in string or character constant");
2154 case Beof:
2155 error("action does not terminate");
2157 case '\n':
2158 lineno++;
2159 goto lcopy;
2162 lcopy:
2163 Bputrune(faction, c);
2164 goto loop;
2167 void
2168 openup(char *stem, int dflag, int vflag, int ytab, char *ytabc)
2170 char buf[256];
2172 if(vflag) {
2173 sprint(buf, "%s.%s", stem, FILEU);
2174 foutput = Bopen(buf, OWRITE);
2175 if(foutput == 0)
2176 error("cannot open %s", buf);
2178 if(yydebug) {
2179 sprint(buf, "%s.%s", stem, FILEDEBUG);
2180 if((fdebug = Bopen(buf, OWRITE)) == 0)
2181 error("can't open %s", buf);
2183 if(dflag) {
2184 sprint(buf, "%s.%s", stem, FILED);
2185 fdefine = Bopen(buf, OWRITE);
2186 if(fdefine == 0)
2187 error("can't create %s", buf);
2189 if(ytab == 0)
2190 sprint(buf, "%s.%s", stem, OFILE);
2191 else
2192 strcpy(buf, ytabc);
2193 ftable = Bopen(buf, OWRITE);
2194 if(ftable == 0)
2195 error("cannot open table file %s", buf);
2199 * print the output for the states
2201 void
2202 output(void)
2204 int i, k, c;
2205 Wset *u, *v;
2207 Bprint(ftable, "static\tconst\tshort yyexca[] =\n{");
2208 if(fdebug)
2209 Bprint(fdebug, "static\tconst\tchar* yystates[] =\n{\n");
2211 /* output the stuff for state i */
2212 SLOOP(i) {
2213 nolook = tystate[i]!=MUSTLOOKAHEAD;
2214 closure(i);
2216 /* output actions */
2217 nolook = 1;
2218 aryfil(temp1, ntokens+nnonter+1, 0);
2219 WSLOOP(wsets, u) {
2220 c = *(u->pitem);
2221 if(c > 1 && c < NTBASE && temp1[c] == 0) {
2222 WSLOOP(u, v)
2223 if(c == *(v->pitem))
2224 putitem(v->pitem+1, (Lkset*)0);
2225 temp1[c] = state(c);
2226 } else
2227 if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0)
2228 temp1[c+ntokens] = amem[indgo[i]+c];
2230 if(i == 1)
2231 temp1[1] = ACCEPTCODE;
2233 /* now, we have the shifts; look at the reductions */
2234 lastred = 0;
2235 WSLOOP(wsets, u) {
2236 c = *u->pitem;
2238 /* reduction */
2239 if(c <= 0) {
2240 lastred = -c;
2241 TLOOP(k)
2242 if(BIT(u->ws.lset, k)) {
2243 if(temp1[k] == 0)
2244 temp1[k] = c;
2245 else
2246 if(temp1[k] < 0) { /* reduce/reduce conflict */
2247 if(foutput)
2248 Bprint(foutput,
2249 "\n%d: reduce/reduce conflict"
2250 " (red'ns %d and %d ) on %s",
2251 i, -temp1[k], lastred,
2252 symnam(k));
2253 if(-temp1[k] > lastred)
2254 temp1[k] = -lastred;
2255 zzrrconf++;
2256 } else
2257 /* potential shift/reduce conflict */
2258 precftn( lastred, k, i );
2262 wract(i);
2265 if(fdebug)
2266 Bprint(fdebug, "};\n");
2267 Bprint(ftable, "};\n");
2268 Bprint(ftable, "#define YYNPROD %d\n", nprod);
2269 Bprint(ftable, "#define YYPRIVATE %d\n", PRIVATE);
2270 if(yydebug)
2271 Bprint(ftable, "#define yydebug %s\n", yydebug);
2275 * pack state i from temp1 into amem
2277 int
2278 apack(int *p, int n)
2280 int *pp, *qq, *rr, off, *q, *r;
2282 /* we don't need to worry about checking because
2283 * we will only look at entries known to be there...
2284 * eliminate leading and trailing 0's
2287 q = p+n;
2288 for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off)
2290 /* no actions */
2291 if(pp > q)
2292 return 0;
2293 p = pp;
2295 /* now, find a place for the elements from p to q, inclusive */
2296 r = &amem[ACTSIZE-1];
2297 for(rr = amem; rr <= r; rr++, off++) {
2298 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2299 if(*pp != 0)
2300 if(*pp != *qq && *qq != 0)
2301 goto nextk;
2303 /* we have found an acceptable k */
2304 if(pkdebug && foutput != 0)
2305 Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem));
2306 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2307 if(*pp) {
2308 if(qq > r)
2309 error("action table overflow");
2310 if(qq > memp)
2311 memp = qq;
2312 *qq = *pp;
2314 if(pkdebug && foutput != 0)
2315 for(pp = amem; pp <= memp; pp += 10) {
2316 Bprint(foutput, "\t");
2317 for(qq = pp; qq <= pp+9; qq++)
2318 Bprint(foutput, "%d ", *qq);
2319 Bprint(foutput, "\n");
2321 return(off);
2322 nextk:;
2324 error("no space in action table");
2325 return 0;
2329 * output the gotos for the nontermninals
2331 void
2332 go2out(void)
2334 int i, j, k, best, count, cbest, times;
2336 /* mark begining of gotos */
2337 Bprint(ftemp, "$\n");
2338 for(i = 1; i <= nnonter; i++) {
2339 go2gen(i);
2341 /* find the best one to make default */
2342 best = -1;
2343 times = 0;
2345 /* is j the most frequent */
2346 for(j = 0; j <= nstate; j++) {
2347 if(tystate[j] == 0)
2348 continue;
2349 if(tystate[j] == best)
2350 continue;
2352 /* is tystate[j] the most frequent */
2353 count = 0;
2354 cbest = tystate[j];
2355 for(k = j; k <= nstate; k++)
2356 if(tystate[k] == cbest)
2357 count++;
2358 if(count > times) {
2359 best = cbest;
2360 times = count;
2364 /* best is now the default entry */
2365 zzgobest += times-1;
2366 for(j = 0; j <= nstate; j++)
2367 if(tystate[j] != 0 && tystate[j] != best) {
2368 Bprint(ftemp, "%d,%d,", j, tystate[j]);
2369 zzgoent++;
2372 /* now, the default */
2373 if(best == -1)
2374 best = 0;
2375 zzgoent++;
2376 Bprint(ftemp, "%d\n", best);
2381 * output the gotos for nonterminal c
2383 void
2384 go2gen(int c)
2386 int i, work, cc;
2387 Item *p, *q;
2390 /* first, find nonterminals with gotos on c */
2391 aryfil(temp1, nnonter+1, 0);
2392 temp1[c] = 1;
2393 work = 1;
2394 while(work) {
2395 work = 0;
2396 PLOOP(0, i)
2398 /* cc is a nonterminal */
2399 if((cc=prdptr[i][1]-NTBASE) >= 0)
2400 /* cc has a goto on c */
2401 if(temp1[cc] != 0) {
2403 /* thus, the left side of production i does too */
2404 cc = *prdptr[i]-NTBASE;
2405 if(temp1[cc] == 0) {
2406 work = 1;
2407 temp1[cc] = 1;
2412 /* now, we have temp1[c] = 1 if a goto on c in closure of cc */
2413 if(g2debug && foutput != 0) {
2414 Bprint(foutput, "%s: gotos on ", nontrst[c].name);
2415 NTLOOP(i)
2416 if(temp1[i])
2417 Bprint(foutput, "%s ", nontrst[i].name);
2418 Bprint(foutput, "\n");
2421 /* now, go through and put gotos into tystate */
2422 aryfil(tystate, nstate, 0);
2423 SLOOP(i)
2424 ITMLOOP(i, p, q)
2425 if((cc = *p->pitem) >= NTBASE)
2426 /* goto on c is possible */
2427 if(temp1[cc-NTBASE]) {
2428 tystate[i] = amem[indgo[i]+c];
2429 break;
2434 * decide a shift/reduce conflict by precedence.
2435 * r is a rule number, t a token number
2436 * the conflict is in state s
2437 * temp1[t] is changed to reflect the action
2439 void
2440 precftn(int r, int t, int s)
2442 int lp, lt, action;
2444 lp = levprd[r];
2445 lt = toklev[t];
2446 if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) {
2448 /* conflict */
2449 if(foutput != 0)
2450 Bprint(foutput,
2451 "\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s",
2452 s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t));
2453 zzsrconf++;
2454 return;
2456 if(PLEVEL(lt) == PLEVEL(lp))
2457 action = ASSOC(lt);
2458 else
2459 if(PLEVEL(lt) > PLEVEL(lp))
2460 action = RASC; /* shift */
2461 else
2462 action = LASC; /* reduce */
2463 switch(action) {
2464 case BASC: /* error action */
2465 temp1[t] = ERRCODE;
2466 break;
2467 case LASC: /* reduce */
2468 temp1[t] = -r;
2469 break;
2474 * output state i
2475 * temp1 has the actions, lastred the default
2477 void
2478 wract(int i)
2480 int p, p0, p1, ntimes, tred, count, j, flag;
2482 /* find the best choice for lastred */
2483 lastred = 0;
2484 ntimes = 0;
2485 TLOOP(j) {
2486 if(temp1[j] >= 0)
2487 continue;
2488 if(temp1[j]+lastred == 0)
2489 continue;
2490 /* count the number of appearances of temp1[j] */
2491 count = 0;
2492 tred = -temp1[j];
2493 levprd[tred] |= REDFLAG;
2494 TLOOP(p)
2495 if(temp1[p]+tred == 0)
2496 count++;
2497 if(count > ntimes) {
2498 lastred = tred;
2499 ntimes = count;
2504 * for error recovery, arrange that, if there is a shift on the
2505 * error recovery token, `error', that the default be the error action
2507 if(temp1[2] > 0)
2508 lastred = 0;
2510 /* clear out entries in temp1 which equal lastred */
2511 TLOOP(p)
2512 if(temp1[p]+lastred == 0)
2513 temp1[p] = 0;
2515 wrstate(i);
2516 defact[i] = lastred;
2517 flag = 0;
2518 TLOOP(p0)
2519 if((p1=temp1[p0]) != 0) {
2520 if(p1 < 0) {
2521 p1 = -p1;
2522 goto exc;
2524 if(p1 == ACCEPTCODE) {
2525 p1 = -1;
2526 goto exc;
2528 if(p1 == ERRCODE) {
2529 p1 = 0;
2530 exc:
2531 if(flag++ == 0)
2532 Bprint(ftable, "-1, %d,\n", i);
2533 Bprint(ftable, "\t%d, %d,\n", p0, p1);
2534 zzexcp++;
2535 continue;
2537 Bprint(ftemp, "%d,%d,", p0, p1);
2538 zzacent++;
2540 if(flag) {
2541 defact[i] = -2;
2542 Bprint(ftable, "\t-2, %d,\n", lastred);
2544 Bprint(ftemp, "\n");
2548 * writes state i
2550 void
2551 wrstate(int i)
2553 int j0, j1;
2554 Item *pp, *qq;
2555 Wset *u;
2557 if(fdebug) {
2558 if(lastred) {
2559 Bprint(fdebug, " 0, /*%d*/\n", i);
2560 } else {
2561 Bprint(fdebug, " \"");
2562 ITMLOOP(i, pp, qq)
2563 Bprint(fdebug, "%s\\n", writem(pp->pitem));
2564 if(tystate[i] == MUSTLOOKAHEAD)
2565 WSLOOP(wsets + (pstate[i+1] - pstate[i]), u)
2566 if(*u->pitem < 0)
2567 Bprint(fdebug, "%s\\n", writem(u->pitem));
2568 Bprint(fdebug, "\", /*%d*/\n", i);
2571 if(foutput == 0)
2572 return;
2573 Bprint(foutput, "\nstate %d\n", i);
2574 ITMLOOP(i, pp, qq)
2575 Bprint(foutput, "\t%s\n", writem(pp->pitem));
2576 if(tystate[i] == MUSTLOOKAHEAD)
2577 /* print out empty productions in closure */
2578 WSLOOP(wsets+(pstate[i+1]-pstate[i]), u)
2579 if(*u->pitem < 0)
2580 Bprint(foutput, "\t%s\n", writem(u->pitem));
2582 /* check for state equal to another */
2583 TLOOP(j0)
2584 if((j1=temp1[j0]) != 0) {
2585 Bprint(foutput, "\n\t%s ", symnam(j0));
2586 /* shift, error, or accept */
2587 if(j1 > 0) {
2588 if(j1 == ACCEPTCODE)
2589 Bprint(foutput, "accept");
2590 else
2591 if(j1 == ERRCODE)
2592 Bprint(foutput, "error");
2593 else
2594 Bprint(foutput, "shift %d", j1);
2595 } else
2596 Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]);
2599 /* output the final production */
2600 if(lastred)
2601 Bprint(foutput, "\n\t. reduce %d (src line %d)\n\n",
2602 lastred, rlines[lastred]);
2603 else
2604 Bprint(foutput, "\n\t. error\n\n");
2606 /* now, output nonterminal actions */
2607 j1 = ntokens;
2608 for(j0 = 1; j0 <= nnonter; j0++) {
2609 j1++;
2610 if(temp1[j1])
2611 Bprint(foutput, "\t%s goto %d\n", symnam(j0+NTBASE), temp1[j1]);
2615 void
2616 warray(char *s, int *v, int n)
2618 int i;
2620 Bprint(ftable, "static\tconst\tshort %s[] =\n{", s);
2621 for(i=0;;) {
2622 if(i%10 == 0)
2623 Bprint(ftable, "\n");
2624 Bprint(ftable, "%4d", v[i]);
2625 i++;
2626 if(i >= n) {
2627 Bprint(ftable, "\n};\n");
2628 break;
2630 Bprint(ftable, ",");
2635 * in order to free up the mem and amem arrays for the optimizer,
2636 * and still be able to output yyr1, etc., after the sizes of
2637 * the action array is known, we hide the nonterminals
2638 * derived by productions in levprd.
2641 void
2642 hideprod(void)
2644 int i, j;
2646 j = 0;
2647 levprd[0] = 0;
2648 PLOOP(1, i) {
2649 if(!(levprd[i] & REDFLAG)) {
2650 j++;
2651 if(foutput != 0)
2652 Bprint(foutput, "Rule not reduced: %s\n", writem(prdptr[i]));
2654 levprd[i] = *prdptr[i] - NTBASE;
2656 if(j)
2657 print("%d rules never reduced\n", j);
2660 void
2661 callopt(void)
2663 int i, *p, j, k, *q;
2665 /* read the arrays from tempfile and set parameters */
2666 finput = Bopen(tempname, OREAD);
2667 if(finput == 0)
2668 error("optimizer cannot open tempfile");
2670 pgo[0] = 0;
2671 temp1[0] = 0;
2672 nstate = 0;
2673 nnonter = 0;
2674 for(;;) {
2675 switch(gtnm()) {
2676 case '\n':
2677 nstate++;
2678 pmem--;
2679 temp1[nstate] = pmem - mem0;
2680 case ',':
2681 continue;
2682 case '$':
2683 break;
2684 default:
2685 error("bad tempfile %s", tempname);
2687 break;
2690 pmem--;
2691 temp1[nstate] = yypgo[0] = pmem - mem0;
2692 for(;;) {
2693 switch(gtnm()) {
2694 case '\n':
2695 nnonter++;
2696 yypgo[nnonter] = pmem-mem0;
2697 case ',':
2698 continue;
2699 case -1:
2700 break;
2701 default:
2702 error("bad tempfile");
2704 break;
2706 pmem--;
2707 yypgo[nnonter--] = pmem - mem0;
2708 for(i = 0; i < nstate; i++) {
2709 k = 32000;
2710 j = 0;
2711 q = mem0 + temp1[i+1];
2712 for(p = mem0 + temp1[i]; p < q ; p += 2) {
2713 if(*p > j)
2714 j = *p;
2715 if(*p < k)
2716 k = *p;
2718 /* nontrivial situation */
2719 if(k <= j) {
2720 /* j is now the range */
2721 /* j -= k; *//* call scj */
2722 if(k > maxoff)
2723 maxoff = k;
2725 tystate[i] = (temp1[i+1]-temp1[i]) + 2*j;
2726 if(j > maxspr)
2727 maxspr = j;
2730 /* initialize ggreed table */
2731 for(i = 1; i <= nnonter; i++) {
2732 ggreed[i] = 1;
2733 j = 0;
2735 /* minimum entry index is always 0 */
2736 q = mem0 + yypgo[i+1] - 1;
2737 for(p = mem0+yypgo[i]; p < q ; p += 2) {
2738 ggreed[i] += 2;
2739 if(*p > j)
2740 j = *p;
2742 ggreed[i] = ggreed[i] + 2*j;
2743 if(j > maxoff)
2744 maxoff = j;
2747 /* now, prepare to put the shift actions into the amem array */
2748 for(i = 0; i < ACTSIZE; i++)
2749 amem[i] = 0;
2750 maxa = amem;
2751 for(i = 0; i < nstate; i++) {
2752 if(tystate[i] == 0 && adb > 1)
2753 Bprint(ftable, "State %d: null\n", i);
2754 indgo[i] = YYFLAG1;
2756 while((i = nxti()) != NOMORE)
2757 if(i >= 0)
2758 stin(i);
2759 else
2760 gin(-i);
2762 /* print amem array */
2763 if(adb > 2 )
2764 for(p = amem; p <= maxa; p += 10) {
2765 Bprint(ftable, "%4d ", (int)(p-amem));
2766 for(i = 0; i < 10; ++i)
2767 Bprint(ftable, "%4d ", p[i]);
2768 Bprint(ftable, "\n");
2771 /* write out the output appropriate to the language */
2772 aoutput();
2773 osummary();
2774 ZAPFILE(tempname);
2777 void
2778 gin(int i)
2780 int *p, *r, *s, *q1, *q2;
2782 /* enter gotos on nonterminal i into array amem */
2783 ggreed[i] = 0;
2785 q2 = mem0+ yypgo[i+1] - 1;
2786 q1 = mem0 + yypgo[i];
2788 /* now, find amem place for it */
2789 for(p = amem; p < &amem[ACTSIZE]; p++) {
2790 if(*p)
2791 continue;
2792 for(r = q1; r < q2; r += 2) {
2793 s = p + *r + 1;
2794 if(*s)
2795 goto nextgp;
2796 if(s > maxa)
2797 if((maxa = s) > &amem[ACTSIZE])
2798 error("a array overflow");
2800 /* we have found amem spot */
2801 *p = *q2;
2802 if(p > maxa)
2803 if((maxa = p) > &amem[ACTSIZE])
2804 error("a array overflow");
2805 for(r = q1; r < q2; r += 2) {
2806 s = p + *r + 1;
2807 *s = r[1];
2809 pgo[i] = p-amem;
2810 if(adb > 1)
2811 Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]);
2812 return;
2814 nextgp:;
2816 error("cannot place goto %d\n", i);
2819 void
2820 stin(int i)
2822 int *r, *s, n, flag, j, *q1, *q2;
2824 tystate[i] = 0;
2826 /* enter state i into the amem array */
2827 q2 = mem0+temp1[i+1];
2828 q1 = mem0+temp1[i];
2829 /* find an acceptable place */
2830 for(n = -maxoff; n < ACTSIZE; n++) {
2831 flag = 0;
2832 for(r = q1; r < q2; r += 2) {
2833 if(*r + n < 0)
2834 goto nextn;
2835 s = *r + n + amem;
2836 if(*s == 0)
2837 flag++;
2838 else
2839 if(*s != r[1])
2840 goto nextn;
2843 /* check that the position equals another only if the states are identical */
2844 for(j=0; j<nstate; j++) {
2845 if(indgo[j] == n) {
2847 /* we have some disagreement */
2848 if(flag)
2849 goto nextn;
2850 if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) {
2852 /* states are equal */
2853 indgo[i] = n;
2854 if(adb > 1)
2855 Bprint(ftable,
2856 "State %d: entry at %d equals state %d\n",
2857 i, n, j);
2858 return;
2861 /* we have some disagreement */
2862 goto nextn;
2866 for(r = q1; r < q2; r += 2) {
2867 if((s = *r+n+amem) >= &amem[ACTSIZE])
2868 error("out of space in optimizer a array");
2869 if(s > maxa)
2870 maxa = s;
2871 if(*s != 0 && *s != r[1])
2872 error("clobber of a array, pos'n %d, by %d", s-amem, r[1]);
2873 *s = r[1];
2875 indgo[i] = n;
2876 if(adb > 1)
2877 Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]);
2878 return;
2879 nextn:;
2881 error("Error; failure to place state %d\n", i);
2885 * finds the next i
2887 int
2888 nxti(void)
2890 int i, max, maxi;
2892 max = 0;
2893 maxi = 0;
2894 for(i = 1; i <= nnonter; i++)
2895 if(ggreed[i] >= max) {
2896 max = ggreed[i];
2897 maxi = -i;
2899 for(i = 0; i < nstate; ++i)
2900 if(tystate[i] >= max) {
2901 max = tystate[i];
2902 maxi = i;
2904 if(nxdb)
2905 Bprint(ftable, "nxti = %d, max = %d\n", maxi, max);
2906 if(max == 0)
2907 return NOMORE;
2908 return maxi;
2912 * write summary
2914 void
2915 osummary(void)
2918 int i, *p;
2920 if(foutput == 0)
2921 return;
2922 i = 0;
2923 for(p = maxa; p >= amem; p--)
2924 if(*p == 0)
2925 i++;
2927 Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n",
2928 (int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE);
2929 Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i);
2930 Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff);
2934 * this version is for C
2935 * write out the optimized parser
2937 void
2938 aoutput(void)
2940 Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1));
2941 arout("yyact", amem, (maxa-amem)+1);
2942 arout("yypact", indgo, nstate);
2943 arout("yypgo", pgo, nnonter+1);
2946 void
2947 arout(char *s, int *v, int n)
2949 int i;
2951 Bprint(ftable, "static\tconst\tshort %s[] =\n{", s);
2952 for(i = 0; i < n;) {
2953 if(i%10 == 0)
2954 Bprint(ftable, "\n");
2955 Bprint(ftable, "%4d", v[i]);
2956 i++;
2957 if(i == n)
2958 Bprint(ftable, "\n};\n");
2959 else
2960 Bprint(ftable, ",");
2965 * read and convert an integer from the standard input
2966 * return the terminating character
2967 * blanks, tabs, and newlines are ignored
2969 int
2970 gtnm(void)
2972 int sign, val, c;
2974 sign = 0;
2975 val = 0;
2976 while((c=Bgetrune(finput)) != Beof) {
2977 if(isvalidchar(c) && isdigit(c)) {
2978 val = val*10 + c-'0';
2979 continue;
2981 if(c == '-') {
2982 sign = 1;
2983 continue;
2985 break;
2987 if(sign)
2988 val = -val;
2989 *pmem++ = val;
2990 if(pmem >= &mem0[MEMSIZE])
2991 error("out of space");
2992 return c;