Blob


1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include <ctype.h>
6 #define Bungetrune Bungetc /* ok for now. */
8 /*
9 * all these are 32 bit
10 */
11 #define TBITSET ((32+NTERMS)/32) /* BOTCH?? +31 */
12 #define BIT(a,i) ((a)[(i)>>5] & (1<<((i)&037)))
13 #define SETBIT(a,i) ((a)[(i)>>5] |= (1<<((i)&037)))
14 #define NWORDS(n) (((n)+32)/32)
16 char *PARSER = "#9/lib/yaccpar";
17 char *PARSERS = "#9/lib/yaccpars";
18 #define TEMPNAME "y.tmp.XXXXXX"
19 #define ACTNAME "y.acts.XXXXXX"
20 #define OFILE "tab.c"
21 #define FILEU "output"
22 #define FILED "tab.h"
23 #define FILEDEBUG "debug"
25 enum
26 {
27 /*
28 * the following are adjustable
29 * according to memory size
30 */
31 ACTSIZE = 40000,
32 MEMSIZE = 40000,
33 NSTATES = 2000,
34 NTERMS = 511,
35 NPROD = 1600,
36 NNONTERM = 600,
37 TEMPSIZE = 2000,
38 CNAMSZ = 10000,
39 LSETSIZE = 2400,
40 WSETSIZE = 350,
42 NAMESIZE = 50,
43 NTYPES = 63,
44 ISIZE = 400,
46 PRIVATE = 0xE000, /* unicode private use */
48 /* relationships which must hold:
49 TBITSET ints must hold NTERMS+1 bits...
50 WSETSIZE >= NNONTERM
51 LSETSIZE >= NNONTERM
52 TEMPSIZE >= NTERMS + NNONTERM + 1
53 TEMPSIZE >= NSTATES
54 */
56 NTBASE = 010000,
57 ERRCODE = 8190,
58 ACCEPTCODE = 8191,
60 NOASC = 0, /* no assoc. */
61 LASC = 1, /* left assoc. */
62 RASC = 2, /* right assoc. */
63 BASC = 3, /* binary assoc. */
65 /* flags for state generation */
67 DONE = 0,
68 MUSTDO = 1,
69 MUSTLOOKAHEAD = 2,
71 /* flags for a rule having an action, and being reduced */
73 ACTFLAG = 04,
74 REDFLAG = 010,
76 /* output parser flags */
77 YYFLAG1 = -1000,
79 /* parse tokens */
80 IDENTIFIER = PRIVATE,
81 MARK,
82 TERM,
83 LEFT,
84 RIGHT,
85 BINARY,
86 PREC,
87 LCURLY,
88 IDENTCOLON,
89 NUMBER,
90 START,
91 TYPEDEF,
92 TYPENAME,
93 UNION,
95 ENDFILE = 0,
97 EMPTY = 1,
98 WHOKNOWS = 0,
99 OK = 1,
100 NOMORE = -1000,
101 };
103 /* macros for getting associativity and precedence levels */
105 #define ASSOC(i) ((i)&03)
106 #define PLEVEL(i) (((i)>>4)&077)
107 #define TYPE(i) (((i)>>10)&077)
109 /* macros for setting associativity and precedence levels */
111 #define SETASC(i,j) i |= j
112 #define SETPLEV(i,j) i |= (j<<4)
113 #define SETTYPE(i,j) i |= (j<<10)
115 /* looping macros */
117 #define TLOOP(i) for(i=1; i<=ntokens; i++)
118 #define NTLOOP(i) for(i=0; i<=nnonter; i++)
119 #define PLOOP(s,i) for(i=s; i<nprod; i++)
120 #define SLOOP(i) for(i=0; i<nstate; i++)
121 #define WSBUMP(x) x++
122 #define WSLOOP(s,j) for(j=s; j<cwp; j++)
123 #define ITMLOOP(i,p,q) for(q=pstate[i+1], p=pstate[i]; p<q; p++)
124 #define SETLOOP(i) for(i=0; i<tbitset; i++)
126 /* command to clobber tempfiles after use */
128 #define ZAPFILE(x) if(x) remove(x)
130 /* I/O descriptors */
132 Biobuf* faction; /* file for saving actions */
133 Biobuf* fdefine; /* file for #defines */
134 Biobuf* fdebug; /* y.debug for strings for debugging */
135 Biobuf* ftable; /* y.tab.c file */
136 Biobuf* ftemp; /* tempfile to pass 2 */
137 Biobuf* finput; /* input file */
138 Biobuf* foutput; /* y.output file */
140 /* communication variables between various I/O routines */
142 char* infile; /* input file name */
143 int numbval; /* value of an input number */
144 char tokname[NAMESIZE+4]; /* input token name, slop for runes and 0 */
146 /* structure declarations */
148 typedef
149 struct
151 int lset[TBITSET];
152 } Lkset;
154 typedef
155 struct
157 int* pitem;
158 Lkset* look;
159 } Item;
161 typedef
162 struct
164 char* name;
165 int value;
166 } Symb;
168 typedef
169 struct
171 int* pitem;
172 int flag;
173 Lkset ws;
174 } Wset;
176 /* storage of names */
178 char cnames[CNAMSZ]; /* place where token and nonterminal names are stored */
179 int cnamsz = CNAMSZ; /* size of cnames */
180 char* cnamp = cnames; /* place where next name is to be put in */
181 int ndefout = 4; /* number of defined symbols output */
182 char* tempname;
183 char* actname;
184 char ttempname[] = TEMPNAME;
185 char tactname[] = ACTNAME;
186 char* parser;
187 char* yydebug;
189 /* storage of types */
190 int ntypes; /* number of types defined */
191 char* typeset[NTYPES]; /* pointers to type tags */
193 /* token information */
195 int ntokens = 0 ; /* number of tokens */
196 Symb tokset[NTERMS];
197 int toklev[NTERMS]; /* vector with the precedence of the terminals */
199 /* nonterminal information */
201 int nnonter = -1; /* the number of nonterminals */
202 Symb nontrst[NNONTERM];
203 int start; /* start symbol */
205 /* assigned token type values */
206 int extval = 0;
208 char* ytabc = OFILE; /* name of y.tab.c */
210 /* grammar rule information */
212 int mem0[MEMSIZE] ; /* production storage */
213 int* mem = mem0;
214 int nprod = 1; /* number of productions */
215 int* prdptr[NPROD]; /* pointers to descriptions of productions */
216 int levprd[NPROD]; /* precedence levels for the productions */
217 int rlines[NPROD]; /* line number for this rule */
219 /* state information */
221 int nstate = 0; /* number of states */
222 Item* pstate[NSTATES+2]; /* pointers to the descriptions of the states */
223 int tystate[NSTATES]; /* contains type information about the states */
224 int defact[NSTATES]; /* the default actions of states */
225 int tstates[NTERMS]; /* states generated by terminal gotos */
226 int ntstates[NNONTERM]; /* states generated by nonterminal gotos */
227 int mstates[NSTATES]; /* chain of overflows of term/nonterm generation lists */
228 int lastred; /* the number of the last reduction of a state */
230 /* lookahead set information */
232 Lkset lkst[LSETSIZE];
233 int nolook; /* flag to turn off lookahead computations */
234 int tbitset; /* size of lookahead sets */
235 int nlset = 0; /* next lookahead set index */
236 int nolook = 0; /* flag to suppress lookahead computations */
237 Lkset clset; /* temporary storage for lookahead computations */
239 /* working set information */
241 Wset wsets[WSETSIZE];
242 Wset* cwp;
244 /* storage for action table */
246 int amem[ACTSIZE]; /* action table storage */
247 int* memp = amem; /* next free action table position */
248 int indgo[NSTATES]; /* index to the stored goto table */
250 /* temporary vector, indexable by states, terms, or ntokens */
252 int temp1[TEMPSIZE]; /* temporary storage, indexed by terms + ntokens or states */
253 int lineno = 1; /* current input line number */
254 int fatfl = 1; /* if on, error is fatal */
255 int nerrors = 0; /* number of errors */
257 /* statistics collection variables */
259 int zzgoent;
260 int zzgobest;
261 int zzacent;
262 int zzexcp;
263 int zzclose;
264 int zzrrconf;
265 int zzsrconf;
267 int* ggreed = lkst[0].lset;
268 int* pgo = wsets[0].ws.lset;
269 int* yypgo = &nontrst[0].value;
271 int maxspr = 0; /* maximum spread of any entry */
272 int maxoff = 0; /* maximum offset into a array */
273 int* pmem = mem0;
274 int* maxa;
275 int nxdb = 0;
276 int adb = 0;
279 /* storage for information about the nonterminals */
281 int** pres[NNONTERM+2]; /* vector of pointers to productions yielding each nonterminal */
282 Lkset* pfirst[NNONTERM+2]; /* vector of pointers to first sets for each nonterminal */
283 int pempty[NNONTERM+1]; /* vector of nonterminals nontrivially deriving e */
285 /* random stuff picked out from between functions */
287 int indebug = 0;
288 Wset* zzcwp = wsets;
289 int zzgoent = 0;
290 int zzgobest = 0;
291 int zzacent = 0;
292 int zzexcp = 0;
293 int zzclose = 0;
294 int zzsrconf = 0;
295 int* zzmemsz = mem0;
296 int zzrrconf = 0;
297 int pidebug = 0; /* debugging flag for putitem */
298 int gsdebug = 0;
299 int cldebug = 0; /* debugging flag for closure */
300 int pkdebug = 0;
301 int g2debug = 0;
303 struct
305 char* name;
306 long value;
307 } resrv[] =
309 "binary", BINARY,
310 "left", LEFT,
311 "nonassoc", BINARY,
312 "prec", PREC,
313 "right", RIGHT,
314 "start", START,
315 "term", TERM,
316 "token", TERM,
317 "type", TYPEDEF,
318 "union", UNION,
319 0,
320 };
322 /* define functions */
324 void main(int, char**);
325 void others(void);
326 char* chcopy(char*, char*);
327 char* writem(int*);
328 char* symnam(int);
329 void summary(void);
330 void error(char*, ...);
331 void aryfil(int*, int, int);
332 int setunion(int*, int*);
333 void prlook(Lkset*);
334 void cpres(void);
335 void cpfir(void);
336 int state(int);
337 void putitem(int*, Lkset*);
338 void cempty(void);
339 void stagen(void);
340 void closure(int);
341 Lkset* flset(Lkset*);
342 void cleantmp(void);
343 void intr(void);
344 void setup(int, char**);
345 void finact(void);
346 int defin(int, char*);
347 void defout(int);
348 char* cstash(char*);
349 long gettok(void);
350 int fdtype(int);
351 int chfind(int, char*);
352 void cpyunion(void);
353 void cpycode(void);
354 int skipcom(void);
355 void cpyact(int);
356 void openup(char*, int, int, int, char*);
357 void output(void);
358 int apack(int*, int);
359 void go2out(void);
360 void go2gen(int);
361 void precftn(int, int, int);
362 void wract(int);
363 void wrstate(int);
364 void warray(char*, int*, int);
365 void hideprod(void);
366 void callopt(void);
367 void gin(int);
368 void stin(int);
369 int nxti(void);
370 void osummary(void);
371 void aoutput(void);
372 void arout(char*, int*, int);
373 int gtnm(void);
375 void
376 main(int argc, char *argv[])
378 PARSER = unsharp(PARSER);
379 PARSERS = unsharp(PARSERS);
380 parser = PARSER;
382 setup(argc, argv); /* initialize and read productions */
383 tbitset = NWORDS(ntokens);
384 cpres(); /* make table of which productions yield a given nonterminal */
385 cempty(); /* make a table of which nonterminals can match the empty string */
386 cpfir(); /* make a table of firsts of nonterminals */
387 stagen(); /* generate the states */
388 output(); /* write the states and the tables */
389 go2out();
390 hideprod();
391 summary();
392 callopt();
393 others();
394 exits(0);
397 /*
398 * put out other arrays, copy the parsers
399 */
400 void
401 others(void)
403 int c, i, j;
405 finput = Bopen(parser, OREAD);
406 if(finput == 0)
407 error("cannot open parser %s: %r", parser);
408 warray("yyr1", levprd, nprod);
409 aryfil(temp1, nprod, 0);
410 PLOOP(1, i)
411 temp1[i] = prdptr[i+1]-prdptr[i]-2;
412 warray("yyr2", temp1, nprod);
414 aryfil(temp1, nstate, -1000);
415 TLOOP(i)
416 for(j=tstates[i]; j!=0; j=mstates[j])
417 temp1[j] = i;
418 NTLOOP(i)
419 for(j=ntstates[i]; j!=0; j=mstates[j])
420 temp1[j] = -i;
421 warray("yychk", temp1, nstate);
422 warray("yydef", defact, nstate);
424 /* put out token translation tables */
425 /* table 1 has 0-256 */
426 aryfil(temp1, 256, 0);
427 c = 0;
428 TLOOP(i) {
429 j = tokset[i].value;
430 if(j >= 0 && j < 256) {
431 if(temp1[j]) {
432 print("yacc bug -- cant have 2 different Ts with same value\n");
433 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
434 nerrors++;
436 temp1[j] = i;
437 if(j > c)
438 c = j;
441 warray("yytok1", temp1, c+1);
443 /* table 2 has PRIVATE-PRIVATE+256 */
444 aryfil(temp1, 256, 0);
445 c = 0;
446 TLOOP(i) {
447 j = tokset[i].value - PRIVATE;
448 if(j >= 0 && j < 256) {
449 if(temp1[j]) {
450 print("yacc bug -- cant have 2 different Ts with same value\n");
451 print(" %s and %s\n", tokset[i].name, tokset[temp1[j]].name);
452 nerrors++;
454 temp1[j] = i;
455 if(j > c)
456 c = j;
459 warray("yytok2", temp1, c+1);
461 /* table 3 has everything else */
462 Bprint(ftable, "long yytok3[] =\n{\n");
463 c = 0;
464 TLOOP(i) {
465 j = tokset[i].value;
466 if(j >= 0 && j < 256)
467 continue;
468 if(j >= PRIVATE && j < 256+PRIVATE)
469 continue;
471 Bprint(ftable, "%4d,%4d,", j, i);
472 c++;
473 if(c%5 == 0)
474 Bprint(ftable, "\n");
476 Bprint(ftable, "%4d\n};\n", 0);
478 /* copy parser text */
479 while((c=Bgetrune(finput)) != Beof) {
480 if(c == '$') {
481 if((c = Bgetrune(finput)) != 'A')
482 Bputrune(ftable, '$');
483 else { /* copy actions */
484 faction = Bopen(actname, OREAD);
485 if(faction == 0)
486 error("cannot reopen action tempfile");
487 while((c=Bgetrune(faction)) != Beof)
488 Bputrune(ftable, c);
489 Bterm(faction);
490 ZAPFILE(actname);
491 c = Bgetrune(finput);
494 Bputrune(ftable, c);
496 Bterm(ftable);
499 /*
500 * copies string q into p, returning next free char ptr
501 */
502 char*
503 chcopy(char* p, char* q)
505 int c;
507 while(c = *q) {
508 if(c == '"')
509 *p++ = '\\';
510 *p++ = c;
511 q++;
513 *p = 0;
514 return p;
517 /*
518 * creates output string for item pointed to by pp
519 */
520 char*
521 writem(int *pp)
523 int i,*p;
524 static char sarr[ISIZE];
525 char* q;
527 for(p=pp; *p>0; p++)
529 p = prdptr[-*p];
530 q = chcopy(sarr, nontrst[*p-NTBASE].name);
531 q = chcopy(q, ": ");
532 for(;;) {
533 *q = ' ';
534 p++;
535 if(p == pp)
536 *q = '.';
537 q++;
538 *q = '\0';
539 i = *p;
540 if(i <= 0)
541 break;
542 q = chcopy(q, symnam(i));
543 if(q > &sarr[ISIZE-30])
544 error("item too big");
547 /* an item calling for a reduction */
548 i = *pp;
549 if(i < 0 ) {
550 q = chcopy(q, " (");
551 sprint(q, "%d)", -i);
553 return sarr;
556 /*
557 * return a pointer to the name of symbol i
558 */
559 char*
560 symnam(int i)
562 char* cp;
564 cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
565 if(*cp == ' ')
566 cp++;
567 return cp;
570 /*
571 * output the summary on y.output
572 */
573 void
574 summary(void)
577 if(foutput != 0) {
578 Bprint(foutput, "\n%d/%d terminals, %d/%d nonterminals\n",
579 ntokens, NTERMS, nnonter, NNONTERM);
580 Bprint(foutput, "%d/%d grammar rules, %d/%d states\n",
581 nprod, NPROD, nstate, NSTATES);
582 Bprint(foutput, "%d shift/reduce, %d reduce/reduce conflicts reported\n",
583 zzsrconf, zzrrconf);
584 Bprint(foutput, "%d/%d working sets used\n",
585 (int)(zzcwp-wsets), WSETSIZE);
586 Bprint(foutput, "memory: states,etc. %d/%d, parser %d/%d\n",
587 (int)(zzmemsz-mem0), MEMSIZE, (int)(memp-amem), ACTSIZE);
588 Bprint(foutput, "%d/%d distinct lookahead sets\n", nlset, LSETSIZE);
589 Bprint(foutput, "%d extra closures\n", zzclose - 2*nstate);
590 Bprint(foutput, "%d shift entries, %d exceptions\n", zzacent, zzexcp);
591 Bprint(foutput, "%d goto entries\n", zzgoent);
592 Bprint(foutput, "%d entries saved by goto default\n", zzgobest);
594 if(zzsrconf != 0 || zzrrconf != 0) {
595 print("\nconflicts: ");
596 if(zzsrconf)
597 print("%d shift/reduce", zzsrconf);
598 if(zzsrconf && zzrrconf)
599 print(", ");
600 if(zzrrconf)
601 print("%d reduce/reduce", zzrrconf);
602 print("\n");
604 if(ftemp != 0) {
605 Bterm(ftemp);
606 ftemp = 0;
608 if(fdefine != 0) {
609 Bterm(fdefine);
610 fdefine = 0;
614 /*
615 * write out error comment -- NEEDS WORK
616 */
617 void
618 error(char *s, ...)
620 va_list arg;
622 nerrors++;
623 fprint(2, "\n fatal error:");
624 va_start(arg, s);
625 vfprint(2, s, arg);
626 va_end(arg);
627 fprint(2, ", %s:%d\n", infile, lineno);
628 if(!fatfl)
629 return;
630 summary();
631 cleantmp();
632 exits("error");
635 /*
636 * set elements 0 through n-1 to c
637 */
638 void
639 aryfil(int *v, int n, int c)
641 int i;
643 for(i=0; i<n; i++)
644 v[i] = c;
647 /*
648 * set a to the union of a and b
649 * return 1 if b is not a subset of a, 0 otherwise
650 */
651 int
652 setunion(int *a, int *b)
654 int i, x, sub;
656 sub = 0;
657 SETLOOP(i) {
658 x = *a;
659 *a |= *b;
660 if(*a != x)
661 sub = 1;
662 a++;
663 b++;
665 return sub;
668 void
669 prlook(Lkset* p)
671 int j, *pp;
673 pp = p->lset;
674 if(pp == 0)
675 Bprint(foutput, "\tNULL");
676 else {
677 Bprint(foutput, " { ");
678 TLOOP(j)
679 if(BIT(pp,j))
680 Bprint(foutput, "%s ", symnam(j));
681 Bprint(foutput, "}");
685 /*
686 * compute an array with the beginnings of productions yielding given nonterminals
687 * The array pres points to these lists
688 * the array pyield has the lists: the total size is only NPROD+1
689 */
690 void
691 cpres(void)
693 int c, j, i, **pmem;
694 static int *pyield[NPROD];
696 pmem = pyield;
697 NTLOOP(i) {
698 c = i+NTBASE;
699 pres[i] = pmem;
700 fatfl = 0; /* make undefined symbols nonfatal */
701 PLOOP(0, j)
702 if(*prdptr[j] == c)
703 *pmem++ = prdptr[j]+1;
704 if(pres[i] == pmem)
705 error("nonterminal %s not defined!", nontrst[i].name);
707 pres[i] = pmem;
708 fatfl = 1;
709 if(nerrors) {
710 summary();
711 cleantmp();
712 exits("error");
714 if(pmem != &pyield[nprod])
715 error("internal Yacc error: pyield %d", pmem-&pyield[nprod]);
718 /*
719 * compute an array with the first of nonterminals
720 */
721 void
722 cpfir(void)
724 int *p, **s, i, **t, ch, changes;
726 zzcwp = &wsets[nnonter];
727 NTLOOP(i) {
728 aryfil(wsets[i].ws.lset, tbitset, 0);
729 t = pres[i+1];
730 /* initially fill the sets */
731 for(s=pres[i]; s<t; ++s)
732 for(p = *s; (ch = *p) > 0; ++p) {
733 if(ch < NTBASE) {
734 SETBIT(wsets[i].ws.lset, ch);
735 break;
737 if(!pempty[ch-NTBASE])
738 break;
742 /* now, reflect transitivity */
743 changes = 1;
744 while(changes) {
745 changes = 0;
746 NTLOOP(i) {
747 t = pres[i+1];
748 for(s = pres[i]; s < t; ++s)
749 for(p = *s; (ch = (*p-NTBASE)) >= 0; ++p) {
750 changes |= setunion(wsets[i].ws.lset, wsets[ch].ws.lset);
751 if(!pempty[ch])
752 break;
757 NTLOOP(i)
758 pfirst[i] = flset(&wsets[i].ws);
759 if(!indebug)
760 return;
761 if(foutput != 0)
762 NTLOOP(i) {
763 Bprint(foutput, "\n%s: ", nontrst[i].name);
764 prlook(pfirst[i]);
765 Bprint(foutput, " %d\n", pempty[i]);
769 /*
770 * sorts last state,and sees if it equals earlier ones. returns state number
771 */
772 int
773 state(int c)
775 Item *p1, *p2, *k, *l, *q1, *q2;
776 int size1, size2, i;
778 p1 = pstate[nstate];
779 p2 = pstate[nstate+1];
780 if(p1 == p2)
781 return 0; /* null state */
782 /* sort the items */
783 for(k = p2-1; k > p1; k--) /* make k the biggest */
784 for(l = k-1; l >= p1; --l)
785 if(l->pitem > k->pitem) {
786 int *s;
787 Lkset *ss;
789 s = k->pitem;
790 k->pitem = l->pitem;
791 l->pitem = s;
792 ss = k->look;
793 k->look = l->look;
794 l->look = ss;
796 size1 = p2 - p1; /* size of state */
798 for(i = (c>=NTBASE)? ntstates[c-NTBASE]: tstates[c]; i != 0; i = mstates[i]) {
799 /* get ith state */
800 q1 = pstate[i];
801 q2 = pstate[i+1];
802 size2 = q2 - q1;
803 if(size1 != size2)
804 continue;
805 k = p1;
806 for(l = q1; l < q2; l++) {
807 if(l->pitem != k->pitem)
808 break;
809 k++;
811 if(l != q2)
812 continue;
813 /* found it */
814 pstate[nstate+1] = pstate[nstate]; /* delete last state */
815 /* fix up lookaheads */
816 if(nolook)
817 return i;
818 for(l = q1, k = p1; l < q2; ++l, ++k ) {
819 int s;
821 SETLOOP(s)
822 clset.lset[s] = l->look->lset[s];
823 if(setunion(clset.lset, k->look->lset)) {
824 tystate[i] = MUSTDO;
825 /* register the new set */
826 l->look = flset( &clset );
829 return i;
831 /* state is new */
832 if(nolook)
833 error("yacc state/nolook error");
834 pstate[nstate+2] = p2;
835 if(nstate+1 >= NSTATES)
836 error("too many states");
837 if(c >= NTBASE) {
838 mstates[nstate] = ntstates[c-NTBASE];
839 ntstates[c-NTBASE] = nstate;
840 } else {
841 mstates[nstate] = tstates[c];
842 tstates[c] = nstate;
844 tystate[nstate] = MUSTDO;
845 return nstate++;
848 void
849 putitem(int *ptr, Lkset *lptr)
851 Item *j;
853 if(pidebug && foutput != 0)
854 Bprint(foutput, "putitem(%s), state %d\n", writem(ptr), nstate);
855 j = pstate[nstate+1];
856 j->pitem = ptr;
857 if(!nolook)
858 j->look = flset(lptr);
859 pstate[nstate+1] = ++j;
860 if((int*)j > zzmemsz) {
861 zzmemsz = (int*)j;
862 if(zzmemsz >= &mem0[MEMSIZE])
863 error("out of state space");
867 /*
868 * mark nonterminals which derive the empty string
869 * also, look for nonterminals which don't derive any token strings
870 */
871 void
872 cempty(void)
875 int i, *p;
877 /* first, use the array pempty to detect productions that can never be reduced */
878 /* set pempty to WHONOWS */
879 aryfil(pempty, nnonter+1, WHOKNOWS);
881 /* now, look at productions, marking nonterminals which derive something */
882 more:
883 PLOOP(0, i) {
884 if(pempty[*prdptr[i] - NTBASE])
885 continue;
886 for(p = prdptr[i]+1; *p >= 0; ++p)
887 if(*p >= NTBASE && pempty[*p-NTBASE] == WHOKNOWS)
888 break;
889 /* production can be derived */
890 if(*p < 0) {
891 pempty[*prdptr[i]-NTBASE] = OK;
892 goto more;
896 /* now, look at the nonterminals, to see if they are all OK */
897 NTLOOP(i) {
898 /* the added production rises or falls as the start symbol ... */
899 if(i == 0)
900 continue;
901 if(pempty[i] != OK) {
902 fatfl = 0;
903 error("nonterminal %s never derives any token string", nontrst[i].name);
907 if(nerrors) {
908 summary();
909 cleantmp();
910 exits("error");
913 /* now, compute the pempty array, to see which nonterminals derive the empty string */
914 /* set pempty to WHOKNOWS */
915 aryfil( pempty, nnonter+1, WHOKNOWS);
917 /* loop as long as we keep finding empty nonterminals */
919 again:
920 PLOOP(1, i) {
921 /* not known to be empty */
922 if(pempty[*prdptr[i]-NTBASE] == WHOKNOWS) {
923 for(p = prdptr[i]+1; *p >= NTBASE && pempty[*p-NTBASE] == EMPTY ; ++p)
925 /* we have a nontrivially empty nonterminal */
926 if(*p < 0) {
927 pempty[*prdptr[i]-NTBASE] = EMPTY;
928 /* got one ... try for another */
929 goto again;
935 /*
936 * generate the states
937 */
938 void
939 stagen(void)
942 int c, i, j, more;
943 Wset *p, *q;
945 /* initialize */
946 nstate = 0;
948 /* THIS IS FUNNY from the standpoint of portability
949 * it represents the magic moment when the mem0 array, which has
950 * been holding the productions, starts to hold item pointers, of a
951 * different type...
952 * someday, alloc should be used to allocate all this stuff... for now, we
953 * accept that if pointers don't fit in integers, there is a problem...
954 */
956 pstate[0] = pstate[1] = (Item*)mem;
957 aryfil(clset.lset, tbitset, 0);
958 putitem(prdptr[0]+1, &clset);
959 tystate[0] = MUSTDO;
960 nstate = 1;
961 pstate[2] = pstate[1];
963 aryfil(amem, ACTSIZE, 0);
965 /* now, the main state generation loop */
966 for(more=1; more;) {
967 more = 0;
968 SLOOP(i) {
969 if(tystate[i] != MUSTDO)
970 continue;
971 tystate[i] = DONE;
972 aryfil(temp1, nnonter+1, 0);
973 /* take state i, close it, and do gotos */
974 closure(i);
975 /* generate goto's */
976 WSLOOP(wsets, p) {
977 if(p->flag)
978 continue;
979 p->flag = 1;
980 c = *(p->pitem);
981 if(c <= 1) {
982 if(pstate[i+1]-pstate[i] <= p-wsets)
983 tystate[i] = MUSTLOOKAHEAD;
984 continue;
986 /* do a goto on c */
987 WSLOOP(p, q)
988 /* this item contributes to the goto */
989 if(c == *(q->pitem)) {
990 putitem(q->pitem+1, &q->ws);
991 q->flag = 1;
993 if(c < NTBASE)
994 state(c); /* register new state */
995 else
996 temp1[c-NTBASE] = state(c);
998 if(gsdebug && foutput != 0) {
999 Bprint(foutput, "%d: ", i);
1000 NTLOOP(j)
1001 if(temp1[j])
1002 Bprint(foutput, "%s %d, ",
1003 nontrst[j].name, temp1[j]);
1004 Bprint(foutput, "\n");
1006 indgo[i] = apack(&temp1[1], nnonter-1) - 1;
1007 /* do some more */
1008 more = 1;
1014 * generate the closure of state i
1016 void
1017 closure(int i)
1020 Wset *u, *v;
1021 Item *p, *q;
1022 int c, ch, work, k, *pi, **s, **t;
1024 zzclose++;
1026 /* first, copy kernel of state i to wsets */
1027 cwp = wsets;
1028 ITMLOOP(i, p, q) {
1029 cwp->pitem = p->pitem;
1030 cwp->flag = 1; /* this item must get closed */
1031 SETLOOP(k)
1032 cwp->ws.lset[k] = p->look->lset[k];
1033 WSBUMP(cwp);
1036 /* now, go through the loop, closing each item */
1037 work = 1;
1038 while(work) {
1039 work = 0;
1040 WSLOOP(wsets, u) {
1041 if(u->flag == 0)
1042 continue;
1043 /* dot is before c */
1044 c = *(u->pitem);
1045 if(c < NTBASE) {
1046 u->flag = 0;
1047 /* only interesting case is where . is before nonterminal */
1048 continue;
1051 /* compute the lookahead */
1052 aryfil(clset.lset, tbitset, 0);
1054 /* find items involving c */
1055 WSLOOP(u, v)
1056 if(v->flag == 1 && *(pi=v->pitem) == c) {
1057 v->flag = 0;
1058 if(nolook)
1059 continue;
1060 while((ch = *++pi) > 0) {
1061 /* terminal symbol */
1062 if(ch < NTBASE) {
1063 SETBIT(clset.lset, ch);
1064 break;
1066 /* nonterminal symbol */
1067 setunion(clset.lset, pfirst[ch-NTBASE]->lset);
1068 if(!pempty[ch-NTBASE])
1069 break;
1071 if(ch <= 0)
1072 setunion(clset.lset, v->ws.lset);
1076 * now loop over productions derived from c
1077 * c is now nonterminal number
1079 c -= NTBASE;
1080 t = pres[c+1];
1081 for(s = pres[c]; s < t; ++s) {
1083 * put these items into the closure
1084 * is the item there
1086 WSLOOP(wsets, v)
1087 /* yes, it is there */
1088 if(v->pitem == *s) {
1089 if(nolook)
1090 goto nexts;
1091 if(setunion(v->ws.lset, clset.lset))
1092 v->flag = work = 1;
1093 goto nexts;
1096 /* not there; make a new entry */
1097 if(cwp-wsets+1 >= WSETSIZE)
1098 error( "working set overflow");
1099 cwp->pitem = *s;
1100 cwp->flag = 1;
1101 if(!nolook) {
1102 work = 1;
1103 SETLOOP(k) cwp->ws.lset[k] = clset.lset[k];
1105 WSBUMP(cwp);
1107 nexts:;
1112 /* have computed closure; flags are reset; return */
1113 if(cwp > zzcwp)
1114 zzcwp = cwp;
1115 if(cldebug && foutput != 0) {
1116 Bprint(foutput, "\nState %d, nolook = %d\n", i, nolook);
1117 WSLOOP(wsets, u) {
1118 if(u->flag)
1119 Bprint(foutput, "flag set!\n");
1120 u->flag = 0;
1121 Bprint(foutput, "\t%s", writem(u->pitem));
1122 prlook(&u->ws);
1123 Bprint(foutput, "\n");
1129 * decide if the lookahead set pointed to by p is known
1130 * return pointer to a perminent location for the set
1132 Lkset*
1133 flset(Lkset *p)
1135 Lkset *q;
1136 int *u, *v, *w, j;
1138 for(q = &lkst[nlset]; q-- > lkst;) {
1139 u = p->lset;
1140 v = q->lset;
1141 w = &v[tbitset];
1142 while(v < w)
1143 if(*u++ != *v++)
1144 goto more;
1145 /* we have matched */
1146 return q;
1147 more:;
1149 /* add a new one */
1150 q = &lkst[nlset++];
1151 if(nlset >= LSETSIZE)
1152 error("too many lookahead sets");
1153 SETLOOP(j)
1154 q->lset[j] = p->lset[j];
1155 return q;
1158 void
1159 cleantmp(void)
1161 ZAPFILE(actname);
1162 ZAPFILE(tempname);
1165 void
1166 intr(void)
1168 cleantmp();
1169 exits("interrupted");
1172 void
1173 setup(int argc, char *argv[])
1175 long c, t;
1176 int i, j, fd, lev, ty, ytab, *p;
1177 int vflag, dflag, stem;
1178 char actnm[8], *stemc, *s, dirbuf[128];
1180 ytab = 0;
1181 vflag = 0;
1182 dflag = 0;
1183 stem = 0;
1184 stemc = "y";
1185 foutput = 0;
1186 fdefine = 0;
1187 fdebug = 0;
1188 ARGBEGIN{
1189 case 'v':
1190 case 'V':
1191 vflag++;
1192 break;
1193 case 'D':
1194 yydebug = ARGF();
1195 break;
1196 case 'd':
1197 dflag++;
1198 break;
1199 case 'o':
1200 ytab++;
1201 ytabc = ARGF();
1202 break;
1203 case 's':
1204 stem++;
1205 stemc = ARGF();
1206 break;
1207 case 'S':
1208 parser = PARSERS;
1209 break;
1210 default:
1211 error("illegal option: %c", ARGC());
1212 }ARGEND
1213 openup(stemc, dflag, vflag, ytab, ytabc);
1215 if((fd = mkstemp(ttempname)) >= 0){
1216 tempname = ttempname;
1217 ftemp = Bfdopen(fd, OWRITE);
1219 if((fd = mkstemp(tactname)) >= 0){
1220 actname = tactname;
1221 faction = Bfdopen(fd, OWRITE);
1223 if(ftemp == 0 || faction == 0)
1224 error("cannot open temp file");
1225 if(argc < 1)
1226 error("no input file");
1227 infile = argv[0];
1228 if(infile[0] != '/' && getwd(dirbuf, sizeof dirbuf)!=nil){
1229 i = strlen(infile)+1+strlen(dirbuf)+1+10;
1230 s = malloc(i);
1231 if(s != nil){
1232 snprint(s, i, "%s/%s", dirbuf, infile);
1233 cleanname(s);
1234 infile = s;
1237 finput = Bopen(infile, OREAD);
1238 if(finput == 0)
1239 error("cannot open '%s'", argv[0]);
1240 cnamp = cnames;
1242 defin(0, "$end");
1243 extval = PRIVATE; /* tokens start in unicode 'private use' */
1244 defin(0, "error");
1245 defin(1, "$accept");
1246 defin(0, "$unk");
1247 mem = mem0;
1248 i = 0;
1250 for(t = gettok(); t != MARK && t != ENDFILE;)
1251 switch(t) {
1252 case ';':
1253 t = gettok();
1254 break;
1256 case START:
1257 if(gettok() != IDENTIFIER)
1258 error("bad %%start construction");
1259 start = chfind(1, tokname);
1260 t = gettok();
1261 continue;
1263 case TYPEDEF:
1264 if(gettok() != TYPENAME)
1265 error("bad syntax in %%type");
1266 ty = numbval;
1267 for(;;) {
1268 t = gettok();
1269 switch(t) {
1270 case IDENTIFIER:
1271 if((t=chfind(1, tokname)) < NTBASE) {
1272 j = TYPE(toklev[t]);
1273 if(j != 0 && j != ty)
1274 error("type redeclaration of token %s",
1275 tokset[t].name);
1276 else
1277 SETTYPE(toklev[t], ty);
1278 } else {
1279 j = nontrst[t-NTBASE].value;
1280 if(j != 0 && j != ty)
1281 error("type redeclaration of nonterminal %s",
1282 nontrst[t-NTBASE].name );
1283 else
1284 nontrst[t-NTBASE].value = ty;
1286 case ',':
1287 continue;
1288 case ';':
1289 t = gettok();
1290 default:
1291 break;
1293 break;
1295 continue;
1297 case UNION:
1298 /* copy the union declaration to the output */
1299 cpyunion();
1300 t = gettok();
1301 continue;
1303 case LEFT:
1304 case BINARY:
1305 case RIGHT:
1306 i++;
1308 case TERM:
1309 /* nonzero means new prec. and assoc. */
1310 lev = t-TERM;
1311 ty = 0;
1313 /* get identifiers so defined */
1314 t = gettok();
1316 /* there is a type defined */
1317 if(t == TYPENAME) {
1318 ty = numbval;
1319 t = gettok();
1321 for(;;) {
1322 switch(t) {
1323 case ',':
1324 t = gettok();
1325 continue;
1327 case ';':
1328 break;
1330 case IDENTIFIER:
1331 j = chfind(0, tokname);
1332 if(j >= NTBASE)
1333 error("%s defined earlier as nonterminal", tokname);
1334 if(lev) {
1335 if(ASSOC(toklev[j]))
1336 error("redeclaration of precedence of %s", tokname);
1337 SETASC(toklev[j], lev);
1338 SETPLEV(toklev[j], i);
1340 if(ty) {
1341 if(TYPE(toklev[j]))
1342 error("redeclaration of type of %s", tokname);
1343 SETTYPE(toklev[j],ty);
1345 t = gettok();
1346 if(t == NUMBER) {
1347 tokset[j].value = numbval;
1348 if(j < ndefout && j > 3)
1349 error("please define type number of %s earlier",
1350 tokset[j].name);
1351 t = gettok();
1353 continue;
1355 break;
1357 continue;
1359 case LCURLY:
1360 defout(0);
1361 cpycode();
1362 t = gettok();
1363 continue;
1365 default:
1366 error("syntax error");
1368 if(t == ENDFILE)
1369 error("unexpected EOF before %%");
1371 /* t is MARK */
1372 Bprint(ftable, "extern int yyerrflag;\n");
1373 Bprint(ftable, "#ifndef YYMAXDEPTH\n");
1374 Bprint(ftable, "#define YYMAXDEPTH 150\n");
1375 Bprint(ftable, "#endif\n" );
1376 if(!ntypes) {
1377 Bprint(ftable, "#ifndef YYSTYPE\n");
1378 Bprint(ftable, "#define YYSTYPE int\n");
1379 Bprint(ftable, "#endif\n");
1381 Bprint(ftable, "YYSTYPE yylval;\n");
1382 Bprint(ftable, "YYSTYPE yyval;\n");
1384 prdptr[0] = mem;
1386 /* added production */
1387 *mem++ = NTBASE;
1389 /* if start is 0, we will overwrite with the lhs of the first rule */
1390 *mem++ = start;
1391 *mem++ = 1;
1392 *mem++ = 0;
1393 prdptr[1] = mem;
1394 while((t=gettok()) == LCURLY)
1395 cpycode();
1396 if(t != IDENTCOLON)
1397 error("bad syntax on first rule");
1399 if(!start)
1400 prdptr[0][1] = chfind(1, tokname);
1402 /* read rules */
1403 while(t != MARK && t != ENDFILE) {
1404 /* process a rule */
1405 rlines[nprod] = lineno;
1406 if(t == '|')
1407 *mem++ = *prdptr[nprod-1];
1408 else
1409 if(t == IDENTCOLON) {
1410 *mem = chfind(1, tokname);
1411 if(*mem < NTBASE)
1412 error("token illegal on LHS of grammar rule");
1413 mem++;
1414 } else
1415 error("illegal rule: missing semicolon or | ?");
1416 /* read rule body */
1417 t = gettok();
1419 more_rule:
1420 while(t == IDENTIFIER) {
1421 *mem = chfind(1, tokname);
1422 if(*mem < NTBASE)
1423 levprd[nprod] = toklev[*mem];
1424 mem++;
1425 t = gettok();
1427 if(t == PREC) {
1428 if(gettok() != IDENTIFIER)
1429 error("illegal %%prec syntax");
1430 j = chfind(2, tokname);
1431 if(j >= NTBASE)
1432 error("nonterminal %s illegal after %%prec",
1433 nontrst[j-NTBASE].name);
1434 levprd[nprod] = toklev[j];
1435 t = gettok();
1437 if(t == '=') {
1438 levprd[nprod] |= ACTFLAG;
1439 Bprint(faction, "\ncase %d:", nprod);
1440 cpyact(mem-prdptr[nprod]-1);
1441 Bprint(faction, " break;");
1442 if((t=gettok()) == IDENTIFIER) {
1444 /* action within rule... */
1445 sprint(actnm, "$$%d", nprod);
1447 /* make it a nonterminal */
1448 j = chfind(1, actnm);
1451 * the current rule will become rule number nprod+1
1452 * move the contents down, and make room for the null
1454 for(p = mem; p >= prdptr[nprod]; --p)
1455 p[2] = *p;
1456 mem += 2;
1458 /* enter null production for action */
1459 p = prdptr[nprod];
1460 *p++ = j;
1461 *p++ = -nprod;
1463 /* update the production information */
1464 levprd[nprod+1] = levprd[nprod] & ~ACTFLAG;
1465 levprd[nprod] = ACTFLAG;
1466 if(++nprod >= NPROD)
1467 error("more than %d rules", NPROD);
1468 prdptr[nprod] = p;
1470 /* make the action appear in the original rule */
1471 *mem++ = j;
1473 /* get some more of the rule */
1474 goto more_rule;
1478 while(t == ';')
1479 t = gettok();
1480 *mem++ = -nprod;
1482 /* check that default action is reasonable */
1483 if(ntypes && !(levprd[nprod]&ACTFLAG) && nontrst[*prdptr[nprod]-NTBASE].value) {
1485 /* no explicit action, LHS has value */
1486 int tempty;
1488 tempty = prdptr[nprod][1];
1489 if(tempty < 0)
1490 error("must return a value, since LHS has a type");
1491 else
1492 if(tempty >= NTBASE)
1493 tempty = nontrst[tempty-NTBASE].value;
1494 else
1495 tempty = TYPE(toklev[tempty]);
1496 if(tempty != nontrst[*prdptr[nprod]-NTBASE].value)
1497 error("default action causes potential type clash");
1499 nprod++;
1500 if(nprod >= NPROD)
1501 error("more than %d rules", NPROD);
1502 prdptr[nprod] = mem;
1503 levprd[nprod] = 0;
1506 /* end of all rules */
1507 defout(1);
1509 finact();
1510 if(t == MARK) {
1511 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1512 while((c=Bgetrune(finput)) != Beof)
1513 Bputrune(ftable, c);
1515 Bterm(finput);
1519 * finish action routine
1521 void
1522 finact(void)
1525 Bterm(faction);
1526 Bprint(ftable, "#define YYEOFCODE %d\n", 1);
1527 Bprint(ftable, "#define YYERRCODE %d\n", 2);
1531 * define s to be a terminal if t=0
1532 * or a nonterminal if t=1
1534 int
1535 defin(int nt, char *s)
1537 int val;
1538 Rune rune;
1540 val = 0;
1541 if(nt) {
1542 nnonter++;
1543 if(nnonter >= NNONTERM)
1544 error("too many nonterminals, limit %d",NNONTERM);
1545 nontrst[nnonter].name = cstash(s);
1546 return NTBASE + nnonter;
1549 /* must be a token */
1550 ntokens++;
1551 if(ntokens >= NTERMS)
1552 error("too many terminals, limit %d", NTERMS);
1553 tokset[ntokens].name = cstash(s);
1555 /* establish value for token */
1556 /* single character literal */
1557 if(s[0] == ' ') {
1558 val = chartorune(&rune, &s[1]);
1559 if(s[val+1] == 0) {
1560 val = rune;
1561 goto out;
1565 /* escape sequence */
1566 if(s[0] == ' ' && s[1] == '\\') {
1567 if(s[3] == 0) {
1568 /* single character escape sequence */
1569 switch(s[2]) {
1570 case 'n': val = '\n'; break;
1571 case 'r': val = '\r'; break;
1572 case 'b': val = '\b'; break;
1573 case 't': val = '\t'; break;
1574 case 'f': val = '\f'; break;
1575 case '\'': val = '\''; break;
1576 case '"': val = '"'; break;
1577 case '\\': val = '\\'; break;
1578 default: error("invalid escape");
1580 goto out;
1583 /* \nnn sequence */
1584 if(s[2] >= '0' && s[2] <= '7') {
1585 if(s[3] < '0' ||
1586 s[3] > '7' ||
1587 s[4] < '0' ||
1588 s[4] > '7' ||
1589 s[5] != 0)
1590 error("illegal \\nnn construction");
1591 val = 64*s[2] + 8*s[3] + s[4] - 73*'0';
1592 if(val == 0)
1593 error("'\\000' is illegal");
1594 goto out;
1596 error("unknown escape");
1598 val = extval++;
1600 out:
1601 tokset[ntokens].value = val;
1602 toklev[ntokens] = 0;
1603 return ntokens;
1607 * write out the defines (at the end of the declaration section)
1609 void
1610 defout(int last)
1612 int i, c;
1613 char sar[NAMESIZE+10];
1615 for(i=ndefout; i<=ntokens; i++) {
1616 /* non-literals */
1617 c = tokset[i].name[0];
1618 if(c != ' ' && c != '$') {
1619 Bprint(ftable, "#define %s %d\n",
1620 tokset[i].name, tokset[i].value);
1621 if(fdefine)
1622 Bprint(fdefine, "#define\t%s\t%d\n",
1623 tokset[i].name, tokset[i].value);
1626 ndefout = ntokens+1;
1627 if(last && fdebug) {
1628 Bprint(fdebug, "char* yytoknames[] =\n{\n");
1629 TLOOP(i) {
1630 if(tokset[i].name) {
1631 chcopy(sar, tokset[i].name);
1632 Bprint(fdebug, "\t\"%s\",\n", sar);
1633 continue;
1635 Bprint(fdebug, "\t0,\n");
1637 Bprint(fdebug, "};\n");
1641 char*
1642 cstash(char *s)
1644 char *temp;
1646 temp = cnamp;
1647 do {
1648 if(cnamp >= &cnames[cnamsz])
1649 error("too many characters in id's and literals");
1650 else
1651 *cnamp++ = *s;
1652 } while(*s++);
1653 return temp;
1656 long
1657 gettok(void)
1659 long c;
1660 Rune rune;
1661 int i, base, match, reserve;
1662 static int peekline;
1664 begin:
1665 reserve = 0;
1666 lineno += peekline;
1667 peekline = 0;
1668 c = Bgetrune(finput);
1669 while(c == ' ' || c == '\n' || c == '\t' || c == '\f') {
1670 if(c == '\n')
1671 lineno++;
1672 c = Bgetrune(finput);
1675 /* skip comment */
1676 if(c == '/') {
1677 lineno += skipcom();
1678 goto begin;
1680 switch(c) {
1681 case Beof:
1682 return ENDFILE;
1684 case '{':
1685 Bungetrune(finput);
1686 return '=';
1688 case '<':
1689 /* get, and look up, a type name (union member name) */
1690 i = 0;
1691 while((c=Bgetrune(finput)) != '>' && c >= 0 && c != '\n') {
1692 rune = c;
1693 c = runetochar(&tokname[i], &rune);
1694 if(i < NAMESIZE)
1695 i += c;
1697 if(c != '>')
1698 error("unterminated < ... > clause");
1699 tokname[i] = 0;
1700 for(i=1; i<=ntypes; i++)
1701 if(!strcmp(typeset[i], tokname)) {
1702 numbval = i;
1703 return TYPENAME;
1705 ntypes++;
1706 numbval = ntypes;
1707 typeset[numbval] = cstash(tokname);
1708 return TYPENAME;
1710 case '"':
1711 case '\'':
1712 match = c;
1713 tokname[0] = ' ';
1714 i = 1;
1715 for(;;) {
1716 c = Bgetrune(finput);
1717 if(c == '\n' || c <= 0)
1718 error("illegal or missing ' or \"" );
1719 if(c == '\\') {
1720 tokname[i] = '\\';
1721 if(i < NAMESIZE)
1722 i++;
1723 c = Bgetrune(finput);
1724 } else
1725 if(c == match)
1726 break;
1727 rune = c;
1728 c = runetochar(&tokname[i], &rune);
1729 if(i < NAMESIZE)
1730 i += c;
1732 break;
1734 case '%':
1735 case '\\':
1736 switch(c = Bgetrune(finput)) {
1737 case '0': return TERM;
1738 case '<': return LEFT;
1739 case '2': return BINARY;
1740 case '>': return RIGHT;
1741 case '%':
1742 case '\\': return MARK;
1743 case '=': return PREC;
1744 case '{': return LCURLY;
1745 default: reserve = 1;
1748 default:
1749 /* number */
1750 if(isdigit(c)) {
1751 numbval = c-'0';
1752 base = (c=='0')? 8: 10;
1753 for(c = Bgetrune(finput); isdigit(c); c = Bgetrune(finput))
1754 numbval = numbval*base + (c-'0');
1755 Bungetrune(finput);
1756 return NUMBER;
1758 if(islower(c) || isupper(c) || c=='_' || c=='.' || c=='$') {
1759 i = 0;
1760 while(islower(c) || isupper(c) || isdigit(c) ||
1761 c == '-' || c=='_' || c=='.' || c=='$') {
1762 if(reserve && isupper(c))
1763 c += 'a'-'A';
1764 rune = c;
1765 c = runetochar(&tokname[i], &rune);
1766 if(i < NAMESIZE)
1767 i += c;
1768 c = Bgetrune(finput);
1770 } else
1771 return c;
1772 Bungetrune(finput);
1774 tokname[i] = 0;
1776 /* find a reserved word */
1777 if(reserve) {
1778 for(c=0; resrv[c].name; c++)
1779 if(strcmp(tokname, resrv[c].name) == 0)
1780 return resrv[c].value;
1781 error("invalid escape, or illegal reserved word: %s", tokname);
1784 /* look ahead to distinguish IDENTIFIER from IDENTCOLON */
1785 c = Bgetrune(finput);
1786 while(c == ' ' || c == '\t'|| c == '\n' || c == '\f' || c == '/') {
1787 if(c == '\n')
1788 peekline++;
1789 /* look for comments */
1790 if(c == '/')
1791 peekline += skipcom();
1792 c = Bgetrune(finput);
1794 if(c == ':')
1795 return IDENTCOLON;
1796 Bungetrune(finput);
1797 return IDENTIFIER;
1801 * determine the type of a symbol
1803 int
1804 fdtype(int t)
1806 int v;
1808 if(t >= NTBASE)
1809 v = nontrst[t-NTBASE].value;
1810 else
1811 v = TYPE(toklev[t]);
1812 if(v <= 0)
1813 error("must specify type for %s", (t>=NTBASE)?
1814 nontrst[t-NTBASE].name: tokset[t].name);
1815 return v;
1818 int
1819 chfind(int t, char *s)
1821 int i;
1823 if(s[0] == ' ')
1824 t = 0;
1825 TLOOP(i)
1826 if(!strcmp(s, tokset[i].name))
1827 return i;
1828 NTLOOP(i)
1829 if(!strcmp(s, nontrst[i].name))
1830 return NTBASE+i;
1832 /* cannot find name */
1833 if(t > 1)
1834 error("%s should have been defined earlier", s);
1835 return defin(t, s);
1839 * copy the union declaration to the output, and the define file if present
1841 void
1842 cpyunion(void)
1844 long c;
1845 int level;
1847 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1848 Bprint(ftable, "typedef union ");
1849 if(fdefine != 0)
1850 Bprint(fdefine, "\ntypedef union ");
1852 level = 0;
1853 for(;;) {
1854 if((c=Bgetrune(finput)) == Beof)
1855 error("EOF encountered while processing %%union");
1856 Bputrune(ftable, c);
1857 if(fdefine != 0)
1858 Bputrune(fdefine, c);
1859 switch(c) {
1860 case '\n':
1861 lineno++;
1862 break;
1863 case '{':
1864 level++;
1865 break;
1866 case '}':
1867 level--;
1869 /* we are finished copying */
1870 if(level == 0) {
1871 Bprint(ftable, " YYSTYPE;\n");
1872 if(fdefine != 0)
1873 Bprint(fdefine, "\tYYSTYPE;\nextern\tYYSTYPE\tyylval;\n");
1874 return;
1881 * copies code between \{ and \}
1883 void
1884 cpycode(void)
1887 long c;
1889 c = Bgetrune(finput);
1890 if(c == '\n') {
1891 c = Bgetrune(finput);
1892 lineno++;
1894 Bprint(ftable, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1895 while(c != Beof) {
1896 if(c == '\\') {
1897 if((c=Bgetrune(finput)) == '}')
1898 return;
1899 Bputc(ftable, '\\');
1901 if(c == '%') {
1902 if((c=Bgetrune(finput)) == '}')
1903 return;
1904 Bputc(ftable, '%');
1906 Bputrune(ftable, c);
1907 if(c == '\n')
1908 lineno++;
1909 c = Bgetrune(finput);
1911 error("eof before %%}");
1915 * skip over comments
1916 * skipcom is called after reading a '/'
1918 int
1919 skipcom(void)
1921 long c;
1922 int i;
1924 /* i is the number of lines skipped */
1925 i = 0;
1926 if(Bgetrune(finput) != '*')
1927 error("illegal comment");
1928 c = Bgetrune(finput);
1929 while(c != Beof) {
1930 while(c == '*')
1931 if((c=Bgetrune(finput)) == '/')
1932 return i;
1933 if(c == '\n')
1934 i++;
1935 c = Bgetrune(finput);
1937 error("EOF inside comment");
1938 return 0;
1942 * copy C action to the next ; or closing }
1944 void
1945 cpyact(int offset)
1947 long c;
1948 int brac, match, j, s, fnd, tok;
1950 Bprint(faction, "\n#line\t%d\t\"%s\"\n", lineno, infile);
1951 brac = 0;
1953 loop:
1954 c = Bgetrune(finput);
1955 swt:
1956 switch(c) {
1957 case ';':
1958 if(brac == 0) {
1959 Bputrune(faction, c);
1960 return;
1962 goto lcopy;
1964 case '{':
1965 brac++;
1966 goto lcopy;
1968 case '$':
1969 s = 1;
1970 tok = -1;
1971 c = Bgetrune(finput);
1973 /* type description */
1974 if(c == '<') {
1975 Bungetrune(finput);
1976 if(gettok() != TYPENAME)
1977 error("bad syntax on $<ident> clause");
1978 tok = numbval;
1979 c = Bgetrune(finput);
1981 if(c == '$') {
1982 Bprint(faction, "yyval");
1984 /* put out the proper tag... */
1985 if(ntypes) {
1986 if(tok < 0)
1987 tok = fdtype(*prdptr[nprod]);
1988 Bprint(faction, ".%s", typeset[tok]);
1990 goto loop;
1992 if(c == '-') {
1993 s = -s;
1994 c = Bgetrune(finput);
1996 if(isdigit(c)) {
1997 j = 0;
1998 while(isdigit(c)) {
1999 j = j*10 + (c-'0');
2000 c = Bgetrune(finput);
2002 Bungetrune(finput);
2003 j = j*s - offset;
2004 if(j > 0)
2005 error("Illegal use of $%d", j+offset);
2007 dollar:
2008 Bprint(faction, "yypt[-%d].yyv", -j);
2010 /* put out the proper tag */
2011 if(ntypes) {
2012 if(j+offset <= 0 && tok < 0)
2013 error("must specify type of $%d", j+offset);
2014 if(tok < 0)
2015 tok = fdtype(prdptr[nprod][j+offset]);
2016 Bprint(faction, ".%s", typeset[tok]);
2018 goto loop;
2020 if(isupper(c) || islower(c) || c == '_' || c == '.') {
2021 int tok; /* tok used oustide for type info */
2023 /* look for $name */
2024 Bungetrune(finput);
2025 if(gettok() != IDENTIFIER)
2026 error("$ must be followed by an identifier");
2027 tok = chfind(2, tokname);
2028 if((c = Bgetrune(finput)) != '#') {
2029 Bungetrune(finput);
2030 fnd = -1;
2031 } else
2032 if(gettok() != NUMBER) {
2033 error("# must be followed by number");
2034 fnd = -1;
2035 } else
2036 fnd = numbval;
2037 for(j=1; j<=offset; ++j)
2038 if(tok == prdptr[nprod][j]) {
2039 if(--fnd <= 0) {
2040 j -= offset;
2041 goto dollar;
2044 error("$name or $name#number not found");
2046 Bputc(faction, '$');
2047 if(s < 0 )
2048 Bputc(faction, '-');
2049 goto swt;
2051 case '}':
2052 brac--;
2053 if(brac)
2054 goto lcopy;
2055 Bputrune(faction, c);
2056 return;
2058 case '/':
2059 /* look for comments */
2060 Bputrune(faction, c);
2061 c = Bgetrune(finput);
2062 if(c != '*')
2063 goto swt;
2065 /* it really is a comment */
2066 Bputrune(faction, c);
2067 c = Bgetrune(finput);
2068 while(c >= 0) {
2069 while(c == '*') {
2070 Bputrune(faction, c);
2071 if((c=Bgetrune(finput)) == '/')
2072 goto lcopy;
2074 Bputrune(faction, c);
2075 if(c == '\n')
2076 lineno++;
2077 c = Bgetrune(finput);
2079 error("EOF inside comment");
2081 case '\'':
2082 /* character constant */
2083 match = '\'';
2084 goto string;
2086 case '"':
2087 /* character string */
2088 match = '"';
2090 string:
2091 Bputrune(faction, c);
2092 while(c = Bgetrune(finput)) {
2093 if(c == '\\') {
2094 Bputrune(faction, c);
2095 c = Bgetrune(finput);
2096 if(c == '\n')
2097 lineno++;
2098 } else
2099 if(c == match)
2100 goto lcopy;
2101 if(c == '\n')
2102 error("newline in string or char. const.");
2103 Bputrune(faction, c);
2105 error("EOF in string or character constant");
2107 case Beof:
2108 error("action does not terminate");
2110 case '\n':
2111 lineno++;
2112 goto lcopy;
2115 lcopy:
2116 Bputrune(faction, c);
2117 goto loop;
2120 void
2121 openup(char *stem, int dflag, int vflag, int ytab, char *ytabc)
2123 char buf[256];
2125 if(vflag) {
2126 sprint(buf, "%s.%s", stem, FILEU);
2127 foutput = Bopen(buf, OWRITE);
2128 if(foutput == 0)
2129 error("cannot open %s", buf);
2131 if(yydebug) {
2132 sprint(buf, "%s.%s", stem, FILEDEBUG);
2133 if((fdebug = Bopen(buf, OWRITE)) == 0)
2134 error("can't open %s", buf);
2136 if(dflag) {
2137 sprint(buf, "%s.%s", stem, FILED);
2138 fdefine = Bopen(buf, OWRITE);
2139 if(fdefine == 0)
2140 error("can't create %s", buf);
2142 if(ytab == 0)
2143 sprint(buf, "%s.%s", stem, OFILE);
2144 else
2145 strcpy(buf, ytabc);
2146 ftable = Bopen(buf, OWRITE);
2147 if(ftable == 0)
2148 error("cannot open table file %s", buf);
2152 * print the output for the states
2154 void
2155 output(void)
2157 int i, k, c;
2158 Wset *u, *v;
2160 Bprint(ftable, "short yyexca[] =\n{");
2161 if(fdebug)
2162 Bprint(fdebug, "char* yystates[] =\n{\n");
2164 /* output the stuff for state i */
2165 SLOOP(i) {
2166 nolook = tystate[i]!=MUSTLOOKAHEAD;
2167 closure(i);
2169 /* output actions */
2170 nolook = 1;
2171 aryfil(temp1, ntokens+nnonter+1, 0);
2172 WSLOOP(wsets, u) {
2173 c = *(u->pitem);
2174 if(c > 1 && c < NTBASE && temp1[c] == 0) {
2175 WSLOOP(u, v)
2176 if(c == *(v->pitem))
2177 putitem(v->pitem+1, (Lkset*)0);
2178 temp1[c] = state(c);
2179 } else
2180 if(c > NTBASE && temp1[(c -= NTBASE) + ntokens] == 0)
2181 temp1[c+ntokens] = amem[indgo[i]+c];
2183 if(i == 1)
2184 temp1[1] = ACCEPTCODE;
2186 /* now, we have the shifts; look at the reductions */
2187 lastred = 0;
2188 WSLOOP(wsets, u) {
2189 c = *u->pitem;
2191 /* reduction */
2192 if(c <= 0) {
2193 lastred = -c;
2194 TLOOP(k)
2195 if(BIT(u->ws.lset, k)) {
2196 if(temp1[k] == 0)
2197 temp1[k] = c;
2198 else
2199 if(temp1[k] < 0) { /* reduce/reduce conflict */
2200 if(foutput)
2201 Bprint(foutput,
2202 "\n%d: reduce/reduce conflict"
2203 " (red'ns %d and %d ) on %s",
2204 i, -temp1[k], lastred,
2205 symnam(k));
2206 if(-temp1[k] > lastred)
2207 temp1[k] = -lastred;
2208 zzrrconf++;
2209 } else
2210 /* potential shift/reduce conflict */
2211 precftn( lastred, k, i );
2215 wract(i);
2218 if(fdebug)
2219 Bprint(fdebug, "};\n");
2220 Bprint(ftable, "};\n");
2221 Bprint(ftable, "#define YYNPROD %d\n", nprod);
2222 Bprint(ftable, "#define YYPRIVATE %d\n", PRIVATE);
2223 if(yydebug)
2224 Bprint(ftable, "#define yydebug %s\n", yydebug);
2228 * pack state i from temp1 into amem
2230 int
2231 apack(int *p, int n)
2233 int *pp, *qq, *rr, off, *q, *r;
2235 /* we don't need to worry about checking because
2236 * we will only look at entries known to be there...
2237 * eliminate leading and trailing 0's
2240 q = p+n;
2241 for(pp = p, off = 0; *pp == 0 && pp <= q; ++pp, --off)
2243 /* no actions */
2244 if(pp > q)
2245 return 0;
2246 p = pp;
2248 /* now, find a place for the elements from p to q, inclusive */
2249 r = &amem[ACTSIZE-1];
2250 for(rr = amem; rr <= r; rr++, off++) {
2251 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2252 if(*pp != 0)
2253 if(*pp != *qq && *qq != 0)
2254 goto nextk;
2256 /* we have found an acceptable k */
2257 if(pkdebug && foutput != 0)
2258 Bprint(foutput, "off = %d, k = %d\n", off, (int)(rr-amem));
2259 for(qq = rr, pp = p; pp <= q; pp++, qq++)
2260 if(*pp) {
2261 if(qq > r)
2262 error("action table overflow");
2263 if(qq > memp)
2264 memp = qq;
2265 *qq = *pp;
2267 if(pkdebug && foutput != 0)
2268 for(pp = amem; pp <= memp; pp += 10) {
2269 Bprint(foutput, "\t");
2270 for(qq = pp; qq <= pp+9; qq++)
2271 Bprint(foutput, "%d ", *qq);
2272 Bprint(foutput, "\n");
2274 return(off);
2275 nextk:;
2277 error("no space in action table");
2278 return 0;
2282 * output the gotos for the nontermninals
2284 void
2285 go2out(void)
2287 int i, j, k, best, count, cbest, times;
2289 /* mark begining of gotos */
2290 Bprint(ftemp, "$\n");
2291 for(i = 1; i <= nnonter; i++) {
2292 go2gen(i);
2294 /* find the best one to make default */
2295 best = -1;
2296 times = 0;
2298 /* is j the most frequent */
2299 for(j = 0; j <= nstate; j++) {
2300 if(tystate[j] == 0)
2301 continue;
2302 if(tystate[j] == best)
2303 continue;
2305 /* is tystate[j] the most frequent */
2306 count = 0;
2307 cbest = tystate[j];
2308 for(k = j; k <= nstate; k++)
2309 if(tystate[k] == cbest)
2310 count++;
2311 if(count > times) {
2312 best = cbest;
2313 times = count;
2317 /* best is now the default entry */
2318 zzgobest += times-1;
2319 for(j = 0; j <= nstate; j++)
2320 if(tystate[j] != 0 && tystate[j] != best) {
2321 Bprint(ftemp, "%d,%d,", j, tystate[j]);
2322 zzgoent++;
2325 /* now, the default */
2326 if(best == -1)
2327 best = 0;
2328 zzgoent++;
2329 Bprint(ftemp, "%d\n", best);
2334 * output the gotos for nonterminal c
2336 void
2337 go2gen(int c)
2339 int i, work, cc;
2340 Item *p, *q;
2343 /* first, find nonterminals with gotos on c */
2344 aryfil(temp1, nnonter+1, 0);
2345 temp1[c] = 1;
2346 work = 1;
2347 while(work) {
2348 work = 0;
2349 PLOOP(0, i)
2351 /* cc is a nonterminal */
2352 if((cc=prdptr[i][1]-NTBASE) >= 0)
2353 /* cc has a goto on c */
2354 if(temp1[cc] != 0) {
2356 /* thus, the left side of production i does too */
2357 cc = *prdptr[i]-NTBASE;
2358 if(temp1[cc] == 0) {
2359 work = 1;
2360 temp1[cc] = 1;
2365 /* now, we have temp1[c] = 1 if a goto on c in closure of cc */
2366 if(g2debug && foutput != 0) {
2367 Bprint(foutput, "%s: gotos on ", nontrst[c].name);
2368 NTLOOP(i)
2369 if(temp1[i])
2370 Bprint(foutput, "%s ", nontrst[i].name);
2371 Bprint(foutput, "\n");
2374 /* now, go through and put gotos into tystate */
2375 aryfil(tystate, nstate, 0);
2376 SLOOP(i)
2377 ITMLOOP(i, p, q)
2378 if((cc = *p->pitem) >= NTBASE)
2379 /* goto on c is possible */
2380 if(temp1[cc-NTBASE]) {
2381 tystate[i] = amem[indgo[i]+c];
2382 break;
2387 * decide a shift/reduce conflict by precedence.
2388 * r is a rule number, t a token number
2389 * the conflict is in state s
2390 * temp1[t] is changed to reflect the action
2392 void
2393 precftn(int r, int t, int s)
2395 int lp, lt, action;
2397 lp = levprd[r];
2398 lt = toklev[t];
2399 if(PLEVEL(lt) == 0 || PLEVEL(lp) == 0) {
2401 /* conflict */
2402 if(foutput != 0)
2403 Bprint(foutput,
2404 "\n%d: shift/reduce conflict (shift %d(%d), red'n %d(%d)) on %s",
2405 s, temp1[t], PLEVEL(lt), r, PLEVEL(lp), symnam(t));
2406 zzsrconf++;
2407 return;
2409 if(PLEVEL(lt) == PLEVEL(lp))
2410 action = ASSOC(lt);
2411 else
2412 if(PLEVEL(lt) > PLEVEL(lp))
2413 action = RASC; /* shift */
2414 else
2415 action = LASC; /* reduce */
2416 switch(action) {
2417 case BASC: /* error action */
2418 temp1[t] = ERRCODE;
2419 break;
2420 case LASC: /* reduce */
2421 temp1[t] = -r;
2422 break;
2427 * output state i
2428 * temp1 has the actions, lastred the default
2430 void
2431 wract(int i)
2433 int p, p0, p1, ntimes, tred, count, j, flag;
2435 /* find the best choice for lastred */
2436 lastred = 0;
2437 ntimes = 0;
2438 TLOOP(j) {
2439 if(temp1[j] >= 0)
2440 continue;
2441 if(temp1[j]+lastred == 0)
2442 continue;
2443 /* count the number of appearances of temp1[j] */
2444 count = 0;
2445 tred = -temp1[j];
2446 levprd[tred] |= REDFLAG;
2447 TLOOP(p)
2448 if(temp1[p]+tred == 0)
2449 count++;
2450 if(count > ntimes) {
2451 lastred = tred;
2452 ntimes = count;
2457 * for error recovery, arrange that, if there is a shift on the
2458 * error recovery token, `error', that the default be the error action
2460 if(temp1[2] > 0)
2461 lastred = 0;
2463 /* clear out entries in temp1 which equal lastred */
2464 TLOOP(p)
2465 if(temp1[p]+lastred == 0)
2466 temp1[p] = 0;
2468 wrstate(i);
2469 defact[i] = lastred;
2470 flag = 0;
2471 TLOOP(p0)
2472 if((p1=temp1[p0]) != 0) {
2473 if(p1 < 0) {
2474 p1 = -p1;
2475 goto exc;
2477 if(p1 == ACCEPTCODE) {
2478 p1 = -1;
2479 goto exc;
2481 if(p1 == ERRCODE) {
2482 p1 = 0;
2483 exc:
2484 if(flag++ == 0)
2485 Bprint(ftable, "-1, %d,\n", i);
2486 Bprint(ftable, "\t%d, %d,\n", p0, p1);
2487 zzexcp++;
2488 continue;
2490 Bprint(ftemp, "%d,%d,", p0, p1);
2491 zzacent++;
2493 if(flag) {
2494 defact[i] = -2;
2495 Bprint(ftable, "\t-2, %d,\n", lastred);
2497 Bprint(ftemp, "\n");
2501 * writes state i
2503 void
2504 wrstate(int i)
2506 int j0, j1;
2507 Item *pp, *qq;
2508 Wset *u;
2510 if(fdebug) {
2511 if(lastred) {
2512 Bprint(fdebug, " 0, /*%d*/\n", i);
2513 } else {
2514 Bprint(fdebug, " \"");
2515 ITMLOOP(i, pp, qq)
2516 Bprint(fdebug, "%s\\n", writem(pp->pitem));
2517 if(tystate[i] == MUSTLOOKAHEAD)
2518 WSLOOP(wsets + (pstate[i+1] - pstate[i]), u)
2519 if(*u->pitem < 0)
2520 Bprint(fdebug, "%s\\n", writem(u->pitem));
2521 Bprint(fdebug, "\", /*%d*/\n", i);
2524 if(foutput == 0)
2525 return;
2526 Bprint(foutput, "\nstate %d\n", i);
2527 ITMLOOP(i, pp, qq)
2528 Bprint(foutput, "\t%s\n", writem(pp->pitem));
2529 if(tystate[i] == MUSTLOOKAHEAD)
2530 /* print out empty productions in closure */
2531 WSLOOP(wsets+(pstate[i+1]-pstate[i]), u)
2532 if(*u->pitem < 0)
2533 Bprint(foutput, "\t%s\n", writem(u->pitem));
2535 /* check for state equal to another */
2536 TLOOP(j0)
2537 if((j1=temp1[j0]) != 0) {
2538 Bprint(foutput, "\n\t%s ", symnam(j0));
2539 /* shift, error, or accept */
2540 if(j1 > 0) {
2541 if(j1 == ACCEPTCODE)
2542 Bprint(foutput, "accept");
2543 else
2544 if(j1 == ERRCODE)
2545 Bprint(foutput, "error");
2546 else
2547 Bprint(foutput, "shift %d", j1);
2548 } else
2549 Bprint(foutput, "reduce %d (src line %d)", -j1, rlines[-j1]);
2552 /* output the final production */
2553 if(lastred)
2554 Bprint(foutput, "\n\t. reduce %d (src line %d)\n\n",
2555 lastred, rlines[lastred]);
2556 else
2557 Bprint(foutput, "\n\t. error\n\n");
2559 /* now, output nonterminal actions */
2560 j1 = ntokens;
2561 for(j0 = 1; j0 <= nnonter; j0++) {
2562 j1++;
2563 if(temp1[j1])
2564 Bprint(foutput, "\t%s goto %d\n", symnam(j0+NTBASE), temp1[j1]);
2568 void
2569 warray(char *s, int *v, int n)
2571 int i;
2573 Bprint(ftable, "short %s[] =\n{", s);
2574 for(i=0;;) {
2575 if(i%10 == 0)
2576 Bprint(ftable, "\n");
2577 Bprint(ftable, "%4d", v[i]);
2578 i++;
2579 if(i >= n) {
2580 Bprint(ftable, "\n};\n");
2581 break;
2583 Bprint(ftable, ",");
2588 * in order to free up the mem and amem arrays for the optimizer,
2589 * and still be able to output yyr1, etc., after the sizes of
2590 * the action array is known, we hide the nonterminals
2591 * derived by productions in levprd.
2594 void
2595 hideprod(void)
2597 int i, j;
2599 j = 0;
2600 levprd[0] = 0;
2601 PLOOP(1, i) {
2602 if(!(levprd[i] & REDFLAG)) {
2603 j++;
2604 if(foutput != 0)
2605 Bprint(foutput, "Rule not reduced: %s\n", writem(prdptr[i]));
2607 levprd[i] = *prdptr[i] - NTBASE;
2609 if(j)
2610 print("%d rules never reduced\n", j);
2613 void
2614 callopt(void)
2616 int i, *p, j, k, *q;
2618 /* read the arrays from tempfile and set parameters */
2619 finput = Bopen(tempname, OREAD);
2620 if(finput == 0)
2621 error("optimizer cannot open tempfile");
2623 pgo[0] = 0;
2624 temp1[0] = 0;
2625 nstate = 0;
2626 nnonter = 0;
2627 for(;;) {
2628 switch(gtnm()) {
2629 case '\n':
2630 nstate++;
2631 pmem--;
2632 temp1[nstate] = pmem - mem0;
2633 case ',':
2634 continue;
2635 case '$':
2636 break;
2637 default:
2638 error("bad tempfile %s", tempname);
2640 break;
2643 pmem--;
2644 temp1[nstate] = yypgo[0] = pmem - mem0;
2645 for(;;) {
2646 switch(gtnm()) {
2647 case '\n':
2648 nnonter++;
2649 yypgo[nnonter] = pmem-mem0;
2650 case ',':
2651 continue;
2652 case -1:
2653 break;
2654 default:
2655 error("bad tempfile");
2657 break;
2659 pmem--;
2660 yypgo[nnonter--] = pmem - mem0;
2661 for(i = 0; i < nstate; i++) {
2662 k = 32000;
2663 j = 0;
2664 q = mem0 + temp1[i+1];
2665 for(p = mem0 + temp1[i]; p < q ; p += 2) {
2666 if(*p > j)
2667 j = *p;
2668 if(*p < k)
2669 k = *p;
2671 /* nontrivial situation */
2672 if(k <= j) {
2673 /* j is now the range */
2674 /* j -= k; *//* call scj */
2675 if(k > maxoff)
2676 maxoff = k;
2678 tystate[i] = (temp1[i+1]-temp1[i]) + 2*j;
2679 if(j > maxspr)
2680 maxspr = j;
2683 /* initialize ggreed table */
2684 for(i = 1; i <= nnonter; i++) {
2685 ggreed[i] = 1;
2686 j = 0;
2688 /* minimum entry index is always 0 */
2689 q = mem0 + yypgo[i+1] - 1;
2690 for(p = mem0+yypgo[i]; p < q ; p += 2) {
2691 ggreed[i] += 2;
2692 if(*p > j)
2693 j = *p;
2695 ggreed[i] = ggreed[i] + 2*j;
2696 if(j > maxoff)
2697 maxoff = j;
2700 /* now, prepare to put the shift actions into the amem array */
2701 for(i = 0; i < ACTSIZE; i++)
2702 amem[i] = 0;
2703 maxa = amem;
2704 for(i = 0; i < nstate; i++) {
2705 if(tystate[i] == 0 && adb > 1)
2706 Bprint(ftable, "State %d: null\n", i);
2707 indgo[i] = YYFLAG1;
2709 while((i = nxti()) != NOMORE)
2710 if(i >= 0)
2711 stin(i);
2712 else
2713 gin(-i);
2715 /* print amem array */
2716 if(adb > 2 )
2717 for(p = amem; p <= maxa; p += 10) {
2718 Bprint(ftable, "%4d ", (int)(p-amem));
2719 for(i = 0; i < 10; ++i)
2720 Bprint(ftable, "%4d ", p[i]);
2721 Bprint(ftable, "\n");
2724 /* write out the output appropriate to the language */
2725 aoutput();
2726 osummary();
2727 ZAPFILE(tempname);
2730 void
2731 gin(int i)
2733 int *p, *r, *s, *q1, *q2;
2735 /* enter gotos on nonterminal i into array amem */
2736 ggreed[i] = 0;
2738 q2 = mem0+ yypgo[i+1] - 1;
2739 q1 = mem0 + yypgo[i];
2741 /* now, find amem place for it */
2742 for(p = amem; p < &amem[ACTSIZE]; p++) {
2743 if(*p)
2744 continue;
2745 for(r = q1; r < q2; r += 2) {
2746 s = p + *r + 1;
2747 if(*s)
2748 goto nextgp;
2749 if(s > maxa)
2750 if((maxa = s) > &amem[ACTSIZE])
2751 error("a array overflow");
2753 /* we have found amem spot */
2754 *p = *q2;
2755 if(p > maxa)
2756 if((maxa = p) > &amem[ACTSIZE])
2757 error("a array overflow");
2758 for(r = q1; r < q2; r += 2) {
2759 s = p + *r + 1;
2760 *s = r[1];
2762 pgo[i] = p-amem;
2763 if(adb > 1)
2764 Bprint(ftable, "Nonterminal %d, entry at %d\n", i, pgo[i]);
2765 return;
2767 nextgp:;
2769 error("cannot place goto %d\n", i);
2772 void
2773 stin(int i)
2775 int *r, *s, n, flag, j, *q1, *q2;
2777 tystate[i] = 0;
2779 /* enter state i into the amem array */
2780 q2 = mem0+temp1[i+1];
2781 q1 = mem0+temp1[i];
2782 /* find an acceptable place */
2783 for(n = -maxoff; n < ACTSIZE; n++) {
2784 flag = 0;
2785 for(r = q1; r < q2; r += 2) {
2786 if((s = *r + n + amem) < amem)
2787 goto nextn;
2788 if(*s == 0)
2789 flag++;
2790 else
2791 if(*s != r[1])
2792 goto nextn;
2795 /* check that the position equals another only if the states are identical */
2796 for(j=0; j<nstate; j++) {
2797 if(indgo[j] == n) {
2799 /* we have some disagreement */
2800 if(flag)
2801 goto nextn;
2802 if(temp1[j+1]+temp1[i] == temp1[j]+temp1[i+1]) {
2804 /* states are equal */
2805 indgo[i] = n;
2806 if(adb > 1)
2807 Bprint(ftable,
2808 "State %d: entry at %d equals state %d\n",
2809 i, n, j);
2810 return;
2813 /* we have some disagreement */
2814 goto nextn;
2818 for(r = q1; r < q2; r += 2) {
2819 if((s = *r+n+amem) >= &amem[ACTSIZE])
2820 error("out of space in optimizer a array");
2821 if(s > maxa)
2822 maxa = s;
2823 if(*s != 0 && *s != r[1])
2824 error("clobber of a array, pos'n %d, by %d", s-amem, r[1]);
2825 *s = r[1];
2827 indgo[i] = n;
2828 if(adb > 1)
2829 Bprint(ftable, "State %d: entry at %d\n", i, indgo[i]);
2830 return;
2831 nextn:;
2833 error("Error; failure to place state %d\n", i);
2837 * finds the next i
2839 int
2840 nxti(void)
2842 int i, max, maxi;
2844 max = 0;
2845 maxi = 0;
2846 for(i = 1; i <= nnonter; i++)
2847 if(ggreed[i] >= max) {
2848 max = ggreed[i];
2849 maxi = -i;
2851 for(i = 0; i < nstate; ++i)
2852 if(tystate[i] >= max) {
2853 max = tystate[i];
2854 maxi = i;
2856 if(nxdb)
2857 Bprint(ftable, "nxti = %d, max = %d\n", maxi, max);
2858 if(max == 0)
2859 return NOMORE;
2860 return maxi;
2864 * write summary
2866 void
2867 osummary(void)
2870 int i, *p;
2872 if(foutput == 0)
2873 return;
2874 i = 0;
2875 for(p = maxa; p >= amem; p--)
2876 if(*p == 0)
2877 i++;
2879 Bprint(foutput, "Optimizer space used: input %d/%d, output %d/%d\n",
2880 (int)(pmem-mem0+1), MEMSIZE, (int)(maxa-amem+1), ACTSIZE);
2881 Bprint(foutput, "%d table entries, %d zero\n", (int)(maxa-amem+1), i);
2882 Bprint(foutput, "maximum spread: %d, maximum offset: %d\n", maxspr, maxoff);
2886 * this version is for C
2887 * write out the optimized parser
2889 void
2890 aoutput(void)
2892 Bprint(ftable, "#define\tYYLAST\t%d\n", (int)(maxa-amem+1));
2893 arout("yyact", amem, (maxa-amem)+1);
2894 arout("yypact", indgo, nstate);
2895 arout("yypgo", pgo, nnonter+1);
2898 void
2899 arout(char *s, int *v, int n)
2901 int i;
2903 Bprint(ftable, "short %s[] =\n{", s);
2904 for(i = 0; i < n;) {
2905 if(i%10 == 0)
2906 Bprint(ftable, "\n");
2907 Bprint(ftable, "%4d", v[i]);
2908 i++;
2909 if(i == n)
2910 Bprint(ftable, "\n};\n");
2911 else
2912 Bprint(ftable, ",");
2917 * read and convert an integer from the standard input
2918 * return the terminating character
2919 * blanks, tabs, and newlines are ignored
2921 int
2922 gtnm(void)
2924 int sign, val, c;
2926 sign = 0;
2927 val = 0;
2928 while((c=Bgetrune(finput)) != Beof) {
2929 if(isdigit(c)) {
2930 val = val*10 + c-'0';
2931 continue;
2933 if(c == '-') {
2934 sign = 1;
2935 continue;
2937 break;
2939 if(sign)
2940 val = -val;
2941 *pmem++ = val;
2942 if(pmem >= &mem0[MEMSIZE])
2943 error("out of space");
2944 return c;