commit e37302c4b99f147f1fd85ca27a2dbd2aa7a4eb41 from: wkj date: Wed Apr 21 01:22:09 2004 UTC Plan 9 lex (to be installed as lex.9, if at all). commit - 7d3bbe16529792999d0627256748f9b780c084f3 commit + e37302c4b99f147f1fd85ca27a2dbd2aa7a4eb41 blob - /dev/null blob + 8fa63d62565bdab9396e2f36577b85ae412ed022 (mode 644) --- /dev/null +++ src/cmd/lex/header.c @@ -0,0 +1,115 @@ +# include "ldefs.h" + +extern int nine; + +void +phead1(void) +{ + Bprint(&fout,"typedef unsigned char Uchar;\n"); + if (nine) { + Bprint(&fout,"# include \n"); + Bprint(&fout,"# include \n"); + } + Bprint(&fout,"# include \n"); + Bprint(&fout, "# define U(x) x\n"); + Bprint(&fout, "# define NLSTATE yyprevious=YYNEWLINE\n"); + Bprint(&fout,"# define BEGIN yybgin = yysvec + 1 +\n"); + Bprint(&fout,"# define INITIAL 0\n"); + Bprint(&fout,"# define YYLERR yysvec\n"); + Bprint(&fout,"# define YYSTATE (yyestate-yysvec-1)\n"); + Bprint(&fout,"# define YYOPTIM 1\n"); +# ifdef DEBUG + Bprint(&fout,"# define LEXDEBUG 1\n"); +# endif + Bprint(&fout,"# define YYLMAX 200\n"); + Bprint(&fout, +"# define unput(c) {yytchar= (c);if(yytchar=='\\n')yylineno--;*yysptr++=yytchar;}\n"); + Bprint(&fout,"# define yymore() (yymorfg=1)\n"); + Bprint(&fout,"# define ECHO fprintf(yyout, \"%%s\",yytext)\n"); + Bprint(&fout,"# define REJECT { nstr = yyreject(); goto yyfussy;}\n"); + Bprint(&fout,"int yyleng; extern char yytext[];\n"); + Bprint(&fout,"int yymorfg;\n"); + Bprint(&fout,"extern Uchar *yysptr, yysbuf[];\n"); + Bprint(&fout,"int yytchar;\n"); +// Bprint(&fout,"FILE *yyin = stdin, *yyout = stdout;\n"); + Bprint(&fout,"FILE *yyin, *yyout;\n"); + Bprint(&fout,"extern int yylineno;\n"); + Bprint(&fout,"struct yysvf { \n"); + Bprint(&fout,"\tstruct yywork *yystoff;\n"); + Bprint(&fout,"\tstruct yysvf *yyother;\n"); + Bprint(&fout,"\tint *yystops;};\n"); + Bprint(&fout,"struct yysvf *yyestate;\n"); + Bprint(&fout,"extern struct yysvf yysvec[], *yybgin;\n"); + Bprint(&fout,"int yylook(void), yywrap(void), yyback(int *, int);\n"); + if(nine) { + Bprint(&fout, + "int infd, outfd;\n" + "\n" + "void\n" + "output(char c)\n" + "{\n" + " int rv;\n" + " if ((rv = write(outfd, &c, 1)) < 0)\n" + " sysfatal(\"output: %%r\");\n" + " if (rv == 0)\n" + " sysfatal(\"output: EOF?\");\n" + "}\n" + "\n" + "int\n" + "input(void)\n" + "{\n" + " if(yysptr > yysbuf)\n" + " yytchar = U(*--yysptr);\n" + " else {\n" + " int rv;\n" + " if ((rv = read(infd, &yytchar, 1)) < 0)\n" + " sysfatal(\"input: %%r\");\n" + " if (rv == 0)\n" + " return 0;\n" + " }\n" + " if (yytchar == '\\n')\n" + " yylineno++;\n" + " return yytchar;\n" + "}\n"); + } + else { + Bprint(&fout,"# define output(c) putc(c,yyout)\n"); + Bprint(&fout, "%s%d%s\n", + "# define input() (((yytchar=yysptr>yysbuf?U(*--yysptr):getc(yyin))==", + '\n', + "?(yylineno++,yytchar):yytchar)==EOF?0:yytchar)"); + } +} + +void +phead2(void) +{ + Bprint(&fout,"while((nstr = yylook()) >= 0)\n"); + Bprint(&fout,"yyfussy: switch(nstr){\n"); + Bprint(&fout,"case 0:\n"); + Bprint(&fout,"if(yywrap()) return(0); break;\n"); +} + +void +ptail(void) +{ + if(!pflag){ + Bprint(&fout,"case -1:\nbreak;\n"); /* for reject */ + Bprint(&fout,"default:\n"); + Bprint(&fout,"fprintf(yyout,\"bad switch yylook %%d\",nstr);\n"); + Bprint(&fout,"} return(0); }\n"); + Bprint(&fout,"/* end of yylex */\n"); + } + pflag = 1; +} + +void +statistics(void) +{ + fprint(errorf,"%d/%d nodes(%%e), %d/%d positions(%%p), %d/%d (%%n), %ld transitions\n", + tptr, treesize, (int)(nxtpos-positions), maxpos, stnum+1, nstates, rcount); + fprint(errorf, ", %d/%d packed char classes(%%k)", (int)(pcptr-pchar), pchlen); + fprint(errorf,", %d/%d packed transitions(%%a)",nptr, ntrans); + fprint(errorf, ", %d/%d output slots(%%o)", yytop, outsize); + fprint(errorf,"\n"); +} blob - /dev/null blob + ea95fb511b8aa942ae44da33d502c59bbf03f8c2 (mode 644) --- /dev/null +++ src/cmd/lex/ldefs.h @@ -0,0 +1,180 @@ +# include +# include +# include +# include +# define PP 1 + +#ifdef NOTDEF +# define CWIDTH 8 +# define CMASK 0377 +#endif +# define NCH 256 + + +# define TOKENSIZE 1000 +# define DEFSIZE 40 +# define DEFCHAR 1000 +# define STARTCHAR 100 +# define STARTSIZE 256 +# define CCLSIZE 1000 + +# define TREESIZE 1000 +# define NSTATES 500 +# define MAXPOS 2500 +# define NTRANS 2000 +# define NOUTPUT 5000 + +# define NACTIONS 100 +# define ALITTLEEXTRA 30 + +# define RCCL NCH+90 +# define RNCCL NCH+91 +# define RSTR NCH+92 +# define RSCON NCH+93 +# define RNEWE NCH+94 +# define FINAL NCH+95 +# define RNULLS NCH+96 +# define RCAT NCH+97 +# define STAR NCH+98 +# define PLUS NCH+99 +# define QUEST NCH+100 +# define DIV NCH+101 +# define BAR NCH+102 +# define CARAT NCH+103 +# define S1FINAL NCH+104 +# define S2FINAL NCH+105 + +# define DEFSECTION 1 +# define RULESECTION 2 +# define ENDSECTION 5 +# define TRUE 1 +# define FALSE 0 + +# define PC 1 +# define PS 1 + +# ifdef DEBUG +# define LINESIZE 110 +extern int yydebug; +extern int debug; /* 1 = on */ +extern int charc; +# endif + +# ifdef DEBUG +extern int freturn(int); +# else +# define freturn(s) s +# endif + +extern int sargc; +extern char **sargv; +extern uchar buf[520]; +extern int yyline; /* line number of file */ +extern int sect; +extern int eof; +extern int lgatflg; +extern int divflg; +extern int funcflag; +extern int pflag; +extern int casecount; +extern int chset; /* 1 = char set modified */ +extern Biobuf *fin, fout, *fother; +extern int foutopen; +extern int errorf; +extern int fptr; +extern char *cname; +extern int prev; /* previous input character */ +extern int pres; /* present input character */ +extern int peek; /* next input character */ +extern int *name; +extern int *left; +extern int *right; +extern int *parent; +extern uchar *nullstr; +extern int tptr; +extern uchar pushc[TOKENSIZE]; +extern uchar *pushptr; +extern uchar slist[STARTSIZE]; +extern uchar *slptr; +extern uchar **def, **subs, *dchar; +extern uchar **sname, *stchar; +extern uchar *ccl; +extern uchar *ccptr; +extern uchar *dp, *sp; +extern int dptr, sptr; +extern uchar *bptr; /* store input position */ +extern uchar *tmpstat; +extern int count; +extern int **foll; +extern int *nxtpos; +extern int *positions; +extern int *gotof; +extern int *nexts; +extern uchar *nchar; +extern int **state; +extern int *sfall; /* fallback state num */ +extern uchar *cpackflg; /* true if state has been character packed */ +extern int *atable, aptr; +extern int nptr; +extern uchar symbol[NCH]; +extern uchar cindex[NCH]; +extern int xstate; +extern int stnum; +extern int ccount; +extern uchar match[NCH]; +extern uchar extra[NACTIONS]; +extern uchar *pcptr, *pchar; +extern int pchlen; +extern int nstates, maxpos; +extern int yytop; +extern int report; +extern int ntrans, treesize, outsize; +extern long rcount; +extern int *verify, *advance, *stoff; +extern int scon; +extern uchar *psave; + +extern void acompute(int); +extern void add(int **, int); +extern void allprint(int); +extern void cclinter(int); +extern void cgoto(void); +extern void cfoll(int); +extern int cpyact(void); +extern int dupl(int); +extern void error(char *,...); +extern void first(int); +extern void follow(int); +extern int gch(void); +extern uchar *getl(uchar *); +extern void layout(void); +extern void lgate(void); +extern int lookup(uchar *, uchar **); +extern int member(int, uchar *); +extern void mkmatch(void); +extern int mn0(int); +extern int mn1(int, int); +extern int mn2(int, int, int); +extern void munputc(int); +extern void munputs(uchar *); +extern void *myalloc(int, int); +extern void nextstate(int, int); +extern int notin(int); +extern void packtrans(int, uchar *, int *, int, int); +extern void padd(int **, int); +extern void pccl(void); +extern void pfoll(void); +extern void phead1(void); +extern void phead2(void); +extern void pstate(int); +extern void ptail(void); +extern void sect1dump(void); +extern void sect2dump(void); +extern void statistics(void); +extern void stprt(int); +extern void strpt(uchar *); +extern void treedump(void); +extern int usescape(int); +extern void warning(char *,...); +extern int yyparse(void); +extern void yyerror(char *); blob - /dev/null blob + ec43869a2e8f4ed597dff5be54d4d3393269c0b2 (mode 644) --- /dev/null +++ src/cmd/lex/lmain.c @@ -0,0 +1,292 @@ +/* lex [-[dynvt]] [file] ... [file] */ + +/* Copyright 1976, Bell Telephone Laboratories, Inc., + written by Eric Schmidt, August 27, 1976 */ + +# include "ldefs.h" +Biobuf fout; +int foutopen; +int errorf = 1; +int sect = DEFSECTION; +int prev = '\n'; /* previous input character */ +int pres = '\n'; /* present input character */ +int peek = '\n'; /* next input character */ +uchar *pushptr = pushc; +uchar *slptr = slist; + +char *cname = SYS9 "/sys/lib/lex/ncform"; + +int nine; +int ccount = 1; +int casecount = 1; +int aptr = 1; +int nstates = NSTATES, maxpos = MAXPOS; +int treesize = TREESIZE, ntrans = NTRANS; +int yytop; +int outsize = NOUTPUT; +int sptr = 1; +int report = 2; +int debug; /* 1 = on */ +int charc; +int sargc; +char **sargv; +uchar buf[520]; +int yyline; /* line number of file */ +int eof; +int lgatflg; +int divflg; +int funcflag; +int pflag; +int chset; /* 1 = char set modified */ +Biobuf *fin, *fother; +int fptr; +int *name; +int *left; +int *right; +int *parent; +uchar *nullstr; +int tptr; +uchar pushc[TOKENSIZE]; +uchar slist[STARTSIZE]; +uchar **def, **subs, *dchar; +uchar **sname, *stchar; +uchar *ccl; +uchar *ccptr; +uchar *dp, *sp; +int dptr; +uchar *bptr; /* store input position */ +uchar *tmpstat; +int count; +int **foll; +int *nxtpos; +int *positions; +int *gotof; +int *nexts; +uchar *nchar; +int **state; +int *sfall; /* fallback state num */ +uchar *cpackflg; /* true if state has been character packed */ +int *atable; +int nptr; +uchar symbol[NCH]; +uchar cindex[NCH]; +int xstate; +int stnum; +uchar match[NCH]; +uchar extra[NACTIONS]; +uchar *pchar, *pcptr; +int pchlen = TOKENSIZE; + long rcount; +int *verify, *advance, *stoff; +int scon; +uchar *psave; + +static void free1core(void); +static void free2core(void); +#ifdef DEBUG +static void free3core(void); +#endif +static void get1core(void); +static void get2core(void); +static void get3core(void); + +void +main(int argc, char **argv) +{ + int i; + + ARGBEGIN { +# ifdef DEBUG + case 'd': debug++; break; + case 'y': yydebug = TRUE; break; +# endif + case 't': case 'T': + Binit(&fout, 1, OWRITE); + errorf= 2; + foutopen = 1; + break; + case 'v': case 'V': + report = 1; + break; + case 'n': case 'N': + report = 0; + break; + case '9': + nine = 1; + break; + default: + warning("Unknown option %c", ARGC()); + } ARGEND + sargc = argc; + sargv = argv; + if (argc > 0){ + fin = Bopen(argv[fptr++], OREAD); + if(fin == 0) + error ("Can't read input file %s",argv[0]); + sargc--; + sargv++; + } + else { + fin = myalloc(sizeof(Biobuf), 1); + if(fin == 0) + exits("core"); + Binit(fin, 0, OREAD); + } + if(Bgetc(fin) == Beof) /* no input */ + exits(0); + Bseek(fin, 0, 0); + gch(); + /* may be gotten: def, subs, sname, stchar, ccl, dchar */ + get1core(); + /* may be gotten: name, left, right, nullstr, parent */ + strcpy((char*)sp, "INITIAL"); + sname[0] = sp; + sp += strlen("INITIAL") + 1; + sname[1] = 0; + if(yyparse()) exits("error"); /* error return code */ + /* may be disposed of: def, subs, dchar */ + free1core(); + /* may be gotten: tmpstat, foll, positions, gotof, nexts, nchar, state, atable, sfall, cpackflg */ + get2core(); + ptail(); + mkmatch(); +# ifdef DEBUG + if(debug) pccl(); +# endif + sect = ENDSECTION; + if(tptr>0)cfoll(tptr-1); +# ifdef DEBUG + if(debug)pfoll(); +# endif + cgoto(); +# ifdef DEBUG + if(debug){ + print("Print %d states:\n",stnum+1); + for(i=0;i<=stnum;i++)stprt(i); + } +# endif + /* may be disposed of: positions, tmpstat, foll, state, name, left, right, parent, ccl, stchar, sname */ + /* may be gotten: verify, advance, stoff */ + free2core(); + get3core(); + layout(); + /* may be disposed of: verify, advance, stoff, nexts, nchar, + gotof, atable, ccpackflg, sfall */ +# ifdef DEBUG + free3core(); +# endif + fother = Bopen(cname,OREAD); + if(fother == 0) + error("Lex driver missing, file %s",cname); + while ( (i=Bgetc(fother)) != Beof) + Bputc(&fout, i); + + Bterm(fother); + Bterm(&fout); + if( +# ifdef DEBUG + debug || +# endif + report == 1)statistics(); + Bterm(fin); + exits(0); /* success return code */ +} + +static void +get1core(void) +{ + ccptr = ccl = myalloc(CCLSIZE,sizeof(*ccl)); + pcptr = pchar = myalloc(pchlen, sizeof(*pchar)); + def = myalloc(DEFSIZE,sizeof(*def)); + subs = myalloc(DEFSIZE,sizeof(*subs)); + dp = dchar = myalloc(DEFCHAR,sizeof(*dchar)); + sname = myalloc(STARTSIZE,sizeof(*sname)); + sp = stchar = myalloc(STARTCHAR,sizeof(*stchar)); + if(ccl == 0 || def == 0 || subs == 0 || dchar == 0 || sname == 0 || stchar == 0) + error("Too little core to begin"); +} + +static void +free1core(void) +{ + free(def); + free(subs); + free(dchar); +} + +static void +get2core(void) +{ + int i; + + gotof = myalloc(nstates,sizeof(*gotof)); + nexts = myalloc(ntrans,sizeof(*nexts)); + nchar = myalloc(ntrans,sizeof(*nchar)); + state = myalloc(nstates,sizeof(*state)); + atable = myalloc(nstates,sizeof(*atable)); + sfall = myalloc(nstates,sizeof(*sfall)); + cpackflg = myalloc(nstates,sizeof(*cpackflg)); + tmpstat = myalloc(tptr+1,sizeof(*tmpstat)); + foll = myalloc(tptr+1,sizeof(*foll)); + nxtpos = positions = myalloc(maxpos,sizeof(*positions)); + if(tmpstat == 0 || foll == 0 || positions == 0 || + gotof == 0 || nexts == 0 || nchar == 0 || state == 0 || atable == 0 || sfall == 0 || cpackflg == 0 ) + error("Too little core for state generation"); + for(i=0;i<=tptr;i++)foll[i] = 0; +} + +static void +free2core(void) +{ + free(positions); + free(tmpstat); + free(foll); + free(name); + free(left); + free(right); + free(parent); + free(nullstr); + free(state); + free(sname); + free(stchar); + free(ccl); +} + +static void +get3core(void) +{ + verify = myalloc(outsize,sizeof(*verify)); + advance = myalloc(outsize,sizeof(*advance)); + stoff = myalloc(stnum+2,sizeof(*stoff)); + if(verify == 0 || advance == 0 || stoff == 0) + error("Too little core for final packing"); +} +# ifdef DEBUG +static void +free3core(void){ + free(advance); + free(verify); + free(stoff); + free(gotof); + free(nexts); + free(nchar); + free(atable); + free(sfall); + free(cpackflg); +} +# endif +void * +myalloc(int a, int b) +{ + void *i; + i = calloc(a, b); + if(i==0) + warning("OOPS - calloc returns a 0"); + return(i); +} + +void +yyerror(char *s) +{ + fprint(2, "line %d: %s\n", yyline, s); +} blob - /dev/null blob + a748409ff42d632958e40e11a00664bf8199fda3 (mode 644) --- /dev/null +++ src/cmd/lex/mkfile @@ -0,0 +1,30 @@ +<$PLAN9/src/mkhdr + +#TARG=lex +TARG=lex.9 +OFILES=lmain.$O\ + y.tab.$O\ + sub1.$O\ + sub2.$O\ + header.$O\ + +HFILES=ldefs.h\ + +YFILES=parser.y\ + +SHORTLIB=bio 9 +UPDATE=\ + mkfile\ + $HFILES\ + lmain.c\ + sub1.c\ + sub2.c\ + header.c + +<$PLAN9/src/mkone + +installall:V: + for objtype in $CPUS ; do + mk install + done + cp ncform $SYS9/lib/lex blob - /dev/null blob + dd7a1ea6791abd248c52814ffd70475fb5f514d0 (mode 644) --- /dev/null +++ src/cmd/lex/ncform @@ -0,0 +1,188 @@ +#pragma lib "libl.a" +int yylineno =1; +# define YYU(x) x +char yytext[YYLMAX]; +struct yysvf *yylstate [YYLMAX], **yylsp, **yyolsp; +Uchar yysbuf[YYLMAX]; +Uchar *yysptr = yysbuf; +int *yyfnd; +extern struct yysvf *yyestate; +int yyprevious = YYNEWLINE; +# ifdef LEXDEBUG +extern void allprint(char); +# endif +yylook(void){ + struct yysvf *yystate, **lsp; + struct yywork *yyt; + struct yysvf *yyz; + int yych; + struct yywork *yyr; +# ifdef LEXDEBUG + int debug; +# endif + Uchar *yylastch; + /* start off machines */ +# ifdef LEXDEBUG + debug = 0; +# endif + if (!yymorfg) + yylastch = (Uchar*)yytext; + else { + yymorfg=0; + yylastch = (Uchar*)yytext+yyleng; + } + for(;;){ + lsp = yylstate; + yyestate = yystate = yybgin; + if (yyprevious==YYNEWLINE) yystate++; + for (;;){ +# ifdef LEXDEBUG + if(debug)fprintf(yyout,"state %d\n",yystate-yysvec-1); +# endif + yyt = yystate->yystoff; + if(yyt == yycrank){ /* may not be any transitions */ + yyz = yystate->yyother; + if(yyz == 0)break; + if(yyz->yystoff == yycrank)break; + } + *yylastch++ = yych = input(); + tryagain: +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"char "); + allprint(yych); + putchar('\n'); + } +# endif + yyr = yyt; + if ( (int)yyt > (int)yycrank){ + yyt = yyr + yych; + if (yyt <= yytop && yyt->verify+yysvec == yystate){ + if(yyt->advance+yysvec == YYLERR) /* error transitions */ + {unput(*--yylastch);break;} + *lsp++ = yystate = yyt->advance+yysvec; + goto contin; + } + } +# ifdef YYOPTIM + else if((int)yyt < (int)yycrank) { /* r < yycrank */ + yyt = yyr = yycrank+(yycrank-yyt); +# ifdef LEXDEBUG + if(debug)fprintf(yyout,"compressed state\n"); +# endif + yyt = yyt + yych; + if(yyt <= yytop && yyt->verify+yysvec == yystate){ + if(yyt->advance+yysvec == YYLERR) /* error transitions */ + {unput(*--yylastch);break;} + *lsp++ = yystate = yyt->advance+yysvec; + goto contin; + } + yyt = yyr + YYU(yymatch[yych]); +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"try fall back character "); + allprint(YYU(yymatch[yych])); + putchar('\n'); + } +# endif + if(yyt <= yytop && yyt->verify+yysvec == yystate){ + if(yyt->advance+yysvec == YYLERR) /* error transition */ + {unput(*--yylastch);break;} + *lsp++ = yystate = yyt->advance+yysvec; + goto contin; + } + } + if ((yystate = yystate->yyother) && (yyt= yystate->yystoff) != yycrank){ +# ifdef LEXDEBUG + if(debug)fprintf(yyout,"fall back to state %d\n",yystate-yysvec-1); +# endif + goto tryagain; + } +# endif + else + {unput(*--yylastch);break;} + contin: +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"state %d char ",yystate-yysvec-1); + allprint(yych); + putchar('\n'); + } +# endif + ; + } +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"stopped at %d with ",*(lsp-1)-yysvec-1); + allprint(yych); + putchar('\n'); + } +# endif + while (lsp-- > yylstate){ + *yylastch-- = 0; + if (*lsp != 0 && (yyfnd= (*lsp)->yystops) && *yyfnd > 0){ + yyolsp = lsp; + if(yyextra[*yyfnd]){ /* must backup */ + while(yyback((*lsp)->yystops,-*yyfnd) != 1 && lsp > yylstate){ + lsp--; + unput(*yylastch--); + } + } + yyprevious = YYU(*yylastch); + yylsp = lsp; + yyleng = yylastch-(Uchar*)yytext+1; + yytext[yyleng] = 0; +# ifdef LEXDEBUG + if(debug){ + fprintf(yyout,"\nmatch '%s'", yytext); + fprintf(yyout," action %d\n",*yyfnd); + } +# endif + return(*yyfnd++); + } + unput(*yylastch); + } + if (yytext[0] == 0 /* && feof(yyin) */) + { + yysptr=yysbuf; + return(0); + } + yyprevious = input(); + yytext[0] = yyprevious; + if (yyprevious>0) + output(yyprevious); + yylastch = (Uchar*)yytext; +# ifdef LEXDEBUG + if(debug)putchar('\n'); +# endif + } + return(0); /* shut up the compiler; i have no idea what should be returned */ + } +yyback(int *p, int m) +{ +if (p==0) return(0); +while (*p) + { + if (*p++ == m) + return(1); + } +return(0); +} + /* the following are only used in the lex library */ +yyinput(void){ + if(yyin == ((void*)0)) + yyin = stdin; + return(input()); +} +void +yyoutput(int c) +{ + if(yyout == ((void*)0)) + yyout = stdin; + output(c); +} +void +yyunput(int c) +{ + unput(c); +} blob - /dev/null blob + 5ac33a2858648a397b46ae9f26ca0b9db6af0a79 (mode 644) --- /dev/null +++ src/cmd/lex/parser.y @@ -0,0 +1,651 @@ +%token CHAR CCL NCCL STR DELIM SCON ITER NEWE NULLS +%left SCON '/' NEWE +%left '|' +%left '$' '^' +%left CHAR CCL NCCL '(' '.' STR NULLS +%left ITER +%left CAT +%left '*' '+' '?' + +%{ +# include "ldefs.h" +#define YYSTYPE union _yystype_ +union _yystype_ +{ + int i; + uchar *cp; +}; +%} +%% +%{ +int i; +int j,k; +int g; +uchar *p; +%} +acc : lexinput + ={ +# ifdef DEBUG + if(debug) sect2dump(); +# endif + } + ; +lexinput: defns delim prods end + | defns delim end + ={ + if(!funcflag)phead2(); + funcflag = TRUE; + } + | error + ={ +# ifdef DEBUG + if(debug) { + sect1dump(); + sect2dump(); + } +# endif + } + ; +end: delim | ; +defns: defns STR STR + ={ strcpy((char*)dp,(char*)$2.cp); + def[dptr] = dp; + dp += strlen((char*)$2.cp) + 1; + strcpy((char*)dp,(char*)$3.cp); + subs[dptr++] = dp; + if(dptr >= DEFSIZE) + error("Too many definitions"); + dp += strlen((char*)$3.cp) + 1; + if(dp >= dchar+DEFCHAR) + error("Definitions too long"); + subs[dptr]=def[dptr]=0; /* for lookup - require ending null */ + } + | + ; +delim: DELIM + ={ +# ifdef DEBUG + if(sect == DEFSECTION && debug) sect1dump(); +# endif + sect++; + } + ; +prods: prods pr + ={ $$.i = mn2(RNEWE,$1.i,$2.i); + } + | pr + ={ $$.i = $1.i;} + ; +pr: r NEWE + ={ + if(divflg == TRUE) + i = mn1(S1FINAL,casecount); + else i = mn1(FINAL,casecount); + $$.i = mn2(RCAT,$1.i,i); + divflg = FALSE; + casecount++; + } + | error NEWE + ={ +# ifdef DEBUG + if(debug) sect2dump(); +# endif + } +r: CHAR + ={ $$.i = mn0($1.i); } + | STR + ={ + p = $1.cp; + i = mn0(*p++); + while(*p) + i = mn2(RSTR,i,*p++); + $$.i = i; + } + | '.' + ={ symbol['\n'] = 0; + if(psave == FALSE){ + p = ccptr; + psave = ccptr; + for(i=1;i<'\n';i++){ + symbol[i] = 1; + *ccptr++ = i; + } + for(i='\n'+1;i ccl+CCLSIZE) + error("Too many large character classes"); + } + else + p = psave; + $$.i = mn1(RCCL,(int)p); + cclinter(1); + } + | CCL + ={ $$.i = mn1(RCCL,$1.i); } + | NCCL + ={ $$.i = mn1(RNCCL,$1.i); } + | r '*' + ={ $$.i = mn1(STAR,$1.i); } + | r '+' + ={ $$.i = mn1(PLUS,$1.i); } + | r '?' + ={ $$.i = mn1(QUEST,$1.i); } + | r '|' r + ={ $$.i = mn2(BAR,$1.i,$3.i); } + | r r %prec CAT + ={ $$.i = mn2(RCAT,$1.i,$2.i); } + | r '/' r + ={ if(!divflg){ + j = mn1(S2FINAL,-casecount); + i = mn2(RCAT,$1.i,j); + $$.i = mn2(DIV,i,$3.i); + } + else { + $$.i = mn2(RCAT,$1.i,$3.i); + warning("Extra slash removed"); + } + divflg = TRUE; + } + | r ITER ',' ITER '}' + ={ if($2.i > $4.i){ + i = $2.i; + $2.i = $4.i; + $4.i = i; + } + if($4.i <= 0) + warning("Iteration range must be positive"); + else { + j = $1.i; + for(k = 2; k<=$2.i;k++) + j = mn2(RCAT,j,dupl($1.i)); + for(i = $2.i+1; i<=$4.i; i++){ + g = dupl($1.i); + for(k=2;k<=i;k++) + g = mn2(RCAT,g,dupl($1.i)); + j = mn2(BAR,j,g); + } + $$.i = j; + } + } + | r ITER '}' + ={ + if($2.i < 0)warning("Can't have negative iteration"); + else if($2.i == 0) $$.i = mn0(RNULLS); + else { + j = $1.i; + for(k=2;k<=$2.i;k++) + j = mn2(RCAT,j,dupl($1.i)); + $$.i = j; + } + } + | r ITER ',' '}' + ={ + /* from n to infinity */ + if($2.i < 0)warning("Can't have negative iteration"); + else if($2.i == 0) $$.i = mn1(STAR,$1.i); + else if($2.i == 1)$$.i = mn1(PLUS,$1.i); + else { /* >= 2 iterations minimum */ + j = $1.i; + for(k=2;k<$2.i;k++) + j = mn2(RCAT,j,dupl($1.i)); + k = mn1(PLUS,dupl($1.i)); + $$.i = mn2(RCAT,j,k); + } + } + | SCON r + ={ $$.i = mn2(RSCON,$2.i,$1.i); } + | '^' r + ={ $$.i = mn1(CARAT,$2.i); } + | r '$' + ={ i = mn0('\n'); + if(!divflg){ + j = mn1(S2FINAL,-casecount); + k = mn2(RCAT,$1.i,j); + $$.i = mn2(DIV,k,i); + } + else $$.i = mn2(RCAT,$1.i,i); + divflg = TRUE; + } + | '(' r ')' + ={ $$.i = $2.i; } + | NULLS + ={ $$.i = mn0(RNULLS); } + ; +%% +int +yylex(void) +{ + uchar *p; + int c, i; + uchar *t, *xp; + int n, j, k, x; + static int sectbegin; + static uchar token[TOKENSIZE]; + static int iter; + +# ifdef DEBUG + yylval.i = 0; +# endif + + if(sect == DEFSECTION) { /* definitions section */ + while(!eof) { + if(prev == '\n'){ /* next char is at beginning of line */ + getl(p=buf); + switch(*p){ + case '%': + switch(*(p+1)){ + case '%': + lgate(); + Bprint(&fout,"#define YYNEWLINE %d\n",'\n'); + Bprint(&fout,"yylex(void){\nint nstr; extern int yyprevious;\n"); + sectbegin = TRUE; + i = treesize*(sizeof(*name)+sizeof(*left)+ + sizeof(*right)+sizeof(*nullstr)+sizeof(*parent))+ALITTLEEXTRA; + p = myalloc(i,1); + if(p == 0) + error("Too little core for parse tree"); + free(p); + name = myalloc(treesize,sizeof(*name)); + left = myalloc(treesize,sizeof(*left)); + right = myalloc(treesize,sizeof(*right)); + nullstr = myalloc(treesize,sizeof(*nullstr)); + parent = myalloc(treesize,sizeof(*parent)); + if(name == 0 || left == 0 || right == 0 || parent == 0 || nullstr == 0) + error("Too little core for parse tree"); + return(freturn(DELIM)); + case 'p': case 'P': /* has overridden number of positions */ + while(*p && !isdigit(*p))p++; + maxpos = atol((char*)p); +# ifdef DEBUG + if (debug) print("positions (%%p) now %d\n",maxpos); +# endif + if(report == 2)report = 1; + continue; + case 'n': case 'N': /* has overridden number of states */ + while(*p && !isdigit(*p))p++; + nstates = atol((char*)p); +# ifdef DEBUG + if(debug)print( " no. states (%%n) now %d\n",nstates); +# endif + if(report == 2)report = 1; + continue; + case 'e': case 'E': /* has overridden number of tree nodes */ + while(*p && !isdigit(*p))p++; + treesize = atol((char*)p); +# ifdef DEBUG + if (debug) print("treesize (%%e) now %d\n",treesize); +# endif + if(report == 2)report = 1; + continue; + case 'o': case 'O': + while (*p && !isdigit(*p))p++; + outsize = atol((char*)p); + if (report ==2) report=1; + continue; + case 'a': case 'A': /* has overridden number of transitions */ + while(*p && !isdigit(*p))p++; + if(report == 2)report = 1; + ntrans = atol((char*)p); +# ifdef DEBUG + if (debug)print("N. trans (%%a) now %d\n",ntrans); +# endif + continue; + case 'k': case 'K': /* overriden packed char classes */ + while (*p && !isdigit(*p))p++; + if (report==2) report=1; + free(pchar); + pchlen = atol((char*)p); +# ifdef DEBUG + if (debug) print( "Size classes (%%k) now %d\n",pchlen); +# endif + pchar=pcptr=myalloc(pchlen, sizeof(*pchar)); + continue; + case '{': + lgate(); + while(getl(p) && strcmp((char*)p,"%}") != 0) + Bprint(&fout, "%s\n",(char*)p); + if(p[0] == '%') continue; + error("Premature eof"); + case 's': case 'S': /* start conditions */ + lgate(); + while(*p && strchr(" \t,", *p) == 0) p++; + n = TRUE; + while(n){ + while(*p && strchr(" \t,", *p)) p++; + t = p; + while(*p && strchr(" \t,", *p) == 0)p++; + if(!*p) n = FALSE; + *p++ = 0; + if (*t == 0) continue; + i = sptr*2; + Bprint(&fout,"#define %s %d\n",(char*)t,i); + strcpy((char*)sp, (char*)t); + sname[sptr++] = sp; + sname[sptr] = 0; /* required by lookup */ + if(sptr >= STARTSIZE) + error("Too many start conditions"); + sp += strlen((char*)sp) + 1; + if(sp >= stchar+STARTCHAR) + error("Start conditions too long"); + } + continue; + default: + warning("Invalid request %s",p); + continue; + } /* end of switch after seeing '%' */ + case ' ': case '\t': /* must be code */ + lgate(); + Bprint(&fout, "%s\n",(char*)p); + continue; + default: /* definition */ + while(*p && !isspace(*p)) p++; + if(*p == 0) + continue; + prev = *p; + *p = 0; + bptr = p+1; + yylval.cp = buf; + if(isdigit(buf[0])) + warning("Substitution strings may not begin with digits"); + return(freturn(STR)); + } + } + /* still sect 1, but prev != '\n' */ + else { + p = bptr; + while(*p && isspace(*p)) p++; + if(*p == 0) + warning("No translation given - null string assumed"); + strcpy((char*)token, (char*)p); + yylval.cp = token; + prev = '\n'; + return(freturn(STR)); + } + } + /* end of section one processing */ + } else if(sect == RULESECTION){ /* rules and actions */ + while(!eof){ + switch(c=gch()){ + case '\0': + return(freturn(0)); + case '\n': + if(prev == '\n') continue; + x = NEWE; + break; + case ' ': + case '\t': + if(sectbegin == TRUE){ + cpyact(); + while((c=gch()) && c != '\n'); + continue; + } + if(!funcflag)phead2(); + funcflag = TRUE; + Bprint(&fout,"case %d:\n",casecount); + if(cpyact()) + Bprint(&fout,"break;\n"); + while((c=gch()) && c != '\n'); + if(peek == ' ' || peek == '\t' || sectbegin == TRUE){ + warning("Executable statements should occur right after %%"); + continue; + } + x = NEWE; + break; + case '%': + if(prev != '\n') goto character; + if(peek == '{'){ /* included code */ + getl(buf); + while(!eof && getl(buf) && strcmp("%}",(char*)buf) != 0) + Bprint(&fout,"%s\n",(char*)buf); + continue; + } + if(peek == '%'){ + gch(); + gch(); + x = DELIM; + break; + } + goto character; + case '|': + if(peek == ' ' || peek == '\t' || peek == '\n'){ + Bprint(&fout,"%d\n",30000+casecount++); + continue; + } + x = '|'; + break; + case '$': + if(peek == '\n' || peek == ' ' || peek == '\t' || peek == '|' || peek == '/'){ + x = c; + break; + } + goto character; + case '^': + if(prev != '\n' && scon != TRUE) goto character; /* valid only at line begin */ + x = c; + break; + case '?': + case '+': + case '.': + case '*': + case '(': + case ')': + case ',': + case '/': + x = c; + break; + case '}': + iter = FALSE; + x = c; + break; + case '{': /* either iteration or definition */ + if(isdigit(c=gch())){ /* iteration */ + iter = TRUE; + ieval: + i = 0; + while(isdigit(c)){ + token[i++] = c; + c = gch(); + } + token[i] = 0; + yylval.i = atol((char*)token); + munputc(c); + x = ITER; + break; + } else { /* definition */ + i = 0; + while(c && c!='}'){ + token[i++] = c; + c = gch(); + } + token[i] = 0; + i = lookup(token,def); + if(i < 0) + warning("Definition %s not found",token); + else + munputs(subs[i]); + continue; + } + case '<': /* start condition ? */ + if(prev != '\n') /* not at line begin, not start */ + goto character; + t = slptr; + do { + i = 0; + c = gch(); + while(c != ',' && c && c != '>'){ + token[i++] = c; + c = gch(); + } + token[i] = 0; + if(i == 0) + goto character; + i = lookup(token,sname); + if(i < 0) { + warning("Undefined start condition %s",token); + continue; + } + *slptr++ = i+1; + } while(c && c != '>'); + *slptr++ = 0; + /* check if previous value re-usable */ + for (xp=slist; xp slist+STARTSIZE) /* note not packed ! */ + error("Too many start conditions used"); + yylval.cp = t; + x = SCON; + break; + case '"': + i = 0; + while((c=gch()) && c != '"' && c != '\n'){ + if(c == '\\') c = usescape(gch()); + token[i++] = c; + if(i > TOKENSIZE){ + warning("String too long"); + i = TOKENSIZE-1; + break; + } + } + if(c == '\n') { + yyline--; + warning("Non-terminated string"); + yyline++; + } + token[i] = 0; + if(i == 0)x = NULLS; + else if(i == 1){ + yylval.i = token[0]; + x = CHAR; + } else { + yylval.cp = token; + x = STR; + } + break; + case '[': + for(i=1;i k) { + n = j; + j = k; + k = n; + } + if(!(('A' <= j && k <= 'Z') || + ('a' <= j && k <= 'z') || + ('0' <= j && k <= '9'))) + warning("Non-portable Character Class"); + for(n=j+1;n<=k;n++) + symbol[n] = 1; /* implementation dependent */ + c = gch(); + } + } + /* try to pack ccl's */ + i = 0; + for(j=0;j= ccl+CCLSIZE) + error("Too many large character classes"); + } + cclinter(x==CCL); + break; + case '\\': + c = usescape(gch()); + default: + character: + if(iter){ /* second part of an iteration */ + iter = FALSE; + if('0' <= c && c <= '9') + goto ieval; + } + if(isalpha(peek)){ + i = 0; + yylval.cp = token; + token[i++] = c; + while(isalpha(peek)) + token[i++] = gch(); + if(peek == '?' || peek == '*' || peek == '+') + munputc(token[--i]); + token[i] = 0; + if(i == 1){ + yylval.i = token[0]; + x = CHAR; + } + else x = STR; + } else { + yylval.i = c; + x = CHAR; + } + } + scon = FALSE; + if(x == SCON)scon = TRUE; + sectbegin = FALSE; + return(freturn(x)); + } + } + /* section three */ + ptail(); +# ifdef DEBUG + if(debug) + Bprint(&fout,"\n/*this comes from section three - debug */\n"); +# endif + while(getl(buf) && !eof) + Bprint(&fout,"%s\n",(char*)buf); + return(freturn(0)); +} +/* end of yylex */ +# ifdef DEBUG +int +freturn(int i) +{ + if(yydebug) { + print("now return "); + if(i < NCH) allprint(i); + else print("%d",i); + printf(" yylval = "); + switch(i){ + case STR: case CCL: case NCCL: + strpt(yylval.cp); + break; + case CHAR: + allprint(yylval.i); + break; + default: + print("%d",yylval.i); + break; + } + print("\n"); + } + return(i); +} +# endif blob - /dev/null blob + cb393a464fce1e70177853627312da39b157a932 (mode 644) --- /dev/null +++ src/cmd/lex/sub1.c @@ -0,0 +1,591 @@ +# include "ldefs.h" +uchar * +getl(uchar *p) /* return next line of input, throw away trailing '\n' */ + /* returns 0 if eof is had immediately */ +{ + int c; + uchar *s, *t; + + t = s = p; + while(((c = gch()) != 0) && c != '\n') + *t++ = c; + *t = 0; + if(c == 0 && s == t) return((uchar *)0); + prev = '\n'; + pres = '\n'; + return(s); +} + +void +printerr(char *type, char *fmt, va_list argl) +{ + char buf[1024]; + + if(!eof)fprint(errorf,"%d: ",yyline); + fprint(errorf,"(%s) ", type); + vseprint(buf, buf+sizeof(buf), fmt, argl); + fprint(errorf, "%s\n", buf); +} + + +void +error(char *s,...) +{ + va_list argl; + + va_start(argl, s); + printerr("Error", s, argl); + va_end(argl); +# ifdef DEBUG + if(debug && sect != ENDSECTION) { + sect1dump(); + sect2dump(); + } +# endif + if( +# ifdef DEBUG + debug || +# endif + report == 1) statistics(); + exits("error"); /* error return code */ +} + +void +warning(char *s,...) +{ + va_list argl; + + va_start(argl, s); + printerr("Warning", s, argl); + va_end(argl); + Bflush(&fout); +} + +void +lgate(void) +{ + int fd; + + if (lgatflg) return; + lgatflg=1; + if(foutopen == 0){ + fd = create("lex.yy.c", OWRITE, 0666); + if(fd < 0) + error("Can't open lex.yy.c"); + Binit(&fout, fd, OWRITE); + foutopen = 1; + } + phead1(); +} + +void +cclinter(int sw) +{ + /* sw = 1 ==> ccl */ + int i, j, k; + int m; + if(!sw){ /* is NCCL */ + for(i=1;i= NCH) return; + i = cindex[i]; + /* see if ccl is already in our table */ + j = 0; + if(i){ + for(j=1;j= NCH) return; /* already in */ + m = 0; + k = 0; + for(i=1;i pushc ? *--pushptr : Bgetc(fin); + if(peek == Beof && sargc > 1){ + Bterm(fin); + fin = Bopen(sargv[fptr++],OREAD); + if(fin == 0) + error("Cannot open file %s",sargv[fptr-1]); + peek = Bgetc(fin); + sargc--; + sargv++; + } + if(c == Beof) { + eof = TRUE; + Bterm(fin); + return(0); + } + if(c == '\n')yyline++; + return(c); +} + +int +mn2(int a, int d, int c) +{ + name[tptr] = a; + left[tptr] = d; + right[tptr] = c; + parent[tptr] = 0; + nullstr[tptr] = 0; + switch(a){ + case RSTR: + parent[d] = tptr; + break; + case BAR: + case RNEWE: + if(nullstr[d] || nullstr[c]) nullstr[tptr] = TRUE; + parent[d] = parent[c] = tptr; + break; + case RCAT: + case DIV: + if(nullstr[d] && nullstr[c])nullstr[tptr] = TRUE; + parent[d] = parent[c] = tptr; + break; + case RSCON: + parent[d] = tptr; + nullstr[tptr] = nullstr[d]; + break; +# ifdef DEBUG + default: + warning("bad switch mn2 %d %d",a,d); + break; +# endif + } + if(tptr > treesize) + error("Parse tree too big %s",(treesize == TREESIZE?"\nTry using %e num":"")); + return(tptr++); +} + +int +mn1(int a, int d) +{ + name[tptr] = a; + left[tptr] = d; + parent[tptr] = 0; + nullstr[tptr] = 0; + switch(a){ + case RCCL: + case RNCCL: + if(strlen((char *)d) == 0) nullstr[tptr] = TRUE; + break; + case STAR: + case QUEST: + nullstr[tptr] = TRUE; + parent[d] = tptr; + break; + case PLUS: + case CARAT: + nullstr[tptr] = nullstr[d]; + parent[d] = tptr; + break; + case S2FINAL: + nullstr[tptr] = TRUE; + break; +# ifdef DEBUG + case FINAL: + case S1FINAL: + break; + default: + warning("bad switch mn1 %d %d",a,d); + break; +# endif + } + if(tptr > treesize) + error("Parse tree too big %s",(treesize == TREESIZE?"\nTry using %e num":"")); + return(tptr++); +} + +int +mn0(int a) +{ + name[tptr] = a; + parent[tptr] = 0; + nullstr[tptr] = 0; + if(a >= NCH) switch(a){ + case RNULLS: nullstr[tptr] = TRUE; break; +# ifdef DEBUG + default: + warning("bad switch mn0 %d",a); + break; +# endif + } + if(tptr > treesize) + error("Parse tree too big %s",(treesize == TREESIZE?"\nTry using %e num":"")); + return(tptr++); +} + +void +munputc(int p) +{ + *pushptr++ = peek; /* watch out for this */ + peek = p; + if(pushptr >= pushc+TOKENSIZE) + error("Too many characters pushed"); +} + +void +munputs(uchar *p) +{ + int i,j; + *pushptr++ = peek; + peek = p[0]; + i = strlen((char*)p); + for(j = i-1; j>=1; j--) + *pushptr++ = p[j]; + if(pushptr >= pushc+TOKENSIZE) + error("Too many characters pushed"); +} + +int +dupl(int n) +{ + /* duplicate the subtree whose root is n, return ptr to it */ + int i; + + i = name[n]; + if(i < NCH) return(mn0(i)); + switch(i){ + case RNULLS: + return(mn0(i)); + case RCCL: case RNCCL: case FINAL: case S1FINAL: case S2FINAL: + return(mn1(i,left[n])); + case STAR: case QUEST: case PLUS: case CARAT: + return(mn1(i,dupl(left[n]))); + case RSTR: case RSCON: + return(mn2(i,dupl(left[n]),right[n])); + case BAR: case RNEWE: case RCAT: case DIV: + return(mn2(i,dupl(left[n]),dupl(right[n]))); +# ifdef DEBUG + default: + warning("bad switch dupl %d",n); +# endif + } + return(0); +} + +# ifdef DEBUG +void +allprint(int c) +{ + switch(c){ + case 014: + print("\\f"); + charc++; + break; + case '\n': + print("\\n"); + charc++; + break; + case '\t': + print("\\t"); + charc++; + break; + case '\b': + print("\\b"); + charc++; + break; + case ' ': + print("\\\bb"); + break; + default: + if(!isprint(c)){ + print("\\%-3o",c); + charc += 3; + } else + print("%c", c); + break; + } + charc++; +} + +void +strpt(uchar *s) +{ + charc = 0; + while(*s){ + allprint(*s++); + if(charc > LINESIZE){ + charc = 0; + print("\n\t"); + } + } +} + +void +sect1dump(void) +{ + int i; + + print("Sect 1:\n"); + if(def[0]){ + print("str trans\n"); + i = -1; + while(def[++i]) + print("%s\t%s\n",def[i],subs[i]); + } + if(sname[0]){ + print("start names\n"); + i = -1; + while(sname[++i]) + print("%s\n",sname[i]); + } +} + +void +sect2dump(void) +{ + print("Sect 2:\n"); + treedump(); +} + +void +treedump(void) +{ + int t; + uchar *p; + print("treedump %d nodes:\n",tptr); + for(t=0;t= pcptr)*pcptr++ = cindex[j]; + } + *pcptr++ = 0; + if(pcptr > pchar + pchlen) + error("Too many packed character classes"); + left[v] = (int)p; + name[v] = RCCL; /* RNCCL eliminated */ +# ifdef DEBUG + if(debug && *p){ + print("ccl %d: %d",v,*p++); + while(*p) + print(", %d",*p++); + print("\n"); + } +# endif + } + break; + case CARAT: + cfoll(left[v]); + break; + case STAR: case PLUS: case QUEST: case RSCON: + cfoll(left[v]); + break; + case BAR: case RCAT: case DIV: case RNEWE: + cfoll(left[v]); + cfoll(right[v]); + break; +# ifdef DEBUG + case FINAL: + case S1FINAL: + case S2FINAL: + break; + default: + warning("bad switch cfoll %d",v); +# endif + } +} + +# ifdef DEBUG +void +pfoll(void) + { + int i,k,*p; + int j; + /* print sets of chars which may follow positions */ + print("pos\tchars\n"); + for(i=0;i= 1){ + print("%d:\t%d",i,*p++); + for(k=2;k<=j;k++) + print(", %d",*p++); + print("\n"); + } + } +} +# endif + +void +add(int **array, int n) +{ + int i, *temp; + uchar *ctemp; + + temp = nxtpos; + ctemp = tmpstat; + array[n] = nxtpos; /* note no packing is done in positions */ + *temp++ = count; + for(i=0;i= positions+maxpos) + error("Too many positions %s",(maxpos== MAXPOS?"\nTry using %p num":"")); + return; +} + +void +follow(int v) +{ + int p; + if(v >= tptr-1)return; + p = parent[v]; + if(p == 0) return; + switch(name[p]){ + /* will not be CHAR RNULLS FINAL S1FINAL S2FINAL RCCL RNCCL */ + case RSTR: + if(tmpstat[p] == FALSE){ + count++; + tmpstat[p] = TRUE; + } + break; + case STAR: case PLUS: + first(v); + follow(p); + break; + case BAR: case QUEST: case RNEWE: + follow(p); + break; + case RCAT: case DIV: + if(v == left[p]){ + if(nullstr[right[p]]) + follow(p); + first(right[p]); + } + else follow(p); + break; + case RSCON: case CARAT: + follow(p); + break; +# ifdef DEBUG + default: + warning("bad switch follow %d",p); +# endif + } +} + +void +first(int v) /* calculate set of positions with v as root which can be active initially */ +{ + int i; + uchar *p; + i = name[v]; + if(i < NCH)i = 1; + switch(i){ + case 1: case RCCL: case RNCCL: case RNULLS: case FINAL: case S1FINAL: case S2FINAL: + if(tmpstat[v] == FALSE){ + count++; + tmpstat[v] = TRUE; + } + break; + case BAR: case RNEWE: + first(left[v]); + first(right[v]); + break; + case CARAT: + if(stnum % 2 == 1) + first(left[v]); + break; + case RSCON: + i = stnum/2 +1; + p = (uchar *)right[v]; + while(*p) + if(*p++ == i){ + first(left[v]); + break; + } + break; + case STAR: case QUEST: case PLUS: case RSTR: + first(left[v]); + break; + case RCAT: case DIV: + first(left[v]); + if(nullstr[left[v]]) + first(right[v]); + break; +# ifdef DEBUG + default: + warning("bad switch first %d",v); +# endif + } +} + +void +cgoto(void) +{ + int i, j, s; + int npos, curpos, n; + int tryit; + uchar tch[NCH]; + int tst[NCH]; + uchar *q; + + /* generate initial state, for each start condition */ + Bprint(&fout,"int yyvstop[] = {\n0,\n"); + while(stnum < 2 || stnum/2 < sptr){ + for(i = 0; i 0)first(tptr-1); + add(state,stnum); +# ifdef DEBUG + if(debug){ + if(stnum > 1) + print("%s:\n",sname[stnum/2]); + pstate(stnum); + } +# endif + stnum++; + } + stnum--; + /* even stnum = might not be at line begin */ + /* odd stnum = must be at line begin */ + /* even states can occur anywhere, odd states only at line begin */ + for(s = 0; s <= stnum; s++){ + tryit = FALSE; + cpackflg[s] = FALSE; + sfall[s] = -1; + acompute(s); + for(i=0;i LINESIZE){ + charc = 0; + print("\n\t"); + } + } + print("\n"); + } +# endif + /* for each char, calculate next state */ + n = 0; + for(i = 1; i= nstates) + error("Too many states %s",(nstates == NSTATES ? "\nTry using %n num":"")); + add(state,++stnum); +# ifdef DEBUG + if(debug)pstate(stnum); +# endif + tch[n] = i; + tst[n++] = stnum; + } else { /* xstate >= 0 ==> state exists */ + tch[n] = i; + tst[n++] = xstate; + } + } + } + tch[n] = 0; + tst[n] = -1; + /* pack transitions into permanent array */ + if(n > 0) packtrans(s,tch,tst,n,tryit); + else gotof[s] = -1; + } + Bprint(&fout,"0};\n"); +} + +/* Beware -- 70% of total CPU time is spent in this subroutine - + if you don't believe me - try it yourself ! */ +void +nextstate(int s, int c) +{ + int j, *newpos; + uchar *temp, *tz; + int *pos, i, *f, num, curpos, number; + /* state to goto from state s on char c */ + num = *state[s]; + temp = tmpstat; + pos = state[s] + 1; + for(i = 0; i=0;i--){ /* for each state */ + j = state[i]; + if(count == *j++){ + for(k=0;k= count) + return(i); + } + } + return(-1); +} + +void +packtrans(int st, uchar *tch, int *tst, int cnt, int tryit) +{ + /* pack transitions into nchar, nexts */ + /* nchar is terminated by '\0', nexts uses cnt, followed by elements */ + /* gotof[st] = index into nchr, nexts for state st */ + + /* sfall[st] = t implies t is fall back state for st */ + /* == -1 implies no fall back */ + + int i,j,k; + int cmin, cval, tcnt, diff, p, *ast; + uchar *ach; + int go[NCH], temp[NCH], c; + int swork[NCH]; + uchar cwork[NCH]; + int upper; + + rcount += cnt; + cmin = -1; + cval = NCH; + ast = tst; + ach = tch; + /* try to pack transitions using ccl's */ + if(tryit){ /* ccl's used */ + for(i=1;i cnt) continue; + diff = 0; + j = 0; + upper = p + tcnt; + while(ach[j] && p < upper){ + while(ach[j] < nchar[p] && ach[j]){diff++; j++; } + if(ach[j] == 0)break; + if(ach[j] > nchar[p]){diff=NCH;break;} + /* ach[j] == nchar[p] */ + if(ast[j] != nexts[++p] || ast[j] == -1 || (cpackflg[st] && ach[j] != match[ach[j]]))diff++; + j++; + } + while(ach[j]){ + diff++; + j++; + } + if(p < upper)diff = NCH; + if(diff < cval && diff < tcnt){ + cval = diff; + cmin = i; + if(cval == 0)break; + } + } + /* cmin = state "most like" state st */ +# ifdef DEBUG + if(debug)print("select st %d for st %d diff %d\n",cmin,st,cval); +# endif +# ifdef PS + if(cmin != -1){ /* if we can use st cmin */ + gotof[st] = nptr; + k = 0; + sfall[st] = cmin; + p = gotof[cmin]+1; + j = 0; + while(ach[j]){ + /* if cmin has a transition on c, then so will st */ + /* st may be "larger" than cmin, however */ + while(ach[j] < nchar[p-1] && ach[j]){ + k++; + nchar[nptr] = ach[j]; + nexts[++nptr] = ast[j]; + j++; + } + if(nchar[p-1] == 0)break; + if(ach[j] > nchar[p-1]){ + warning("bad transition %d %d",st,cmin); + goto nopack; + } + /* ach[j] == nchar[p-1] */ + if(ast[j] != nexts[p] || ast[j] == -1 || (cpackflg[st] && ach[j] != match[ach[j]])){ + k++; + nchar[nptr] = ach[j]; + nexts[++nptr] = ast[j]; + } + p++; + j++; + } + while(ach[j]){ + nchar[nptr] = ach[j]; + nexts[++nptr] = ast[j++]; + k++; + } + nexts[gotof[st]] = cnt = k; + nchar[nptr++] = 0; + } else { +# endif +nopack: + /* stick it in */ + gotof[st] = nptr; + nexts[nptr] = cnt; + for(i=0;i ntrans) + error("Too many transitions %s",(ntrans==NTRANS?"\nTry using %a num":"")); +} + +# ifdef DEBUG +void +pstate(int s) +{ + int *p,i,j; + print("State %d:\n",s); + p = state[s]; + i = *p++; + if(i == 0) return; + print("%4d",*p++); + for(j = 1; j= 0) + print("%d\t",nexts[p+1]); + else print("err\t"); + allprint(nchar[p++]); + while(nexts[p] == nexts[p+1] && nchar[p]){ + if(charc > LINESIZE){ + charc = 0; + print("\n\t"); + } + allprint(nchar[p++]); + } + print("\n"); + } + print("\n"); +} +# endif + +void +acompute(int s) /* compute action list = set of poss. actions */ +{ + int *p, i, j; + int cnt, m; + int temp[300], k, neg[300], n; + + k = 0; + n = 0; + p = state[s]; + cnt = *p++; + if(cnt > 300) + error("Too many positions for one state - acompute"); + for(i=0;i= NACTIONS) error("Too many right contexts"); + extra[left[*p]] = 1; + } else if(name[*p] == S2FINAL)neg[n++] = left[*p]; + p++; + } + atable[s] = -1; + if(k < 1 && n < 1) return; +# ifdef DEBUG + if(debug) print("final %d actions:",s); +# endif + /* sort action list */ + for(i=0; i LINESIZE){ + print("\n\t"); + charc = 0; + } + } + print("\n"); + } + charc = 0; + print("match:\n"); + for(i=0;i LINESIZE){ + print("\n"); + charc = 0; + } + } + print("\n"); + return; +} +# endif + +void +mkmatch(void) +{ + int i; + uchar tab[NCH]; + + for(i=0; i outsize - NCH) + error("output table overflow"); + for(j = bot; j<= top; j++){ + k = startup + nchar[j]; + if(verify[k])break; + } + } while (j <= top); + /* have found place */ +# ifdef DEBUG + if (debug) print(" startup going to be %d\n", startup); +# endif + for(j = bot; j<= top; j++){ + k = startup + nchar[j]; + verify[k] = i+1; /* state number + 1*/ + advance[k] = nexts[j+1]+1; /* state number + 1*/ + if(yytop < k) yytop = k; + } + stoff[i] = startup; + } + + /* stoff[i] = offset into verify, advance for trans for state i */ + /* put out yywork */ + Bprint(&fout,"# define YYTYPE %s\n",stnum+1 >= NCH ? "int" : "Uchar"); + Bprint(&fout,"struct yywork { YYTYPE verify, advance; } yycrank[] = {\n"); + for(i=0;i<=yytop;i+=4){ + for(j=0;j<4;j++){ + k = i+j; + if(verify[k]) + Bprint(&fout,"%d,%d,\t",verify[k],advance[k]); + else + Bprint(&fout,"0,0,\t"); + } + Bputc(&fout, '\n'); + } + Bprint(&fout,"0,0};\n"); + + /* put out yysvec */ + + Bprint(&fout,"struct yysvf yysvec[] = {\n"); + Bprint(&fout,"0,\t0,\t0,\n"); + for(i=0;i<=stnum;i++){ /* for each state */ + if(cpackflg[i])stoff[i] = -stoff[i]; + Bprint(&fout,"yycrank+%d,\t",stoff[i]); + if(sfall[i] != -1) + Bprint(&fout,"yysvec+%d,\t", sfall[i]+1); /* state + 1 */ + else Bprint(&fout,"0,\t\t"); + if(atable[i] != -1) + Bprint(&fout,"yyvstop+%d,",atable[i]); + else Bprint(&fout,"0,\t"); +# ifdef DEBUG + Bprint(&fout,"\t\t/* state %d */",i); +# endif + Bputc(&fout, '\n'); + } + Bprint(&fout,"0,\t0,\t0};\n"); + + /* put out yymatch */ + + Bprint(&fout,"struct yywork *yytop = yycrank+%d;\n",yytop); + Bprint(&fout,"struct yysvf *yybgin = yysvec+1;\n"); + Bprint(&fout,"Uchar yymatch[] = {\n"); + for(i=0; i=0;i--){ + j = array[i]; + if(j && *j++ == count){ + for(k=0;k= count){ + array[n] = array[i]; + return; + } + } + } + add(array,n); +} +# endif