Blob
- Date:
- Message:
- rc: add recursive descent parser The old yacc-based parser is available with the -Y flag, which will probably be removed at some point. The new -D flag dumps a parse tree of the input, without executing it. This allows comparing the output of rc -D and rc -DY on different scripts to see that the two parsers behave the same. The rc paper ends by saying: It is remarkable that in the four most recent editions of the UNIX system programmer’s manual the Bourne shell grammar described in the manual page does not admit the command who|wc. This is surely an oversight, but it suggests something darker: nobody really knows what the Bourne shell’s grammar is. Even examination of the source code is little help. The parser is implemented by recursive descent, but the routines corresponding to the syntactic categories all have a flag argument that subtly changes their operation depending on the context. Rc’s parser is implemented using yacc, so I can say precisely what the grammar is. The new recursive descent parser here has no such flags. It is a straightforward translation of the yacc. The new parser will make it easier to handle free carats in more generality as well as potentially allow the use of unquoted = as a word character. Going through this exercise has highlighted a few dark corners here as well. For example, I was surprised to find that x >f | y >f x | y are different commands (the latter redirects y's output). It is similarly surprising that a=b x | y sets a during the execution of y. It is also a bit counter-intuitive x | y | z x | if(c) y | z are not both 3-phase pipelines. These are certainly not things we should change, but they are not entirely obvious from the man page description, undercutting the quoted claim a bit. On the other hand, who | wc is clearly accepted by the grammar in the manual page, and the new parser still handles that test case.
- Actions:
- History | Blame | Raw File
1 #include "rc.h"2 #include "io.h"3 #include "exec.h"4 #include "fns.h"5 #include "getflags.h"6 #define c0 t->child[0]7 #define c1 t->child[1]8 #define c2 t->child[2]9 int codep, ncode;10 #define emitf(x) ((void)(codep!=ncode || morecode()), codebuf[codep].f = (x), codep++)11 #define emiti(x) ((void)(codep!=ncode || morecode()), codebuf[codep].i = (x), codep++)12 #define emits(x) ((void)(codep!=ncode || morecode()), codebuf[codep].s = (x), codep++)13 void stuffdot(int);14 char *fnstr(tree*);15 void outcode(tree*, int);16 void codeswitch(tree*, int);17 int iscase(tree*);18 code *codecopy(code*);19 void codefree(code*);21 int22 morecode(void)23 {24 ncode+=100;25 codebuf = (code *)realloc((char *)codebuf, ncode*sizeof codebuf[0]);26 if(codebuf==0)27 panic("Can't realloc %d bytes in morecode!",28 ncode*sizeof codebuf[0]);29 memset(codebuf+ncode-100, 0, 100*sizeof codebuf[0]);30 return 0;31 }33 void34 stuffdot(int a)35 {36 if(a<0 || codep<=a)37 panic("Bad address %d in stuffdot", a);38 codebuf[a].i = codep;39 }41 int42 compile(tree *t)43 {44 if(flag['D']) {45 struct io *s;46 s = openstr();47 pfmt(s, "compile: %u\n", t);48 write(2, s->strp, strlen(s->strp));49 closeio(s);50 if(eflagok) // made it out of rcmain - stop executing commands, just print them51 t = nil;52 }54 ncode = 100;55 codebuf = (code *)emalloc(ncode*sizeof codebuf[0]);56 codep = 0;57 emiti(0); /* reference count */58 outcode(t, flag['e']?1:0);59 if(nerror){60 efree((char *)codebuf);61 return 0;62 }63 readhere();64 emitf(Xreturn);65 emitf(0);66 return 1;67 }69 void70 cleanhere(char *f)71 {72 emitf(Xdelhere);73 emits(strdup(f));74 }76 char*77 fnstr(tree *t)78 {79 io *f = openstr();80 char *v;81 extern char nl;82 char svnl = nl;83 nl=';';84 pfmt(f, "%t", t);85 nl = svnl;86 v = f->strp;87 f->strp = 0;88 closeio(f);89 return v;90 }92 void93 outcode(tree *t, int eflag)94 {95 int p, q;96 tree *tt;97 if(t==0)98 return;99 if(t->type!=NOT && t->type!=';')100 runq->iflast = 0;101 switch(t->type){102 default:103 pfmt(err, "bad type %d in outcode\n", t->type);104 break;105 case '$':106 emitf(Xmark);107 outcode(c0, eflag);108 emitf(Xdol);109 break;110 case '"':111 emitf(Xmark);112 outcode(c0, eflag);113 emitf(Xqdol);114 break;115 case SUB:116 emitf(Xmark);117 outcode(c0, eflag);118 emitf(Xmark);119 outcode(c1, eflag);120 emitf(Xsub);121 break;122 case '&':123 emitf(Xasync);124 if(havefork){125 p = emiti(0);126 outcode(c0, eflag);127 emitf(Xexit);128 stuffdot(p);129 } else130 emits(fnstr(c0));131 break;132 case ';':133 outcode(c0, eflag);134 outcode(c1, eflag);135 break;136 case '^':137 emitf(Xmark);138 outcode(c1, eflag);139 emitf(Xmark);140 outcode(c0, eflag);141 emitf(Xconc);142 break;143 case '`':144 emitf(Xbackq);145 if(havefork){146 p = emiti(0);147 outcode(c0, 0);148 emitf(Xexit);149 stuffdot(p);150 } else151 emits(fnstr(c0));152 break;153 case ANDAND:154 outcode(c0, 0);155 emitf(Xtrue);156 p = emiti(0);157 outcode(c1, eflag);158 stuffdot(p);159 break;160 case ARGLIST:161 outcode(c1, eflag);162 outcode(c0, eflag);163 break;164 case BANG:165 outcode(c0, eflag);166 emitf(Xbang);167 break;168 case PCMD:169 case BRACE:170 outcode(c0, eflag);171 break;172 case COUNT:173 emitf(Xmark);174 outcode(c0, eflag);175 emitf(Xcount);176 break;177 case FN:178 emitf(Xmark);179 outcode(c0, eflag);180 if(c1){181 emitf(Xfn);182 p = emiti(0);183 emits(fnstr(c1));184 outcode(c1, eflag);185 emitf(Xunlocal); /* get rid of $* */186 emitf(Xreturn);187 stuffdot(p);188 }189 else190 emitf(Xdelfn);191 break;192 case IF:193 outcode(c0, 0);194 emitf(Xif);195 p = emiti(0);196 outcode(c1, eflag);197 emitf(Xwastrue);198 stuffdot(p);199 break;200 case NOT:201 if(!runq->iflast)202 yyerror("`if not' does not follow `if(...)'");203 emitf(Xifnot);204 p = emiti(0);205 outcode(c0, eflag);206 stuffdot(p);207 break;208 case OROR:209 outcode(c0, 0);210 emitf(Xfalse);211 p = emiti(0);212 outcode(c1, eflag);213 stuffdot(p);214 break;215 case PAREN:216 outcode(c0, eflag);217 break;218 case SIMPLE:219 emitf(Xmark);220 outcode(c0, eflag);221 emitf(Xsimple);222 if(eflag)223 emitf(Xeflag);224 break;225 case SUBSHELL:226 emitf(Xsubshell);227 if(havefork){228 p = emiti(0);229 outcode(c0, eflag);230 emitf(Xexit);231 stuffdot(p);232 } else233 emits(fnstr(c0));234 if(eflag)235 emitf(Xeflag);236 break;237 case SWITCH:238 codeswitch(t, eflag);239 break;240 case TWIDDLE:241 emitf(Xmark);242 outcode(c1, eflag);243 emitf(Xmark);244 outcode(c0, eflag);245 emitf(Xmatch);246 if(eflag)247 emitf(Xeflag);248 break;249 case WHILE:250 q = codep;251 outcode(c0, 0);252 if(q==codep)253 emitf(Xsettrue); /* empty condition == while(true) */254 emitf(Xtrue);255 p = emiti(0);256 outcode(c1, eflag);257 emitf(Xjump);258 emiti(q);259 stuffdot(p);260 break;261 case WORDS:262 outcode(c1, eflag);263 outcode(c0, eflag);264 break;265 case FOR:266 emitf(Xmark);267 if(c1){268 outcode(c1, eflag);269 emitf(Xglob);270 }271 else{272 emitf(Xmark);273 emitf(Xword);274 emits(strdup("*"));275 emitf(Xdol);276 }277 emitf(Xmark); /* dummy value for Xlocal */278 emitf(Xmark);279 outcode(c0, eflag);280 emitf(Xlocal);281 p = emitf(Xfor);282 q = emiti(0);283 outcode(c2, eflag);284 emitf(Xjump);285 emiti(p);286 stuffdot(q);287 emitf(Xunlocal);288 break;289 case WORD:290 emitf(Xword);291 emits(strdup(t->str));292 break;293 case DUP:294 if(t->rtype==DUPFD){295 emitf(Xdup);296 emiti(t->fd0);297 emiti(t->fd1);298 }299 else{300 emitf(Xclose);301 emiti(t->fd0);302 }303 outcode(c1, eflag);304 emitf(Xpopredir);305 break;306 case PIPEFD:307 emitf(Xpipefd);308 emiti(t->rtype);309 if(havefork){310 p = emiti(0);311 outcode(c0, eflag);312 emitf(Xexit);313 stuffdot(p);314 } else {315 emits(fnstr(c0));316 }317 break;318 case REDIR:319 emitf(Xmark);320 outcode(c0, eflag);321 emitf(Xglob);322 switch(t->rtype){323 case APPEND:324 emitf(Xappend);325 break;326 case WRITE:327 emitf(Xwrite);328 break;329 case READ:330 case HERE:331 emitf(Xread);332 break;333 case RDWR:334 emitf(Xrdwr);335 break;336 }337 emiti(t->fd0);338 outcode(c1, eflag);339 emitf(Xpopredir);340 break;341 case '=':342 tt = t;343 for(;t && t->type=='=';t = c2);344 if(t){345 for(t = tt;t->type=='=';t = c2){346 emitf(Xmark);347 outcode(c1, eflag);348 emitf(Xmark);349 outcode(c0, eflag);350 emitf(Xlocal);351 }352 outcode(t, eflag);353 for(t = tt; t->type=='='; t = c2)354 emitf(Xunlocal);355 }356 else{357 for(t = tt;t;t = c2){358 emitf(Xmark);359 outcode(c1, eflag);360 emitf(Xmark);361 outcode(c0, eflag);362 emitf(Xassign);363 }364 }365 t = tt; /* so tests below will work */366 break;367 case PIPE:368 emitf(Xpipe);369 emiti(t->fd0);370 emiti(t->fd1);371 if(havefork){372 p = emiti(0);373 q = emiti(0);374 outcode(c0, eflag);375 emitf(Xexit);376 stuffdot(p);377 } else {378 emits(fnstr(c0));379 q = emiti(0);380 }381 outcode(c1, eflag);382 emitf(Xreturn);383 stuffdot(q);384 emitf(Xpipewait);385 break;386 }387 if(t->type!=NOT && t->type!=';')388 runq->iflast = t->type==IF;389 else if(c0) runq->iflast = c0->type==IF;390 }391 /*392 * switch code looks like this:393 * Xmark394 * (get switch value)395 * Xjump 1f396 * out: Xjump leave397 * 1: Xmark398 * (get case values)399 * Xcase 1f400 * (commands)401 * Xjump out402 * 1: Xmark403 * (get case values)404 * Xcase 1f405 * (commands)406 * Xjump out407 * 1:408 * leave:409 * Xpopm410 */412 void413 codeswitch(tree *t, int eflag)414 {415 int leave; /* patch jump address to leave switch */416 int out; /* jump here to leave switch */417 int nextcase; /* patch jump address to next case */418 tree *tt;419 if(c1->child[0]==nil420 || c1->child[0]->type!=';'421 || !iscase(c1->child[0]->child[0])){422 yyerror("case missing in switch");423 return;424 }425 emitf(Xmark);426 outcode(c0, eflag);427 emitf(Xjump);428 nextcase = emiti(0);429 out = emitf(Xjump);430 leave = emiti(0);431 stuffdot(nextcase);432 t = c1->child[0];433 while(t->type==';'){434 tt = c1;435 emitf(Xmark);436 for(t = c0->child[0];t->type==ARGLIST;t = c0) outcode(c1, eflag);437 emitf(Xcase);438 nextcase = emiti(0);439 t = tt;440 for(;;){441 if(t->type==';'){442 if(iscase(c0)) break;443 outcode(c0, eflag);444 t = c1;445 }446 else{447 if(!iscase(t)) outcode(t, eflag);448 break;449 }450 }451 emitf(Xjump);452 emiti(out);453 stuffdot(nextcase);454 }455 stuffdot(leave);456 emitf(Xpopm);457 }459 int460 iscase(tree *t)461 {462 if(t->type!=SIMPLE)463 return 0;464 do t = c0; while(t->type==ARGLIST);465 return t->type==WORD && !t->quoted && strcmp(t->str, "case")==0;466 }468 code*469 codecopy(code *cp)470 {471 cp[0].i++;472 return cp;473 }475 void476 codefree(code *cp)477 {478 code *p;479 if(--cp[0].i!=0)480 return;481 for(p = cp+1;p->f;p++){482 if(p->f==Xappend || p->f==Xclose || p->f==Xread || p->f==Xwrite483 || p->f==Xrdwr484 || p->f==Xasync || p->f==Xbackq || p->f==Xcase || p->f==Xfalse485 || p->f==Xfor || p->f==Xjump486 || p->f==Xsubshell || p->f==Xtrue) p++;487 else if(p->f==Xdup || p->f==Xpipefd) p+=2;488 else if(p->f==Xpipe) p+=4;489 else if(p->f==Xword || p->f==Xdelhere) efree((++p)->s);490 else if(p->f==Xfn){491 efree(p[2].s);492 p+=2;493 }494 }495 efree((char *)cp);496 }