commit 7d6a248f2c68d70f58387afc69e73e695c3d940c from: Russ Cox date: Tue May 05 03:41:15 2020 UTC rc: move free carat handling into parser This fixes at least one shell script (printfont) that expected 'x'`{y}'z' to mean 'x'^`{y}^'z' as it now does. Before it meant: 'x'^`{y} 'z' One surprise is that adjacent lists get a free carat: (x y z)(1 2 3) is (x1 y2 z3) This doesn't affect any rc script in Plan 9 or plan9port. commit - 3caf5c238a886d06b438ec6d42f2609b8625463f commit + 7d6a248f2c68d70f58387afc69e73e695c3d940c blob - 7ea8998a11a0c0956e8f6a6c6afd2311d5c151b8 blob + df7af05b5940e344509fa56745f7beb65a483f78 --- man/man1/rc.1 +++ man/man1/rc.1 @@ -290,28 +290,10 @@ then one operand must have one component, and the othe and concatenation is distributive. .PD .SS Free Carets -In most circumstances, -.I rc +.I Rc will insert the .B ^ operator automatically between words that are not separated by white space. -Whenever one of -.B $ -.B ' -.B ` -follows a quoted or unquoted word or an unquoted word follows a quoted word -with no intervening blanks or tabs, -a -.B ^ -is inserted between the two. -If an unquoted word immediately follows a -.BR $ -and contains a character other than an alphanumeric, underscore, -or -.BR * , -a -.B ^ -is inserted before the first such character. Thus .IP .B cc -$flags $stem.c @@ -367,7 +349,7 @@ or .I Fd1 is a previously opened file descriptor and .I fd0 -becomes a new copy (in the sense of +becomes a new copy (in the sense of .IR dup (3)) of it. A file descriptor may be closed by writing @@ -477,7 +459,7 @@ is executed. The .I command is executed once for each -.IR argument +.IR argument with that argument assigned to .IR name . If the argument list is omitted, @@ -981,8 +963,6 @@ changes .BR $status . .PP Functions that use here documents don't work. -.PP -Free carets don't get inserted next to keywords. .PP The .BI <{ command } blob - 5833847988c2b5fa0eab8ac64f3f380575b8a0e1 blob + 48bd70deddbec479756021e1f21ec3040a8944dd --- src/cmd/rc/lex.c +++ src/cmd/rc/lex.c @@ -202,7 +202,7 @@ yylex(void) * if the next character is the first character of a simple or compound word, * we insert a `^' before it. */ - if(lastword){ + if(lastword && flag['Y']){ lastword = 0; if(d=='('){ advance(); @@ -215,8 +215,8 @@ yylex(void) } } inquote = 0; - if(skipwhite() && flag['Z']) - return SP; + if(skipwhite() && !flag['Y']) + return ' '; switch(c = advance()){ case EOF: lastdol = 0; blob - 466b7da28e02ac00c7dcf005f3d1c04db2d52806 blob + 11be951ba7789970e87ae7a248061f1bba882200 --- src/cmd/rc/parse.c +++ src/cmd/rc/parse.c @@ -23,11 +23,19 @@ static jmp_buf yyjmp; static int dropnl(int tok) { - while(tok == '\n') + while(tok == ' ' || tok == '\n') tok = yylex(); return tok; } +static int +dropsp(int tok) +{ + while(tok == ' ') + tok = yylex(); + return tok; +} + static void syntax(int tok) { @@ -49,7 +57,7 @@ parse(void) // rc: { return 1;} // | line '\n' {return !compile($1);} - tok = yylex(); + tok = dropsp(yylex()); if(tok == EOF) return 1; t = line(tok, &tok); @@ -117,6 +125,7 @@ brace(int tok) // brace: '{' body '}' {$$=tree1(BRACE, $2);} + tok = dropsp(tok); if(tok != '{') syntax(tok); t = body(yylex(), &tok); @@ -132,6 +141,7 @@ paren(int tok) // paren: '(' body ')' {$$=tree1(PCMD, $2);} + tok = dropsp(tok); if(tok != '(') syntax(tok); t = body(yylex(), &tok); @@ -172,11 +182,12 @@ yyredir(int tok, int *ptok) syntax(tok); case DUP: r = yylval.tree; - *ptok = yylex(); + *ptok = dropsp(yylex()); break; case REDIR: r = yylval.tree; - w = yyword(yylex(), ptok); + w = yyword(yylex(), &tok); + *ptok = dropsp(tok); r = mung1(r, r->rtype==HERE?heredoc(w):w); break; } @@ -186,17 +197,67 @@ yyredir(int tok, int *ptok) static tree* cmd(int tok, int *ptok) { - tree *t1, *t2, *t3, *t4; - + tok = dropsp(tok); switch(tok) { default: return cmd2(tok, ptok); + } +} + +static tree* +cmd2(int tok, int *ptok) +{ + int op; + tree *t1, *t2; + + // | cmd ANDAND cmd {$$=tree2(ANDAND, $1, $3);} + // | cmd OROR cmd {$$=tree2(OROR, $1, $3);} + + t1 = cmd3(tok, &tok); + while(tok == ANDAND || tok == OROR) { + op = tok; + t2 = cmd3(dropnl(yylex()), &tok); + t1 = tree2(op, t1, t2); + } + *ptok = tok; + return t1; +} + +static tree* +cmd3(int tok, int *ptok) +{ + tree *t1, *t2, *t3; + + // | cmd PIPE cmd {$$=mung2($2, $1, $3);} + t1 = cmd4(tok, &tok); + while(tok == PIPE) { + t2 = yylval.tree; + t3 = cmd4(dropnl(yylex()), &tok); + t1 = mung2(t2, t1, t3); + } + *ptok = tok; + return t1; +} + +static tree* +cmd4(int tok, int *ptok) +{ + tree *t1, *t2, *t3, *t4; + + tok = dropsp(tok); + switch(tok) { + case ';': + case '&': + case '\n': + *ptok = tok; + return nil; + case IF: // | IF paren {skipnl();} cmd {$$=mung2($1, $2, $4);} // | IF NOT {skipnl();} cmd {$$=mung1($2, $4);} t1 = yylval.tree; - tok = yylex(); + tok = dropsp(yylex()); if(tok == NOT) { t1 = yylval.tree; t2 = cmd(dropnl(yylex()), ptok); @@ -212,7 +273,7 @@ cmd(int tok, int *ptok) // | FOR '(' word ')' {skipnl();} cmd // {$$=mung3($1, $3, (tree *)0, $6);} t1 = yylval.tree; - tok = yylex(); + tok = dropsp(yylex()); if(tok != '(') syntax(tok); t2 = yyword(yylex(), &tok); @@ -247,62 +308,8 @@ cmd(int tok, int *ptok) t1 = yyword(yylex(), &tok); tok = dropnl(tok); // doesn't work in yacc grammar but works here! t2 = brace(tok); - *ptok = yylex(); + *ptok = dropsp(yylex()); return tree2(SWITCH, t1, t2); - } -} - -static tree* -cmd2(int tok, int *ptok) -{ - int op; - tree *t1, *t2; - - // | cmd ANDAND cmd {$$=tree2(ANDAND, $1, $3);} - // | cmd OROR cmd {$$=tree2(OROR, $1, $3);} - - t1 = cmd3(tok, &tok); - while(tok == ANDAND || tok == OROR) { - op = tok; - t2 = cmd3(dropnl(yylex()), &tok); - t1 = tree2(op, t1, t2); - } - *ptok = tok; - return t1; -} - -static tree* -cmd3(int tok, int *ptok) -{ - tree *t1, *t2, *t3; - - // | cmd PIPE cmd {$$=mung2($2, $1, $3);} - t1 = cmd4(tok, &tok); - while(tok == PIPE) { - t2 = yylval.tree; - t3 = cmd4(dropnl(yylex()), &tok); - t1 = mung2(t2, t1, t3); - } - *ptok = tok; - return t1; -} - -static tree* -cmd4(int tok, int *ptok) -{ - tree *t1, *t2, *t3; - - switch(tok) { - case ';': - case '&': - case '\n': - *ptok = tok; - return nil; - - case IF: - case FOR: - case SWITCH: - case WHILE: // Note: cmd: a && for(x) y && b is a && {for (x) {y && b}}. return cmd(tok, ptok); @@ -315,7 +322,7 @@ cmd4(int tok, int *ptok) return tree1(FN, t1); } t2 = brace(tok); - *ptok = yylex(); + *ptok = dropsp(yylex()); return tree2(FN, t1, t2); case TWIDDLE: @@ -344,7 +351,7 @@ cmd4(int tok, int *ptok) case '{': // | brace epilog {$$=epimung($1, $2);} t1 = brace(tok); - tok = yylex(); + tok = dropsp(yylex()); t2 = epilog(tok, ptok); return epimung(t1, t2); } @@ -396,6 +403,7 @@ words(int tok, int *ptok) // | words word {$$=tree2(WORDS, $1, $2);} t = nil; + tok = dropsp(tok); while(iswordtok(tok)) t = tree2(WORDS, t, yyword(tok, &tok)); *ptok = tok; @@ -428,9 +436,19 @@ yyword(int tok, int *ptok) // word1: keyword | comword t = word1(tok, &tok); - while(tok == '^') - t = tree2('^', t, word1(yylex(), &tok)); - *ptok = tok; + for(;;) { + if(iswordtok(tok)) { + t = tree2('^', t, word1(tok, &tok)); + continue; + } + tok = dropsp(tok); + if(tok == '^') { + t = tree2('^', t, word1(yylex(), &tok)); + continue; + } + break; + } + *ptok = dropsp(tok); return t; } @@ -439,6 +457,7 @@ word1(int tok, int *ptok) { tree *w, *sub, *t; + tok = dropsp(tok); switch(tok) { default: syntax(tok); @@ -458,7 +477,6 @@ word1(int tok, int *ptok) // keyword: FOR|IN|WHILE|IF|NOT|TWIDDLE|BANG|SUBSHELL|SWITCH|FN t = yylval.tree; t->type = WORD; - lastword = 1; *ptok = yylex(); return t; @@ -466,7 +484,7 @@ word1(int tok, int *ptok) // comword: '$' word1 {$$=tree1('$', $2);} // | '$' word1 SUB words ')' {$$=tree2(SUB, $2, $4);} w = word1(yylex(), &tok); - if(tok == SUB) { + if(tok == '(') { sub = words(yylex(), &tok); if(tok != ')') syntax(tok); blob - e3decd414616d65922bbf5c7025677a6b35a1568 blob + 5c98ef80c8b00edbc894857b552b5e300552f3a0 --- src/cmd/rc/syn.y +++ src/cmd/rc/syn.y @@ -1,4 +1,4 @@ -%term FOR IN WHILE IF NOT TWIDDLE BANG SUBSHELL SWITCH FN SP +%term FOR IN WHILE IF NOT TWIDDLE BANG SUBSHELL SWITCH FN %term WORD REDIR REDIRW DUP PIPE SUB %term SIMPLE ARGLIST WORDS BRACE PAREN PCMD PIPEFD /* not used in syntax */ /* operator priorities -- lowest first */ blob - 5c6581327e689c6efca1708614892dec4b0f2203 blob + 7a83ad17c20a673000735b7407e41ffae3848035 --- src/cmd/rc/test.rc +++ src/cmd/rc/test.rc @@ -1,5 +1,9 @@ # test for parser +a +a b +a|b +a | b {a; b; c} x=y a && b || c x=y a | b | c @@ -63,3 +67,10 @@ x || y x | y +switch x {y} && z +switch x {} | y + +OPTIONS=$OPTIONS' /axescount '^`{echo $1 | sed s/-a//}^' def' + +# bug in old printfont script - expected more free carats +# OPTIONS=$OPTIONS' /axescount '`{echo $1 | sed s/-a//}' def'