1 63a68686 2008-11-03 jas /****************************************************************
2 63a68686 2008-11-03 jas Copyright (C) Lucent Technologies 1997
3 63a68686 2008-11-03 jas All Rights Reserved
5 63a68686 2008-11-03 jas Permission to use, copy, modify, and distribute this software and
6 63a68686 2008-11-03 jas its documentation for any purpose and without fee is hereby
7 63a68686 2008-11-03 jas granted, provided that the above copyright notice appear in all
8 63a68686 2008-11-03 jas copies and that both that the copyright notice and this
9 63a68686 2008-11-03 jas permission notice and warranty disclaimer appear in supporting
10 63a68686 2008-11-03 jas documentation, and that the name Lucent Technologies or any of
11 63a68686 2008-11-03 jas its entities not be used in advertising or publicity pertaining
12 63a68686 2008-11-03 jas to distribution of the software without specific, written prior
15 63a68686 2008-11-03 jas LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16 63a68686 2008-11-03 jas INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17 63a68686 2008-11-03 jas IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18 63a68686 2008-11-03 jas SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19 63a68686 2008-11-03 jas WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20 63a68686 2008-11-03 jas IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21 63a68686 2008-11-03 jas ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22 63a68686 2008-11-03 jas THIS SOFTWARE.
23 63a68686 2008-11-03 jas ****************************************************************/
25 63a68686 2008-11-03 jas #include <stdio.h>
26 63a68686 2008-11-03 jas #include <stdlib.h>
27 63a68686 2008-11-03 jas #include <string.h>
28 63a68686 2008-11-03 jas #include <ctype.h>
29 63a68686 2008-11-03 jas #include "awk.h"
30 63a68686 2008-11-03 jas #include "y.tab.h"
32 63a68686 2008-11-03 jas extern YYSTYPE yylval;
33 63a68686 2008-11-03 jas extern int infunc;
35 63a68686 2008-11-03 jas int lineno = 1;
36 63a68686 2008-11-03 jas int bracecnt = 0;
37 63a68686 2008-11-03 jas int brackcnt = 0;
38 63a68686 2008-11-03 jas int parencnt = 0;
40 63a68686 2008-11-03 jas typedef struct Keyword {
46 63a68686 2008-11-03 jas Keyword keywords[] ={ /* keep sorted: binary searched */
47 63a68686 2008-11-03 jas { "BEGIN", XBEGIN, XBEGIN },
48 63a68686 2008-11-03 jas { "END", XEND, XEND },
49 63a68686 2008-11-03 jas { "NF", VARNF, VARNF },
50 63a68686 2008-11-03 jas { "atan2", FATAN, BLTIN },
51 63a68686 2008-11-03 jas { "break", BREAK, BREAK },
52 63a68686 2008-11-03 jas { "close", CLOSE, CLOSE },
53 63a68686 2008-11-03 jas { "continue", CONTINUE, CONTINUE },
54 63a68686 2008-11-03 jas { "cos", FCOS, BLTIN },
55 63a68686 2008-11-03 jas { "delete", DELETE, DELETE },
56 63a68686 2008-11-03 jas { "do", DO, DO },
57 63a68686 2008-11-03 jas { "else", ELSE, ELSE },
58 63a68686 2008-11-03 jas { "exit", EXIT, EXIT },
59 63a68686 2008-11-03 jas { "exp", FEXP, BLTIN },
60 63a68686 2008-11-03 jas { "fflush", FFLUSH, BLTIN },
61 63a68686 2008-11-03 jas { "for", FOR, FOR },
62 63a68686 2008-11-03 jas { "func", FUNC, FUNC },
63 63a68686 2008-11-03 jas { "function", FUNC, FUNC },
64 63a68686 2008-11-03 jas { "getline", GETLINE, GETLINE },
65 63a68686 2008-11-03 jas { "gsub", GSUB, GSUB },
66 63a68686 2008-11-03 jas { "if", IF, IF },
67 63a68686 2008-11-03 jas { "in", IN, IN },
68 63a68686 2008-11-03 jas { "index", INDEX, INDEX },
69 63a68686 2008-11-03 jas { "int", FINT, BLTIN },
70 63a68686 2008-11-03 jas { "length", FLENGTH, BLTIN },
71 63a68686 2008-11-03 jas { "log", FLOG, BLTIN },
72 63a68686 2008-11-03 jas { "match", MATCHFCN, MATCHFCN },
73 63a68686 2008-11-03 jas { "next", NEXT, NEXT },
74 63a68686 2008-11-03 jas { "nextfile", NEXTFILE, NEXTFILE },
75 63a68686 2008-11-03 jas { "print", PRINT, PRINT },
76 63a68686 2008-11-03 jas { "printf", PRINTF, PRINTF },
77 63a68686 2008-11-03 jas { "rand", FRAND, BLTIN },
78 63a68686 2008-11-03 jas { "return", RETURN, RETURN },
79 63a68686 2008-11-03 jas { "sin", FSIN, BLTIN },
80 63a68686 2008-11-03 jas { "split", SPLIT, SPLIT },
81 63a68686 2008-11-03 jas { "sprintf", SPRINTF, SPRINTF },
82 63a68686 2008-11-03 jas { "sqrt", FSQRT, BLTIN },
83 63a68686 2008-11-03 jas { "srand", FSRAND, BLTIN },
84 63a68686 2008-11-03 jas { "sub", SUB, SUB },
85 63a68686 2008-11-03 jas { "substr", SUBSTR, SUBSTR },
86 63a68686 2008-11-03 jas { "system", FSYSTEM, BLTIN },
87 63a68686 2008-11-03 jas { "tolower", FTOLOWER, BLTIN },
88 63a68686 2008-11-03 jas { "toupper", FTOUPPER, BLTIN },
89 63a68686 2008-11-03 jas { "utf", FUTF, BLTIN },
90 63a68686 2008-11-03 jas { "while", WHILE, WHILE },
93 63a68686 2008-11-03 jas #define DEBUG
95 63a68686 2008-11-03 jas #define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
97 63a68686 2008-11-03 jas #define RET(x) return(x)
100 63a68686 2008-11-03 jas int peek(void)
102 63a68686 2008-11-03 jas int c = input();
107 63a68686 2008-11-03 jas int gettok(char **pbuf, int *psz) /* get next input token */
110 63a68686 2008-11-03 jas char *buf = *pbuf;
111 63a68686 2008-11-03 jas int sz = *psz;
112 63a68686 2008-11-03 jas char *bp = buf;
114 63a68686 2008-11-03 jas c = input();
119 63a68686 2008-11-03 jas if (!isalnum(c) && c != '.' && c != '_')
123 63a68686 2008-11-03 jas if (isalpha(c) || c == '_') { /* it's a varname */
124 63a68686 2008-11-03 jas for ( ; (c = input()) != 0; ) {
125 63a68686 2008-11-03 jas if (bp-buf >= sz)
126 63a68686 2008-11-03 jas if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, 0))
127 63a68686 2008-11-03 jas FATAL( "out of space for name %.10s...", buf );
128 63a68686 2008-11-03 jas if (isalnum(c) || c == '_')
136 63a68686 2008-11-03 jas } else { /* it's a number */
138 63a68686 2008-11-03 jas /* read input until can't be a number */
139 63a68686 2008-11-03 jas for ( ; (c = input()) != 0; ) {
140 63a68686 2008-11-03 jas if (bp-buf >= sz)
141 63a68686 2008-11-03 jas if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, 0))
142 63a68686 2008-11-03 jas FATAL( "out of space for number %.10s...", buf );
143 63a68686 2008-11-03 jas if (isdigit(c) || c == 'e' || c == 'E'
144 63a68686 2008-11-03 jas || c == '.' || c == '+' || c == '-')
152 63a68686 2008-11-03 jas strtod(buf, &rem); /* parse the number */
153 63a68686 2008-11-03 jas unputstr(rem); /* put rest back for later */
156 63a68686 2008-11-03 jas *pbuf = buf;
158 63a68686 2008-11-03 jas return buf[0];
161 63a68686 2008-11-03 jas int word(char *);
162 63a68686 2008-11-03 jas int string(void);
163 63a68686 2008-11-03 jas int regexpr(void);
164 63a68686 2008-11-03 jas int sc = 0; /* 1 => return a } right now */
165 63a68686 2008-11-03 jas int reg = 0; /* 1 => return a REGEXPR now */
167 63a68686 2008-11-03 jas int yylex(void)
170 63a68686 2008-11-03 jas static char *buf = 0;
171 63a68686 2008-11-03 jas static int bufsize = 500;
173 63a68686 2008-11-03 jas if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
174 63a68686 2008-11-03 jas FATAL( "out of space in yylex" );
181 63a68686 2008-11-03 jas return regexpr();
184 63a68686 2008-11-03 jas c = gettok(&buf, &bufsize);
187 63a68686 2008-11-03 jas if (isalpha(c) || c == '_')
188 63a68686 2008-11-03 jas return word(buf);
189 63a68686 2008-11-03 jas if (isdigit(c) || c == '.') {
190 63a68686 2008-11-03 jas yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
191 63a68686 2008-11-03 jas /* should this also have STR set? */
192 63a68686 2008-11-03 jas RET(NUMBER);
195 63a68686 2008-11-03 jas yylval.i = c;
196 63a68686 2008-11-03 jas switch (c) {
197 63a68686 2008-11-03 jas case '\n': /* {EOL} */
199 63a68686 2008-11-03 jas case '\r': /* assume \n is coming */
200 63a68686 2008-11-03 jas case ' ': /* {WS}+ */
203 63a68686 2008-11-03 jas case '#': /* #.* strip comments */
204 63a68686 2008-11-03 jas while ((c = input()) != '\n' && c != 0)
211 63a68686 2008-11-03 jas if (peek() == '\n') {
213 63a68686 2008-11-03 jas } else if (peek() == '\r') {
214 63a68686 2008-11-03 jas input(); input(); /* \n */
221 63a68686 2008-11-03 jas if (peek() == '&') {
222 63a68686 2008-11-03 jas input(); RET(AND);
226 63a68686 2008-11-03 jas if (peek() == '|') {
227 63a68686 2008-11-03 jas input(); RET(BOR);
231 63a68686 2008-11-03 jas if (peek() == '=') {
232 63a68686 2008-11-03 jas input(); yylval.i = NE; RET(NE);
233 63a68686 2008-11-03 jas } else if (peek() == '~') {
234 63a68686 2008-11-03 jas input(); yylval.i = NOTMATCH; RET(MATCHOP);
238 63a68686 2008-11-03 jas yylval.i = MATCH;
239 63a68686 2008-11-03 jas RET(MATCHOP);
241 63a68686 2008-11-03 jas if (peek() == '=') {
242 63a68686 2008-11-03 jas input(); yylval.i = LE; RET(LE);
244 63a68686 2008-11-03 jas yylval.i = LT; RET(LT);
247 63a68686 2008-11-03 jas if (peek() == '=') {
248 63a68686 2008-11-03 jas input(); yylval.i = EQ; RET(EQ);
250 63a68686 2008-11-03 jas yylval.i = ASSIGN; RET(ASGNOP);
253 63a68686 2008-11-03 jas if (peek() == '=') {
254 63a68686 2008-11-03 jas input(); yylval.i = GE; RET(GE);
255 63a68686 2008-11-03 jas } else if (peek() == '>') {
256 63a68686 2008-11-03 jas input(); yylval.i = APPEND; RET(APPEND);
258 63a68686 2008-11-03 jas yylval.i = GT; RET(GT);
261 63a68686 2008-11-03 jas if (peek() == '+') {
262 63a68686 2008-11-03 jas input(); yylval.i = INCR; RET(INCR);
263 63a68686 2008-11-03 jas } else if (peek() == '=') {
264 63a68686 2008-11-03 jas input(); yylval.i = ADDEQ; RET(ASGNOP);
268 63a68686 2008-11-03 jas if (peek() == '-') {
269 63a68686 2008-11-03 jas input(); yylval.i = DECR; RET(DECR);
270 63a68686 2008-11-03 jas } else if (peek() == '=') {
271 63a68686 2008-11-03 jas input(); yylval.i = SUBEQ; RET(ASGNOP);
275 63a68686 2008-11-03 jas if (peek() == '=') { /* *= */
276 63a68686 2008-11-03 jas input(); yylval.i = MULTEQ; RET(ASGNOP);
277 63a68686 2008-11-03 jas } else if (peek() == '*') { /* ** or **= */
278 63a68686 2008-11-03 jas input(); /* eat 2nd * */
279 63a68686 2008-11-03 jas if (peek() == '=') {
280 63a68686 2008-11-03 jas input(); yylval.i = POWEQ; RET(ASGNOP);
289 63a68686 2008-11-03 jas if (peek() == '=') {
290 63a68686 2008-11-03 jas input(); yylval.i = MODEQ; RET(ASGNOP);
294 63a68686 2008-11-03 jas if (peek() == '=') {
295 63a68686 2008-11-03 jas input(); yylval.i = POWEQ; RET(ASGNOP);
300 63a68686 2008-11-03 jas /* BUG: awkward, if not wrong */
301 63a68686 2008-11-03 jas c = gettok(&buf, &bufsize);
302 63a68686 2008-11-03 jas if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
303 63a68686 2008-11-03 jas unputstr(buf);
304 63a68686 2008-11-03 jas RET(INDIRECT);
305 63a68686 2008-11-03 jas } else if (isalpha(c)) {
306 63a68686 2008-11-03 jas if (strcmp(buf, "NF") == 0) { /* very special */
307 63a68686 2008-11-03 jas unputstr("(NF)");
308 63a68686 2008-11-03 jas RET(INDIRECT);
310 63a68686 2008-11-03 jas yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
313 63a68686 2008-11-03 jas unputstr(buf);
314 63a68686 2008-11-03 jas RET(INDIRECT);
318 63a68686 2008-11-03 jas if (--bracecnt < 0)
319 63a68686 2008-11-03 jas SYNTAX( "extra }" );
323 63a68686 2008-11-03 jas if (--brackcnt < 0)
324 63a68686 2008-11-03 jas SYNTAX( "extra ]" );
327 63a68686 2008-11-03 jas if (--parencnt < 0)
328 63a68686 2008-11-03 jas SYNTAX( "extra )" );
341 63a68686 2008-11-03 jas return string(); /* BUG: should be like tran.c ? */
349 63a68686 2008-11-03 jas int string(void)
352 63a68686 2008-11-03 jas char *s, *bp;
353 63a68686 2008-11-03 jas static char *buf = 0;
354 63a68686 2008-11-03 jas static int bufsz = 500;
356 63a68686 2008-11-03 jas if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
357 63a68686 2008-11-03 jas FATAL("out of space for strings");
358 63a68686 2008-11-03 jas for (bp = buf; (c = input()) != '"'; ) {
359 63a68686 2008-11-03 jas if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, 0))
360 63a68686 2008-11-03 jas FATAL("out of space for string %.10s...", buf);
361 63a68686 2008-11-03 jas switch (c) {
365 63a68686 2008-11-03 jas SYNTAX( "non-terminated string %.10s...", buf );
369 63a68686 2008-11-03 jas c = input();
370 63a68686 2008-11-03 jas switch (c) {
371 63a68686 2008-11-03 jas case '"': *bp++ = '"'; break;
372 63a68686 2008-11-03 jas case 'n': *bp++ = '\n'; break;
373 63a68686 2008-11-03 jas case 't': *bp++ = '\t'; break;
374 63a68686 2008-11-03 jas case 'f': *bp++ = '\f'; break;
375 63a68686 2008-11-03 jas case 'r': *bp++ = '\r'; break;
376 63a68686 2008-11-03 jas case 'b': *bp++ = '\b'; break;
377 63a68686 2008-11-03 jas case 'v': *bp++ = '\v'; break;
378 63a68686 2008-11-03 jas case 'a': *bp++ = '\007'; break;
379 63a68686 2008-11-03 jas case '\\': *bp++ = '\\'; break;
381 63a68686 2008-11-03 jas case '0': case '1': case '2': /* octal: \d \dd \ddd */
382 63a68686 2008-11-03 jas case '3': case '4': case '5': case '6': case '7':
383 63a68686 2008-11-03 jas n = c - '0';
384 63a68686 2008-11-03 jas if ((c = peek()) >= '0' && c < '8') {
385 63a68686 2008-11-03 jas n = 8 * n + input() - '0';
386 63a68686 2008-11-03 jas if ((c = peek()) >= '0' && c < '8')
387 63a68686 2008-11-03 jas n = 8 * n + input() - '0';
392 63a68686 2008-11-03 jas case 'x': /* hex \x0-9a-fA-F + */
393 63a68686 2008-11-03 jas { char xbuf[100], *px;
394 63a68686 2008-11-03 jas for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
395 63a68686 2008-11-03 jas if (isdigit(c)
396 63a68686 2008-11-03 jas || (c >= 'a' && c <= 'f')
397 63a68686 2008-11-03 jas || (c >= 'A' && c <= 'F'))
404 63a68686 2008-11-03 jas sscanf(xbuf, "%x", &n);
420 63a68686 2008-11-03 jas s = tostring(buf);
421 63a68686 2008-11-03 jas *bp++ = ' '; *bp++ = 0;
422 63a68686 2008-11-03 jas yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
423 63a68686 2008-11-03 jas RET(STRING);
427 63a68686 2008-11-03 jas int binsearch(char *w, Keyword *kp, int n)
429 63a68686 2008-11-03 jas int cond, low, mid, high;
432 63a68686 2008-11-03 jas high = n - 1;
433 63a68686 2008-11-03 jas while (low <= high) {
434 63a68686 2008-11-03 jas mid = (low + high) / 2;
435 63a68686 2008-11-03 jas if ((cond = strcmp(w, kp[mid].word)) < 0)
436 63a68686 2008-11-03 jas high = mid - 1;
437 63a68686 2008-11-03 jas else if (cond > 0)
438 63a68686 2008-11-03 jas low = mid + 1;
445 63a68686 2008-11-03 jas int word(char *w)
447 63a68686 2008-11-03 jas Keyword *kp;
450 63a68686 2008-11-03 jas n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
451 63a68686 2008-11-03 jas kp = keywords + n;
452 63a68686 2008-11-03 jas if (n != -1) { /* found in table */
453 63a68686 2008-11-03 jas yylval.i = kp->sub;
454 63a68686 2008-11-03 jas switch (kp->type) { /* special handling */
455 63a68686 2008-11-03 jas case FSYSTEM:
457 63a68686 2008-11-03 jas SYNTAX( "system is unsafe" );
458 63a68686 2008-11-03 jas RET(kp->type);
461 63a68686 2008-11-03 jas SYNTAX( "illegal nested function" );
462 63a68686 2008-11-03 jas RET(kp->type);
463 63a68686 2008-11-03 jas case RETURN:
464 63a68686 2008-11-03 jas if (!infunc)
465 63a68686 2008-11-03 jas SYNTAX( "return not in function" );
466 63a68686 2008-11-03 jas RET(kp->type);
468 63a68686 2008-11-03 jas yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
471 63a68686 2008-11-03 jas RET(kp->type);
474 63a68686 2008-11-03 jas c = peek(); /* look for '(' */
475 63a68686 2008-11-03 jas if (c != '(' && infunc && (n=isarg(w)) >= 0) {
476 63a68686 2008-11-03 jas yylval.i = n;
479 63a68686 2008-11-03 jas yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
480 63a68686 2008-11-03 jas if (c == '(') {
488 63a68686 2008-11-03 jas void startreg(void) /* next call to yyles will return a regular expression */
493 63a68686 2008-11-03 jas int regexpr(void)
496 63a68686 2008-11-03 jas static char *buf = 0;
497 63a68686 2008-11-03 jas static int bufsz = 500;
500 63a68686 2008-11-03 jas if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
501 63a68686 2008-11-03 jas FATAL("out of space for rex expr");
503 63a68686 2008-11-03 jas for ( ; (c = input()) != '/' && c != 0; ) {
504 63a68686 2008-11-03 jas if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, 0))
505 63a68686 2008-11-03 jas FATAL("out of space for reg expr %.10s...", buf);
506 63a68686 2008-11-03 jas if (c == '\n') {
507 63a68686 2008-11-03 jas SYNTAX( "newline in regular expression %.10s...", buf );
508 63a68686 2008-11-03 jas unput('\n');
510 63a68686 2008-11-03 jas } else if (c == '\\') {
511 63a68686 2008-11-03 jas *bp++ = '\\';
512 63a68686 2008-11-03 jas *bp++ = input();
518 63a68686 2008-11-03 jas yylval.s = tostring(buf);
520 63a68686 2008-11-03 jas RET(REGEXPR);
523 63a68686 2008-11-03 jas /* low-level lexical stuff, sort of inherited from lex */
525 63a68686 2008-11-03 jas char ebuf[300];
526 63a68686 2008-11-03 jas char *ep = ebuf;
527 63a68686 2008-11-03 jas char yysbuf[100]; /* pushback buffer */
528 63a68686 2008-11-03 jas char *yysptr = yysbuf;
529 63a68686 2008-11-03 jas FILE *yyin = 0;
531 63a68686 2008-11-03 jas int input(void) /* get next lexical input character */
534 63a68686 2008-11-03 jas extern char *lexprog;
536 63a68686 2008-11-03 jas if (yysptr > yysbuf)
537 63a68686 2008-11-03 jas c = *--yysptr;
538 63a68686 2008-11-03 jas else if (lexprog != NULL) { /* awk '...' */
539 63a68686 2008-11-03 jas if ((c = *lexprog) != 0)
541 63a68686 2008-11-03 jas } else /* awk -f ... */
542 63a68686 2008-11-03 jas c = pgetc();
543 63a68686 2008-11-03 jas if (c == '\n')
545 63a68686 2008-11-03 jas else if (c == EOF)
547 63a68686 2008-11-03 jas if (ep >= ebuf + sizeof ebuf)
549 63a68686 2008-11-03 jas return *ep++ = c;
552 63a68686 2008-11-03 jas void unput(int c) /* put lexical character back on input */
554 63a68686 2008-11-03 jas if (c == '\n')
556 63a68686 2008-11-03 jas if (yysptr >= yysbuf + sizeof(yysbuf))
557 63a68686 2008-11-03 jas FATAL("pushed back too much: %.20s...", yysbuf);
558 63a68686 2008-11-03 jas *yysptr++ = c;
559 63a68686 2008-11-03 jas if (--ep < ebuf)
560 63a68686 2008-11-03 jas ep = ebuf + sizeof(ebuf) - 1;
563 63a68686 2008-11-03 jas void unputstr(char *s) /* put a string back on input */
567 63a68686 2008-11-03 jas for (i = strlen(s)-1; i >= 0; i--)
568 63a68686 2008-11-03 jas unput(s[i]);