commit 74f0778b9ae93a700d8b0f759b05f24e69f54921 from: Omar Polo date: Wed Jun 16 14:43:16 2021 UTC drop the dependency on lex by implementing yylex by ourselves The actual implementation is based off doas' parse.y. This gave us various benefits, like cleaner code, \ to break long lines, better handling of quotes etc... commit - 984c46a82e002089b3a4035ba34873ad9c75d973 commit + 74f0778b9ae93a700d8b0f759b05f24e69f54921 blob - d4b2817de405f6e7ad844320f087918583e7c904 blob + 76e321ff9f6d3d0eaa93e0a8df112c9c9c840c1d --- ChangeLog +++ ChangeLog @@ -1,3 +1,10 @@ +2021-06-16 Omar Polo + + * parse.y (yylex): drop the dependency on lex by implementing + yylex by ourselves (the actual implementation is based off doas' + parse.y). This gave us various benefits, like cleaner code, \ to + break long lines, better handling of quotes etc... + 2021-06-11 Omar Polo * parse.y (servopt): add `param' keyword blob - 5f91bd7d1cc61800f8d3f469a0d004975abecb3e blob + be01f679a168d7cd0248aac1ed6ae7f61aa0ec0f --- Makefile +++ Makefile @@ -7,15 +7,12 @@ Makefile.local: configure include Makefile.local -lex.yy.c: lex.l y.tab.c - ${LEX} lex.l - y.tab.c: parse.y - ${YACC} -b y -d parse.y + ${YACC} -b y parse.y SRCS = gmid.c iri.c utf8.c ex.c server.c sandbox.c mime.c puny.c \ utils.c log.c dirs.c fcgi.c -OBJS = ${SRCS:.c=.o} lex.yy.o y.tab.o ${COMPAT} +OBJS = ${SRCS:.c=.o} y.tab.o ${COMPAT} gmid: ${OBJS} ${CC} ${OBJS} -o gmid ${LDFLAGS} blob - 8b2ebeb13d216b48786ec51aa90c85f3706e0eb3 blob + 67cd2752778671547b11006c8b91ee083e36a639 --- README.md +++ README.md @@ -92,8 +92,8 @@ server "example.com" { ## Building gmid depends on a POSIX libc, libevent2, OpenSSL/LibreSSL and libtls -(provided either by LibreSSL or libretls). At build time, flex and -yacc (or GNU bison) are also needed. +(provided either by LibreSSL or libretls). At build time, yacc (or +GNU bison) is also needed. The build is as simple as blob - 84ed361b3cb5b392dfbacb61548b82e405948782 blob + d5f40358bb8edc70f0dfb81e48f37fbbc18a8a99 --- configure +++ configure @@ -40,7 +40,6 @@ CFLAGS="${CFLAGS} -g -W -Wall -Wextra -Wmissing-protot CFLAGS="${CFLAGS} -Wwrite-strings -Wno-unused-parameter" LDFLAGS="-ltls -levent" LD_IMSG= -LEX=lex STATIC= YACC=yacc @@ -73,17 +72,6 @@ if which pkg-config 2>/dev/null 1>&2; then ;; esac fi - -# auto detect lex/flex -which ${LEX} 2>/dev/null 1>&2 || { - echo "${LEX} not found: trying flex" 1>&2 - echo "${LEX} not found: trying flex" 1>&3 - LEX=flex - which ${LEX} 2>/dev/null 1>&2 || { - echo "${LEX} not found: giving up" 1>&2 - echo "${LEX} not found: giving up" 1>&3 - } -} # auto detect yacc/bison which ${YACC} 2>/dev/null 1>&2 || { @@ -112,7 +100,6 @@ for keyvals in "$@"; do CFLAGS) CFLAGS="$val" ;; DESTDIR) DESTDIR="$val" ;; LDFLAGS) LDFLAGS="$val" ;; - LEX) LEX="$lex" ;; PREFIX) PREFIX="$val" ;; YACC) YACC="$val" ;; *) @@ -398,7 +385,6 @@ CC = ${CC} CFLAGS = ${CFLAGS} LDFLAGS = ${LDFLAGS} ${LD_IMSG} YACC = ${YACC} -LEX = ${LEX} STATIC = ${STATIC} PREFIX = ${PREFIX} BINDIR = ${BINDIR} blob - a013af503ead875842509f3a46fd4b5c20e07ea1 blob + 731a4fd130ecb00aadb03dcd3a394ca40801ef27 --- gmid.h +++ gmid.h @@ -309,12 +309,6 @@ void setup_tls(void); void init_config(void); void free_config(void); void drop_priv(void); - -/* provided by lex/yacc */ -extern FILE *yyin; -extern int yylineno; -extern int yyparse(void); -extern int yylex(void); void yyerror(const char*, ...); int parse_portno(const char*); blob - 661e8e8d488d78b891d79af2f58cb8265cd18d43 blob + 9182fd417e1e392c09e1aceb233e0806fcf9486e --- parse.y +++ parse.y @@ -1,4 +1,3 @@ -/* -*- mode: fundamental; indent-tabs-mode: t; -*- */ %{ /* @@ -17,13 +16,17 @@ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ +#include #include #include #include +#include #include #include "gmid.h" +FILE *yyfp; + /* * #define YYDEBUG 1 * int yydebug = 1; @@ -32,12 +35,14 @@ struct vhost *host; struct location *loc; -int goterror = 0; +static int goterror; +static int lineno, colno; static struct vhost *new_vhost(void); static struct location *new_location(void); void yyerror(const char*, ...); +static int yylex(void); int parse_portno(const char*); void parse_conf(const char*); char *ensure_absolute_path(char*); @@ -109,7 +114,7 @@ vhost : TSERVER TSTRING { if (strstr($2, "xn--") != NULL) { warnx("%s:%d \"%s\" looks like punycode: " "you should use the decoded hostname.", - config_path, yylineno, $2); + config_path, lineno, $2); } } '{' servopts locations '}' { @@ -278,10 +283,191 @@ yyerror(const char *msg, ...) goterror = 1; va_start(ap, msg); - fprintf(stderr, "%s:%d: ", config_path, yylineno); + fprintf(stderr, "%s:%d: ", config_path, lineno); vfprintf(stderr, msg, ap); fprintf(stderr, "\n"); va_end(ap); +} + +static struct keyword { + const char *word; + int token; +} keywords[] = { + {"alias", TALIAS}, + {"auto", TAUTO}, + {"block", TBLOCK}, + {"ca", TCA}, + {"cert", TCERT}, + {"cgi", TCGI}, + {"chroot", TCHROOT}, + {"client", TCLIENT}, + {"default", TDEFAULT}, + {"entrypoint", TENTRYPOINT}, + {"env", TENV}, + {"fastcgi", TFASTCGI}, + {"index", TINDEX}, + {"ipv6", TIPV6}, + {"key", TKEY}, + {"lang", TLANG}, + {"location", TLOCATION}, + {"log", TLOG}, + {"mime", TMIME}, + {"param", TPARAM}, + {"port", TPORT}, + {"prefork", TPREFORK}, + {"protocols", TPROTOCOLS}, + {"require", TREQUIRE}, + {"return", TRETURN}, + {"root", TROOT}, + {"server", TSERVER}, + {"spawn", TSPAWN}, + {"strip", TSTRIP}, + {"tcp", TTCP}, + {"type", TTYPE}, + {"user", TUSER}, +}; + +/* + * Taken an adapted from doas' parse.y + */ +static int +yylex(void) +{ + char buf[1024], *ebuf, *p, *str; + int c, quotes = 0, escape = 0, qpos = -1, nonkw = 0; + size_t i; + + p = buf; + ebuf = buf + sizeof(buf); + +repeat: + /* skip whitespace first */ + for (c = getc(yyfp); isspace(c); c = getc(yyfp)) { + colno++; + if (c == '\n') { + lineno++; + colno = 0; + } + } + + /* check for special one-character constructions */ + switch (c) { + case '{': + case '}': + return c; + case '#': + /* skip comments; NUL is allowed; no continuation */ + while ((c = getc(yyfp)) != '\n') + if (c == EOF) + goto eof; + colno = 0; + lineno++; + goto repeat; + case EOF: + goto eof; + } + + /* parsing next word */ + for (;; c = getc(yyfp), colno++) { + switch (c) { + case '\0': + yyerror("unallowed character NULL in column %d", + colno+1); + escape = 0; + continue; + case '\\': + escape = !escape; + if (escape) + continue; + break; + case '\n': + if (quotes) + yyerror("unterminated quotes in column %d", + colno+1); + if (escape) { + nonkw = 1; + escape = 0; + colno = 0; + lineno++; + } + goto eow; + case EOF: + if (escape) + yyerror("unterminated escape in column %d", + colno); + if (quotes) + yyerror("unterminated quotes in column %d", + qpos+1); + goto eow; + case '{': + case '}': + case '#': + case ' ': + case '\t': + if (!escape && !quotes) + goto eow; + break; + case '"': + if (!escape) { + quotes = !quotes; + if (quotes) { + nonkw = 1; + qpos = colno; + } + continue; + } + } + *p++ = c; + if (p == ebuf) { + yyerror("line too long"); + p = buf; + } + escape = 0; + } + +eow: + *p = 0; + if (c != EOF) + ungetc(c, yyfp); + if (p == buf) { + /* + * There could be a number of reason for empty buffer, + * and we handle all of them here, to avoid cluttering + * the main loop. + */ + if (c == EOF) + goto eof; + else if (qpos == -1) /* accept, e.g., empty args: cmd foo args "" */ + goto repeat; + } + if (!nonkw) { + for (i = 0; i < sizeof(keywords) / sizeof(keywords[0]); ++i) { + if (!strcmp(buf, keywords[i].word)) + return keywords[i].token; + } + } + c = *buf; + if (!nonkw && (c == '-' || isdigit(c))) { + yylval.num = parse_portno(buf); + return TNUM; + } + if (!nonkw && !strcmp(buf, "on")) { + yylval.num = 1; + return TBOOL; + } + if (!nonkw && !strcmp(buf, "off")) { + yylval.num = 0; + return TBOOL; + } + if ((str = strdup(buf)) == NULL) + err(1, "%s", __func__); + yylval.str = str; + return TSTRING; + +eof: + if (ferror(yyfp)) + yyerror("input error reading config"); + return 0; } int @@ -300,10 +486,10 @@ void parse_conf(const char *path) { config_path = path; - if ((yyin = fopen(path, "r")) == NULL) + if ((yyfp = fopen(path, "r")) == NULL) err(1, "cannot open config: %s", path); yyparse(); - fclose(yyin); + fclose(yyfp); if (goterror) exit(1);