Commit Diff


commit - 984c46a82e002089b3a4035ba34873ad9c75d973
commit + 74f0778b9ae93a700d8b0f759b05f24e69f54921
blob - d4b2817de405f6e7ad844320f087918583e7c904
blob + 76e321ff9f6d3d0eaa93e0a8df112c9c9c840c1d
--- ChangeLog
+++ ChangeLog
@@ -1,3 +1,10 @@
+2021-06-16  Omar Polo  <op@omarpolo.com>
+
+	* parse.y (yylex): drop the dependency on lex by implementing
+	yylex by ourselves (the actual implementation is based off doas'
+	parse.y).  This gave us various benefits, like cleaner code, \ to
+	break long lines, better handling of quotes etc...
+
 2021-06-11  Omar Polo  <op@omarpolo.com>
 
 	* parse.y (servopt): add `param' keyword
blob - 5f91bd7d1cc61800f8d3f469a0d004975abecb3e
blob + be01f679a168d7cd0248aac1ed6ae7f61aa0ec0f
--- Makefile
+++ Makefile
@@ -7,15 +7,12 @@ Makefile.local: configure
 
 include Makefile.local
 
-lex.yy.c: lex.l y.tab.c
-	${LEX} lex.l
-
 y.tab.c: parse.y
-	${YACC} -b y -d parse.y
+	${YACC} -b y parse.y
 
 SRCS = gmid.c iri.c utf8.c ex.c server.c sandbox.c mime.c puny.c \
 	utils.c log.c dirs.c fcgi.c
-OBJS = ${SRCS:.c=.o} lex.yy.o y.tab.o ${COMPAT}
+OBJS = ${SRCS:.c=.o} y.tab.o ${COMPAT}
 
 gmid: ${OBJS}
 	${CC} ${OBJS} -o gmid ${LDFLAGS}
blob - 8b2ebeb13d216b48786ec51aa90c85f3706e0eb3
blob + 67cd2752778671547b11006c8b91ee083e36a639
--- README.md
+++ README.md
@@ -92,8 +92,8 @@ server "example.com" {
 ## Building
 
 gmid depends on a POSIX libc, libevent2, OpenSSL/LibreSSL and libtls
-(provided either by LibreSSL or libretls).  At build time, flex and
-yacc (or GNU bison) are also needed.
+(provided either by LibreSSL or libretls).  At build time, yacc (or
+GNU bison) is also needed.
 
 The build is as simple as
 
blob - 84ed361b3cb5b392dfbacb61548b82e405948782
blob + d5f40358bb8edc70f0dfb81e48f37fbbc18a8a99
--- configure
+++ configure
@@ -40,7 +40,6 @@ CFLAGS="${CFLAGS} -g -W -Wall -Wextra -Wmissing-protot
 CFLAGS="${CFLAGS} -Wwrite-strings -Wno-unused-parameter"
 LDFLAGS="-ltls -levent"
 LD_IMSG=
-LEX=lex
 STATIC=
 YACC=yacc
 
@@ -73,17 +72,6 @@ if which pkg-config 2>/dev/null 1>&2; then
 			;;
 	esac
 fi
-
-# auto detect lex/flex
-which ${LEX} 2>/dev/null 1>&2 || {
-	echo "${LEX} not found: trying flex" 1>&2
-	echo "${LEX} not found: trying flex" 1>&3
-	LEX=flex
-	which ${LEX} 2>/dev/null 1>&2 || {
-		echo "${LEX} not found: giving up" 1>&2
-		echo "${LEX} not found: giving up" 1>&3
-	}
-}
 
 # auto detect yacc/bison
 which ${YACC} 2>/dev/null 1>&2 || {
@@ -112,7 +100,6 @@ for keyvals in "$@"; do
 		CFLAGS)  CFLAGS="$val" ;;
 		DESTDIR) DESTDIR="$val" ;;
 		LDFLAGS) LDFLAGS="$val" ;;
-		LEX)     LEX="$lex" ;;
 		PREFIX)  PREFIX="$val" ;;
 		YACC)    YACC="$val" ;;
 		*)
@@ -398,7 +385,6 @@ CC		= ${CC}
 CFLAGS		= ${CFLAGS}
 LDFLAGS		= ${LDFLAGS} ${LD_IMSG}
 YACC		= ${YACC}
-LEX		= ${LEX}
 STATIC		= ${STATIC}
 PREFIX		= ${PREFIX}
 BINDIR		= ${BINDIR}
blob - a013af503ead875842509f3a46fd4b5c20e07ea1
blob + 731a4fd130ecb00aadb03dcd3a394ca40801ef27
--- gmid.h
+++ gmid.h
@@ -309,12 +309,6 @@ void		 setup_tls(void);
 void		 init_config(void);
 void		 free_config(void);
 void		 drop_priv(void);
-
-/* provided by lex/yacc */
-extern FILE *yyin;
-extern int yylineno;
-extern int yyparse(void);
-extern int yylex(void);
 
 void		 yyerror(const char*, ...);
 int		 parse_portno(const char*);
blob - 661e8e8d488d78b891d79af2f58cb8265cd18d43
blob + 9182fd417e1e392c09e1aceb233e0806fcf9486e
--- parse.y
+++ parse.y
@@ -1,4 +1,3 @@
-/* -*- mode: fundamental; indent-tabs-mode: t; -*- */
 %{
 
 /*
@@ -17,13 +16,17 @@
  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
  */
 
+#include <ctype.h>
 #include <errno.h>
 #include <stdarg.h>
 #include <stdio.h>
+#include <stdlib.h>
 #include <string.h>
 
 #include "gmid.h"
 
+FILE *yyfp;
+
 /*
  * #define YYDEBUG 1
  * int yydebug = 1;
@@ -32,12 +35,14 @@
 struct vhost *host;
 struct location *loc;
 
-int goterror = 0;
+static int goterror;
+static int lineno, colno;
 
 static struct vhost	*new_vhost(void);
 static struct location	*new_location(void);
 
 void		 yyerror(const char*, ...);
+static int	 yylex(void);
 int		 parse_portno(const char*);
 void		 parse_conf(const char*);
 char		*ensure_absolute_path(char*);
@@ -109,7 +114,7 @@ vhost		: TSERVER TSTRING {
 			if (strstr($2, "xn--") != NULL) {
 				warnx("%s:%d \"%s\" looks like punycode: "
 				    "you should use the decoded hostname.",
-				    config_path, yylineno, $2);
+				    config_path, lineno, $2);
 			}
 		} '{' servopts locations '}' {
 
@@ -278,10 +283,191 @@ yyerror(const char *msg, ...)
 	goterror = 1;
 
 	va_start(ap, msg);
-	fprintf(stderr, "%s:%d: ", config_path, yylineno);
+	fprintf(stderr, "%s:%d: ", config_path, lineno);
 	vfprintf(stderr, msg, ap);
 	fprintf(stderr, "\n");
 	va_end(ap);
+}
+
+static struct keyword {
+	const char *word;
+	int token;
+} keywords[] = {
+	{"alias", TALIAS},
+	{"auto", TAUTO},
+	{"block", TBLOCK},
+	{"ca", TCA},
+	{"cert", TCERT},
+	{"cgi", TCGI},
+	{"chroot", TCHROOT},
+	{"client", TCLIENT},
+	{"default", TDEFAULT},
+	{"entrypoint", TENTRYPOINT},
+	{"env", TENV},
+	{"fastcgi", TFASTCGI},
+	{"index", TINDEX},
+	{"ipv6", TIPV6},
+	{"key", TKEY},
+	{"lang", TLANG},
+	{"location", TLOCATION},
+	{"log", TLOG},
+	{"mime", TMIME},
+	{"param", TPARAM},
+	{"port", TPORT},
+	{"prefork", TPREFORK},
+	{"protocols", TPROTOCOLS},
+	{"require", TREQUIRE},
+	{"return", TRETURN},
+	{"root", TROOT},
+	{"server", TSERVER},
+	{"spawn", TSPAWN},
+	{"strip", TSTRIP},
+	{"tcp", TTCP},
+	{"type", TTYPE},
+	{"user", TUSER},
+};
+
+/*
+ * Taken an adapted from doas' parse.y
+ */
+static int
+yylex(void)
+{
+	char buf[1024], *ebuf, *p, *str;
+	int c, quotes = 0, escape = 0, qpos = -1, nonkw = 0;
+	size_t i;
+
+	p = buf;
+	ebuf = buf + sizeof(buf);
+
+repeat:
+	/* skip whitespace first */
+	for (c = getc(yyfp); isspace(c); c = getc(yyfp)) {
+		colno++;
+		if (c == '\n') {
+			lineno++;
+			colno = 0;
+		}
+	}
+
+	/* check for special one-character constructions */
+	switch (c) {
+	case '{':
+	case '}':
+		return c;
+	case '#':
+		/* skip comments; NUL is allowed; no continuation */
+		while ((c = getc(yyfp)) != '\n')
+			if (c == EOF)
+				goto eof;
+		colno = 0;
+		lineno++;
+		goto repeat;
+	case EOF:
+		goto eof;
+	}
+
+	/* parsing next word */
+	for (;; c = getc(yyfp), colno++) {
+		switch (c) {
+		case '\0':
+			yyerror("unallowed character NULL in column %d",
+			    colno+1);
+			escape = 0;
+			continue;
+		case '\\':
+			escape = !escape;
+			if (escape)
+				continue;
+			break;
+		case '\n':
+			if (quotes)
+				yyerror("unterminated quotes in column %d",
+				    colno+1);
+			if (escape) {
+				nonkw = 1;
+				escape = 0;
+				colno = 0;
+				lineno++;
+			}
+			goto eow;
+		case EOF:
+			if (escape)
+				yyerror("unterminated escape in column %d",
+				    colno);
+			if (quotes)
+				yyerror("unterminated quotes in column %d",
+				    qpos+1);
+			goto eow;
+		case '{':
+		case '}':
+		case '#':
+		case ' ':
+		case '\t':
+                        if (!escape && !quotes)
+				goto eow;
+			break;
+		case '"':
+			if (!escape) {
+				quotes = !quotes;
+				if (quotes) {
+					nonkw = 1;
+					qpos = colno;
+				}
+				continue;
+			}
+		}
+		*p++ = c;
+		if (p == ebuf) {
+			yyerror("line too long");
+			p = buf;
+		}
+		escape = 0;
+	}
+
+eow:
+	*p = 0;
+	if (c != EOF)
+		ungetc(c, yyfp);
+	if (p == buf) {
+		/*
+		 * There could be a number of reason for empty buffer,
+		 * and we handle all of them here, to avoid cluttering
+		 * the main loop.
+		 */
+		if (c == EOF)
+			goto eof;
+		else if (qpos == -1) /* accept, e.g., empty args: cmd foo args "" */
+			goto repeat;
+	}
+	if (!nonkw) {
+		for (i = 0; i < sizeof(keywords) / sizeof(keywords[0]); ++i) {
+			if (!strcmp(buf, keywords[i].word))
+				return keywords[i].token;
+		}
+	}
+	c = *buf;
+	if (!nonkw && (c == '-' || isdigit(c))) {
+		yylval.num = parse_portno(buf);
+		return TNUM;
+	}
+	if (!nonkw && !strcmp(buf, "on")) {
+		yylval.num = 1;
+		return TBOOL;
+	}
+	if (!nonkw && !strcmp(buf, "off")) {
+		yylval.num = 0;
+		return TBOOL;
+	}
+	if ((str = strdup(buf)) == NULL)
+		err(1, "%s", __func__);
+	yylval.str = str;
+	return TSTRING;
+
+eof:
+	if (ferror(yyfp))
+		yyerror("input error reading config");
+	return 0;
 }
 
 int
@@ -300,10 +486,10 @@ void
 parse_conf(const char *path)
 {
 	config_path = path;
-	if ((yyin = fopen(path, "r")) == NULL)
+	if ((yyfp = fopen(path, "r")) == NULL)
 		err(1, "cannot open config: %s", path);
 	yyparse();
-	fclose(yyin);
+	fclose(yyfp);
 
 	if (goterror)
 		exit(1);