Commit Diff


commit - f890c8c54d88b529dc568eaf187b29a5acf6d22d
commit + 5c2e310edececfc9ef67946c1bf7df6bcdbe8931
blob - c77ea7cb406ea5434104a64db624cbfba0663e18
blob + ab5dd84e2405008d509a713cd04c70b415ba22c4
--- .gitignore
+++ .gitignore
@@ -13,3 +13,6 @@ config.h.old
 config.log
 config.log.old
 configure.local
+regress/testdata
+regress/*.pem
+regress/reg.conf
blob - 509eaace5d3915f0fbe990074fdd2516d1e758d1
blob + e04c1502119be3c88b3e03c80358647e4a966d3a
--- Makefile
+++ Makefile
@@ -1,4 +1,4 @@
-.PHONY: all static clean test install
+.PHONY: all static clean regress install
 
 all: Makefile.local gmid TAGS
 
@@ -29,12 +29,15 @@ TAGS: ${SRCS}
 	-etags ${SRCS} || true
 
 clean:
-	rm -f *.o lex.yy.c y.tab.c y.tab.h y.output gmid iri_test
-	rm -f Makefile.local
+	rm -f *.o lex.yy.c y.tab.c y.tab.h y.output gmid
+	make -C regress clean
 
 iri_test: iri_test.o iri.o utf8.o
 	${CC} iri_test.o iri.o utf8.o -o iri_test ${LDFLAGS}
 
+regress: gmid
+	make -C regress all
+
 test: gmid iri_test
 	@echo "IRI tests"
 	@echo "=============================="
blob - /dev/null
blob + 67948a4f872e0c745d6eefcc4447fe9b18837035 (mode 644)
--- /dev/null
+++ regress/Makefile
@@ -0,0 +1,38 @@
+include ../Makefile.local
+
+.PHONY: all clean runtime
+
+all: iri_test runtime
+	./iri_test
+
+iri_test: iri_test.o ../iri.o ../utf8.o
+	${CC} iri_test.o ../iri.o ../utf8.o -o iri_test ${LDFLAGS}
+
+key.pem: cert.pem
+
+# XXX: key size is NOT GOOD.  This is only for testing.  Smaller keys
+# are quicker to generate.  DON'T DO THIS AT HOME.
+cert.pem:
+	printf ".\n.\n.\n.\n.\nlocalhost\n.\n" |	\
+		openssl req -x509 -newkey rsa:1024	\
+			-keyout key.pem			\
+			-out cert.pem			\
+			-days 365 -nodes
+	@echo
+
+clean:
+	rm -f *.o iri_test cert.pem key.pem
+	rm -rf testdata
+
+testdata:
+	mkdir testdata
+	./genbigfile testdata/bigfile
+	./sha testdata/bigfile testdata/bigfile.sha
+	printf "# hello world\n" > testdata/index.gmi
+	./sha testdata/index.gmi testdata/index.gmi.sha
+	cp hello slow err testdata/
+	mkdir testdata/dir
+	cp testdata/index.gmi testdata/dir/foo.gmi
+
+runtime: testdata cert.pem
+	./runtime
blob - /dev/null
blob + 2bb8d868bd0c2cbbed7fef880e0e58bdabb94a79 (mode 755)
--- /dev/null
+++ regress/err
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+exit 1
blob - /dev/null
blob + a4183f042872c0872885f867df5ac4551ceeb2a8 (mode 755)
--- /dev/null
+++ regress/genbigfile
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+set -e
+
+dotimes() {
+	if which jot 2>/dev/null >/dev/null; then
+		jot "$@"
+	elif which seq 2>/dev/null >/dev/null; then
+		seq "$@"
+	else
+		echo "no jot/seq binary found"
+		exit 1
+	fi
+}
+
+file="$1"
+
+if [ -z "$file" ]; then
+	echo "USAGE: $(dirname "$0") <filename>"
+	exit 1
+fi
+
+printf "" > "$file"
+
+for i in `dotimes 1024`; do
+	for j in `dotimes 1024`; do
+		echo "a" >> "$file"
+	done
+done
blob - /dev/null
blob + e421dcd4784863a46a96fa49725a430cc67c2122 (mode 755)
--- /dev/null
+++ regress/gg.py
@@ -0,0 +1,31 @@
+#!/usr/bin/env python3
+
+# GeminiGet, aka gg
+# USAGE: ./gg path [port]
+
+import os
+import socket
+import ssl
+import urllib.parse
+import sys
+
+hostname = 'localhost'
+path = sys.argv[1]
+
+port = 1965
+if len(sys.argv) > 2:
+    port = int(sys.argv[2])
+
+s = socket.create_connection((hostname, port))
+context = ssl.SSLContext()
+context.check_hostname = False
+context.verify_mode = ssl.CERT_NONE
+s = context.wrap_socket(s, server_hostname = hostname)
+s.sendall(("gemini://" + hostname + ":" + str(port) + path + "\r\n").encode('UTF-8'))
+
+try:
+    fp = s.makefile("rb")
+    for line in fp.read().splitlines():
+        print(line.decode('UTF-8'))
+except:
+    pass
blob - /dev/null
blob + d70b64ff54d9d1e12a05be4234c08773b15bf913 (mode 755)
--- /dev/null
+++ regress/hello
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+printf "20 text/gemini\r\n"
+echo "# hello world"
blob - /dev/null
blob + 47105308723788f30ac53717906551be5e96de73 (mode 644)
--- /dev/null
+++ regress/iri_test.c
@@ -0,0 +1,251 @@
+/*
+ * Copyright (c) 2020 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <err.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "../gmid.h"
+
+#define TEST(iri, fail, exp, descr)				\
+	if (!run_test(iri, fail, exp)) {			\
+		fprintf(stderr, "%s:%d: error: %s\n",		\
+		    __FILE__, __LINE__, descr);			\
+		exit(1);					\
+	}
+
+#define IRI(schema, host, port, path, query, frag)		\
+	((struct iri){(char*)schema, (char*)host, (char*)port,	\
+		 0, (char*)path, (char*)query,			\
+		 (char*)frag})
+
+#define DIFF(wanted, got, field)					\
+	if (wanted->field == NULL || got->field == NULL ||		\
+	    strcmp(wanted->field, got->field)) {			\
+		fprintf(stderr, #field ":\n\tgot: %s\n\twanted: %s\n",	\
+		    got->field, wanted->field);				\
+		return 0;						\
+	}
+
+#define PASS 0
+#define FAIL 1
+
+int	diff_iri(struct iri*, struct iri*);
+int	run_test(const char*, int, struct iri);
+
+int
+diff_iri(struct iri *p, struct iri *exp)
+{
+        DIFF(p, exp, schema);
+        DIFF(p, exp, host);
+        DIFF(p, exp, port);
+        DIFF(p, exp, path);
+        DIFF(p, exp, query);
+        DIFF(p, exp, fragment);
+	return 1;
+}
+
+int
+run_test(const char *iri, int should_fail, struct iri expected)
+{
+	int failed, ok = 1;
+	char *iri_copy;
+	struct iri parsed;
+	const char *error;
+
+	if ((iri_copy = strdup(iri)) == NULL)
+		err(1, "strdup");
+
+	fprintf(stderr, "=> %s\n", iri);
+	failed = !parse_iri(iri_copy, &parsed, &error);
+
+	if (failed && should_fail)
+		goto done;
+
+	if (error != NULL)
+		fprintf(stderr, "> %s\n", error);
+
+	ok = !failed && !should_fail;
+	if (ok)
+		ok = diff_iri(&expected, &parsed);
+
+done:
+	free(iri_copy);
+	return ok;
+}
+
+int
+main(void)
+{
+	struct iri empty = IRI("", "", "", "", "", "");
+
+	TEST("http://omarpolo.com",
+	    PASS,
+	    IRI("http", "omarpolo.com", "", "", "", ""),
+	    "can parse iri with empty path");
+
+	/* schema */
+	TEST("omarpolo.com", FAIL, empty, "FAIL when the schema is missing");
+	TEST("gemini:/omarpolo.com", FAIL, empty, "FAIL with invalid marker");
+	TEST("gemini//omarpolo.com", FAIL, empty, "FAIL with invalid marker");
+	TEST("h!!p://omarpolo.com", FAIL, empty, "FAIL with invalid schema");
+	TEST("GEMINI://omarpolo.com",
+	    PASS,
+	    IRI("gemini", "omarpolo.com", "", "", "", ""),
+	    "Schemas are case insensitive.");
+
+	/* authority */
+	TEST("gemini://omarpolo.com",
+	    PASS,
+	    IRI("gemini", "omarpolo.com", "", "", "", ""),
+	    "can parse authority with empty path");
+	TEST("gemini://omarpolo.com/",
+	    PASS,
+	    IRI("gemini", "omarpolo.com", "", "", "", ""),
+	    "can parse authority with empty path (alt)")
+	TEST("gemini://omarpolo.com:1965",
+	    PASS,
+	    IRI("gemini", "omarpolo.com", "1965", "", "", ""),
+	    "can parse with port and empty path");
+	TEST("gemini://omarpolo.com:1965/",
+	    PASS,
+	    IRI("gemini", "omarpolo.com", "1965", "", "", ""),
+	    "can parse with port and empty path")
+	TEST("gemini://omarpolo.com:196s",
+	    FAIL,
+	    empty,
+	    "FAIL with invalid port number");
+	TEST("gemini://OmArPoLo.CoM",
+	    PASS,
+	    IRI("gemini", "omarpolo.com", "", "", "", ""),
+	    "host is case-insensitive");
+
+	/* path */
+	TEST("gemini://omarpolo.com/foo/bar/baz",
+	    PASS,
+	    IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+	    "parse simple paths");
+	TEST("gemini://omarpolo.com/foo//bar///baz",
+	    PASS,
+	    IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+	    "parse paths with multiple slashes");
+	TEST("gemini://omarpolo.com/foo/./bar/./././baz",
+	    PASS,
+	    IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+	    "parse paths with . elements");
+	TEST("gemini://omarpolo.com/foo/bar/../bar/baz",
+	    PASS,
+	    IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+	    "parse paths with .. elements");
+	TEST("gemini://omarpolo.com/foo/../foo/bar/../bar/baz/../baz",
+	    PASS,
+	    IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+	    "parse paths with multiple .. elements");
+	TEST("gemini://omarpolo.com/foo/..",
+	    PASS,
+            IRI("gemini", "omarpolo.com", "", "", "", ""),
+	    "parse paths with a trailing ..");
+	TEST("gemini://omarpolo.com/foo/../",
+	    PASS,
+            IRI("gemini", "omarpolo.com", "", "", "", ""),
+	    "parse paths with a trailing ..");
+	TEST("gemini://omarpolo.com/foo/../..",
+	    FAIL,
+            empty,
+	    "reject paths that would escape the root");
+	TEST("gemini://omarpolo.com/foo/../../",
+	    FAIL,
+            empty,
+	    "reject paths that would escape the root")
+	TEST("gemini://omarpolo.com/foo/../foo/../././/bar/baz/.././.././/",
+	    PASS,
+            IRI("gemini", "omarpolo.com", "", "", "", ""),
+	    "parse path with lots of cleaning available");
+	TEST("gemini://omarpolo.com//foo",
+	    PASS,
+            IRI("gemini", "omarpolo.com", "", "foo", "", ""),
+	    "Trim initial slashes");
+	TEST("gemini://omarpolo.com/////foo",
+	    PASS,
+            IRI("gemini", "omarpolo.com", "", "foo", "", ""),
+	    "Trim initial slashes (pt. 2)");
+
+	/* query */
+	TEST("foo://example.com/foo/?gne",
+	    PASS,
+	    IRI("foo", "example.com", "", "foo/", "gne", ""),
+	    "parse query strings");
+	TEST("foo://example.com/foo/?gne&foo",
+	    PASS,
+	    IRI("foo", "example.com", "", "foo/", "gne&foo", ""),
+	    "parse query strings");
+	TEST("foo://example.com/foo/?gne%2F",
+	    PASS,
+	    IRI("foo", "example.com", "", "foo/", "gne/", ""),
+	    "parse query strings");
+
+	/* fragment */
+	TEST("foo://bar.co/#foo",
+	    PASS,
+	    IRI("foo", "bar.co", "", "", "", "foo"),
+	    "can recognize fragments");
+
+	/* percent encoding */
+	TEST("foo://bar.com/caf%C3%A8.gmi",
+	    PASS,
+	    IRI("foo", "bar.com", "", "cafè.gmi", "", ""),
+	    "can decode");
+	TEST("foo://bar.com/caff%C3%A8%20macchiato.gmi",
+	    PASS,
+	    IRI("foo", "bar.com", "", "caffè macchiato.gmi", "", ""),
+	    "can decode");
+	TEST("foo://bar.com/caff%C3%A8+macchiato.gmi",
+	    PASS,
+	    IRI("foo", "bar.com", "", "caffè+macchiato.gmi", "", ""),
+	    "can decode");
+	TEST("foo://bar.com/foo%2F..%2F..",
+	    FAIL,
+	    empty,
+	    "conversion and checking are done in the correct order");
+	TEST("foo://bar.com/foo%00?baz",
+	    FAIL,
+	    empty,
+	    "rejects %00");
+
+	/* IRI */
+        TEST("foo://bar.com/cafè.gmi",
+	    PASS,
+	    IRI("foo", "bar.com", "", "cafè.gmi", "" , ""),
+	    "decode IRI (with a 2-byte utf8 seq)");
+	TEST("foo://bar.com/世界.gmi",
+	    PASS,
+	    IRI("foo", "bar.com", "", "世界.gmi", "" , ""),
+	    "decode IRI");
+	TEST("foo://bar.com/😼.gmi",
+	    PASS,
+	    IRI("foo", "bar.com", "", "😼.gmi", "" , ""),
+	    "decode IRI (with a 3-byte utf8 seq)");
+	TEST("foo://bar.com/😼/𤭢.gmi",
+	    PASS,
+	    IRI("foo", "bar.com", "", "😼/𤭢.gmi", "" , ""),
+	    "decode IRI (with a 3-byte and a 4-byte utf8 seq)");
+	TEST("foo://bar.com/世界/\xC0\x80",
+	    FAIL,
+	    empty,
+	    "reject invalid sequence (overlong NUL)");
+
+	return 0;
+}
blob - /dev/null
blob + e9f193cc3a9f16d324e09900609c47dc57b74e3d (mode 755)
--- /dev/null
+++ regress/runtime
@@ -0,0 +1,163 @@
+#!/bin/sh
+
+set -e
+
+# usage: config <global config> <stuff for localhost>
+# generates a configuration file reg.conf
+config() {
+	cat <<EOF > reg.conf
+daemon off
+ipv6 off
+port 10965
+$1
+server "localhost" {
+	cert "cert.pem"
+	key  "key.pem"
+	root "testdata"
+	$2
+}
+EOF
+}
+
+checkconf() {
+	./../gmid -n -c reg.conf
+}
+
+# usage: get <path>
+# return the body of the request on stdout
+get() {
+	(./gg.py "$1" 10965 | sed 1d) || true
+}
+
+# usage: head <path>
+# return the meta response line on stdout
+head() {
+	(./gg.py "$1" 10965 | sed 1q) || true
+}
+
+run() {
+	# filter out logs for GET requests
+	(./../gmid -c reg.conf 2>&1 | grep -v GET) >&2 &
+	pid=$!
+}
+
+# usage: check [exit-message]
+# check if gmid is still running
+check() {
+	if ! ps $pid >/dev/null; then
+		echo ${1:-"gmid crashed?"}
+		exit 1
+	fi
+}
+
+# quit gmid
+quit() {
+	pkill gmid || true
+	wait || true
+}
+
+# usage: eq a b errmsg
+# if a and b aren't equal strings, exit with errmsg
+eq() {
+	if ! [ "$1" = "$2" ]; then
+		echo "$3: \"$1\" not equal \"$2\""
+		exit 1
+	fi
+}
+
+onexit() {
+	rm -f bigfile bigfile.sha
+	quit
+}
+
+# tests
+
+trap 'onexit' INT TERM EXIT
+
+endl=`printf "\r\n"`
+lf=`echo`
+
+config "" ""
+checkconf
+run
+
+eq "$(head /)"		"20 text/gemini"	"Unexpected head for /"
+eq "$(get /)"		"# hello world$ln"	"Unexpected body for /"
+echo OK GET /
+
+eq "$(head /foo)"	"51 not found"		"Unexpected head /foo"
+eq "$(get /foo)"	""			"Unexpected body /foo"
+echo OK GET /foo
+
+# should redirect if asked for a directory but without the trailing /
+eq "$(head /dir)"	"30 /dir/"		"Unexpected redirect for /dir"
+eq "$(get  /dir)"	""			"Unexpected body for redirect"
+echo OK GET /dir
+
+# 51 for a directory without index.gmi
+eq "$(head /dir/)"	"51 not found"		"Unexpected head for /"
+eq "$(get  /dir/)"	""			"Unexpected body for error"
+echo OK GET /dir/
+
+eq "$(head /dir/foo.gmi)" "20 text/gemini"	"Unexpected head for /dir/foo.gmi"
+eq "$(get  /dir/foo.gmi)" "# hello world$ln"	"Unexpected body for /dir/foo.gmi"
+echo OK GET /dir/foo.gmi
+
+# try a big file
+eq "$(head /bigfile)"	"20 application/octet-stream" "Unexpected head for /bigfile"
+get /bigfile > bigfile
+./sha bigfile bigfile.sha
+eq "$(cat bigfile.sha)"	"$(cat testdata/bigfile.sha)" "Unexpected sha for /bigfile"
+echo OK GET /bigfile
+
+# shouldn't be executing cgi scripts
+eq "$(head /hello)"	"20 application/octet-stream" "Unexpected head for /hello"
+echo OK GET /hello
+
+check "should be running"
+quit
+
+# try with custom mime
+config 'mime "text/x-funny-text" "gmi"' 'default type "application/x-trash"'
+checkconf
+run
+
+eq "$(head /)"		"20 text/x-funny-text"		"Unexpected head for /"
+echo OK GET / with custom mime
+
+eq "$(head /hello)"	"20 application/x-trash"	"Unexpected head for /hello"
+echo OK GET /hello with custom mime
+
+check "should be running"
+quit
+
+# try with custom lang
+config '' 'lang "it"'
+checkconf
+run
+
+eq "$(head /)"		"20 text/gemini; lang=it"	"Unexpected head for /"
+echo OK GET / with custom lang
+
+check "should be running"
+quit
+
+# finally try with CGI scripts
+config '' 'cgi ""'
+checkconf
+run
+
+eq "$(head /hello)"	"20 text/gemini"		"Unexpected head for /hello"
+eq "$(get  /hello)"	"# hello world$ln"		"Unexpected body for /hello"
+echo OK GET /hello with cgi
+
+eq "$(head /slow)"	"20 text/gemini"		"Unexpected head for /slow"
+eq "$(get  /slow)"	"# hello world$ln"		"Unexpected body for /slow"
+echo OK GET /slow with cgi
+
+eq "$(head /err)"	""		"Unexpected head for /err"
+eq "$(get  /err)"	""		"Unexpected body for /err"
+echo OK GET /err with cgi
+
+check "should be running"
+quit
blob - /dev/null
blob + 12cf5823bc33d1fcb95058db0bf884dae1483dbd (mode 755)
--- /dev/null
+++ regress/sha
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+# USAGE: ./sha in out
+# writes the sha256 of in to file out
+
+if which sha256 2>/dev/null >/dev/null; then
+	exec sha256 < "$1" > "$2"
+fi
+
+if which sha256sum 2>/dev/null >/dev/null; then
+	exec sha256sum "$1" | awk '{print $1}' > "$2"
+fi
+
+echo "No sha binary found"
+exit 1
blob - /dev/null
blob + 2ceb52cae18f8fee55352e24ccdd95454cd1f87e (mode 755)
--- /dev/null
+++ regress/slow
@@ -0,0 +1,6 @@
+#!/bin/sh
+
+printf "20 "
+sleep 1
+printf "text/gemini\r\n"
+echo "# hello world"