Commit Diff


commit - 390a61189309451462c0a1dc56c68f71e334ad4b
commit + 3300cbe06a9567c66ee63f3866bcbcf3430e0205
blob - c7d3334c6fe6f05dfcdfa0484b5e093c2ce6b3bc
blob + ba2033027cec9d56439a93c36cfeb8883fcff9a7
--- .gitignore
+++ .gitignore
@@ -17,4 +17,5 @@ regress/*.pem
 regress/reg.conf
 regress/fill-file
 regress/iri_test
+regress/puny-test
 regress/*.o
blob - 40826852356ef81e32836b079170df4ad7d32731
blob + b0116b3dd35fb84848405da724ccf53abf703266
--- ChangeLog
+++ ChangeLog
@@ -1,3 +1,7 @@
+2021-01-26  Omar Polo  <op@omarpolo.com>
+
+	* puny.c (puny_decode): initial punycode support!
+
 2021-01-25  Omar Polo  <op@omarpolo.com>
 
 	* gmid.1: manpage improvements (example and usage)
blob - c600dd30c4e2002d4c2f2a982c20e010836095bd
blob + 576b49c0d54cc41c997702fdf5090ae538176162
--- Makefile
+++ Makefile
@@ -13,7 +13,7 @@ lex.yy.c: lex.l y.tab.c
 y.tab.c: parse.y
 	${YACC} -b y -d parse.y
 
-SRCS = gmid.c iri.c utf8.c ex.c server.c sandbox.c mime.c
+SRCS = gmid.c iri.c utf8.c ex.c server.c sandbox.c mime.c puny.c
 OBJS = ${SRCS:.c=.o} lex.yy.o y.tab.o ${COMPAT}
 
 gmid: ${OBJS}
blob - 7a973f546635637000b403818b62d48975ad295b
blob + 09a1d6fcfe20dc7f4b1feacd5e0027944c7b0e1d
--- gmid.c
+++ gmid.c
@@ -131,7 +131,14 @@ log_request(struct client *c, char *meta, size_t l)
 		/* serialize the IRI */
 		strlcpy(b, c->iri.schema, sizeof(b));
 		strlcat(b, "://", sizeof(b));
-		strlcat(b, c->iri.host, sizeof(b));
+
+		/* log the decoded host name, but if it was invalid
+		 * use the raw one. */
+		if (*c->domain != '\0')
+			strlcat(b, c->domain, sizeof(b));
+		else
+			strlcat(b, c->iri.host, sizeof(b));
+
 		strlcat(b, "/", sizeof(b));
 		strlcat(b, c->iri.path, sizeof(b)); /* TODO: sanitize UTF8 */
 		if (*c->iri.query != '\0') {	    /* TODO: sanitize UTF8 */
blob - 9774257709cf5f822ae47e8efa38494b2811c09c
blob + b4e6b8448d5c945cf71a3c0f3df01ee8b505e4a3
--- gmid.h
+++ gmid.h
@@ -54,6 +54,9 @@
 #define HOSTSLEN	64
 #define LOCLEN		32
 
+/* RFC1034 imposes this limit.  63+1 for the NUL-terminator */
+#define DOMAIN_NAME_LEN (63+1)
+
 #define LOGE(c, fmt, ...) logs(LOG_ERR,     c, fmt, __VA_ARGS__)
 #define LOGW(c, fmt, ...) logs(LOG_WARNING, c, fmt, __VA_ARGS__)
 #define LOGN(c, fmt, ...) logs(LOG_NOTICE,  c, fmt, __VA_ARGS__)
@@ -134,6 +137,7 @@ struct client {
 	struct tls	*ctx;
 	char		 req[GEMINI_URL_LEN];
 	struct iri	 iri;
+	char		 domain[DOMAIN_NAME_LEN];
 	int		 state, next;
 	int		 code;
 	const char	*meta;
@@ -236,9 +240,13 @@ void		 sandbox(void);
 
 /* utf8.c */
 int		 valid_multibyte_utf8(struct parser*);
+char		*utf8_nth(char*, size_t);
 
 /* iri.c */
 int		 parse_iri(char*, struct iri*, const char**);
 int		 trim_req_iri(char*);
 
+/* puny.c */
+int		 puny_decode(char*, char*, size_t);
+
 #endif
blob - 3c9c572fb8c337ede611f0ea7a7818e5e8b73d03
blob + 50001658fc10b01a3cedb200171a5c162b1fe18a
--- regress/Makefile
+++ regress/Makefile
@@ -2,11 +2,16 @@ include ../Makefile.local
 
 .PHONY: all clean runtime
 
-all: iri_test runtime
+all: puny-test testdata iri_test cert.pem
+	./puny-test
+	./runtime
 	./iri_test
 
+puny-test: puny-test.o ../puny.o ../utf8.o
+	${CC} puny-test.o ../puny.o ../utf8.o -o puny-test
+
 iri_test: iri_test.o ../iri.o ../utf8.o
-	${CC} iri_test.o ../iri.o ../utf8.o -o iri_test ${LDFLAGS}
+	${CC} iri_test.o ../iri.o ../utf8.o -o iri_test
 
 fill-file: fill-file.o
 	${CC} fill-file.o -o fill-file
@@ -38,5 +43,5 @@ testdata: fill-file
 	cp hello testdata/dir
 	cp testdata/index.gmi testdata/dir/foo.gmi
 
-runtime: testdata cert.pem
+runtime: testdata
 	./runtime
blob - /dev/null
blob + b25f82a9f9f6e410156922ee3f9e588bc1a6fa9e (mode 644)
--- /dev/null
+++ regress/puny-test.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "../gmid.h"
+
+struct suite {
+	const char	*src;
+	const char	*res;
+} t[] = {
+	{"foo",			"foo"},
+	{"xn-invalid",		"xn-invalid"},
+	{"naïve",		"naïve"},
+	{"xn--8ca",		"è"},
+	{"xn--caff-8oa",	"caffè"},
+	{"xn--nave-6pa",	"naïve"},
+	{"xn--e-0mbbc",		"τeστ"},
+	{"xn--8ca67lbac",	"τèστ"},
+	{"xn--28j2a3ar1p",	"こんにちは"},
+	{"xn--hello--ur7iy09x",	"hello-世界"},
+	{"xn--hi--hi-rr7iy09x",	"hi-世界-hi"},
+	{"xn--caf-8la.foo.org",	"cafè.foo.org"},
+	/* 3 bytes */
+	{"xn--j6h",		"♨"},
+	/* 4 bytes */
+	{"xn--x73l",		"𩸽"},
+	{"xn--x73laaa",		"𩸽𩸽𩸽𩸽"},
+	{NULL, NULL}
+};
+
+int
+main(int argc, char **argv)
+{
+	struct suite *i;
+	int failed;
+	char *hostname;
+	char buf[64];		/* name len */
+
+	failed = 0;
+	for (i = t; i->src != NULL; ++i) {
+		if ((hostname = strdup(i->src)) == NULL)
+			return 0;
+
+		memset(buf, 0, sizeof(buf));
+		if (!puny_decode(hostname, buf, sizeof(buf))) {
+                        printf("decode: failure with %s\n", i->src);
+                        failed = 1;
+			continue;
+		}
+
+		if (strcmp(buf, i->res)) {
+			printf("ERR: expected \"%s\", got \"%s\"\n",
+			    i->res, buf);
+			failed = 1;
+			continue;
+		} else
+			printf("OK:  %s => %s\n", i->src, buf);
+
+		free(hostname);
+	}
+
+	return failed;
+}
blob - a305da35b43b915582c63802783777bdd3ab94e9
blob + 1383974840e1981cd7812d1bcb55f61586468d89
--- server.c
+++ server.c
@@ -262,12 +262,10 @@ handle_handshake(struct pollfd *fds, struct client *c)
 	}
 
 	servname = tls_conn_servername(c->ctx);
+	puny_decode(servname, c->domain, sizeof(c->domain));
 
 	for (h = hosts; h->domain != NULL; ++h) {
-		if (!strcmp(h->domain, "*"))
-			break;
-
-		if (servname != NULL && !fnmatch(h->domain, servname, 0))
+		if (!fnmatch(h->domain, c->domain, 0))
 			break;
 	}
 
@@ -290,6 +288,7 @@ void
 handle_open_conn(struct pollfd *fds, struct client *c)
 {
 	const char *parse_err = "invalid request";
+	char decoded[DOMAIN_NAME_LEN];
 
 	bzero(c->req, sizeof(c->req));
 	bzero(&c->iri, sizeof(c->iri));
@@ -314,8 +313,11 @@ handle_open_conn(struct pollfd *fds, struct client *c)
 		return;
 	}
 
-	/* XXX: we should check that the SNI matches the requested host */
-	if (strcmp(c->iri.schema, "gemini") || c->iri.port_no != conf.port) {
+	puny_decode(c->iri.host, decoded, sizeof(decoded));
+
+	if (c->iri.port_no != conf.port
+	    || strcmp(c->iri.schema, "gemini")
+	    || strcmp(decoded, c->domain)) {
 		start_reply(fds, c, PROXY_REFUSED, "won't proxy request");
 		return;
 	}
blob - 8f530b0203310cdb0c880adf60ad6a610ae7823b
blob + 20985b4237e53d58b551f6eaaf283e20859f3813
--- utf8.c
+++ utf8.c
@@ -77,3 +77,20 @@ valid_multibyte_utf8(struct parser *p)
 	}
 	return 1;
 }
+
+char *
+utf8_nth(char *s, size_t n)
+{
+	size_t i;
+	uint32_t cp = 0, state = 0;
+
+	for (i = 0; *s && i < n; ++s)
+		if (!utf8_decode(&state, &cp, *s))
+			++i;
+
+	if (state != UTF8_ACCEPT)
+		return NULL;
+	if (i == n)
+		return s;
+	return NULL;
+}