Commit Diff
Diff:
390a61189309451462c0a1dc56c68f71e334ad4b
3300cbe06a9567c66ee63f3866bcbcf3430e0205
Commit:
3300cbe06a9567c66ee63f3866bcbcf3430e0205
Tree:
a7686f8e774573b55ebbe18373a27eb60f44baa4
Author:
Omar Polo <op@omarpolo.com>
Date:
Wed Jan 27 10:47:49 2021 UTC
Message:
initial punycode support
commit - 390a61189309451462c0a1dc56c68f71e334ad4b
commit + 3300cbe06a9567c66ee63f3866bcbcf3430e0205
blob - c7d3334c6fe6f05dfcdfa0484b5e093c2ce6b3bc
blob + ba2033027cec9d56439a93c36cfeb8883fcff9a7
--- .gitignore
+++ .gitignore
@@ -17,4 +17,5 @@ regress/*.o
regress/reg.conf
regress/fill-file
regress/iri_test
+regress/puny-test
regress/*.o
blob - 40826852356ef81e32836b079170df4ad7d32731
blob + b0116b3dd35fb84848405da724ccf53abf703266
--- ChangeLog
+++ ChangeLog
@@ -1,3 +1,7 @@
+2021-01-26 Omar Polo <op@omarpolo.com>
+
+ * puny.c (puny_decode): initial punycode support!
+
2021-01-25 Omar Polo <op@omarpolo.com>
* gmid.1: manpage improvements (example and usage)
blob - c600dd30c4e2002d4c2f2a982c20e010836095bd
blob + 576b49c0d54cc41c997702fdf5090ae538176162
--- Makefile
+++ Makefile
@@ -13,7 +13,7 @@ SRCS = gmid.c iri.c utf8.c ex.c server.c sandbox.c mim
y.tab.c: parse.y
${YACC} -b y -d parse.y
-SRCS = gmid.c iri.c utf8.c ex.c server.c sandbox.c mime.c
+SRCS = gmid.c iri.c utf8.c ex.c server.c sandbox.c mime.c puny.c
OBJS = ${SRCS:.c=.o} lex.yy.o y.tab.o ${COMPAT}
gmid: ${OBJS}
blob - 7a973f546635637000b403818b62d48975ad295b
blob + 09a1d6fcfe20dc7f4b1feacd5e0027944c7b0e1d
--- gmid.c
+++ gmid.c
@@ -131,7 +131,14 @@ log_request(struct client *c, char *meta, size_t l)
/* serialize the IRI */
strlcpy(b, c->iri.schema, sizeof(b));
strlcat(b, "://", sizeof(b));
- strlcat(b, c->iri.host, sizeof(b));
+
+ /* log the decoded host name, but if it was invalid
+ * use the raw one. */
+ if (*c->domain != '\0')
+ strlcat(b, c->domain, sizeof(b));
+ else
+ strlcat(b, c->iri.host, sizeof(b));
+
strlcat(b, "/", sizeof(b));
strlcat(b, c->iri.path, sizeof(b)); /* TODO: sanitize UTF8 */
if (*c->iri.query != '\0') { /* TODO: sanitize UTF8 */
blob - 9774257709cf5f822ae47e8efa38494b2811c09c
blob + b4e6b8448d5c945cf71a3c0f3df01ee8b505e4a3
--- gmid.h
+++ gmid.h
@@ -54,6 +54,9 @@
#define HOSTSLEN 64
#define LOCLEN 32
+/* RFC1034 imposes this limit. 63+1 for the NUL-terminator */
+#define DOMAIN_NAME_LEN (63+1)
+
#define LOGE(c, fmt, ...) logs(LOG_ERR, c, fmt, __VA_ARGS__)
#define LOGW(c, fmt, ...) logs(LOG_WARNING, c, fmt, __VA_ARGS__)
#define LOGN(c, fmt, ...) logs(LOG_NOTICE, c, fmt, __VA_ARGS__)
@@ -134,6 +137,7 @@ struct client {
struct tls *ctx;
char req[GEMINI_URL_LEN];
struct iri iri;
+ char domain[DOMAIN_NAME_LEN];
int state, next;
int code;
const char *meta;
@@ -236,9 +240,13 @@ int valid_multibyte_utf8(struct parser*);
/* utf8.c */
int valid_multibyte_utf8(struct parser*);
+char *utf8_nth(char*, size_t);
/* iri.c */
int parse_iri(char*, struct iri*, const char**);
int trim_req_iri(char*);
+/* puny.c */
+int puny_decode(char*, char*, size_t);
+
#endif
blob - 3c9c572fb8c337ede611f0ea7a7818e5e8b73d03
blob + 50001658fc10b01a3cedb200171a5c162b1fe18a
--- regress/Makefile
+++ regress/Makefile
@@ -2,11 +2,16 @@ all: iri_test runtime
.PHONY: all clean runtime
-all: iri_test runtime
+all: puny-test testdata iri_test cert.pem
+ ./puny-test
+ ./runtime
./iri_test
+puny-test: puny-test.o ../puny.o ../utf8.o
+ ${CC} puny-test.o ../puny.o ../utf8.o -o puny-test
+
iri_test: iri_test.o ../iri.o ../utf8.o
- ${CC} iri_test.o ../iri.o ../utf8.o -o iri_test ${LDFLAGS}
+ ${CC} iri_test.o ../iri.o ../utf8.o -o iri_test
fill-file: fill-file.o
${CC} fill-file.o -o fill-file
@@ -38,5 +43,5 @@ runtime: testdata cert.pem
cp hello testdata/dir
cp testdata/index.gmi testdata/dir/foo.gmi
-runtime: testdata cert.pem
+runtime: testdata
./runtime
blob - /dev/null
blob + b25f82a9f9f6e410156922ee3f9e588bc1a6fa9e (mode 644)
--- /dev/null
+++ regress/puny-test.c
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "../gmid.h"
+
+struct suite {
+ const char *src;
+ const char *res;
+} t[] = {
+ {"foo", "foo"},
+ {"xn-invalid", "xn-invalid"},
+ {"naïve", "naïve"},
+ {"xn--8ca", "è"},
+ {"xn--caff-8oa", "caffè"},
+ {"xn--nave-6pa", "naïve"},
+ {"xn--e-0mbbc", "τeστ"},
+ {"xn--8ca67lbac", "τèστ"},
+ {"xn--28j2a3ar1p", "こんにちは"},
+ {"xn--hello--ur7iy09x", "hello-世界"},
+ {"xn--hi--hi-rr7iy09x", "hi-世界-hi"},
+ {"xn--caf-8la.foo.org", "cafè.foo.org"},
+ /* 3 bytes */
+ {"xn--j6h", "♨"},
+ /* 4 bytes */
+ {"xn--x73l", "𩸽"},
+ {"xn--x73laaa", "𩸽𩸽𩸽𩸽"},
+ {NULL, NULL}
+};
+
+int
+main(int argc, char **argv)
+{
+ struct suite *i;
+ int failed;
+ char *hostname;
+ char buf[64]; /* name len */
+
+ failed = 0;
+ for (i = t; i->src != NULL; ++i) {
+ if ((hostname = strdup(i->src)) == NULL)
+ return 0;
+
+ memset(buf, 0, sizeof(buf));
+ if (!puny_decode(hostname, buf, sizeof(buf))) {
+ printf("decode: failure with %s\n", i->src);
+ failed = 1;
+ continue;
+ }
+
+ if (strcmp(buf, i->res)) {
+ printf("ERR: expected \"%s\", got \"%s\"\n",
+ i->res, buf);
+ failed = 1;
+ continue;
+ } else
+ printf("OK: %s => %s\n", i->src, buf);
+
+ free(hostname);
+ }
+
+ return failed;
+}
blob - a305da35b43b915582c63802783777bdd3ab94e9
blob + 1383974840e1981cd7812d1bcb55f61586468d89
--- server.c
+++ server.c
@@ -262,12 +262,10 @@ handle_handshake(struct pollfd *fds, struct client *c)
}
servname = tls_conn_servername(c->ctx);
+ puny_decode(servname, c->domain, sizeof(c->domain));
for (h = hosts; h->domain != NULL; ++h) {
- if (!strcmp(h->domain, "*"))
- break;
-
- if (servname != NULL && !fnmatch(h->domain, servname, 0))
+ if (!fnmatch(h->domain, c->domain, 0))
break;
}
@@ -290,6 +288,7 @@ handle_open_conn(struct pollfd *fds, struct client *c)
handle_open_conn(struct pollfd *fds, struct client *c)
{
const char *parse_err = "invalid request";
+ char decoded[DOMAIN_NAME_LEN];
bzero(c->req, sizeof(c->req));
bzero(&c->iri, sizeof(c->iri));
@@ -314,8 +313,11 @@ handle_open_conn(struct pollfd *fds, struct client *c)
return;
}
- /* XXX: we should check that the SNI matches the requested host */
- if (strcmp(c->iri.schema, "gemini") || c->iri.port_no != conf.port) {
+ puny_decode(c->iri.host, decoded, sizeof(decoded));
+
+ if (c->iri.port_no != conf.port
+ || strcmp(c->iri.schema, "gemini")
+ || strcmp(decoded, c->domain)) {
start_reply(fds, c, PROXY_REFUSED, "won't proxy request");
return;
}
blob - 8f530b0203310cdb0c880adf60ad6a610ae7823b
blob + 20985b4237e53d58b551f6eaaf283e20859f3813
--- utf8.c
+++ utf8.c
@@ -77,3 +77,20 @@ valid_multibyte_utf8(struct parser *p)
}
return 1;
}
+
+char *
+utf8_nth(char *s, size_t n)
+{
+ size_t i;
+ uint32_t cp = 0, state = 0;
+
+ for (i = 0; *s && i < n; ++s)
+ if (!utf8_decode(&state, &cp, *s))
+ ++i;
+
+ if (state != UTF8_ACCEPT)
+ return NULL;
+ if (i == n)
+ return s;
+ return NULL;
+}
Omar Polo