commit 3300cbe06a9567c66ee63f3866bcbcf3430e0205 from: Omar Polo date: Wed Jan 27 10:47:49 2021 UTC initial punycode support commit - 390a61189309451462c0a1dc56c68f71e334ad4b commit + 3300cbe06a9567c66ee63f3866bcbcf3430e0205 blob - c7d3334c6fe6f05dfcdfa0484b5e093c2ce6b3bc blob + ba2033027cec9d56439a93c36cfeb8883fcff9a7 --- .gitignore +++ .gitignore @@ -17,4 +17,5 @@ regress/*.pem regress/reg.conf regress/fill-file regress/iri_test +regress/puny-test regress/*.o blob - 40826852356ef81e32836b079170df4ad7d32731 blob + b0116b3dd35fb84848405da724ccf53abf703266 --- ChangeLog +++ ChangeLog @@ -1,3 +1,7 @@ +2021-01-26 Omar Polo + + * puny.c (puny_decode): initial punycode support! + 2021-01-25 Omar Polo * gmid.1: manpage improvements (example and usage) blob - c600dd30c4e2002d4c2f2a982c20e010836095bd blob + 576b49c0d54cc41c997702fdf5090ae538176162 --- Makefile +++ Makefile @@ -13,7 +13,7 @@ lex.yy.c: lex.l y.tab.c y.tab.c: parse.y ${YACC} -b y -d parse.y -SRCS = gmid.c iri.c utf8.c ex.c server.c sandbox.c mime.c +SRCS = gmid.c iri.c utf8.c ex.c server.c sandbox.c mime.c puny.c OBJS = ${SRCS:.c=.o} lex.yy.o y.tab.o ${COMPAT} gmid: ${OBJS} blob - 7a973f546635637000b403818b62d48975ad295b blob + 09a1d6fcfe20dc7f4b1feacd5e0027944c7b0e1d --- gmid.c +++ gmid.c @@ -131,7 +131,14 @@ log_request(struct client *c, char *meta, size_t l) /* serialize the IRI */ strlcpy(b, c->iri.schema, sizeof(b)); strlcat(b, "://", sizeof(b)); - strlcat(b, c->iri.host, sizeof(b)); + + /* log the decoded host name, but if it was invalid + * use the raw one. */ + if (*c->domain != '\0') + strlcat(b, c->domain, sizeof(b)); + else + strlcat(b, c->iri.host, sizeof(b)); + strlcat(b, "/", sizeof(b)); strlcat(b, c->iri.path, sizeof(b)); /* TODO: sanitize UTF8 */ if (*c->iri.query != '\0') { /* TODO: sanitize UTF8 */ blob - 9774257709cf5f822ae47e8efa38494b2811c09c blob + b4e6b8448d5c945cf71a3c0f3df01ee8b505e4a3 --- gmid.h +++ gmid.h @@ -54,6 +54,9 @@ #define HOSTSLEN 64 #define LOCLEN 32 +/* RFC1034 imposes this limit. 63+1 for the NUL-terminator */ +#define DOMAIN_NAME_LEN (63+1) + #define LOGE(c, fmt, ...) logs(LOG_ERR, c, fmt, __VA_ARGS__) #define LOGW(c, fmt, ...) logs(LOG_WARNING, c, fmt, __VA_ARGS__) #define LOGN(c, fmt, ...) logs(LOG_NOTICE, c, fmt, __VA_ARGS__) @@ -134,6 +137,7 @@ struct client { struct tls *ctx; char req[GEMINI_URL_LEN]; struct iri iri; + char domain[DOMAIN_NAME_LEN]; int state, next; int code; const char *meta; @@ -236,9 +240,13 @@ void sandbox(void); /* utf8.c */ int valid_multibyte_utf8(struct parser*); +char *utf8_nth(char*, size_t); /* iri.c */ int parse_iri(char*, struct iri*, const char**); int trim_req_iri(char*); +/* puny.c */ +int puny_decode(char*, char*, size_t); + #endif blob - 3c9c572fb8c337ede611f0ea7a7818e5e8b73d03 blob + 50001658fc10b01a3cedb200171a5c162b1fe18a --- regress/Makefile +++ regress/Makefile @@ -2,11 +2,16 @@ include ../Makefile.local .PHONY: all clean runtime -all: iri_test runtime +all: puny-test testdata iri_test cert.pem + ./puny-test + ./runtime ./iri_test +puny-test: puny-test.o ../puny.o ../utf8.o + ${CC} puny-test.o ../puny.o ../utf8.o -o puny-test + iri_test: iri_test.o ../iri.o ../utf8.o - ${CC} iri_test.o ../iri.o ../utf8.o -o iri_test ${LDFLAGS} + ${CC} iri_test.o ../iri.o ../utf8.o -o iri_test fill-file: fill-file.o ${CC} fill-file.o -o fill-file @@ -38,5 +43,5 @@ testdata: fill-file cp hello testdata/dir cp testdata/index.gmi testdata/dir/foo.gmi -runtime: testdata cert.pem +runtime: testdata ./runtime blob - /dev/null blob + b25f82a9f9f6e410156922ee3f9e588bc1a6fa9e (mode 644) --- /dev/null +++ regress/puny-test.c @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2021 Omar Polo + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include + +#include "../gmid.h" + +struct suite { + const char *src; + const char *res; +} t[] = { + {"foo", "foo"}, + {"xn-invalid", "xn-invalid"}, + {"naïve", "naïve"}, + {"xn--8ca", "è"}, + {"xn--caff-8oa", "caffè"}, + {"xn--nave-6pa", "naïve"}, + {"xn--e-0mbbc", "τeστ"}, + {"xn--8ca67lbac", "τèστ"}, + {"xn--28j2a3ar1p", "こんにちは"}, + {"xn--hello--ur7iy09x", "hello-世界"}, + {"xn--hi--hi-rr7iy09x", "hi-世界-hi"}, + {"xn--caf-8la.foo.org", "cafè.foo.org"}, + /* 3 bytes */ + {"xn--j6h", "♨"}, + /* 4 bytes */ + {"xn--x73l", "𩸽"}, + {"xn--x73laaa", "𩸽𩸽𩸽𩸽"}, + {NULL, NULL} +}; + +int +main(int argc, char **argv) +{ + struct suite *i; + int failed; + char *hostname; + char buf[64]; /* name len */ + + failed = 0; + for (i = t; i->src != NULL; ++i) { + if ((hostname = strdup(i->src)) == NULL) + return 0; + + memset(buf, 0, sizeof(buf)); + if (!puny_decode(hostname, buf, sizeof(buf))) { + printf("decode: failure with %s\n", i->src); + failed = 1; + continue; + } + + if (strcmp(buf, i->res)) { + printf("ERR: expected \"%s\", got \"%s\"\n", + i->res, buf); + failed = 1; + continue; + } else + printf("OK: %s => %s\n", i->src, buf); + + free(hostname); + } + + return failed; +} blob - a305da35b43b915582c63802783777bdd3ab94e9 blob + 1383974840e1981cd7812d1bcb55f61586468d89 --- server.c +++ server.c @@ -262,12 +262,10 @@ handle_handshake(struct pollfd *fds, struct client *c) } servname = tls_conn_servername(c->ctx); + puny_decode(servname, c->domain, sizeof(c->domain)); for (h = hosts; h->domain != NULL; ++h) { - if (!strcmp(h->domain, "*")) - break; - - if (servname != NULL && !fnmatch(h->domain, servname, 0)) + if (!fnmatch(h->domain, c->domain, 0)) break; } @@ -290,6 +288,7 @@ void handle_open_conn(struct pollfd *fds, struct client *c) { const char *parse_err = "invalid request"; + char decoded[DOMAIN_NAME_LEN]; bzero(c->req, sizeof(c->req)); bzero(&c->iri, sizeof(c->iri)); @@ -314,8 +313,11 @@ handle_open_conn(struct pollfd *fds, struct client *c) return; } - /* XXX: we should check that the SNI matches the requested host */ - if (strcmp(c->iri.schema, "gemini") || c->iri.port_no != conf.port) { + puny_decode(c->iri.host, decoded, sizeof(decoded)); + + if (c->iri.port_no != conf.port + || strcmp(c->iri.schema, "gemini") + || strcmp(decoded, c->domain)) { start_reply(fds, c, PROXY_REFUSED, "won't proxy request"); return; } blob - 8f530b0203310cdb0c880adf60ad6a610ae7823b blob + 20985b4237e53d58b551f6eaaf283e20859f3813 --- utf8.c +++ utf8.c @@ -77,3 +77,20 @@ valid_multibyte_utf8(struct parser *p) } return 1; } + +char * +utf8_nth(char *s, size_t n) +{ + size_t i; + uint32_t cp = 0, state = 0; + + for (i = 0; *s && i < n; ++s) + if (!utf8_decode(&state, &cp, *s)) + ++i; + + if (state != UTF8_ACCEPT) + return NULL; + if (i == n) + return s; + return NULL; +}