op public repos

Commit Diff

Commit:: 3c1cf9d07cb679ba444566159538b510902f2de9
From:: Omar Polo <op@omarpolo.com>
Date:: Mon Jan 11 13:08:00 2021 UTC
Message:: s/uri/iri since we accept IRIs
Actions:: Patch | Tree
commit - 28778244d67be7024868a5095e5eedda22a3ed98
commit + 3c1cf9d07cb679ba444566159538b510902f2de9
blob - a7794ec77895de2aa815675b27a8a09f22d1f956
blob + 5660e44f37b2ae2d269500e367612556ee6b2e80
--- .gitignore
+++ .gitignore
@@ -2,6 +2,6 @@ cert.pem
 key.pem
 TAGS
 gmid
-uri_test
+iri_test
 *.o
 docs
blob - 76f05bd5b384c8a23073a5d6f29ba5a0312c16a1
blob + 3e0b72e31bb3c1c16b34e6062dbc4b93db793e29
--- Makefile
+++ Makefile
@@ -6,17 +6,17 @@ LDFLAGS =	-ltls
 
 all: gmid TAGS README.md
 
-gmid: gmid.o uri.o utf8.o
-	${CC} gmid.o uri.o utf8.o -o gmid ${LDFLAGS}
+gmid: gmid.o iri.o utf8.o
+	${CC} gmid.o iri.o utf8.o -o gmid ${LDFLAGS}
 
-TAGS: gmid.c uri.c utf8.c
-	-etags gmid.c uri.c utf8.c || true
+TAGS: gmid.c iri.c utf8.c
+	-etags gmid.c iri.c utf8.c || true
 
 clean:
-	rm -f *.o gmid
+	rm -f *.o gmid iri_test
 
-uri_test: uri_test.o uri.o utf8.o
-	${CC} uri_test.o uri.o utf8.o -o uri_test ${LDFLAGS}
+iri_test: iri_test.o iri.o utf8.o
+	${CC} iri_test.o iri.o utf8.o -o iri_test ${LDFLAGS}
 
-test: uri_test
-	./uri_test
+test: iri_test
+	./iri_test
blob - ef12066eac747e32dae1c6136b4167366076242a
blob + 0c1bed23a05e30fef4ebcf534d1a0cc2e4bf455d
--- gmid.c
+++ gmid.c
@@ -572,7 +572,7 @@ handle(struct pollfd *fds, struct client *client)
 {
 	char buf[GEMINI_URL_LEN];
 	const char *parse_err;
-	struct uri uri;
+	struct iri iri;
 
 	switch (client->state) {
 	case S_OPEN:
@@ -593,7 +593,7 @@ handle(struct pollfd *fds, struct client *client)
 		}
 
 		parse_err = "invalid request";
-		if (!trim_req_uri(buf) || !parse_uri(buf, &uri, &parse_err)) {
+		if (!trim_req_iri(buf) || !parse_iri(buf, &iri, &parse_err)) {
 			if (!start_reply(fds, client, BAD_REQUEST, parse_err))
 				return;
 			goodbye(fds, client);
@@ -601,11 +601,11 @@ handle(struct pollfd *fds, struct client *client)
 		}
 
 		LOGI(client, "GET %s%s%s",
-		    *uri.path ? uri.path : "/",
-		    *uri.query ? "?" : "",
-		    *uri.query ? uri.query : "");
+		    *iri.path ? iri.path : "/",
+		    *iri.query ? "?" : "",
+		    *iri.query ? iri.query : "");
 
-		send_file(uri.path, uri.query, fds, client);
+		send_file(iri.path, iri.query, fds, client);
 		break;
 
 	case S_INITIALIZING:
blob - 64effdeb7170cd4219199d787810443f618e1c52
blob + ecca57ffd095eb684a593403e0f0f25964f70438
--- gmid.h
+++ gmid.h
@@ -70,7 +70,7 @@ struct client {
 	struct sockaddr_storage	 addr;
 };
 
-struct uri {
+struct iri {
 	char		*schema;
 	char		*host;
 	char		*port;
@@ -81,8 +81,8 @@ struct uri {
 };
 
 struct parser {
-	char		*uri;
-	struct uri	*parsed;
+	char		*iri;
+	struct iri	*parsed;
 	const char	*err;
 };
 
@@ -123,8 +123,8 @@ void		 usage(const char*);
 /* utf8.c */
 int		 valid_multibyte_utf8(struct parser*);
 
-/* uri.c */
-int		 parse_uri(char*, struct uri*, const char**);
-int		 trim_req_uri(char*);
+/* iri.c */
+int		 parse_iri(char*, struct iri*, const char**);
+int		 trim_req_iri(char*);
 
 #endif
blob - 1258abbf91161ba655231f964d5e501b4c7c4e67 (mode 644)
blob + /dev/null
--- uri.c
+++ /dev/null
@@ -1,426 +0,0 @@
-/*
- * Copyright (c) 2020 Omar Polo <op@omarpolo.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include <ctype.h>
-#include <string.h>
-
-#include "gmid.h"
-
-/*
- * Notes from RFC3986
- *
- * => gemini://tanso.net/rfc/rfc3986.txt
- *
- *
- * ABNF
- * ====
- *
- * pct-encoded	"%" HEXDIG HEXDIG
- *
- * reserved	= gen-delims / sub-delimis
- * gen-delims	= ":" / "/" / "?" / "#" / "[" / "]" / "@"
- * sub-delims	= "!" / "$" / "&" / "'" / "(" / ")"
- * 		/ "*" / "+" / "," / ";" / "="
- *
- * unreserved	= ALPHA / DIGIT / "-" / "." / "_" / "~"
- *
- * URI		= scheme ":" hier-part [ "?" query ] [ "#" fragment ]
- *
- * hier-part	= "//" authority path-abempty
- * 		/ path-absolute
- * 		/ path-rootless
- * 		/ path-empty
- *
- * scheme	= ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
- *
- * authority	= [ userinfo "@" ] host [ ":" port ]
- *
- * (note that userinfo isn't used for Gemini URL)
- *
- * host		= IP-literal / IPv4address / reg-name
- * reg-name	= *( unreserved / pct-encoded / sub-delims )
- *
- * port		= *DIGIT
- *
- * path		= path-abemty	; begins with "/" or is empty
- * 		/ path-absolute	; begins with "/" but not "//"
- * 		/ path-noscheme	; begins with a non-colon segment
- * 		/ path-rootless ; begins with a segment
- * 		/ path-empty	; zero characters
- *
- * path-abemty		= *( "/" segment )
- * path-absolute	= "/" [ segment-nz *( "/" segment ) ]
- * path-noscheme	= ; not used
- * path-rootless	= ; not used
- * path-empty		= ; not used
- *
- * segment		= *pchar
- * segment-nz	= 1*pchar
- * segment-nz-nc	= 1*( unreserved / pct-encoded / sub-delims / "@" )
- * pchar		= unreserved / pct-encoded / sub-delims / ":" / "@"
- *
- * query		= *( pchar / "/" / "?" )
- *
- * fragment		= *( pchar / "/" / "?" )
- *
- *
- * EXAMPLE
- * =======
- *
- *    foo://example.com:8042/over/there?name=ferret#nose
- *    \_/   \______________/\_________/ \_________/ \__/
- *     |           |            |            |        |
- *  scheme     authority       path        query   fragment
- *
- */
-
-static inline int
-unreserved(int p)
-{
-	return isalnum(p)
-		|| p == '-'
-		|| p == '.'
-		|| p == '_'
-		|| p == '~';
-}
-
-static inline int
-sub_delimiters(int p)
-{
-	return p == '!'
-		|| p == '$'
-		|| p == '&'
-		|| p == '\''
-		|| p == '('
-		|| p == ')'
-		|| p == '*'
-		|| p == '+'
-		|| p == ','
-		|| p == ';'
-		|| p == '=';
-}
-
-static int
-parse_pct_encoded(struct parser *p)
-{
-	if (*p->uri != '%')
-		return 0;
-
-	if (!isxdigit(*(p->uri+1)) || !isxdigit(*(p->uri+2))) {
-		p->err = "illegal percent-encoding";
-		return 0;
-	}
-
-	sscanf(p->uri+1, "%2hhx", p->uri);
-	memmove(p->uri+1, p->uri+3, strlen(p->uri+3)+1);
-	if (*p->uri == '\0') {
-		p->err = "illegal percent-encoding";
-		return 0;
-	}
-
-	return 1;
-}
-
-/* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) "://" */
-static int
-parse_scheme(struct parser *p)
-{
-	p->parsed->schema = p->uri;
-
-	if (!isalpha(*p->uri)) {
-		p->err = "illegal character in scheme";
-		return 0;
-	}
-
-	p->uri++;
-	while (isalnum(*p->uri)
-	    || *p->uri == '+'
-	    || *p->uri == '-'
-	    || *p->uri == '.')
-		p->uri++;
-
-	if (*p->uri != ':') {
-		p->err = "illegal character in scheme";
-		return 0;
-	}
-
-	*p->uri = '\0';
-	if (*(++p->uri) != '/' || *(++p->uri) != '/') {
-		p->err = "invalid marker after scheme";
-		return 0;
-	}
-
-	p->uri++;
-	return 1;
-}
-
-/* *DIGIT */
-static int
-parse_port(struct parser *p)
-{
-	uint32_t i = 0;
-
-	p->parsed->port = p->uri;
-
-	for (; isdigit(*p->uri); p->uri++) {
-		i = i * 10 + *p->uri - '0';
-		if (i > UINT16_MAX) {
-			p->err = "port number too large";
-			return 0;
-		}
-	}
-
-	if (*p->uri != '/' && *p->uri != '\0') {
-		p->err = "illegal character in port number";
-		return 0;
-	}
-
-	p->parsed->port_no = i;
-
-	if (*p->uri != '\0') {
-		*p->uri = '\0';
-		p->uri++;
-	}
-
-	return 1;
-}
-
-/* TODO: add support for ip-literal and ipv4addr ? */
-/* *( unreserved / sub-delims / pct-encoded ) */
-static int
-parse_authority(struct parser *p)
-{
-	p->parsed->host = p->uri;
-
-	while (unreserved(*p->uri)
-	    || sub_delimiters(*p->uri)
-	    || parse_pct_encoded(p))
-		p->uri++;
-
-	if (p->err != NULL)
-		return 0;
-
-	if (*p->uri == ':') {
-		*p->uri = '\0';
-		p->uri++;
-		return parse_port(p);
-	}
-
-	if (*p->uri == '/') {
-		*p->uri = '\0';
-		p->uri++;
-		return 1;
-	}
-
-	if (*p->uri == '\0')
-		return 1;
-
-	p->err = "illegal character in authority section";
-	return 0;
-}
-
-/* Routine for path_clean.  Elide the pointed .. with the preceding
- * element.  Return 0 if it's not possible.  incr is the length of
- * the increment, 3 for ../ and 2 for .. */
-static int
-path_elide_dotdot(char *path, char *i, int incr)
-{
-	char *j;
-
-	if (i == path)
-		return 0;
-	for (j = i-2; j != path && *j != '/'; j--)
-                /* noop */ ;
-	if (*j == '/')
-		j++;
-	i += incr;
-	memmove(j, i, strlen(i)+1);
-	return 1;
-}
-
-/*
- * Use an algorithm similar to the one implemented in go' path.Clean:
- *
- * 1. Replace multiple slashes with a single slash
- * 2. Eliminate each . path name element
- * 3. Eliminate each inner .. along with the non-.. element that precedes it
- * 4. Eliminate trailing .. if possible or error (go would only discard)
- *
- * Unlike path.Clean, this function return the empty string if the
- * original path is equivalent to "/".
- */
-static int
-path_clean(char *path)
-{
-	char *i;
-
-	/* 1. replace multiple slashes with a single one */
-	for (i = path; *i; ++i) {
-		if (*i == '/' && *(i+1) == '/') {
-			memmove(i, i+1, strlen(i)); /* move also the \0 */
-			i--;
-		}
-	}
-
-	/* 2. eliminate each . path name element */
-	for (i = path; *i; ++i) {
-		if ((i == path || *i == '/') && *(i+1) == '.' &&
-		    *(i+2) == '/') {
-			/* move also the \0 */
-			memmove(i, i+2, strlen(i)-1);
-			i--;
-		}
-	}
-	if (!strcmp(path, ".") || !strcmp(path, "/.")) {
-		*path = '\0';
-		return 1;
-	}
-
-	/* 3. eliminate each inner .. along with the preceding non-.. */
-	for (i = strstr(path, "../"); i != NULL; i = strstr(path, ".."))
-		if (!path_elide_dotdot(path, i, 3))
-			return 0;
-
-	/* 4. eliminate trailing ..*/
-	if ((i = strstr(path, "..")) != NULL)
-		if (!path_elide_dotdot(path, i, 2))
-			return 0;
-
-	return 1;
-}
-
-static int
-parse_query(struct parser *p)
-{
-	p->parsed->query = p->uri;
-	if (*p->uri == '\0')
-		return 1;
-
-	while (unreserved(*p->uri)
-	    || sub_delimiters(*p->uri)
-	    || *p->uri == '/'
-	    || *p->uri == '?'
-	    || parse_pct_encoded(p)
-	    || valid_multibyte_utf8(p))
-		p->uri++;
-
-	if (p->err != NULL)
-		return 0;
-
-	if (*p->uri != '\0' && *p->uri != '#') {
-		p->err = "illegal character in query";
-		return 0;
-	}
-
-	if (*p->uri != '\0') {
-		*p->uri = '\0';
-		p->uri++;
-	}
-
-	return 1;
-}
-
-/* don't even bother */
-static int
-parse_fragment(struct parser *p)
-{
-	p->parsed->fragment = p->uri;
-	return 1;
-}
-
-/* XXX: is it too broad? */
-/* *(pchar / "/") */
-static int
-parse_path(struct parser *p)
-{
-	char c;
-
-	p->parsed->path = p->uri;
-	if (*p->uri == '\0') {
-		p->parsed->query = p->parsed->fragment = p->uri;
-		return 1;
-	}
-
-	while (unreserved(*p->uri)
-	    || sub_delimiters(*p->uri)
-	    || *p->uri == '/'
-	    || parse_pct_encoded(p)
-	    || valid_multibyte_utf8(p))
-		p->uri++;
-
-	if (p->err != NULL)
-		return 0;
-
-	if (*p->uri != '\0' && *p->uri != '?' && *p->uri != '#') {
-		p->err = "illegal character in path";
-		return 0;
-	}
-
-	if (*p->uri != '\0') {
-		c = *p->uri;
-		*p->uri = '\0';
-		p->uri++;
-
-		if (c == '#') {
-			if (!parse_fragment(p))
-				return 0;
-		} else
-			if (!parse_query(p) || !parse_fragment(p))
-				return 0;
-	}
-
-	if (!path_clean(p->parsed->path)) {
-		p->err = "illegal path";
-		return 0;
-	}
-
-	return 1;
-}
-
-int
-parse_uri(char *uri, struct uri *ret, const char **err_ret)
-{
-	char *end;
-	struct parser p = {uri, ret, NULL};
-
-	bzero(ret, sizeof(*ret));
-
-	/* initialize optional stuff to the empty string */
-	end = uri + strlen(uri);
-	p.parsed->port = end;
-	p.parsed->path = end;
-	p.parsed->query = end;
-	p.parsed->fragment = end;
-
-	if (!parse_scheme(&p) || !parse_authority(&p) || !parse_path(&p)) {
-		*err_ret = p.err;
-		return 0;
-	}
-
-	*err_ret = NULL;
-	return 1;
-}
-
-int
-trim_req_uri(char *uri)
-{
-	char *i;
-
-	if ((i = strstr(uri, "\r\n")) == NULL)
-		return 0;
-	*i = '\0';
-	return 1;
-}
blob - /dev/null
blob + 8c020392806387e3f84ca44d08d68c89abba42e9 (mode 644)
--- /dev/null
+++ iri.c
@@ -0,0 +1,358 @@
+/*
+ * Copyright (c) 2020 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <ctype.h>
+#include <string.h>
+
+#include "gmid.h"
+
+static inline int
+unreserved(int p)
+{
+	return isalnum(p)
+		|| p == '-'
+		|| p == '.'
+		|| p == '_'
+		|| p == '~';
+}
+
+static inline int
+sub_delimiters(int p)
+{
+	return p == '!'
+		|| p == '$'
+		|| p == '&'
+		|| p == '\''
+		|| p == '('
+		|| p == ')'
+		|| p == '*'
+		|| p == '+'
+		|| p == ','
+		|| p == ';'
+		|| p == '=';
+}
+
+static int
+parse_pct_encoded(struct parser *p)
+{
+	if (*p->iri != '%')
+		return 0;
+
+	if (!isxdigit(*(p->iri+1)) || !isxdigit(*(p->iri+2))) {
+		p->err = "illegal percent-encoding";
+		return 0;
+	}
+
+	sscanf(p->iri+1, "%2hhx", p->iri);
+	memmove(p->iri+1, p->iri+3, strlen(p->iri+3)+1);
+	if (*p->iri == '\0') {
+		p->err = "illegal percent-encoding";
+		return 0;
+	}
+
+	return 1;
+}
+
+/* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) "://" */
+static int
+parse_scheme(struct parser *p)
+{
+	p->parsed->schema = p->iri;
+
+	if (!isalpha(*p->iri)) {
+		p->err = "illegal character in scheme";
+		return 0;
+	}
+
+	p->iri++;
+	while (isalnum(*p->iri)
+	    || *p->iri == '+'
+	    || *p->iri == '-'
+	    || *p->iri == '.')
+		p->iri++;
+
+	if (*p->iri != ':') {
+		p->err = "illegal character in scheme";
+		return 0;
+	}
+
+	*p->iri = '\0';
+	if (*(++p->iri) != '/' || *(++p->iri) != '/') {
+		p->err = "invalid marker after scheme";
+		return 0;
+	}
+
+	p->iri++;
+	return 1;
+}
+
+/* *DIGIT */
+static int
+parse_port(struct parser *p)
+{
+	uint32_t i = 0;
+
+	p->parsed->port = p->iri;
+
+	for (; isdigit(*p->iri); p->iri++) {
+		i = i * 10 + *p->iri - '0';
+		if (i > UINT16_MAX) {
+			p->err = "port number too large";
+			return 0;
+		}
+	}
+
+	if (*p->iri != '/' && *p->iri != '\0') {
+		p->err = "illegal character in port number";
+		return 0;
+	}
+
+	p->parsed->port_no = i;
+
+	if (*p->iri != '\0') {
+		*p->iri = '\0';
+		p->iri++;
+	}
+
+	return 1;
+}
+
+/* TODO: add support for ip-literal and ipv4addr ? */
+/* *( unreserved / sub-delims / pct-encoded ) */
+static int
+parse_authority(struct parser *p)
+{
+	p->parsed->host = p->iri;
+
+	while (unreserved(*p->iri)
+	    || sub_delimiters(*p->iri)
+	    || parse_pct_encoded(p))
+		p->iri++;
+
+	if (p->err != NULL)
+		return 0;
+
+	if (*p->iri == ':') {
+		*p->iri = '\0';
+		p->iri++;
+		return parse_port(p);
+	}
+
+	if (*p->iri == '/') {
+		*p->iri = '\0';
+		p->iri++;
+		return 1;
+	}
+
+	if (*p->iri == '\0')
+		return 1;
+
+	p->err = "illegal character in authority section";
+	return 0;
+}
+
+/* Routine for path_clean.  Elide the pointed .. with the preceding
+ * element.  Return 0 if it's not possible.  incr is the length of
+ * the increment, 3 for ../ and 2 for .. */
+static int
+path_elide_dotdot(char *path, char *i, int incr)
+{
+	char *j;
+
+	if (i == path)
+		return 0;
+	for (j = i-2; j != path && *j != '/'; j--)
+                /* noop */ ;
+	if (*j == '/')
+		j++;
+	i += incr;
+	memmove(j, i, strlen(i)+1);
+	return 1;
+}
+
+/*
+ * Use an algorithm similar to the one implemented in go' path.Clean:
+ *
+ * 1. Replace multiple slashes with a single slash
+ * 2. Eliminate each . path name element
+ * 3. Eliminate each inner .. along with the non-.. element that precedes it
+ * 4. Eliminate trailing .. if possible or error (go would only discard)
+ *
+ * Unlike path.Clean, this function return the empty string if the
+ * original path is equivalent to "/".
+ */
+static int
+path_clean(char *path)
+{
+	char *i;
+
+	/* 1. replace multiple slashes with a single one */
+	for (i = path; *i; ++i) {
+		if (*i == '/' && *(i+1) == '/') {
+			memmove(i, i+1, strlen(i)); /* move also the \0 */
+			i--;
+		}
+	}
+
+	/* 2. eliminate each . path name element */
+	for (i = path; *i; ++i) {
+		if ((i == path || *i == '/') && *(i+1) == '.' &&
+		    *(i+2) == '/') {
+			/* move also the \0 */
+			memmove(i, i+2, strlen(i)-1);
+			i--;
+		}
+	}
+	if (!strcmp(path, ".") || !strcmp(path, "/.")) {
+		*path = '\0';
+		return 1;
+	}
+
+	/* 3. eliminate each inner .. along with the preceding non-.. */
+	for (i = strstr(path, "../"); i != NULL; i = strstr(path, ".."))
+		if (!path_elide_dotdot(path, i, 3))
+			return 0;
+
+	/* 4. eliminate trailing ..*/
+	if ((i = strstr(path, "..")) != NULL)
+		if (!path_elide_dotdot(path, i, 2))
+			return 0;
+
+	return 1;
+}
+
+static int
+parse_query(struct parser *p)
+{
+	p->parsed->query = p->iri;
+	if (*p->iri == '\0')
+		return 1;
+
+	while (unreserved(*p->iri)
+	    || sub_delimiters(*p->iri)
+	    || *p->iri == '/'
+	    || *p->iri == '?'
+	    || parse_pct_encoded(p)
+	    || valid_multibyte_utf8(p))
+		p->iri++;
+
+	if (p->err != NULL)
+		return 0;
+
+	if (*p->iri != '\0' && *p->iri != '#') {
+		p->err = "illegal character in query";
+		return 0;
+	}
+
+	if (*p->iri != '\0') {
+		*p->iri = '\0';
+		p->iri++;
+	}
+
+	return 1;
+}
+
+/* don't even bother */
+static int
+parse_fragment(struct parser *p)
+{
+	p->parsed->fragment = p->iri;
+	return 1;
+}
+
+/* XXX: is it too broad? */
+/* *(pchar / "/") */
+static int
+parse_path(struct parser *p)
+{
+	char c;
+
+	p->parsed->path = p->iri;
+	if (*p->iri == '\0') {
+		p->parsed->query = p->parsed->fragment = p->iri;
+		return 1;
+	}
+
+	while (unreserved(*p->iri)
+	    || sub_delimiters(*p->iri)
+	    || *p->iri == '/'
+	    || parse_pct_encoded(p)
+	    || valid_multibyte_utf8(p))
+		p->iri++;
+
+	if (p->err != NULL)
+		return 0;
+
+	if (*p->iri != '\0' && *p->iri != '?' && *p->iri != '#') {
+		p->err = "illegal character in path";
+		return 0;
+	}
+
+	if (*p->iri != '\0') {
+		c = *p->iri;
+		*p->iri = '\0';
+		p->iri++;
+
+		if (c == '#') {
+			if (!parse_fragment(p))
+				return 0;
+		} else
+			if (!parse_query(p) || !parse_fragment(p))
+				return 0;
+	}
+
+	if (!path_clean(p->parsed->path)) {
+		p->err = "illegal path";
+		return 0;
+	}
+
+	return 1;
+}
+
+int
+parse_iri(char *iri, struct iri *ret, const char **err_ret)
+{
+	char *end;
+	struct parser p = {iri, ret, NULL};
+
+	bzero(ret, sizeof(*ret));
+
+	/* initialize optional stuff to the empty string */
+	end = iri + strlen(iri);
+	p.parsed->port = end;
+	p.parsed->path = end;
+	p.parsed->query = end;
+	p.parsed->fragment = end;
+
+	if (!parse_scheme(&p) || !parse_authority(&p) || !parse_path(&p)) {
+		*err_ret = p.err;
+		return 0;
+	}
+
+	*err_ret = NULL;
+	return 1;
+}
+
+int
+trim_req_iri(char *iri)
+{
+	char *i;
+
+	if ((i = strstr(iri, "\r\n")) == NULL)
+		return 0;
+	*i = '\0';
+	return 1;
+}
blob - 3e4d823343de421e402c437962283292e8742b29 (mode 644)
blob + /dev/null
--- uri_test.c
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * Copyright (c) 2020 Omar Polo <op@omarpolo.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include <err.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "gmid.h"
-
-#define TEST(uri, fail, exp, descr)				\
-	if (!run_test(uri, fail, exp)) {			\
-		fprintf(stderr, "%s:%d: error: %s\n",		\
-		    __FILE__, __LINE__, descr);			\
-		exit(1);					\
-	}
-
-#define URI(schema, host, port, path, query, frag)		\
-	((struct uri){schema, host, port, 0, path, query, frag})
-
-#define DIFF(wanted, got, field)					\
-	if (wanted->field == NULL || got->field == NULL ||		\
-	    strcmp(wanted->field, got->field)) {			\
-		fprintf(stderr, #field ":\n\tgot: %s\n\twanted: %s\n",	\
-		    got->field, wanted->field);				\
-		return 0;						\
-	}
-
-#define PASS 0
-#define FAIL 1
-
-int
-diff_uri(struct uri *p, struct uri *exp)
-{
-        DIFF(p, exp, schema);
-        DIFF(p, exp, host);
-        DIFF(p, exp, port);
-        DIFF(p, exp, path);
-        DIFF(p, exp, query);
-        DIFF(p, exp, fragment);
-	return 1;
-}
-
-int
-run_test(const char *uri, int should_fail, struct uri expected)
-{
-	int failed, ok = 1;
-	char *uri_copy;
-	struct uri parsed;
-	const char *error;
-
-	if ((uri_copy = strdup(uri)) == NULL)
-		err(1, "strdup");
-
-	fprintf(stderr, "=> %s\n", uri);
-	failed = !parse_uri(uri_copy, &parsed, &error);
-
-	if (failed && should_fail)
-		goto done;
-
-	if (error != NULL)
-		fprintf(stderr, "> %s\n", error);
-
-	ok = !failed && !should_fail;
-	if (ok)
-		ok = diff_uri(&expected, &parsed);
-
-done:
-	free(uri_copy);
-	return ok;
-}
-
-int
-main(void)
-{
-	struct uri empty = {"", "", "", PASS, "", "", ""};
-
-	TEST("http://omarpolo.com",
-	    PASS,
-	    URI("http", "omarpolo.com", "", "", "", ""),
-	    "can parse uri with empty path");
-
-	/* schema */
-	TEST("omarpolo.com", FAIL, empty, "FAIL when the schema is missing");
-	TEST("gemini:/omarpolo.com", FAIL, empty, "FAIL with invalid marker");
-	TEST("gemini//omarpolo.com", FAIL, empty, "FAIL with invalid marker");
-	TEST("h!!p://omarpolo.com", FAIL, empty, "FAIL with invalid schema");
-
-	/* authority */
-	TEST("gemini://omarpolo.com",
-	    PASS,
-	    URI("gemini", "omarpolo.com", "", "", "", ""),
-	    "can parse authority with empty path");
-	TEST("gemini://omarpolo.com/",
-	    PASS,
-	    URI("gemini", "omarpolo.com", "", "", "", ""),
-	    "can parse authority with empty path (alt)")
-	TEST("gemini://omarpolo.com:1965",
-	    PASS,
-	    URI("gemini", "omarpolo.com", "1965", "", "", ""),
-	    "can parse with port and empty path");
-	TEST("gemini://omarpolo.com:1965/",
-	    PASS,
-	    URI("gemini", "omarpolo.com", "1965", "", "", ""),
-	    "can parse with port and empty path")
-	TEST("gemini://omarpolo.com:196s",
-	    FAIL,
-	    empty,
-	    "FAIL with invalid port number");
-
-	/* path */
-	TEST("gemini://omarpolo.com/foo/bar/baz",
-	    PASS,
-	    URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
-	    "parse simple paths");
-	TEST("gemini://omarpolo.com/foo//bar///baz",
-	    PASS,
-	    URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
-	    "parse paths with multiple slashes");
-	TEST("gemini://omarpolo.com/foo/./bar/./././baz",
-	    PASS,
-	    URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
-	    "parse paths with . elements");
-	TEST("gemini://omarpolo.com/foo/bar/../bar/baz",
-	    PASS,
-	    URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
-	    "parse paths with .. elements");
-	TEST("gemini://omarpolo.com/foo/../foo/bar/../bar/baz/../baz",
-	    PASS,
-	    URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
-	    "parse paths with multiple .. elements");
-	TEST("gemini://omarpolo.com/foo/..",
-	    PASS,
-            URI("gemini", "omarpolo.com", "", "", "", ""),
-	    "parse paths with a trailing ..");
-	TEST("gemini://omarpolo.com/foo/../",
-	    PASS,
-            URI("gemini", "omarpolo.com", "", "", "", ""),
-	    "parse paths with a trailing ..");
-	TEST("gemini://omarpolo.com/foo/../..",
-	    FAIL,
-            empty,
-	    "reject paths that would escape the root");
-	TEST("gemini://omarpolo.com/foo/../foo/../././/bar/baz/.././.././/",
-	    PASS,
-            URI("gemini", "omarpolo.com", "", "", "", ""),
-	    "parse path with lots of cleaning available");
-
-	/* query */
-	TEST("foo://example.com/foo/?gne",
-	    PASS,
-	    URI("foo", "example.com", "", "foo/", "gne", ""),
-	    "parse query strings");
-	TEST("foo://example.com/foo/?gne&foo",
-	    PASS,
-	    URI("foo", "example.com", "", "foo/", "gne&foo", ""),
-	    "parse query strings");
-	TEST("foo://example.com/foo/?gne%2F",
-	    PASS,
-	    URI("foo", "example.com", "", "foo/", "gne/", ""),
-	    "parse query strings");
-
-	/* fragment */
-	TEST("foo://bar.co/#foo",
-	    PASS,
-	    URI("foo", "bar.co", "", "", "", "foo"),
-	    "can recognize fragments");
-
-	/* percent encoding */
-	TEST("foo://bar.com/caf%C3%A8.gmi",
-	    PASS,
-	    URI("foo", "bar.com", "", "cafè.gmi", "", ""),
-	    "can decode");
-	TEST("foo://bar.com/caff%C3%A8%20macchiato.gmi",
-	    PASS,
-	    URI("foo", "bar.com", "", "caffè macchiato.gmi", "", ""),
-	    "can decode");
-	TEST("foo://bar.com/caff%C3%A8+macchiato.gmi",
-	    PASS,
-	    URI("foo", "bar.com", "", "caffè+macchiato.gmi", "", ""),
-	    "can decode");
-	TEST("foo://bar.com/foo%2F..%2F..",
-	    FAIL,
-	    empty,
-	    "conversion and checking are done in the correct order");
-	TEST("foo://bar.com/foo%00?baz",
-	    FAIL,
-	    empty,
-	    "rejects %00");
-
-	/* IRI */
-        TEST("foo://bar.com/cafè.gmi",
-	    PASS,
-	    URI("foo", "bar.com", "", "cafè.gmi", "" , ""),
-	    "decode IRI (with a 2-byte utf8 seq)");
-	TEST("foo://bar.com/世界.gmi",
-	    PASS,
-	    URI("foo", "bar.com", "", "世界.gmi", "" , ""),
-	    "decode IRI");
-	TEST("foo://bar.com/😼.gmi",
-	    PASS,
-	    URI("foo", "bar.com", "", "😼.gmi", "" , ""),
-	    "decode IRI (with a 3-byte utf8 seq)");
-	TEST("foo://bar.com/😼/𤭢.gmi",
-	    PASS,
-	    URI("foo", "bar.com", "", "😼/𤭢.gmi", "" , ""),
-	    "decode IRI (with a 3-byte and a 4-byte utf8 seq)");
-	TEST("foo://bar.com/世界/\xC0\x80",
-	    FAIL,
-	    empty,
-	    "reject invalid sequence (overlong NUL)");
-
-	return 0;
-}
blob - /dev/null
blob + 6200cb7422181a3280a9e6b9e1ab5cd96ebd0f93 (mode 644)
--- /dev/null
+++ iri_test.c
@@ -0,0 +1,226 @@
+/*
+ * Copyright (c) 2020 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <err.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "gmid.h"
+
+#define TEST(iri, fail, exp, descr)				\
+	if (!run_test(iri, fail, exp)) {			\
+		fprintf(stderr, "%s:%d: error: %s\n",		\
+		    __FILE__, __LINE__, descr);			\
+		exit(1);					\
+	}
+
+#define IRI(schema, host, port, path, query, frag)		\
+	((struct iri){schema, host, port, 0, path, query, frag})
+
+#define DIFF(wanted, got, field)					\
+	if (wanted->field == NULL || got->field == NULL ||		\
+	    strcmp(wanted->field, got->field)) {			\
+		fprintf(stderr, #field ":\n\tgot: %s\n\twanted: %s\n",	\
+		    got->field, wanted->field);				\
+		return 0;						\
+	}
+
+#define PASS 0
+#define FAIL 1
+
+int
+diff_iri(struct iri *p, struct iri *exp)
+{
+        DIFF(p, exp, schema);
+        DIFF(p, exp, host);
+        DIFF(p, exp, port);
+        DIFF(p, exp, path);
+        DIFF(p, exp, query);
+        DIFF(p, exp, fragment);
+	return 1;
+}
+
+int
+run_test(const char *iri, int should_fail, struct iri expected)
+{
+	int failed, ok = 1;
+	char *iri_copy;
+	struct iri parsed;
+	const char *error;
+
+	if ((iri_copy = strdup(iri)) == NULL)
+		err(1, "strdup");
+
+	fprintf(stderr, "=> %s\n", iri);
+	failed = !parse_iri(iri_copy, &parsed, &error);
+
+	if (failed && should_fail)
+		goto done;
+
+	if (error != NULL)
+		fprintf(stderr, "> %s\n", error);
+
+	ok = !failed && !should_fail;
+	if (ok)
+		ok = diff_iri(&expected, &parsed);
+
+done:
+	free(iri_copy);
+	return ok;
+}
+
+int
+main(void)
+{
+	struct iri empty = {"", "", "", PASS, "", "", ""};
+
+	TEST("http://omarpolo.com",
+	    PASS,
+	    IRI("http", "omarpolo.com", "", "", "", ""),
+	    "can parse iri with empty path");
+
+	/* schema */
+	TEST("omarpolo.com", FAIL, empty, "FAIL when the schema is missing");
+	TEST("gemini:/omarpolo.com", FAIL, empty, "FAIL with invalid marker");
+	TEST("gemini//omarpolo.com", FAIL, empty, "FAIL with invalid marker");
+	TEST("h!!p://omarpolo.com", FAIL, empty, "FAIL with invalid schema");
+
+	/* authority */
+	TEST("gemini://omarpolo.com",
+	    PASS,
+	    IRI("gemini", "omarpolo.com", "", "", "", ""),
+	    "can parse authority with empty path");
+	TEST("gemini://omarpolo.com/",
+	    PASS,
+	    IRI("gemini", "omarpolo.com", "", "", "", ""),
+	    "can parse authority with empty path (alt)")
+	TEST("gemini://omarpolo.com:1965",
+	    PASS,
+	    IRI("gemini", "omarpolo.com", "1965", "", "", ""),
+	    "can parse with port and empty path");
+	TEST("gemini://omarpolo.com:1965/",
+	    PASS,
+	    IRI("gemini", "omarpolo.com", "1965", "", "", ""),
+	    "can parse with port and empty path")
+	TEST("gemini://omarpolo.com:196s",
+	    FAIL,
+	    empty,
+	    "FAIL with invalid port number");
+
+	/* path */
+	TEST("gemini://omarpolo.com/foo/bar/baz",
+	    PASS,
+	    IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+	    "parse simple paths");
+	TEST("gemini://omarpolo.com/foo//bar///baz",
+	    PASS,
+	    IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+	    "parse paths with multiple slashes");
+	TEST("gemini://omarpolo.com/foo/./bar/./././baz",
+	    PASS,
+	    IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+	    "parse paths with . elements");
+	TEST("gemini://omarpolo.com/foo/bar/../bar/baz",
+	    PASS,
+	    IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+	    "parse paths with .. elements");
+	TEST("gemini://omarpolo.com/foo/../foo/bar/../bar/baz/../baz",
+	    PASS,
+	    IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+	    "parse paths with multiple .. elements");
+	TEST("gemini://omarpolo.com/foo/..",
+	    PASS,
+            IRI("gemini", "omarpolo.com", "", "", "", ""),
+	    "parse paths with a trailing ..");
+	TEST("gemini://omarpolo.com/foo/../",
+	    PASS,
+            IRI("gemini", "omarpolo.com", "", "", "", ""),
+	    "parse paths with a trailing ..");
+	TEST("gemini://omarpolo.com/foo/../..",
+	    FAIL,
+            empty,
+	    "reject paths that would escape the root");
+	TEST("gemini://omarpolo.com/foo/../foo/../././/bar/baz/.././.././/",
+	    PASS,
+            IRI("gemini", "omarpolo.com", "", "", "", ""),
+	    "parse path with lots of cleaning available");
+
+	/* query */
+	TEST("foo://example.com/foo/?gne",
+	    PASS,
+	    IRI("foo", "example.com", "", "foo/", "gne", ""),
+	    "parse query strings");
+	TEST("foo://example.com/foo/?gne&foo",
+	    PASS,
+	    IRI("foo", "example.com", "", "foo/", "gne&foo", ""),
+	    "parse query strings");
+	TEST("foo://example.com/foo/?gne%2F",
+	    PASS,
+	    IRI("foo", "example.com", "", "foo/", "gne/", ""),
+	    "parse query strings");
+
+	/* fragment */
+	TEST("foo://bar.co/#foo",
+	    PASS,
+	    IRI("foo", "bar.co", "", "", "", "foo"),
+	    "can recognize fragments");
+
+	/* percent encoding */
+	TEST("foo://bar.com/caf%C3%A8.gmi",
+	    PASS,
+	    IRI("foo", "bar.com", "", "cafè.gmi", "", ""),
+	    "can decode");
+	TEST("foo://bar.com/caff%C3%A8%20macchiato.gmi",
+	    PASS,
+	    IRI("foo", "bar.com", "", "caffè macchiato.gmi", "", ""),
+	    "can decode");
+	TEST("foo://bar.com/caff%C3%A8+macchiato.gmi",
+	    PASS,
+	    IRI("foo", "bar.com", "", "caffè+macchiato.gmi", "", ""),
+	    "can decode");
+	TEST("foo://bar.com/foo%2F..%2F..",
+	    FAIL,
+	    empty,
+	    "conversion and checking are done in the correct order");
+	TEST("foo://bar.com/foo%00?baz",
+	    FAIL,
+	    empty,
+	    "rejects %00");
+
+	/* IRI */
+        TEST("foo://bar.com/cafè.gmi",
+	    PASS,
+	    IRI("foo", "bar.com", "", "cafè.gmi", "" , ""),
+	    "decode IRI (with a 2-byte utf8 seq)");
+	TEST("foo://bar.com/世界.gmi",
+	    PASS,
+	    IRI("foo", "bar.com", "", "世界.gmi", "" , ""),
+	    "decode IRI");
+	TEST("foo://bar.com/😼.gmi",
+	    PASS,
+	    IRI("foo", "bar.com", "", "😼.gmi", "" , ""),
+	    "decode IRI (with a 3-byte utf8 seq)");
+	TEST("foo://bar.com/😼/𤭢.gmi",
+	    PASS,
+	    IRI("foo", "bar.com", "", "😼/𤭢.gmi", "" , ""),
+	    "decode IRI (with a 3-byte and a 4-byte utf8 seq)");
+	TEST("foo://bar.com/世界/\xC0\x80",
+	    FAIL,
+	    empty,
+	    "reject invalid sequence (overlong NUL)");
+
+	return 0;
+}
blob - 09aad1cd4b4aed0af095e949d47fb4056764eded
blob + 8f530b0203310cdb0c880adf60ad6a610ae7823b
--- utf8.c
+++ utf8.c
@@ -64,8 +64,8 @@ valid_multibyte_utf8(struct parser *p)
 {
 	uint32_t cp = 0, state = 0;
 
-        for (; *p->uri; p->uri++)
-		if (!utf8_decode(&state, &cp, *p->uri))
+        for (; *p->iri; p->iri++)
+		if (!utf8_decode(&state, &cp, *p->iri))
 			break;
 
 	/* reject the ASCII range */