commit 33d32d1fd66a577f22f3f33f238e8dac44ec9995 from: Omar Polo date: Fri Dec 25 12:13:12 2020 UTC implement a valid RFC3986 (URI) parser Up until now I used a "poor man" approach: the uri parser is barely a parser, it tries to extract the path from the request, with some minor checking, and that's all. This obviously is not RFC3986-compliant. The new RFC3986 (URI) parser should be fully compliant. It may accept some invalid URI, but shouldn't reject or mis-parse valid URI. (in particular, the rule for the path is way more relaxed in this parser than it is in the RFC text). A difference with RFC3986 is that we don't even try to parse the (optional) userinfo part of a URI: following the Gemini spec we treat it as an error. A further caveats is that %2F in the path part of the URI is indistinguishable from a literal '/': this is NOT conforming, but due to the scope and use of gmid, I don't see how treat a %2F sequence in the path (reject the URI?). commit - d5aba4c791266e35cf79cec02dcd15267fb62f62 commit + 33d32d1fd66a577f22f3f33f238e8dac44ec9995 blob - 9b1c514b6ac3057e90a07914b91c47799f6468fb blob + a7794ec77895de2aa815675b27a8a09f22d1f956 --- .gitignore +++ .gitignore @@ -2,5 +2,6 @@ cert.pem key.pem TAGS gmid +uri_test *.o docs blob - 6c495733d86ddd8cccbd9ed6f696b3b7c51bc178 blob + 1d355e3ba2d5b1d5fd244ed77062bc9b6952b0c1 --- Makefile +++ Makefile @@ -2,18 +2,24 @@ CC = cc CFLAGS = -Wall -Wextra -g LDFLAGS = -ltls -.PHONY: all clean +.PHONY: all clean test all: gmid TAGS README.md -gmid: gmid.o - ${CC} gmid.o -o gmid ${LDFLAGS} +gmid: gmid.o uri.o + ${CC} gmid.o uri.o -o gmid ${LDFLAGS} -TAGS: gmid.c - -etags gmid.c || true +TAGS: gmid.c uri.c + -etags gmid.c uri.c || true README.md: gmid.1 mandoc -Tmarkdown gmid.1 | sed -e '1d' -e '$$d' > README.md clean: - rm -f gmid.o gmid + rm -f *.o gmid + +uri_test: uri_test.o uri.o + ${CC} uri_test.o uri.o -o uri_test ${LDFLAGS} + +test: uri_test + ./uri_test blob - 7376a34bee2b269d3ebd441edeffba98c21a5391 blob + fb98cfc6b2c623668487e8e4529b576406056fab --- gmid.c +++ gmid.c @@ -34,6 +34,28 @@ int port; int foreground; int connected_clients; +struct etm { /* file extension to mime */ + const char *mime; + const char *ext; +} filetypes[] = { + {"application/pdf", "pdf"}, + + {"image/gif", "gif"}, + {"image/jpeg", "jpg"}, + {"image/jpeg", "jpeg"}, + {"image/png", "png"}, + {"image/svg+xml", "svg"}, + + {"text/gemini", "gemini"}, + {"text/gemini", "gmi"}, + {"text/markdown", "markdown"}, + {"text/markdown", "md"}, + {"text/plain", "txt"}, + {"text/xml", "xml"}, + + {NULL, NULL} +}; + void siginfo_handler(int sig) { @@ -51,103 +73,7 @@ starts_with(const char *str, const char *prefix) return 1; } -char * -url_after_proto(char *url) -{ - char *s; - const char *proto = "gemini:"; - const char *marker = "//"; - - /* a relative URL */ - if ((s = strstr(url, marker)) == NULL) - return url; - - /* - * if a protocol is not specified, gemini should be implied: - * this handles the case of //example.com - */ - if (s == url) - return s + strlen(marker); - - if (s - strlen(proto) != url) - return NULL; - - if (!starts_with(url, proto)) - return NULL; - - return s + strlen(marker); -} - -char * -url_start_of_request(char *url) -{ - char *s, *t; - - if ((s = url_after_proto(url)) == NULL) - return NULL; - - /* non-absolute URL */ - if (s == url) - return s; - - if ((t = strstr(s, "/")) == NULL) - return s + strlen(s); - return t; -} - int -url_trim(struct client *c, char *url) -{ - const char *e = "\r\n"; - char *s; - - if ((s = strstr(url, e)) == NULL) - return 0; - s[0] = '\0'; - s[1] = '\0'; - - if (s[2] != '\0') { - LOGE(c, "%s", "request longer than 1024 bytes"); - return 0; - } - - return 1; -} - -char * -adjust_path(char *path) -{ - char *s, *query; - size_t len; - - if ((query = strchr(path, '?')) != NULL) { - *query = '\0'; - query++; - } - - /* /.. -> / */ - len = strlen(path); - if (len >= 3) { - if (!strcmp(&path[len-3], "/..")) { - path[len-2] = '\0'; - } - } - - /* if the path is only `..` trim out and exit */ - if (!strcmp(path, "..")) { - path[0] = '\0'; - return query; - } - - /* remove every ../ in the path */ - while (1) { - if ((s = strstr(path, "../")) == NULL) - return query; - memmove(s, s+3, strlen(s)+1); /* copy also the \0 */ - } -} - -int start_reply(struct pollfd *pfd, struct client *client, int code, const char *reason) { char buf[1030] = {0}; /* status + ' ' + max reply len + \r\n\0 */ @@ -224,7 +150,7 @@ check_path(struct client *c, const char *path, int *fd struct stat sb; assert(path != NULL); - if ((*fd = openat(dirfd, path, + if ((*fd = openat(dirfd, *path ? path : ".", O_RDONLY | O_NOFOLLOW | O_CLOEXEC)) == -1) { return FILE_MISSING; } @@ -288,16 +214,8 @@ err: int -open_file(char *path, char *query, struct pollfd *fds, struct client *c) +open_file(char *fpath, char *query, struct pollfd *fds, struct client *c) { - char fpath[PATHBUF]; - - bzero(fpath, sizeof(fpath)); - - if (*path != '.') - fpath[0] = '.'; - strlcat(fpath, path, PATHBUF); - switch (check_path(c, fpath, &c->fd)) { case FILE_EXECUTABLE: /* +2 to skip the ./ */ @@ -578,8 +496,8 @@ void handle(struct pollfd *fds, struct client *client) { char buf[GEMINI_URL_LEN]; - char *path; - char *query; + const char *parse_err; + struct uri uri; switch (client->state) { case S_OPEN: @@ -599,26 +517,19 @@ handle(struct pollfd *fds, struct client *client) return; } - if (!url_trim(client, buf)) { - if (!start_reply(fds, client, BAD_REQUEST, "bad request")) - return; - goodbye(fds, client); - return; - } - - if ((path = url_start_of_request(buf)) == NULL) { - if (!start_reply(fds, client, BAD_REQUEST, "bad request")) + if (!trim_req_uri(buf) || !parse_uri(buf, &uri, &parse_err)) { + if (!start_reply(fds, client, BAD_REQUEST, parse_err)) return; goodbye(fds, client); return; } - query = adjust_path(path); - LOGI(client, "GET %s%s%s", path, - query ? "?" : "", - query ? query : ""); + LOGI(client, "GET %s%s%s", + *uri.path ? uri.path : "/", + *uri.query ? "?" : "", + *uri.query ? uri.query : ""); - send_file(path, query, fds, client); + send_file(uri.path, uri.query, fds, client); break; case S_INITIALIZING: blob - 62288a82b02a286dffab2acfebfe8c62013db7e6 blob + d8a050b64fa04e57aed62cb8dd1008b90ce19f16 --- gmid.h +++ gmid.h @@ -107,6 +107,17 @@ struct client { struct in_addr addr; }; + +struct uri { + char *schema; + char *host; + char *port; + uint16_t port_no; + char *path; + char *query; + char *fragment; +}; + enum { FILE_EXISTS, FILE_EXECUTABLE, @@ -114,35 +125,10 @@ enum { FILE_MISSING, }; -struct etm { /* file extension to mime */ - const char *mime; - const char *ext; -} filetypes[] = { - {"application/pdf", "pdf"}, - - {"image/gif", "gif"}, - {"image/jpeg", "jpg"}, - {"image/jpeg", "jpeg"}, - {"image/png", "png"}, - {"image/svg+xml", "svg"}, - - {"text/gemini", "gemini"}, - {"text/gemini", "gmi"}, - {"text/markdown", "markdown"}, - {"text/markdown", "md"}, - {"text/plain", "txt"}, - {"text/xml", "xml"}, - - {NULL, NULL} -}; - +/* gmid.c */ void siginfo_handler(int); int starts_with(const char*, const char*); -char *url_after_proto(char*); -char *url_start_of_request(char*); -int url_trim(struct client*, char*); -char *adjust_path(char*); ssize_t filesize(int); int start_reply(struct pollfd*, struct client*, int, const char*); @@ -167,4 +153,8 @@ void loop(struct tls*, int); void usage(const char*); +/* uri.c */ +int parse_uri(char*, struct uri*, const char**); +int trim_req_uri(char*); + #endif blob - /dev/null blob + 245928ac2a36f5594f0eda298dc56e4c1c8b7900 (mode 644) --- /dev/null +++ uri.c @@ -0,0 +1,413 @@ +/* + * Copyright (c) 2020 Omar Polo + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include + +#include "gmid.h" + +/* + * Notes from RFC3986 + * + * => gemini://tanso.net/rfc/rfc3986.txt + * + * + * ABNF + * ==== + * + * pct-encoded "%" HEXDIG HEXDIG + * + * reserved = gen-delims / sub-delimis + * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + * / "*" / "+" / "," / ";" / "=" + * + * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + * + * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + * + * hier-part = "//" authority path-abempty + * / path-absolute + * / path-rootless + * / path-empty + * + * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + * + * authority = [ userinfo "@" ] host [ ":" port ] + * + * (note that userinfo isn't used for Gemini URL) + * + * host = IP-literal / IPv4address / reg-name + * reg-name = *( unreserved / pct-encoded / sub-delims ) + * + * port = *DIGIT + * + * path = path-abemty ; begins with "/" or is empty + * / path-absolute ; begins with "/" but not "//" + * / path-noscheme ; begins with a non-colon segment + * / path-rootless ; begins with a segment + * / path-empty ; zero characters + * + * path-abemty = *( "/" segment ) + * path-absolute = "/" [ segment-nz *( "/" segment ) ] + * path-noscheme = ; not used + * path-rootless = ; not used + * path-empty = ; not used + * + * segment = *pchar + * segment-nz = 1*pchar + * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + * + * query = *( pchar / "/" / "?" ) + * + * fragment = *( pchar / "/" / "?" ) + * + * + * EXAMPLE + * ======= + * + * foo://example.com:8042/over/there?name=ferret#nose + * \_/ \______________/\_________/ \_________/ \__/ + * | | | | | + * scheme authority path query fragment + * + */ + +struct parser { + char *uri; + struct uri *parsed; + const char *err; +}; + +/* XXX: these macros will expand multiple times their argument */ + +#define UNRESERVED(p) \ + (isalnum(p) \ + || p == '-' \ + || p == '.' \ + || p == '_' \ + || p == '~') + +#define SUB_DELIMITERS(p) \ + (p == '!' \ + || p == '$' \ + || p == '&' \ + || p == '\'' \ + || p == '(' \ + || p == ')' \ + || p == '*' \ + || p == '+' \ + || p == ',' \ + || p == ';' \ + || p == '=') + +static int +parse_pct_encoded(struct parser *p) +{ + if (*p->uri != '%') + return 0; + + if (!isxdigit(*(p->uri+1)) || !isxdigit(*(p->uri+2))) { + p->err = "illegal percent-encoding"; + return 0; + } + + sscanf(p->uri+1, "%2hhx", p->uri); + memmove(p->uri+1, p->uri+3, strlen(p->uri+3)+1); + + return 1; +} + +/* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) "://" */ +static int +parse_scheme(struct parser *p) +{ + p->parsed->schema = p->uri; + + if (!isalpha(*p->uri)) { + p->err = "illegal character in scheme"; + return 0; + } + + p->uri++; + while (isalnum(*p->uri) + || *p->uri == '+' + || *p->uri == '-' + || *p->uri == '.') + p->uri++; + + if (*p->uri != ':') { + p->err = "illegal character in scheme"; + return 0; + } + + *p->uri = '\0'; + if (*(++p->uri) != '/' || *(++p->uri) != '/') { + p->err = "invalid marker after scheme"; + return 0; + } + + p->uri++; + return 1; +} + +/* *DIGIT */ +static int +parse_port(struct parser *p) +{ + uint32_t i = 0; + + p->parsed->port = p->uri; + + for (; isdigit(*p->uri); p->uri++) { + i = i * 10 + *p->uri - '0'; + if (i > UINT16_MAX) { + p->err = "port number too large"; + return 0; + } + } + + if (*p->uri != '/' && *p->uri != '\0') { + p->err = "illegal character in port number"; + return 0; + } + + p->parsed->port_no = i; + + if (*p->uri != '\0') { + *p->uri = '\0'; + p->uri++; + } + + return 1; +} + +/* TODO: add support for ip-literal and ipv4addr ? */ +/* *( unreserved / sub-delims / pct-encoded ) */ +static int +parse_authority(struct parser *p) +{ + p->parsed->host = p->uri; + + while (UNRESERVED(*p->uri) + || SUB_DELIMITERS(*p->uri) + || parse_pct_encoded(p)) + p->uri++; + + if (*p->uri == ':') { + *p->uri = '\0'; + p->uri++; + return parse_port(p); + } + + if (*p->uri == '/') { + *p->uri = '\0'; + p->uri++; + return 1; + } + + if (*p->uri == '\0') + return 1; + + p->err = "illegal character in authority section"; + return 0; +} + +/* Routine for path_clean. Elide the pointed .. with the preceding + * element. Return 0 if it's not possible. incr is the length of + * the increment, 3 for ../ and 2 for .. */ +static int +path_elide_dotdot(char *path, char *i, int incr) +{ + char *j; + + if (i == path) + return 0; + for (j = i-2; j != path && *j != '/'; j--) + /* noop */ ; + if (*j == '/') + j++; + i += incr; + memmove(j, i, strlen(i)+1); + return 1; +} + +/* + * Use an algorithm similar to the one implemented in go' path.Clean: + * + * 1. Replace multiple slashes with a single slash + * 2. Eliminate each . path name element + * 3. Eliminate each inner .. along with the non-.. element that precedes it + * 4. Eliminate trailing .. if possible or error (go would only discard) + * + * Unlike path.Clean, this function return the empty string if the + * original path is equivalent to "/". + */ +static int +path_clean(char *path) +{ + char *i; + + /* 1. replace multiple slashes with a single one */ + for (i = path; *i; ++i) { + if (*i == '/' && *(i+1) == '/') { + memmove(i, i+1, strlen(i)); /* move also the \0 */ + i--; + } + } + + /* 2. eliminate each . path name element */ + for (i = path; *i; ++i) { + if ((i == path || *i == '/') && *(i+1) == '.' && + *(i+2) == '/') { + /* move also the \0 */ + memmove(i, i+2, strlen(i)-1); + i--; + } + } + if (!strcmp(path, ".") || !strcmp(path, "/.")) { + *path = '\0'; + return 1; + } + + /* 3. eliminate each inner .. along with the preceding non-.. */ + for (i = strstr(path, "../"); i != NULL; i = strstr(path, "..")) + if (!path_elide_dotdot(path, i, 3)) + return 0; + + /* 4. eliminate trailing ..*/ + if ((i = strstr(path, "..")) != NULL) + if (!path_elide_dotdot(path, i, 2)) + return 0; + + return 1; +} + +static int +parse_query(struct parser *p) +{ + p->parsed->query = p->uri; + if (*p->uri == '\0') + return 1; + + while (UNRESERVED(*p->uri) + || SUB_DELIMITERS(*p->uri) + || *p->uri == '/' + || *p->uri == '?' + || parse_pct_encoded(p)) + p->uri++; + + if (*p->uri != '\0' && *p->uri != '#') { + p->err = "illegal character in query"; + return 0; + } + + if (*p->uri != '\0') { + *p->uri = '\0'; + p->uri++; + } + + return 1; +} + +/* don't even bother */ +static int +parse_fragment(struct parser *p) +{ + p->parsed->fragment = p->uri; + return 1; +} + +/* XXX: is it too broad? */ +/* *(pchar / "/") */ +static int +parse_path(struct parser *p) +{ + char c; + + p->parsed->path = p->uri; + if (*p->uri == '\0') { + p->parsed->query = p->parsed->fragment = p->uri; + return 1; + } + + while (UNRESERVED(*p->uri) + || SUB_DELIMITERS(*p->uri) + || *p->uri == '/' + || parse_pct_encoded(p)) + p->uri++; + + if (*p->uri != '\0' && *p->uri != '?' && *p->uri != '#') { + p->err = "illegal character in path"; + return 0; + } + + if (*p->uri != '\0') { + c = *p->uri; + *p->uri = '\0'; + p->uri++; + + if (c == '#') { + if (!parse_fragment(p)) + return 0; + } else + if (!parse_query(p) || !parse_fragment(p)) + return 0; + } + + if (!path_clean(p->parsed->path)) { + p->err = "illegal path"; + return 0; + } + + return 1; +} + +int +parse_uri(char *uri, struct uri *ret, const char **err_ret) +{ + char *end; + struct parser p = {uri, ret, NULL}; + + bzero(ret, sizeof(*ret)); + + /* initialize optional stuff to the empty string */ + end = uri + strlen(uri); + p.parsed->port = end; + p.parsed->path = end; + p.parsed->query = end; + p.parsed->fragment = end; + + if (!parse_scheme(&p) || !parse_authority(&p) || !parse_path(&p)) { + *err_ret = p.err; + return 0; + } + + *err_ret = NULL; + return 1; +} + +int +trim_req_uri(char *uri) +{ + char *i; + + if ((i = strstr(uri, "\r\n")) == NULL) + return 0; + *i = '\0'; + return 1; +} blob - /dev/null blob + c6521f668c8263cd6b9162966dd9b212dc529948 (mode 644) --- /dev/null +++ uri_test.c @@ -0,0 +1,184 @@ +/* + * Copyright (c) 2020 Omar Polo + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include + +#include "gmid.h" + +#define TEST(uri, fail, exp, descr) \ + if (!run_test(uri, fail, exp)) { \ + fprintf(stderr, "%s:%d: error: %s\n", \ + __FILE__, __LINE__, descr); \ + exit(1); \ + } + +#define URI(schema, host, port, path, query, frag) \ + ((struct uri){schema, host, port, 0, path, query, frag}) + +#define DIFF(wanted, got, field) \ + if (wanted->field == NULL || got->field == NULL || \ + strcmp(wanted->field, got->field)) { \ + fprintf(stderr, #field ":\n\tgot: %s\n\twanted: %s\n", \ + got->field, wanted->field); \ + return 0; \ + } + +#define PASS 0 +#define FAIL 1 + +int +diff_uri(struct uri *p, struct uri *exp) +{ + DIFF(p, exp, schema); + DIFF(p, exp, host); + DIFF(p, exp, port); + DIFF(p, exp, path); + DIFF(p, exp, query); + DIFF(p, exp, fragment); + return 1; +} + +int +run_test(const char *uri, int should_fail, struct uri expected) +{ + int failed, ok = 1; + char *uri_copy; + struct uri parsed; + const char *error; + + if ((uri_copy = strdup(uri)) == NULL) + err(1, "strdup"); + + fprintf(stderr, "=> %s\n", uri); + failed = !parse_uri(uri_copy, &parsed, &error); + + if (failed && should_fail) + goto done; + + if (error != NULL) + fprintf(stderr, "> %s\n", error); + + ok = !failed && !should_fail; + if (ok) + ok = diff_uri(&expected, &parsed); + +done: + free(uri_copy); + return ok; +} + +int +main(void) +{ + struct uri empty = {"", "", "", PASS, "", "", ""}; + + TEST("http://omarpolo.com", + PASS, + URI("http", "omarpolo.com", "", "", "", ""), + "can parse uri with empty path"); + + /* schema */ + TEST("omarpolo.com", FAIL, empty, "FAIL when the schema is missing"); + TEST("gemini:/omarpolo.com", FAIL, empty, "FAIL with invalid marker"); + TEST("gemini//omarpolo.com", FAIL, empty, "FAIL with invalid marker"); + TEST("h!!p://omarpolo.com", FAIL, empty, "FAIL with invalid schema"); + + /* authority */ + TEST("gemini://omarpolo.com", + PASS, + URI("gemini", "omarpolo.com", "", "", "", ""), + "can parse authority with empty path"); + TEST("gemini://omarpolo.com/", + PASS, + URI("gemini", "omarpolo.com", "", "", "", ""), + "can parse authority with empty path (alt)") + TEST("gemini://omarpolo.com:1965", + PASS, + URI("gemini", "omarpolo.com", "1965", "", "", ""), + "can parse with port and empty path"); + TEST("gemini://omarpolo.com:1965/", + PASS, + URI("gemini", "omarpolo.com", "1965", "", "", ""), + "can parse with port and empty path") + TEST("gemini://omarpolo.com:196s", + FAIL, + empty, + "FAIL with invalid port number"); + + /* path */ + TEST("gemini://omarpolo.com/foo/bar/baz", + PASS, + URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""), + "parse simple paths"); + TEST("gemini://omarpolo.com/foo//bar///baz", + PASS, + URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""), + "parse paths with multiple slashes"); + TEST("gemini://omarpolo.com/foo/./bar/./././baz", + PASS, + URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""), + "parse paths with . elements"); + TEST("gemini://omarpolo.com/foo/bar/../bar/baz", + PASS, + URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""), + "parse paths with .. elements"); + TEST("gemini://omarpolo.com/foo/../foo/bar/../bar/baz/../baz", + PASS, + URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""), + "parse paths with multiple .. elements"); + TEST("gemini://omarpolo.com/foo/..", + PASS, + URI("gemini", "omarpolo.com", "", "", "", ""), + "parse paths with a trailing .."); + TEST("gemini://omarpolo.com/foo/../", + PASS, + URI("gemini", "omarpolo.com", "", "", "", ""), + "parse paths with a trailing .."); + TEST("gemini://omarpolo.com/foo/../..", + FAIL, + empty, + "reject paths that would escape the root"); + + /* query */ + TEST("foo://example.com/foo/?gne", + PASS, + URI("foo", "example.com", "", "foo/", "gne", ""), + "parse query strings"); + TEST("foo://example.com/foo/?gne&foo", + PASS, + URI("foo", "example.com", "", "foo/", "gne&foo", ""), + "parse query strings"); + TEST("foo://example.com/foo/?gne%2F", + PASS, + URI("foo", "example.com", "", "foo/", "gne/", ""), + "parse query strings"); + + /* fragment */ + TEST("foo://bar.co/#foo", + PASS, + URI("foo", "bar.co", "", "", "", "foo"), + "can recognize fragments"); + + /* percent encoding */ + TEST("foo://bar.com/caf%C3%A8.gmi", + PASS, + URI("foo", "bar.com", "", "cafè.gmi", "", ""), + "can decode"); + + return 0; +}