commit - 28778244d67be7024868a5095e5eedda22a3ed98
commit + 3c1cf9d07cb679ba444566159538b510902f2de9
blob - a7794ec77895de2aa815675b27a8a09f22d1f956
blob + 5660e44f37b2ae2d269500e367612556ee6b2e80
--- .gitignore
+++ .gitignore
key.pem
TAGS
gmid
-uri_test
+iri_test
*.o
docs
blob - 76f05bd5b384c8a23073a5d6f29ba5a0312c16a1
blob + 3e0b72e31bb3c1c16b34e6062dbc4b93db793e29
--- Makefile
+++ Makefile
all: gmid TAGS README.md
-gmid: gmid.o uri.o utf8.o
- ${CC} gmid.o uri.o utf8.o -o gmid ${LDFLAGS}
+gmid: gmid.o iri.o utf8.o
+ ${CC} gmid.o iri.o utf8.o -o gmid ${LDFLAGS}
-TAGS: gmid.c uri.c utf8.c
- -etags gmid.c uri.c utf8.c || true
+TAGS: gmid.c iri.c utf8.c
+ -etags gmid.c iri.c utf8.c || true
clean:
- rm -f *.o gmid
+ rm -f *.o gmid iri_test
-uri_test: uri_test.o uri.o utf8.o
- ${CC} uri_test.o uri.o utf8.o -o uri_test ${LDFLAGS}
+iri_test: iri_test.o iri.o utf8.o
+ ${CC} iri_test.o iri.o utf8.o -o iri_test ${LDFLAGS}
-test: uri_test
- ./uri_test
+test: iri_test
+ ./iri_test
blob - ef12066eac747e32dae1c6136b4167366076242a
blob + 0c1bed23a05e30fef4ebcf534d1a0cc2e4bf455d
--- gmid.c
+++ gmid.c
{
char buf[GEMINI_URL_LEN];
const char *parse_err;
- struct uri uri;
+ struct iri iri;
switch (client->state) {
case S_OPEN:
}
parse_err = "invalid request";
- if (!trim_req_uri(buf) || !parse_uri(buf, &uri, &parse_err)) {
+ if (!trim_req_iri(buf) || !parse_iri(buf, &iri, &parse_err)) {
if (!start_reply(fds, client, BAD_REQUEST, parse_err))
return;
goodbye(fds, client);
}
LOGI(client, "GET %s%s%s",
- *uri.path ? uri.path : "/",
- *uri.query ? "?" : "",
- *uri.query ? uri.query : "");
+ *iri.path ? iri.path : "/",
+ *iri.query ? "?" : "",
+ *iri.query ? iri.query : "");
- send_file(uri.path, uri.query, fds, client);
+ send_file(iri.path, iri.query, fds, client);
break;
case S_INITIALIZING:
blob - 64effdeb7170cd4219199d787810443f618e1c52
blob + ecca57ffd095eb684a593403e0f0f25964f70438
--- gmid.h
+++ gmid.h
struct sockaddr_storage addr;
};
-struct uri {
+struct iri {
char *schema;
char *host;
char *port;
};
struct parser {
- char *uri;
- struct uri *parsed;
+ char *iri;
+ struct iri *parsed;
const char *err;
};
/* utf8.c */
int valid_multibyte_utf8(struct parser*);
-/* uri.c */
-int parse_uri(char*, struct uri*, const char**);
-int trim_req_uri(char*);
+/* iri.c */
+int parse_iri(char*, struct iri*, const char**);
+int trim_req_iri(char*);
#endif
blob - 1258abbf91161ba655231f964d5e501b4c7c4e67 (mode 644)
blob + /dev/null
--- uri.c
+++ /dev/null
-/*
- * Copyright (c) 2020 Omar Polo <op@omarpolo.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include <ctype.h>
-#include <string.h>
-
-#include "gmid.h"
-
-/*
- * Notes from RFC3986
- *
- * => gemini://tanso.net/rfc/rfc3986.txt
- *
- *
- * ABNF
- * ====
- *
- * pct-encoded "%" HEXDIG HEXDIG
- *
- * reserved = gen-delims / sub-delimis
- * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
- * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
- * / "*" / "+" / "," / ";" / "="
- *
- * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
- *
- * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
- *
- * hier-part = "//" authority path-abempty
- * / path-absolute
- * / path-rootless
- * / path-empty
- *
- * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
- *
- * authority = [ userinfo "@" ] host [ ":" port ]
- *
- * (note that userinfo isn't used for Gemini URL)
- *
- * host = IP-literal / IPv4address / reg-name
- * reg-name = *( unreserved / pct-encoded / sub-delims )
- *
- * port = *DIGIT
- *
- * path = path-abemty ; begins with "/" or is empty
- * / path-absolute ; begins with "/" but not "//"
- * / path-noscheme ; begins with a non-colon segment
- * / path-rootless ; begins with a segment
- * / path-empty ; zero characters
- *
- * path-abemty = *( "/" segment )
- * path-absolute = "/" [ segment-nz *( "/" segment ) ]
- * path-noscheme = ; not used
- * path-rootless = ; not used
- * path-empty = ; not used
- *
- * segment = *pchar
- * segment-nz = 1*pchar
- * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
- * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
- *
- * query = *( pchar / "/" / "?" )
- *
- * fragment = *( pchar / "/" / "?" )
- *
- *
- * EXAMPLE
- * =======
- *
- * foo://example.com:8042/over/there?name=ferret#nose
- * \_/ \______________/\_________/ \_________/ \__/
- * | | | | |
- * scheme authority path query fragment
- *
- */
-
-static inline int
-unreserved(int p)
-{
- return isalnum(p)
- || p == '-'
- || p == '.'
- || p == '_'
- || p == '~';
-}
-
-static inline int
-sub_delimiters(int p)
-{
- return p == '!'
- || p == '$'
- || p == '&'
- || p == '\''
- || p == '('
- || p == ')'
- || p == '*'
- || p == '+'
- || p == ','
- || p == ';'
- || p == '=';
-}
-
-static int
-parse_pct_encoded(struct parser *p)
-{
- if (*p->uri != '%')
- return 0;
-
- if (!isxdigit(*(p->uri+1)) || !isxdigit(*(p->uri+2))) {
- p->err = "illegal percent-encoding";
- return 0;
- }
-
- sscanf(p->uri+1, "%2hhx", p->uri);
- memmove(p->uri+1, p->uri+3, strlen(p->uri+3)+1);
- if (*p->uri == '\0') {
- p->err = "illegal percent-encoding";
- return 0;
- }
-
- return 1;
-}
-
-/* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) "://" */
-static int
-parse_scheme(struct parser *p)
-{
- p->parsed->schema = p->uri;
-
- if (!isalpha(*p->uri)) {
- p->err = "illegal character in scheme";
- return 0;
- }
-
- p->uri++;
- while (isalnum(*p->uri)
- || *p->uri == '+'
- || *p->uri == '-'
- || *p->uri == '.')
- p->uri++;
-
- if (*p->uri != ':') {
- p->err = "illegal character in scheme";
- return 0;
- }
-
- *p->uri = '\0';
- if (*(++p->uri) != '/' || *(++p->uri) != '/') {
- p->err = "invalid marker after scheme";
- return 0;
- }
-
- p->uri++;
- return 1;
-}
-
-/* *DIGIT */
-static int
-parse_port(struct parser *p)
-{
- uint32_t i = 0;
-
- p->parsed->port = p->uri;
-
- for (; isdigit(*p->uri); p->uri++) {
- i = i * 10 + *p->uri - '0';
- if (i > UINT16_MAX) {
- p->err = "port number too large";
- return 0;
- }
- }
-
- if (*p->uri != '/' && *p->uri != '\0') {
- p->err = "illegal character in port number";
- return 0;
- }
-
- p->parsed->port_no = i;
-
- if (*p->uri != '\0') {
- *p->uri = '\0';
- p->uri++;
- }
-
- return 1;
-}
-
-/* TODO: add support for ip-literal and ipv4addr ? */
-/* *( unreserved / sub-delims / pct-encoded ) */
-static int
-parse_authority(struct parser *p)
-{
- p->parsed->host = p->uri;
-
- while (unreserved(*p->uri)
- || sub_delimiters(*p->uri)
- || parse_pct_encoded(p))
- p->uri++;
-
- if (p->err != NULL)
- return 0;
-
- if (*p->uri == ':') {
- *p->uri = '\0';
- p->uri++;
- return parse_port(p);
- }
-
- if (*p->uri == '/') {
- *p->uri = '\0';
- p->uri++;
- return 1;
- }
-
- if (*p->uri == '\0')
- return 1;
-
- p->err = "illegal character in authority section";
- return 0;
-}
-
-/* Routine for path_clean. Elide the pointed .. with the preceding
- * element. Return 0 if it's not possible. incr is the length of
- * the increment, 3 for ../ and 2 for .. */
-static int
-path_elide_dotdot(char *path, char *i, int incr)
-{
- char *j;
-
- if (i == path)
- return 0;
- for (j = i-2; j != path && *j != '/'; j--)
- /* noop */ ;
- if (*j == '/')
- j++;
- i += incr;
- memmove(j, i, strlen(i)+1);
- return 1;
-}
-
-/*
- * Use an algorithm similar to the one implemented in go' path.Clean:
- *
- * 1. Replace multiple slashes with a single slash
- * 2. Eliminate each . path name element
- * 3. Eliminate each inner .. along with the non-.. element that precedes it
- * 4. Eliminate trailing .. if possible or error (go would only discard)
- *
- * Unlike path.Clean, this function return the empty string if the
- * original path is equivalent to "/".
- */
-static int
-path_clean(char *path)
-{
- char *i;
-
- /* 1. replace multiple slashes with a single one */
- for (i = path; *i; ++i) {
- if (*i == '/' && *(i+1) == '/') {
- memmove(i, i+1, strlen(i)); /* move also the \0 */
- i--;
- }
- }
-
- /* 2. eliminate each . path name element */
- for (i = path; *i; ++i) {
- if ((i == path || *i == '/') && *(i+1) == '.' &&
- *(i+2) == '/') {
- /* move also the \0 */
- memmove(i, i+2, strlen(i)-1);
- i--;
- }
- }
- if (!strcmp(path, ".") || !strcmp(path, "/.")) {
- *path = '\0';
- return 1;
- }
-
- /* 3. eliminate each inner .. along with the preceding non-.. */
- for (i = strstr(path, "../"); i != NULL; i = strstr(path, ".."))
- if (!path_elide_dotdot(path, i, 3))
- return 0;
-
- /* 4. eliminate trailing ..*/
- if ((i = strstr(path, "..")) != NULL)
- if (!path_elide_dotdot(path, i, 2))
- return 0;
-
- return 1;
-}
-
-static int
-parse_query(struct parser *p)
-{
- p->parsed->query = p->uri;
- if (*p->uri == '\0')
- return 1;
-
- while (unreserved(*p->uri)
- || sub_delimiters(*p->uri)
- || *p->uri == '/'
- || *p->uri == '?'
- || parse_pct_encoded(p)
- || valid_multibyte_utf8(p))
- p->uri++;
-
- if (p->err != NULL)
- return 0;
-
- if (*p->uri != '\0' && *p->uri != '#') {
- p->err = "illegal character in query";
- return 0;
- }
-
- if (*p->uri != '\0') {
- *p->uri = '\0';
- p->uri++;
- }
-
- return 1;
-}
-
-/* don't even bother */
-static int
-parse_fragment(struct parser *p)
-{
- p->parsed->fragment = p->uri;
- return 1;
-}
-
-/* XXX: is it too broad? */
-/* *(pchar / "/") */
-static int
-parse_path(struct parser *p)
-{
- char c;
-
- p->parsed->path = p->uri;
- if (*p->uri == '\0') {
- p->parsed->query = p->parsed->fragment = p->uri;
- return 1;
- }
-
- while (unreserved(*p->uri)
- || sub_delimiters(*p->uri)
- || *p->uri == '/'
- || parse_pct_encoded(p)
- || valid_multibyte_utf8(p))
- p->uri++;
-
- if (p->err != NULL)
- return 0;
-
- if (*p->uri != '\0' && *p->uri != '?' && *p->uri != '#') {
- p->err = "illegal character in path";
- return 0;
- }
-
- if (*p->uri != '\0') {
- c = *p->uri;
- *p->uri = '\0';
- p->uri++;
-
- if (c == '#') {
- if (!parse_fragment(p))
- return 0;
- } else
- if (!parse_query(p) || !parse_fragment(p))
- return 0;
- }
-
- if (!path_clean(p->parsed->path)) {
- p->err = "illegal path";
- return 0;
- }
-
- return 1;
-}
-
-int
-parse_uri(char *uri, struct uri *ret, const char **err_ret)
-{
- char *end;
- struct parser p = {uri, ret, NULL};
-
- bzero(ret, sizeof(*ret));
-
- /* initialize optional stuff to the empty string */
- end = uri + strlen(uri);
- p.parsed->port = end;
- p.parsed->path = end;
- p.parsed->query = end;
- p.parsed->fragment = end;
-
- if (!parse_scheme(&p) || !parse_authority(&p) || !parse_path(&p)) {
- *err_ret = p.err;
- return 0;
- }
-
- *err_ret = NULL;
- return 1;
-}
-
-int
-trim_req_uri(char *uri)
-{
- char *i;
-
- if ((i = strstr(uri, "\r\n")) == NULL)
- return 0;
- *i = '\0';
- return 1;
-}
blob - /dev/null
blob + 8c020392806387e3f84ca44d08d68c89abba42e9 (mode 644)
--- /dev/null
+++ iri.c
+/*
+ * Copyright (c) 2020 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <ctype.h>
+#include <string.h>
+
+#include "gmid.h"
+
+static inline int
+unreserved(int p)
+{
+ return isalnum(p)
+ || p == '-'
+ || p == '.'
+ || p == '_'
+ || p == '~';
+}
+
+static inline int
+sub_delimiters(int p)
+{
+ return p == '!'
+ || p == '$'
+ || p == '&'
+ || p == '\''
+ || p == '('
+ || p == ')'
+ || p == '*'
+ || p == '+'
+ || p == ','
+ || p == ';'
+ || p == '=';
+}
+
+static int
+parse_pct_encoded(struct parser *p)
+{
+ if (*p->iri != '%')
+ return 0;
+
+ if (!isxdigit(*(p->iri+1)) || !isxdigit(*(p->iri+2))) {
+ p->err = "illegal percent-encoding";
+ return 0;
+ }
+
+ sscanf(p->iri+1, "%2hhx", p->iri);
+ memmove(p->iri+1, p->iri+3, strlen(p->iri+3)+1);
+ if (*p->iri == '\0') {
+ p->err = "illegal percent-encoding";
+ return 0;
+ }
+
+ return 1;
+}
+
+/* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) "://" */
+static int
+parse_scheme(struct parser *p)
+{
+ p->parsed->schema = p->iri;
+
+ if (!isalpha(*p->iri)) {
+ p->err = "illegal character in scheme";
+ return 0;
+ }
+
+ p->iri++;
+ while (isalnum(*p->iri)
+ || *p->iri == '+'
+ || *p->iri == '-'
+ || *p->iri == '.')
+ p->iri++;
+
+ if (*p->iri != ':') {
+ p->err = "illegal character in scheme";
+ return 0;
+ }
+
+ *p->iri = '\0';
+ if (*(++p->iri) != '/' || *(++p->iri) != '/') {
+ p->err = "invalid marker after scheme";
+ return 0;
+ }
+
+ p->iri++;
+ return 1;
+}
+
+/* *DIGIT */
+static int
+parse_port(struct parser *p)
+{
+ uint32_t i = 0;
+
+ p->parsed->port = p->iri;
+
+ for (; isdigit(*p->iri); p->iri++) {
+ i = i * 10 + *p->iri - '0';
+ if (i > UINT16_MAX) {
+ p->err = "port number too large";
+ return 0;
+ }
+ }
+
+ if (*p->iri != '/' && *p->iri != '\0') {
+ p->err = "illegal character in port number";
+ return 0;
+ }
+
+ p->parsed->port_no = i;
+
+ if (*p->iri != '\0') {
+ *p->iri = '\0';
+ p->iri++;
+ }
+
+ return 1;
+}
+
+/* TODO: add support for ip-literal and ipv4addr ? */
+/* *( unreserved / sub-delims / pct-encoded ) */
+static int
+parse_authority(struct parser *p)
+{
+ p->parsed->host = p->iri;
+
+ while (unreserved(*p->iri)
+ || sub_delimiters(*p->iri)
+ || parse_pct_encoded(p))
+ p->iri++;
+
+ if (p->err != NULL)
+ return 0;
+
+ if (*p->iri == ':') {
+ *p->iri = '\0';
+ p->iri++;
+ return parse_port(p);
+ }
+
+ if (*p->iri == '/') {
+ *p->iri = '\0';
+ p->iri++;
+ return 1;
+ }
+
+ if (*p->iri == '\0')
+ return 1;
+
+ p->err = "illegal character in authority section";
+ return 0;
+}
+
+/* Routine for path_clean. Elide the pointed .. with the preceding
+ * element. Return 0 if it's not possible. incr is the length of
+ * the increment, 3 for ../ and 2 for .. */
+static int
+path_elide_dotdot(char *path, char *i, int incr)
+{
+ char *j;
+
+ if (i == path)
+ return 0;
+ for (j = i-2; j != path && *j != '/'; j--)
+ /* noop */ ;
+ if (*j == '/')
+ j++;
+ i += incr;
+ memmove(j, i, strlen(i)+1);
+ return 1;
+}
+
+/*
+ * Use an algorithm similar to the one implemented in go' path.Clean:
+ *
+ * 1. Replace multiple slashes with a single slash
+ * 2. Eliminate each . path name element
+ * 3. Eliminate each inner .. along with the non-.. element that precedes it
+ * 4. Eliminate trailing .. if possible or error (go would only discard)
+ *
+ * Unlike path.Clean, this function return the empty string if the
+ * original path is equivalent to "/".
+ */
+static int
+path_clean(char *path)
+{
+ char *i;
+
+ /* 1. replace multiple slashes with a single one */
+ for (i = path; *i; ++i) {
+ if (*i == '/' && *(i+1) == '/') {
+ memmove(i, i+1, strlen(i)); /* move also the \0 */
+ i--;
+ }
+ }
+
+ /* 2. eliminate each . path name element */
+ for (i = path; *i; ++i) {
+ if ((i == path || *i == '/') && *(i+1) == '.' &&
+ *(i+2) == '/') {
+ /* move also the \0 */
+ memmove(i, i+2, strlen(i)-1);
+ i--;
+ }
+ }
+ if (!strcmp(path, ".") || !strcmp(path, "/.")) {
+ *path = '\0';
+ return 1;
+ }
+
+ /* 3. eliminate each inner .. along with the preceding non-.. */
+ for (i = strstr(path, "../"); i != NULL; i = strstr(path, ".."))
+ if (!path_elide_dotdot(path, i, 3))
+ return 0;
+
+ /* 4. eliminate trailing ..*/
+ if ((i = strstr(path, "..")) != NULL)
+ if (!path_elide_dotdot(path, i, 2))
+ return 0;
+
+ return 1;
+}
+
+static int
+parse_query(struct parser *p)
+{
+ p->parsed->query = p->iri;
+ if (*p->iri == '\0')
+ return 1;
+
+ while (unreserved(*p->iri)
+ || sub_delimiters(*p->iri)
+ || *p->iri == '/'
+ || *p->iri == '?'
+ || parse_pct_encoded(p)
+ || valid_multibyte_utf8(p))
+ p->iri++;
+
+ if (p->err != NULL)
+ return 0;
+
+ if (*p->iri != '\0' && *p->iri != '#') {
+ p->err = "illegal character in query";
+ return 0;
+ }
+
+ if (*p->iri != '\0') {
+ *p->iri = '\0';
+ p->iri++;
+ }
+
+ return 1;
+}
+
+/* don't even bother */
+static int
+parse_fragment(struct parser *p)
+{
+ p->parsed->fragment = p->iri;
+ return 1;
+}
+
+/* XXX: is it too broad? */
+/* *(pchar / "/") */
+static int
+parse_path(struct parser *p)
+{
+ char c;
+
+ p->parsed->path = p->iri;
+ if (*p->iri == '\0') {
+ p->parsed->query = p->parsed->fragment = p->iri;
+ return 1;
+ }
+
+ while (unreserved(*p->iri)
+ || sub_delimiters(*p->iri)
+ || *p->iri == '/'
+ || parse_pct_encoded(p)
+ || valid_multibyte_utf8(p))
+ p->iri++;
+
+ if (p->err != NULL)
+ return 0;
+
+ if (*p->iri != '\0' && *p->iri != '?' && *p->iri != '#') {
+ p->err = "illegal character in path";
+ return 0;
+ }
+
+ if (*p->iri != '\0') {
+ c = *p->iri;
+ *p->iri = '\0';
+ p->iri++;
+
+ if (c == '#') {
+ if (!parse_fragment(p))
+ return 0;
+ } else
+ if (!parse_query(p) || !parse_fragment(p))
+ return 0;
+ }
+
+ if (!path_clean(p->parsed->path)) {
+ p->err = "illegal path";
+ return 0;
+ }
+
+ return 1;
+}
+
+int
+parse_iri(char *iri, struct iri *ret, const char **err_ret)
+{
+ char *end;
+ struct parser p = {iri, ret, NULL};
+
+ bzero(ret, sizeof(*ret));
+
+ /* initialize optional stuff to the empty string */
+ end = iri + strlen(iri);
+ p.parsed->port = end;
+ p.parsed->path = end;
+ p.parsed->query = end;
+ p.parsed->fragment = end;
+
+ if (!parse_scheme(&p) || !parse_authority(&p) || !parse_path(&p)) {
+ *err_ret = p.err;
+ return 0;
+ }
+
+ *err_ret = NULL;
+ return 1;
+}
+
+int
+trim_req_iri(char *iri)
+{
+ char *i;
+
+ if ((i = strstr(iri, "\r\n")) == NULL)
+ return 0;
+ *i = '\0';
+ return 1;
+}
blob - 3e4d823343de421e402c437962283292e8742b29 (mode 644)
blob + /dev/null
--- uri_test.c
+++ /dev/null
-/*
- * Copyright (c) 2020 Omar Polo <op@omarpolo.com>
- *
- * Permission to use, copy, modify, and distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
- * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
- * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
- * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
- */
-
-#include <err.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "gmid.h"
-
-#define TEST(uri, fail, exp, descr) \
- if (!run_test(uri, fail, exp)) { \
- fprintf(stderr, "%s:%d: error: %s\n", \
- __FILE__, __LINE__, descr); \
- exit(1); \
- }
-
-#define URI(schema, host, port, path, query, frag) \
- ((struct uri){schema, host, port, 0, path, query, frag})
-
-#define DIFF(wanted, got, field) \
- if (wanted->field == NULL || got->field == NULL || \
- strcmp(wanted->field, got->field)) { \
- fprintf(stderr, #field ":\n\tgot: %s\n\twanted: %s\n", \
- got->field, wanted->field); \
- return 0; \
- }
-
-#define PASS 0
-#define FAIL 1
-
-int
-diff_uri(struct uri *p, struct uri *exp)
-{
- DIFF(p, exp, schema);
- DIFF(p, exp, host);
- DIFF(p, exp, port);
- DIFF(p, exp, path);
- DIFF(p, exp, query);
- DIFF(p, exp, fragment);
- return 1;
-}
-
-int
-run_test(const char *uri, int should_fail, struct uri expected)
-{
- int failed, ok = 1;
- char *uri_copy;
- struct uri parsed;
- const char *error;
-
- if ((uri_copy = strdup(uri)) == NULL)
- err(1, "strdup");
-
- fprintf(stderr, "=> %s\n", uri);
- failed = !parse_uri(uri_copy, &parsed, &error);
-
- if (failed && should_fail)
- goto done;
-
- if (error != NULL)
- fprintf(stderr, "> %s\n", error);
-
- ok = !failed && !should_fail;
- if (ok)
- ok = diff_uri(&expected, &parsed);
-
-done:
- free(uri_copy);
- return ok;
-}
-
-int
-main(void)
-{
- struct uri empty = {"", "", "", PASS, "", "", ""};
-
- TEST("http://omarpolo.com",
- PASS,
- URI("http", "omarpolo.com", "", "", "", ""),
- "can parse uri with empty path");
-
- /* schema */
- TEST("omarpolo.com", FAIL, empty, "FAIL when the schema is missing");
- TEST("gemini:/omarpolo.com", FAIL, empty, "FAIL with invalid marker");
- TEST("gemini//omarpolo.com", FAIL, empty, "FAIL with invalid marker");
- TEST("h!!p://omarpolo.com", FAIL, empty, "FAIL with invalid schema");
-
- /* authority */
- TEST("gemini://omarpolo.com",
- PASS,
- URI("gemini", "omarpolo.com", "", "", "", ""),
- "can parse authority with empty path");
- TEST("gemini://omarpolo.com/",
- PASS,
- URI("gemini", "omarpolo.com", "", "", "", ""),
- "can parse authority with empty path (alt)")
- TEST("gemini://omarpolo.com:1965",
- PASS,
- URI("gemini", "omarpolo.com", "1965", "", "", ""),
- "can parse with port and empty path");
- TEST("gemini://omarpolo.com:1965/",
- PASS,
- URI("gemini", "omarpolo.com", "1965", "", "", ""),
- "can parse with port and empty path")
- TEST("gemini://omarpolo.com:196s",
- FAIL,
- empty,
- "FAIL with invalid port number");
-
- /* path */
- TEST("gemini://omarpolo.com/foo/bar/baz",
- PASS,
- URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
- "parse simple paths");
- TEST("gemini://omarpolo.com/foo//bar///baz",
- PASS,
- URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
- "parse paths with multiple slashes");
- TEST("gemini://omarpolo.com/foo/./bar/./././baz",
- PASS,
- URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
- "parse paths with . elements");
- TEST("gemini://omarpolo.com/foo/bar/../bar/baz",
- PASS,
- URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
- "parse paths with .. elements");
- TEST("gemini://omarpolo.com/foo/../foo/bar/../bar/baz/../baz",
- PASS,
- URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
- "parse paths with multiple .. elements");
- TEST("gemini://omarpolo.com/foo/..",
- PASS,
- URI("gemini", "omarpolo.com", "", "", "", ""),
- "parse paths with a trailing ..");
- TEST("gemini://omarpolo.com/foo/../",
- PASS,
- URI("gemini", "omarpolo.com", "", "", "", ""),
- "parse paths with a trailing ..");
- TEST("gemini://omarpolo.com/foo/../..",
- FAIL,
- empty,
- "reject paths that would escape the root");
- TEST("gemini://omarpolo.com/foo/../foo/../././/bar/baz/.././.././/",
- PASS,
- URI("gemini", "omarpolo.com", "", "", "", ""),
- "parse path with lots of cleaning available");
-
- /* query */
- TEST("foo://example.com/foo/?gne",
- PASS,
- URI("foo", "example.com", "", "foo/", "gne", ""),
- "parse query strings");
- TEST("foo://example.com/foo/?gne&foo",
- PASS,
- URI("foo", "example.com", "", "foo/", "gne&foo", ""),
- "parse query strings");
- TEST("foo://example.com/foo/?gne%2F",
- PASS,
- URI("foo", "example.com", "", "foo/", "gne/", ""),
- "parse query strings");
-
- /* fragment */
- TEST("foo://bar.co/#foo",
- PASS,
- URI("foo", "bar.co", "", "", "", "foo"),
- "can recognize fragments");
-
- /* percent encoding */
- TEST("foo://bar.com/caf%C3%A8.gmi",
- PASS,
- URI("foo", "bar.com", "", "cafè.gmi", "", ""),
- "can decode");
- TEST("foo://bar.com/caff%C3%A8%20macchiato.gmi",
- PASS,
- URI("foo", "bar.com", "", "caffè macchiato.gmi", "", ""),
- "can decode");
- TEST("foo://bar.com/caff%C3%A8+macchiato.gmi",
- PASS,
- URI("foo", "bar.com", "", "caffè+macchiato.gmi", "", ""),
- "can decode");
- TEST("foo://bar.com/foo%2F..%2F..",
- FAIL,
- empty,
- "conversion and checking are done in the correct order");
- TEST("foo://bar.com/foo%00?baz",
- FAIL,
- empty,
- "rejects %00");
-
- /* IRI */
- TEST("foo://bar.com/cafè.gmi",
- PASS,
- URI("foo", "bar.com", "", "cafè.gmi", "" , ""),
- "decode IRI (with a 2-byte utf8 seq)");
- TEST("foo://bar.com/世界.gmi",
- PASS,
- URI("foo", "bar.com", "", "世界.gmi", "" , ""),
- "decode IRI");
- TEST("foo://bar.com/😼.gmi",
- PASS,
- URI("foo", "bar.com", "", "😼.gmi", "" , ""),
- "decode IRI (with a 3-byte utf8 seq)");
- TEST("foo://bar.com/😼/𤭢.gmi",
- PASS,
- URI("foo", "bar.com", "", "😼/𤭢.gmi", "" , ""),
- "decode IRI (with a 3-byte and a 4-byte utf8 seq)");
- TEST("foo://bar.com/世界/\xC0\x80",
- FAIL,
- empty,
- "reject invalid sequence (overlong NUL)");
-
- return 0;
-}
blob - /dev/null
blob + 6200cb7422181a3280a9e6b9e1ab5cd96ebd0f93 (mode 644)
--- /dev/null
+++ iri_test.c
+/*
+ * Copyright (c) 2020 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <err.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "gmid.h"
+
+#define TEST(iri, fail, exp, descr) \
+ if (!run_test(iri, fail, exp)) { \
+ fprintf(stderr, "%s:%d: error: %s\n", \
+ __FILE__, __LINE__, descr); \
+ exit(1); \
+ }
+
+#define IRI(schema, host, port, path, query, frag) \
+ ((struct iri){schema, host, port, 0, path, query, frag})
+
+#define DIFF(wanted, got, field) \
+ if (wanted->field == NULL || got->field == NULL || \
+ strcmp(wanted->field, got->field)) { \
+ fprintf(stderr, #field ":\n\tgot: %s\n\twanted: %s\n", \
+ got->field, wanted->field); \
+ return 0; \
+ }
+
+#define PASS 0
+#define FAIL 1
+
+int
+diff_iri(struct iri *p, struct iri *exp)
+{
+ DIFF(p, exp, schema);
+ DIFF(p, exp, host);
+ DIFF(p, exp, port);
+ DIFF(p, exp, path);
+ DIFF(p, exp, query);
+ DIFF(p, exp, fragment);
+ return 1;
+}
+
+int
+run_test(const char *iri, int should_fail, struct iri expected)
+{
+ int failed, ok = 1;
+ char *iri_copy;
+ struct iri parsed;
+ const char *error;
+
+ if ((iri_copy = strdup(iri)) == NULL)
+ err(1, "strdup");
+
+ fprintf(stderr, "=> %s\n", iri);
+ failed = !parse_iri(iri_copy, &parsed, &error);
+
+ if (failed && should_fail)
+ goto done;
+
+ if (error != NULL)
+ fprintf(stderr, "> %s\n", error);
+
+ ok = !failed && !should_fail;
+ if (ok)
+ ok = diff_iri(&expected, &parsed);
+
+done:
+ free(iri_copy);
+ return ok;
+}
+
+int
+main(void)
+{
+ struct iri empty = {"", "", "", PASS, "", "", ""};
+
+ TEST("http://omarpolo.com",
+ PASS,
+ IRI("http", "omarpolo.com", "", "", "", ""),
+ "can parse iri with empty path");
+
+ /* schema */
+ TEST("omarpolo.com", FAIL, empty, "FAIL when the schema is missing");
+ TEST("gemini:/omarpolo.com", FAIL, empty, "FAIL with invalid marker");
+ TEST("gemini//omarpolo.com", FAIL, empty, "FAIL with invalid marker");
+ TEST("h!!p://omarpolo.com", FAIL, empty, "FAIL with invalid schema");
+
+ /* authority */
+ TEST("gemini://omarpolo.com",
+ PASS,
+ IRI("gemini", "omarpolo.com", "", "", "", ""),
+ "can parse authority with empty path");
+ TEST("gemini://omarpolo.com/",
+ PASS,
+ IRI("gemini", "omarpolo.com", "", "", "", ""),
+ "can parse authority with empty path (alt)")
+ TEST("gemini://omarpolo.com:1965",
+ PASS,
+ IRI("gemini", "omarpolo.com", "1965", "", "", ""),
+ "can parse with port and empty path");
+ TEST("gemini://omarpolo.com:1965/",
+ PASS,
+ IRI("gemini", "omarpolo.com", "1965", "", "", ""),
+ "can parse with port and empty path")
+ TEST("gemini://omarpolo.com:196s",
+ FAIL,
+ empty,
+ "FAIL with invalid port number");
+
+ /* path */
+ TEST("gemini://omarpolo.com/foo/bar/baz",
+ PASS,
+ IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+ "parse simple paths");
+ TEST("gemini://omarpolo.com/foo//bar///baz",
+ PASS,
+ IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+ "parse paths with multiple slashes");
+ TEST("gemini://omarpolo.com/foo/./bar/./././baz",
+ PASS,
+ IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+ "parse paths with . elements");
+ TEST("gemini://omarpolo.com/foo/bar/../bar/baz",
+ PASS,
+ IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+ "parse paths with .. elements");
+ TEST("gemini://omarpolo.com/foo/../foo/bar/../bar/baz/../baz",
+ PASS,
+ IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
+ "parse paths with multiple .. elements");
+ TEST("gemini://omarpolo.com/foo/..",
+ PASS,
+ IRI("gemini", "omarpolo.com", "", "", "", ""),
+ "parse paths with a trailing ..");
+ TEST("gemini://omarpolo.com/foo/../",
+ PASS,
+ IRI("gemini", "omarpolo.com", "", "", "", ""),
+ "parse paths with a trailing ..");
+ TEST("gemini://omarpolo.com/foo/../..",
+ FAIL,
+ empty,
+ "reject paths that would escape the root");
+ TEST("gemini://omarpolo.com/foo/../foo/../././/bar/baz/.././.././/",
+ PASS,
+ IRI("gemini", "omarpolo.com", "", "", "", ""),
+ "parse path with lots of cleaning available");
+
+ /* query */
+ TEST("foo://example.com/foo/?gne",
+ PASS,
+ IRI("foo", "example.com", "", "foo/", "gne", ""),
+ "parse query strings");
+ TEST("foo://example.com/foo/?gne&foo",
+ PASS,
+ IRI("foo", "example.com", "", "foo/", "gne&foo", ""),
+ "parse query strings");
+ TEST("foo://example.com/foo/?gne%2F",
+ PASS,
+ IRI("foo", "example.com", "", "foo/", "gne/", ""),
+ "parse query strings");
+
+ /* fragment */
+ TEST("foo://bar.co/#foo",
+ PASS,
+ IRI("foo", "bar.co", "", "", "", "foo"),
+ "can recognize fragments");
+
+ /* percent encoding */
+ TEST("foo://bar.com/caf%C3%A8.gmi",
+ PASS,
+ IRI("foo", "bar.com", "", "cafè.gmi", "", ""),
+ "can decode");
+ TEST("foo://bar.com/caff%C3%A8%20macchiato.gmi",
+ PASS,
+ IRI("foo", "bar.com", "", "caffè macchiato.gmi", "", ""),
+ "can decode");
+ TEST("foo://bar.com/caff%C3%A8+macchiato.gmi",
+ PASS,
+ IRI("foo", "bar.com", "", "caffè+macchiato.gmi", "", ""),
+ "can decode");
+ TEST("foo://bar.com/foo%2F..%2F..",
+ FAIL,
+ empty,
+ "conversion and checking are done in the correct order");
+ TEST("foo://bar.com/foo%00?baz",
+ FAIL,
+ empty,
+ "rejects %00");
+
+ /* IRI */
+ TEST("foo://bar.com/cafè.gmi",
+ PASS,
+ IRI("foo", "bar.com", "", "cafè.gmi", "" , ""),
+ "decode IRI (with a 2-byte utf8 seq)");
+ TEST("foo://bar.com/世界.gmi",
+ PASS,
+ IRI("foo", "bar.com", "", "世界.gmi", "" , ""),
+ "decode IRI");
+ TEST("foo://bar.com/😼.gmi",
+ PASS,
+ IRI("foo", "bar.com", "", "😼.gmi", "" , ""),
+ "decode IRI (with a 3-byte utf8 seq)");
+ TEST("foo://bar.com/😼/𤭢.gmi",
+ PASS,
+ IRI("foo", "bar.com", "", "😼/𤭢.gmi", "" , ""),
+ "decode IRI (with a 3-byte and a 4-byte utf8 seq)");
+ TEST("foo://bar.com/世界/\xC0\x80",
+ FAIL,
+ empty,
+ "reject invalid sequence (overlong NUL)");
+
+ return 0;
+}
blob - 09aad1cd4b4aed0af095e949d47fb4056764eded
blob + 8f530b0203310cdb0c880adf60ad6a610ae7823b
--- utf8.c
+++ utf8.c
{
uint32_t cp = 0, state = 0;
- for (; *p->uri; p->uri++)
- if (!utf8_decode(&state, &cp, *p->uri))
+ for (; *p->iri; p->iri++)
+ if (!utf8_decode(&state, &cp, *p->iri))
break;
/* reject the ASCII range */