commit bfa33dbefe2bc41a9961e881cfea874e6bc78cfa from: Omar Polo date: Thu Apr 22 10:37:56 2021 UTC import some bits from libphos commit - 7c698b4fcfd08469f011082be7a318a7d885d2bf commit + bfa33dbefe2bc41a9961e881cfea874e6bc78cfa blob - /dev/null blob + e95e9b39e98ae9ffde91c067ba5c65f6fffcbdaa (mode 644) --- /dev/null +++ phos/README.md @@ -0,0 +1 @@ +Some files imported from libphos blob - /dev/null blob + 8994b31b60a1decd709b17b7d6a215ae5cd22b47 (mode 644) --- /dev/null +++ phos/phos.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2021 Omar Polo + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* for the time being, drop everything but the URL stuff */ + +#ifndef PHOS_H +#define PHOS_H + +#ifdef __cplusplus +#extern "C" { +#endif + +#include + +#define PHOS_URL_MAX_LEN 1024 + +struct phos_uri { + char scheme[32]; + char host[1024]; + char port[6]; + uint16_t dec_port; + char path[1024]; + char query[1024]; + char fragment[32]; +}; + +/* phos_uri.c */ +int phos_parse_uri_reference(const char*, struct phos_uri*); +int phos_parse_absolute_uri(const char*, struct phos_uri*); +int phos_resolve_uri_from_str(const struct phos_uri*, const char *, struct phos_uri*); +void phos_uri_drop_empty_segments(struct phos_uri*); +int phos_serialize_uri(const struct phos_uri*, char*, size_t); + +#ifdef __cplusplus +} +#endif + +#endif /* PHOS_H */ blob - /dev/null blob + 6e25302ebedfa13aaf01feea9e83d4fa0bb28a2e (mode 644) --- /dev/null +++ phos/phos_uri.c @@ -0,0 +1,880 @@ +/* + * Copyright (c) 2021 Omar Polo + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +/* + * TODOs: + * - distinguish between an empty component and a undefined one + * - ... + */ + +#include + +#include "compat.h" + +#include "phos.h" + +#include +#include +#include +#include + +static const char *sub_ip_literal(const char*); +static const char *sub_host_dummy(const char*); +static const char *sub_pchar(const char*); +static const char *sub_segment(const char*); +static const char *sub_segment_nz(const char*); +static const char *sub_segment_nz_nc(const char*); +static const char *sub_path_common(const char*); + +static const char *parse_scheme(const char*, struct phos_uri*); +static const char *parse_host(const char*, struct phos_uri*); +static const char *parse_port(const char*, struct phos_uri*); +static const char *parse_authority(const char*, struct phos_uri*); +static const char *parse_path_abempty(const char*, struct phos_uri*); +static const char *parse_path_absolute(const char*, struct phos_uri*); +static const char *parse_path_noscheme(const char*, struct phos_uri*); +static const char *parse_path_rootless(const char*, struct phos_uri*); +static const char *parse_path_empty(const char*, struct phos_uri*); +static const char *parse_hier_part(const char*, struct phos_uri*); +static const char *parse_query(const char*, struct phos_uri*); +static const char *parse_fragment(const char*, struct phos_uri*); +static const char *parse_uri(const char*, struct phos_uri*); +static const char *parse_relative_part(const char*, struct phos_uri*); +static const char *parse_relative_ref(const char*, struct phos_uri*); +static const char *parse_uri_reference(const char*, struct phos_uri*); + +static int hasprefix(const char*, const char*); +static char *dotdot(char*, char*); +static void path_clean(struct phos_uri*); +static int merge_path(struct phos_uri*, const struct phos_uri*, const struct phos_uri*); + +static int phos_resolve_uri_from(const struct phos_uri*, const struct phos_uri*, struct phos_uri*); + + +/* common defs */ + +static inline int +gen_delims(int c) +{ + return c == ':' + || c == '/' + || c == '?' + || c == '#' + || c == '[' + || c == ']' + || c == '@'; +} + +static inline int +sub_delims(int c) +{ + return c == '!' + || c == '$' + || c == '&' + || c == '\'' + || c == '(' + || c == ')' + || c == '*' + || c == '+' + || c == ',' + || c == ';' + || c == '='; +} + +static inline int +reserved(int c) +{ + return gen_delims(c) || sub_delims(c); +} + +static inline int +unreserved(int c) +{ + return isalpha(c) + || isdigit(c) + || c == '-' + || c == '.' + || c == '_' + || c == '~'; +} + + +/* subs */ + +/* + * IP-literal = "[" ( IPv6address / IPvFuture ) "]" + * + * in reality, we parse [.*] + */ +static const char * +sub_ip_literal(const char *s) +{ + if (*s != '[') + return NULL; + + while (*s != '\0' && *s != ']') + s++; + + if (*s == '\0') + return NULL; + return ++s; +} + +/* + * parse everything until : or / (or \0). + * NB: empty hosts are technically valid! + */ +static const char * +sub_host_dummy(const char *s) +{ + while (*s != '\0' && *s != ':' && *s != '/') + s++; + return s; +} + +/* + * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + */ +static const char * +sub_pchar(const char *s) +{ + if (*s == '\0') + return NULL; + + if (unreserved(*s)) + return ++s; + + if (*s == '%') { + if (isxdigit(s[1]) && isxdigit(s[2])) + return s + 3; + } + + if (sub_delims(*s)) + return ++s; + + if (*s == ':' || *s == '@') + return ++s; + + return NULL; +} + +/* + * segment = *pchar + */ +static const char * +sub_segment(const char *s) +{ + const char *t; + + while ((t = sub_pchar(s)) != NULL) + s = t; + return s; +} + +/* segment-nz = 1*pchar */ +static const char * +sub_segment_nz(const char *s) +{ + if ((s = sub_pchar(s)) == NULL) + return NULL; + return sub_segment(s); +} + +/* + * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + * + * so, 1*pchar excluding ":" + */ +static const char * +sub_segment_nz_nc(const char *s) +{ + const char *t; + + if (*s == ':') + return NULL; + + while (*s != ':' && (t = sub_pchar(s)) != NULL) + s = t; + return s; +} + +/* *( "/" segment ) */ +static const char * +sub_path_common(const char *s) +{ + for (;;) { + if (*s != '/') + return s; + s++; + s = sub_segment(s); + } +} + + +/* parse fns */ + +/* + * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + */ +static const char * +parse_scheme(const char *s, struct phos_uri *parsed) +{ + const char *start = s; + size_t len; + + if (!isalpha(*s)) + return NULL; + + while (*s != '\0') { + if (isalpha(*s) || + isdigit(*s) || + *s == '+' || + *s == '-' || + *s == '.') + s++; + else + break; + } + + if (*s == '\0') + return NULL; + + len = s - start; + if (len >= sizeof(parsed->scheme)) + return NULL; + + memcpy(parsed->scheme, start, len); + return s; +} + +/* + * host = IP-literal / IPv4address / reg-name + * + * rules IPv4address and reg-name are relaxed into parse_host_dummy. + */ +static const char * +parse_host(const char *s, struct phos_uri *parsed) +{ + const char *t; + size_t len; + + if ((t = sub_ip_literal(s)) != NULL || + (t = sub_host_dummy(s)) != NULL) { + len = t - s; + if (len >= sizeof(parsed->scheme)) + return NULL; + memcpy(parsed->host, s, len); + return t; + } + + return NULL; +} + +/* + * port = *digit + */ +static const char * +parse_port(const char *s, struct phos_uri *parsed) +{ + const char *errstr, *start = s; + size_t len; + + while (isdigit(*s)) + s++; + + if (s == start) + return NULL; + + len = s - start; + if (len >= sizeof(parsed->port)) + return NULL; + + memcpy(parsed->port, start, len); + + parsed->dec_port = strtonum(parsed->port, 0, 65535, &errstr); + if (errstr != NULL) + return NULL; + + return s; +} + +/* + * authority = host [ ":" port ] + * (yep, blatantly ignore the userinfo stuff -- not relevant for Gemini) + */ +static const char * +parse_authority(const char *s, struct phos_uri *parsed) +{ + if ((s = parse_host(s, parsed)) == NULL) + return NULL; + + if (*s == ':') { + s++; + return parse_port(s, parsed); + } + + return s; +} + +static inline const char * +set_path(const char *start, const char *end, struct phos_uri *parsed) +{ + size_t len; + + if (end == NULL) + return NULL; + + len = end - start; + if (len >= sizeof(parsed->path)) + return NULL; + memcpy(parsed->path, start, len); + return end; +} + +/* + * path-abempty = *( "/" segment ) + */ +static const char * +parse_path_abempty(const char *s, struct phos_uri *parsed) +{ + const char *t; + + t = sub_path_common(s); + return set_path(s, t, parsed); +} + +/* + * path-absolute = "/" [ segment-nz *( "/" segment ) ] + */ +static const char * +parse_path_absolute(const char *s, struct phos_uri *parsed) +{ + const char *t, *start = s; + + if (*s != '/') + return NULL; + + s++; + if ((t = sub_segment_nz(s)) == NULL) + return set_path(start, s, parsed); + + s = sub_path_common(t); + return set_path(start, s, parsed); +} + +/* + * path-noscheme = segment-nz-nc *( "/" segment ) + */ +static const char * +parse_path_noscheme(const char *s, struct phos_uri *parsed) +{ + const char *start = s; + + if ((s = sub_segment_nz_nc(s)) == NULL) + return NULL; + s = sub_path_common(s); + return set_path(start, s, parsed); +} + +/* + * path-rootless = segment-nz *( "/" segment ) + */ +static const char * +parse_path_rootless(const char *s, struct phos_uri *parsed) +{ + const char *start = s; + + if ((s = sub_segment_nz(s)) == NULL) + return NULL; + s = sub_path_common(s); + return set_path(start, s, parsed); +} + +/* + * path-empty = 0 + */ +static const char * +parse_path_empty(const char *s, struct phos_uri *parsed) +{ + return s; +} + +/* + * hier-part = "//" authority path-abempty + * / path-absolute + * / path-rootless + * / path-empty + */ +static const char * +parse_hier_part(const char *s, struct phos_uri *parsed) +{ + const char *t; + + if (s[0] == '/' && s[1] == '/') { + s += 2; + if ((s = parse_authority(s, parsed)) == NULL) + return NULL; + return parse_path_abempty(s, parsed); + } + + if ((t = parse_path_absolute(s, parsed)) != NULL) + return t; + + if ((t = parse_path_rootless(s, parsed)) != NULL) + return t; + + return parse_path_empty(s, parsed); +} + +/* + * query = *( pchar / "/" / "?" ) + */ +static const char * +parse_query(const char *s, struct phos_uri *parsed) +{ + const char *t, *start = s; + size_t len; + + while (*s != '\0') { + if (*s == '/' || *s == '?') { + s++; + continue; + } + + if ((t = sub_pchar(s)) == NULL) + break; + s = t; + } + + len = s - start; + if (len >= sizeof(parsed->query)) + return NULL; + + memcpy(parsed->query, start, len); + return s; +} + +/* + * fragment = *( pchar / "/" / "?" ) + */ +static const char * +parse_fragment(const char *s, struct phos_uri *parsed) +{ + const char *start = s; + size_t len; + + for (;;) { + if (*s == '\0') + break; + + if (*s == '/' || *s == '?') { + s++; + continue; + } + + if ((s = sub_pchar(s)) == NULL) + return NULL; + } + + len = s - start; + if (len >= sizeof(parsed->fragment)) + return NULL; + + memcpy(parsed->fragment, start, len); + return s; +} + +/* + * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ] + */ +static const char * +parse_uri(const char *s, struct phos_uri *parsed) +{ + if ((s = parse_scheme(s, parsed)) == NULL) + return NULL; + + if (*s != ':') + return NULL; + + s++; + if ((s = parse_hier_part(s, parsed)) == NULL) + return NULL; + + if (*s == '?') { + s++; + if ((s = parse_query(s, parsed)) == NULL) + return NULL; + } + + if (*s == '#') { + s++; + if ((s = parse_fragment(s, parsed)) == NULL) + return NULL; + } + + return s; +} + +/* + * relative-part = "//" authority path-abempty + * / path-absolute + * / path-noscheme + * / path-empty + */ +static const char * +parse_relative_part(const char *s, struct phos_uri *parsed) +{ + const char *t; + + if (s[0] == '/' && s[1] == '/') { + s += 2; + if ((s = parse_authority(s, parsed)) == NULL) + return NULL; + return parse_path_abempty(s, parsed); + } + + if ((t = parse_path_absolute(s, parsed)) != NULL) + return t; + + if ((t = parse_path_noscheme(s, parsed)) != NULL) + return t; + + return parse_path_empty(s, parsed); +} + +/* + * relative-ref = relative-part [ "?" query ] [ "#" fragment ] + */ +static const char * +parse_relative_ref(const char *s, struct phos_uri *parsed) +{ + if ((s = parse_relative_part(s, parsed)) == NULL) + return NULL; + + if (*s == '?') { + s++; + if ((s = parse_query(s, parsed)) == NULL) + return NULL; + } + + if (*s == '#') { + s++; + if ((s = parse_fragment(s, parsed)) == NULL) + return NULL; + } + + return s; +} + +/* + * URI-reference = URI / relative-ref + */ +static const char * +parse_uri_reference(const char *s, struct phos_uri *parsed) +{ + const char *t; + + if ((t = parse_uri(s, parsed)) != NULL) + return t; + memset(parsed, 0, sizeof(*parsed)); + return parse_relative_ref(s, parsed); +} + + +/* + * absolute-URI = scheme ":" hier-part [ "?" query ] + */ +static const char * +parse_absolute_uri(const char *s, struct phos_uri *parsed) +{ + if ((s = parse_scheme(s, parsed)) == NULL) + return NULL; + + if (*s != ':') + return NULL; + + s++; + if ((s = parse_hier_part(s, parsed)) == NULL) + return NULL; + + if (*s == '?') { + s++; + if ((s = parse_query(s, parsed)) == NULL) + return NULL; + } + + return s; +} + + +/* normalizing fns */ + +static int +hasprefix(const char *str, const char *prfx) +{ + for (; *str == *prfx && *prfx != '\0'; str++, prfx++) + ; + + return *prfx == '\0'; +} + +static char * +dotdot(char *point, char *start) +{ + char *t; + + for (t = point-1; t > start; --t) { + if (*t == '/') + break; + } + if (t < start) + t = start; + + memmove(t, point, strlen(point)+1); + return t; +} + +/* + * This is the "Remove Dot Segments" straight outta RFC3986, section + * 5.2.4 + */ +static void +path_clean(struct phos_uri *uri) +{ + char *in = uri->path; + + while (in != NULL && *in != '\0') { + assert(in >= uri->path); + + /* A) drop leading ../ or ./ */ + if (hasprefix(in, "../")) + memmove(in, &in[3], strlen(&in[3])+1); + else if (hasprefix(in, "./")) + memmove(in, &in[2], strlen(&in[2])+1); + + /* B) replace /./ or /. with / */ + else if (hasprefix(in, "/./")) + memmove(&in[1], &in[3], strlen(&in[3])+1); + else if (!strcmp(in, "/.")) + in[1] = '\0'; + + /* C) resolve dot-dot */ + else if (hasprefix(in, "/../")) { + in = dotdot(in, uri->path); + memmove(&in[1], &in[4], strlen(&in[4])+1); + } else if (!strcmp(in, "/..")) { + in = dotdot(in, uri->path); + in[1] = '\0'; + break; + } + + /* D */ + else if (!strcmp(in, ".")) + *in = '\0'; + else if (!strcmp(in, "..")) + *in = '\0'; + + /* E */ + else + in = strchr(in+1, '/'); + } +} + +/* + * see RFC3986 5.3.3 "Merge Paths". + */ +static int +merge_path(struct phos_uri *ret, const struct phos_uri *base, + const struct phos_uri *ref) +{ + const char *s; + size_t len; + + len = sizeof(ret->path); + + s = strrchr(base->path, '/'); + if ((*base->host != '\0' && *base->path == '\0') || s == NULL) { + strlcpy(ret->path, "/", len); + } else { + /* copy the / too */ + memcpy(ret->path, base->path, s - base->path + 1); + } + + return strlcat(ret->path, ref->path, len) < len; +} + + +/* public interface */ + +int +phos_parse_absolute_uri(const char *s, struct phos_uri *uri) +{ + memset(uri, 0, sizeof(*uri)); + + if ((s = parse_absolute_uri(s, uri)) == NULL) + return 0; + if (*s != '\0') + return 0; + path_clean(uri); + return 1; +} + +int +phos_parse_uri_reference(const char *s, struct phos_uri *uri) +{ + memset(uri, 0, sizeof(*uri)); + + if ((s = parse_uri_reference(s, uri)) == NULL) + return 0; + if (*s != '\0') + return 0; + path_clean(uri); + return 1; +} + +/* + * Implementation of the "transform references" algorithm from + * RFC3986, see 5.2.2. + * + * We expect base and ref to be URIs constructed by this library + * (because we emit only normalized URIs). + * + * ATM this is marked as private because: + * - let's say the URI is "." + * - one calls phos_parse_uri_references + * - it exists with success, but the path becomes "" + * - this routine does the right thing, but the outcome is not what expected. + * + * so users for now have to user resolve_uri_from_str, which parses + * the URI but not normalize it, and then call into us. + */ +static int +phos_resolve_uri_from(const struct phos_uri *base, const struct phos_uri *ref, + struct phos_uri *ret) +{ + memset(ret, 0, sizeof(*ret)); + + if (*ref->scheme != '\0') { + strlcpy(ret->scheme, ref->scheme, sizeof(ret->scheme)); + strlcpy(ret->host, ref->host, sizeof(ret->host)); + strlcpy(ret->port, ref->port, sizeof(ret->port)); + ret->dec_port = ret->dec_port; + strlcpy(ret->path, ref->path, sizeof(ret->path)); + strlcpy(ret->query, ref->query, sizeof(ret->query)); + } else { + if (*ref->host != '\0') { + strlcpy(ret->host, ref->host, sizeof(ret->host)); + strlcpy(ret->port, ref->port, sizeof(ret->port)); + ret->dec_port = ret->dec_port; + strlcpy(ret->path, ref->path, sizeof(ret->path)); + strlcpy(ret->query, ref->query, sizeof(ret->query)); + } else { + if (*ref->path == '\0') { + strlcpy(ret->path, base->path, sizeof(ret->path)); + if (*ref->query != '\0') + strlcpy(ret->query, ref->query, sizeof(ret->query)); + else + strlcpy(ret->query, base->query, sizeof(ret->query)); + } else { + if (*ref->path == '/') + strlcpy(ret->path, ref->path, sizeof(ret->path)); + else { + if (!merge_path(ret, base, ref)) + return 0; + } + path_clean(ret); + + strlcpy(ret->query, ref->query, sizeof(ret->query)); + } + + strlcpy(ret->host, base->host, sizeof(ret->host)); + strlcpy(ret->port, base->port, sizeof(ret->port)); + ret->dec_port = base->dec_port; + } + + strlcpy(ret->scheme, base->scheme, sizeof(ret->scheme)); + } + + strlcpy(ret->fragment, ref->fragment, sizeof(ret->fragment)); + + return 1; +} + +int +phos_resolve_uri_from_str(const struct phos_uri *base, const char *refstr, + struct phos_uri *ret) +{ + struct phos_uri ref; + + memset(&ref, 0, sizeof(ref)); + + if ((refstr = parse_uri_reference(refstr, &ref)) == NULL) + return 0; + + if (*refstr != '\0') + return 0; + + return phos_resolve_uri_from(base, &ref, ret); +} + +void +phos_uri_drop_empty_segments(struct phos_uri *uri) +{ + char *i; + + for (i = uri->path; *i; ++i) { + if (*i == '/' && *(i+1) == '/') { + memmove(i, i+1, strlen(i)); /* move also the \0 */ + i--; + } + } +} + +int +phos_serialize_uri(const struct phos_uri *uri, char *buf, size_t len) +{ +#define CAT(s) \ + if (strlcat(buf, s, len) >= len) \ + return 0; + + strlcpy(buf, "", len); + + if (*uri->scheme != '\0') { + CAT(uri->scheme); + CAT(":"); + } + + if (*uri->host != '\0') { + CAT("//"); + CAT(uri->host); + } + + CAT(uri->path); + + if (*uri->query != '\0') { + CAT("?"); + CAT(uri->query); + } + + if (*uri->fragment) { + CAT("#"); + CAT(uri->fragment); + } + + return 1; + +#undef CAT +}