2 * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 * - distinguish between an empty component and a undefined one
35 static const char *sub_ip_literal(const char*);
36 static const char *sub_host_dummy(const char*);
37 static const char *sub_pchar(const char*);
39 static const char *sub_segment(const char*);
40 static const char *sub_segment_nz(const char*);
41 static const char *sub_segment_nz_nc(const char*);
42 static const char *sub_path_common(const char*);
44 static const char *parse_scheme(const char*, struct phos_uri*);
45 static const char *parse_host(const char*, struct phos_uri*);
46 static const char *parse_port(const char*, struct phos_uri*);
47 static const char *parse_authority(const char*, struct phos_uri*);
48 static const char *parse_path_abempty(const char*, struct phos_uri*);
49 static const char *parse_path_absolute(const char*, struct phos_uri*);
50 static const char *parse_path_noscheme(const char*, struct phos_uri*);
51 static const char *parse_path_rootless(const char*, struct phos_uri*);
52 static const char *parse_path_empty(const char*, struct phos_uri*);
53 static const char *parse_hier_part(const char*, struct phos_uri*);
54 static const char *parse_query(const char*, struct phos_uri*);
55 static const char *parse_fragment(const char*, struct phos_uri*);
56 static const char *parse_uri(const char*, struct phos_uri*);
57 static const char *parse_relative_part(const char*, struct phos_uri*);
58 static const char *parse_relative_ref(const char*, struct phos_uri*);
59 static const char *parse_uri_reference(const char*, struct phos_uri*);
61 static int hasprefix(const char*, const char*);
62 static char *dotdot(char*, char*);
63 static void path_clean(struct phos_uri*);
64 static int merge_path(struct phos_uri*, const struct phos_uri*, const struct phos_uri*);
66 static int phos_resolve_uri_from(const struct phos_uri*, const struct phos_uri*, struct phos_uri*);
102 return gen_delims(c) || sub_delims(c);
120 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
122 * in reality, we parse [.*]
125 sub_ip_literal(const char *s)
130 while (*s != '\0' && *s != ']')
139 * parse everything until : or / (or \0).
140 * NB: empty hosts are technically valid!
143 sub_host_dummy(const char *s)
145 while (*s != '\0' && *s != ':' && *s != '/')
151 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
154 sub_pchar(const char *s)
163 if (isxdigit(s[1]) && isxdigit(s[2]))
170 if (*s == ':' || *s == '@')
180 sub_segment(const char *s)
184 while ((t = sub_pchar(s)) != NULL)
189 /* segment-nz = 1*pchar */
191 sub_segment_nz(const char *s)
193 if ((s = sub_pchar(s)) == NULL)
195 return sub_segment(s);
199 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
201 * so, 1*pchar excluding ":"
204 sub_segment_nz_nc(const char *s)
211 while (*s != ':' && (t = sub_pchar(s)) != NULL)
216 /* *( "/" segment ) */
218 sub_path_common(const char *s)
232 * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
235 parse_scheme(const char *s, struct phos_uri *parsed)
237 const char *start = s;
258 if (len >= sizeof(parsed->scheme))
261 memcpy(parsed->scheme, start, len);
266 * host = IP-literal / IPv4address / reg-name
268 * rules IPv4address and reg-name are relaxed into parse_host_dummy.
271 parse_host(const char *s, struct phos_uri *parsed)
276 if ((t = sub_ip_literal(s)) != NULL ||
277 (t = sub_host_dummy(s)) != NULL) {
279 if (len >= sizeof(parsed->scheme))
281 memcpy(parsed->host, s, len);
292 parse_port(const char *s, struct phos_uri *parsed)
294 const char *errstr, *start = s;
304 if (len >= sizeof(parsed->port))
307 memcpy(parsed->port, start, len);
309 parsed->dec_port = strtonum(parsed->port, 0, 65535, &errstr);
317 * authority = host [ ":" port ]
318 * (yep, blatantly ignore the userinfo stuff -- not relevant for Gemini)
321 parse_authority(const char *s, struct phos_uri *parsed)
323 if ((s = parse_host(s, parsed)) == NULL)
328 return parse_port(s, parsed);
334 static inline const char *
335 set_path(const char *start, const char *end, struct phos_uri *parsed)
343 if (len >= sizeof(parsed->path))
345 memcpy(parsed->path, start, len);
350 * path-abempty = *( "/" segment )
353 parse_path_abempty(const char *s, struct phos_uri *parsed)
357 t = sub_path_common(s);
358 return set_path(s, t, parsed);
362 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
365 parse_path_absolute(const char *s, struct phos_uri *parsed)
367 const char *t, *start = s;
373 if ((t = sub_segment_nz(s)) == NULL)
374 return set_path(start, s, parsed);
376 s = sub_path_common(t);
377 return set_path(start, s, parsed);
381 * path-noscheme = segment-nz-nc *( "/" segment )
384 parse_path_noscheme(const char *s, struct phos_uri *parsed)
386 const char *start = s;
388 if ((s = sub_segment_nz_nc(s)) == NULL)
390 s = sub_path_common(s);
391 return set_path(start, s, parsed);
395 * path-rootless = segment-nz *( "/" segment )
398 parse_path_rootless(const char *s, struct phos_uri *parsed)
400 const char *start = s;
402 if ((s = sub_segment_nz(s)) == NULL)
404 s = sub_path_common(s);
405 return set_path(start, s, parsed);
409 * path-empty = 0<pchar>
412 parse_path_empty(const char *s, struct phos_uri *parsed)
418 * hier-part = "//" authority path-abempty
424 parse_hier_part(const char *s, struct phos_uri *parsed)
428 if (s[0] == '/' && s[1] == '/') {
430 if ((s = parse_authority(s, parsed)) == NULL)
432 return parse_path_abempty(s, parsed);
435 if ((t = parse_path_absolute(s, parsed)) != NULL)
438 if ((t = parse_path_rootless(s, parsed)) != NULL)
441 return parse_path_empty(s, parsed);
445 * query = *( pchar / "/" / "?" )
448 parse_query(const char *s, struct phos_uri *parsed)
450 const char *t, *start = s;
454 if (*s == '/' || *s == '?') {
459 if ((t = sub_pchar(s)) == NULL)
465 if (len >= sizeof(parsed->query))
468 memcpy(parsed->query, start, len);
473 * fragment = *( pchar / "/" / "?" )
476 parse_fragment(const char *s, struct phos_uri *parsed)
478 const char *start = s;
485 if (*s == '/' || *s == '?') {
490 if ((s = sub_pchar(s)) == NULL)
495 if (len >= sizeof(parsed->fragment))
498 memcpy(parsed->fragment, start, len);
503 * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
506 parse_uri(const char *s, struct phos_uri *parsed)
508 if ((s = parse_scheme(s, parsed)) == NULL)
515 if ((s = parse_hier_part(s, parsed)) == NULL)
520 if ((s = parse_query(s, parsed)) == NULL)
526 if ((s = parse_fragment(s, parsed)) == NULL)
534 * relative-part = "//" authority path-abempty
540 parse_relative_part(const char *s, struct phos_uri *parsed)
544 if (s[0] == '/' && s[1] == '/') {
546 if ((s = parse_authority(s, parsed)) == NULL)
548 return parse_path_abempty(s, parsed);
551 if ((t = parse_path_absolute(s, parsed)) != NULL)
554 if ((t = parse_path_noscheme(s, parsed)) != NULL)
557 return parse_path_empty(s, parsed);
561 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
564 parse_relative_ref(const char *s, struct phos_uri *parsed)
566 if ((s = parse_relative_part(s, parsed)) == NULL)
571 if ((s = parse_query(s, parsed)) == NULL)
577 if ((s = parse_fragment(s, parsed)) == NULL)
585 * URI-reference = URI / relative-ref
588 parse_uri_reference(const char *s, struct phos_uri *parsed)
592 if ((t = parse_uri(s, parsed)) != NULL)
594 memset(parsed, 0, sizeof(*parsed));
595 return parse_relative_ref(s, parsed);
600 * absolute-URI = scheme ":" hier-part [ "?" query ]
603 parse_absolute_uri(const char *s, struct phos_uri *parsed)
605 if ((s = parse_scheme(s, parsed)) == NULL)
612 if ((s = parse_hier_part(s, parsed)) == NULL)
617 if ((s = parse_query(s, parsed)) == NULL)
625 /* normalizing fns */
628 hasprefix(const char *str, const char *prfx)
630 for (; *str == *prfx && *prfx != '\0'; str++, prfx++)
633 return *prfx == '\0';
637 dotdot(char *point, char *start)
641 for (t = point-1; t > start; --t) {
648 memmove(t, point, strlen(point)+1);
653 * This is the "Remove Dot Segments" straight outta RFC3986, section
657 path_clean(struct phos_uri *uri)
659 char *in = uri->path;
661 while (in != NULL && *in != '\0') {
662 assert(in >= uri->path);
664 /* A) drop leading ../ or ./ */
665 if (hasprefix(in, "../"))
666 memmove(in, &in[3], strlen(&in[3])+1);
667 else if (hasprefix(in, "./"))
668 memmove(in, &in[2], strlen(&in[2])+1);
670 /* B) replace /./ or /. with / */
671 else if (hasprefix(in, "/./"))
672 memmove(&in[1], &in[3], strlen(&in[3])+1);
673 else if (!strcmp(in, "/."))
676 /* C) resolve dot-dot */
677 else if (hasprefix(in, "/../")) {
678 in = dotdot(in, uri->path);
679 memmove(&in[1], &in[4], strlen(&in[4])+1);
680 } else if (!strcmp(in, "/..")) {
681 in = dotdot(in, uri->path);
687 else if (!strcmp(in, "."))
689 else if (!strcmp(in, ".."))
694 in = strchr(in+1, '/');
699 * see RFC3986 5.3.3 "Merge Paths".
702 merge_path(struct phos_uri *ret, const struct phos_uri *base,
703 const struct phos_uri *ref)
708 len = sizeof(ret->path);
710 s = strrchr(base->path, '/');
711 if ((*base->host != '\0' && *base->path == '\0') || s == NULL) {
712 strlcpy(ret->path, "/", len);
715 memcpy(ret->path, base->path, s - base->path + 1);
718 return strlcat(ret->path, ref->path, len) < len;
722 /* public interface */
725 phos_parse_absolute_uri(const char *s, struct phos_uri *uri)
727 memset(uri, 0, sizeof(*uri));
729 if ((s = parse_absolute_uri(s, uri)) == NULL)
738 phos_parse_uri_reference(const char *s, struct phos_uri *uri)
740 memset(uri, 0, sizeof(*uri));
742 if ((s = parse_uri_reference(s, uri)) == NULL)
751 * Implementation of the "transform references" algorithm from
752 * RFC3986, see 5.2.2.
754 * We expect base and ref to be URIs constructed by this library
755 * (because we emit only normalized URIs).
757 * ATM this is marked as private because:
758 * - let's say the URI is "."
759 * - one calls phos_parse_uri_references
760 * - it exists with success, but the path becomes ""
761 * - this routine does the right thing, but the outcome is not what expected.
763 * so users for now have to user resolve_uri_from_str, which parses
764 * the URI but not normalize it, and then call into us.
767 phos_resolve_uri_from(const struct phos_uri *base, const struct phos_uri *ref,
768 struct phos_uri *ret)
770 memset(ret, 0, sizeof(*ret));
772 if (*ref->scheme != '\0') {
773 strlcpy(ret->scheme, ref->scheme, sizeof(ret->scheme));
774 strlcpy(ret->host, ref->host, sizeof(ret->host));
775 strlcpy(ret->port, ref->port, sizeof(ret->port));
776 ret->dec_port = ret->dec_port;
777 strlcpy(ret->path, ref->path, sizeof(ret->path));
778 strlcpy(ret->query, ref->query, sizeof(ret->query));
780 if (*ref->host != '\0') {
781 strlcpy(ret->host, ref->host, sizeof(ret->host));
782 strlcpy(ret->port, ref->port, sizeof(ret->port));
783 ret->dec_port = ret->dec_port;
784 strlcpy(ret->path, ref->path, sizeof(ret->path));
785 strlcpy(ret->query, ref->query, sizeof(ret->query));
787 if (*ref->path == '\0') {
788 strlcpy(ret->path, base->path, sizeof(ret->path));
789 if (*ref->query != '\0')
790 strlcpy(ret->query, ref->query, sizeof(ret->query));
792 strlcpy(ret->query, base->query, sizeof(ret->query));
794 if (*ref->path == '/')
795 strlcpy(ret->path, ref->path, sizeof(ret->path));
797 if (!merge_path(ret, base, ref))
802 strlcpy(ret->query, ref->query, sizeof(ret->query));
805 strlcpy(ret->host, base->host, sizeof(ret->host));
806 strlcpy(ret->port, base->port, sizeof(ret->port));
807 ret->dec_port = base->dec_port;
810 strlcpy(ret->scheme, base->scheme, sizeof(ret->scheme));
813 strlcpy(ret->fragment, ref->fragment, sizeof(ret->fragment));
819 phos_resolve_uri_from_str(const struct phos_uri *base, const char *refstr,
820 struct phos_uri *ret)
824 memset(&ref, 0, sizeof(ref));
826 if ((refstr = parse_uri_reference(refstr, &ref)) == NULL)
832 return phos_resolve_uri_from(base, &ref, ret);
836 phos_uri_drop_empty_segments(struct phos_uri *uri)
840 for (i = uri->path; *i; ++i) {
841 if (*i == '/' && *(i+1) == '/') {
842 memmove(i, i+1, strlen(i)); /* move also the \0 */
849 phos_uri_set_query(struct phos_uri *uri, const char *query)
855 len = sizeof(uri->query);
857 memset(uri->query, 0, len);
859 for (; *query != '\0' && len > 0; ++query) {
864 unreserved(*query) ||
865 sub_delims(*query)) {
874 sprintf(out, "%02X", t);
879 return *query == '\0';
883 phos_serialize_uri(const struct phos_uri *uri, char *buf, size_t len)
886 if (strlcat(buf, s, len) >= len) \
889 strlcpy(buf, "", len);
891 if (*uri->scheme != '\0') {
896 if (*uri->host != '\0') {
901 if (*uri->port != '\0' && strcmp(uri->port, "1965")) {
908 if (*uri->query != '\0') {
913 if (*uri->fragment) {