2 * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
19 * - distinguish between an empty component and a undefined one
34 static const char *sub_ip_literal(const char*);
35 static const char *sub_host_dummy(const char*);
36 static const char *sub_pchar(const char*);
37 static const char *sub_segment(const char*);
38 static const char *sub_segment_nz(const char*);
39 static const char *sub_segment_nz_nc(const char*);
40 static const char *sub_path_common(const char*);
42 static const char *parse_scheme(const char*, struct phos_uri*);
43 static const char *parse_host(const char*, struct phos_uri*);
44 static const char *parse_port(const char*, struct phos_uri*);
45 static const char *parse_authority(const char*, struct phos_uri*);
46 static const char *parse_path_abempty(const char*, struct phos_uri*);
47 static const char *parse_path_absolute(const char*, struct phos_uri*);
48 static const char *parse_path_noscheme(const char*, struct phos_uri*);
49 static const char *parse_path_rootless(const char*, struct phos_uri*);
50 static const char *parse_path_empty(const char*, struct phos_uri*);
51 static const char *parse_hier_part(const char*, struct phos_uri*);
52 static const char *parse_query(const char*, struct phos_uri*);
53 static const char *parse_fragment(const char*, struct phos_uri*);
54 static const char *parse_uri(const char*, struct phos_uri*);
55 static const char *parse_relative_part(const char*, struct phos_uri*);
56 static const char *parse_relative_ref(const char*, struct phos_uri*);
57 static const char *parse_uri_reference(const char*, struct phos_uri*);
59 static int hasprefix(const char*, const char*);
60 static char *dotdot(char*, char*);
61 static void path_clean(struct phos_uri*);
62 static int merge_path(struct phos_uri*, const struct phos_uri*, const struct phos_uri*);
64 static int phos_resolve_uri_from(const struct phos_uri*, const struct phos_uri*, struct phos_uri*);
100 return gen_delims(c) || sub_delims(c);
118 * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
120 * in reality, we parse [.*]
123 sub_ip_literal(const char *s)
128 while (*s != '\0' && *s != ']')
137 * parse everything until : or / (or \0).
138 * NB: empty hosts are technically valid!
141 sub_host_dummy(const char *s)
143 while (*s != '\0' && *s != ':' && *s != '/')
149 * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
152 sub_pchar(const char *s)
161 if (isxdigit(s[1]) && isxdigit(s[2]))
168 if (*s == ':' || *s == '@')
178 sub_segment(const char *s)
182 while ((t = sub_pchar(s)) != NULL)
187 /* segment-nz = 1*pchar */
189 sub_segment_nz(const char *s)
191 if ((s = sub_pchar(s)) == NULL)
193 return sub_segment(s);
197 * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
199 * so, 1*pchar excluding ":"
202 sub_segment_nz_nc(const char *s)
209 while (*s != ':' && (t = sub_pchar(s)) != NULL)
214 /* *( "/" segment ) */
216 sub_path_common(const char *s)
230 * scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
233 parse_scheme(const char *s, struct phos_uri *parsed)
235 const char *start = s;
256 if (len >= sizeof(parsed->scheme))
259 memcpy(parsed->scheme, start, len);
264 * host = IP-literal / IPv4address / reg-name
266 * rules IPv4address and reg-name are relaxed into parse_host_dummy.
269 parse_host(const char *s, struct phos_uri *parsed)
274 if ((t = sub_ip_literal(s)) != NULL ||
275 (t = sub_host_dummy(s)) != NULL) {
277 if (len >= sizeof(parsed->scheme))
279 memcpy(parsed->host, s, len);
290 parse_port(const char *s, struct phos_uri *parsed)
292 const char *errstr, *start = s;
302 if (len >= sizeof(parsed->port))
305 memcpy(parsed->port, start, len);
307 parsed->dec_port = strtonum(parsed->port, 0, 65535, &errstr);
315 * authority = host [ ":" port ]
316 * (yep, blatantly ignore the userinfo stuff -- not relevant for Gemini)
319 parse_authority(const char *s, struct phos_uri *parsed)
321 if ((s = parse_host(s, parsed)) == NULL)
326 return parse_port(s, parsed);
332 static inline const char *
333 set_path(const char *start, const char *end, struct phos_uri *parsed)
341 if (len >= sizeof(parsed->path))
343 memcpy(parsed->path, start, len);
348 * path-abempty = *( "/" segment )
351 parse_path_abempty(const char *s, struct phos_uri *parsed)
355 t = sub_path_common(s);
356 return set_path(s, t, parsed);
360 * path-absolute = "/" [ segment-nz *( "/" segment ) ]
363 parse_path_absolute(const char *s, struct phos_uri *parsed)
365 const char *t, *start = s;
371 if ((t = sub_segment_nz(s)) == NULL)
372 return set_path(start, s, parsed);
374 s = sub_path_common(t);
375 return set_path(start, s, parsed);
379 * path-noscheme = segment-nz-nc *( "/" segment )
382 parse_path_noscheme(const char *s, struct phos_uri *parsed)
384 const char *start = s;
386 if ((s = sub_segment_nz_nc(s)) == NULL)
388 s = sub_path_common(s);
389 return set_path(start, s, parsed);
393 * path-rootless = segment-nz *( "/" segment )
396 parse_path_rootless(const char *s, struct phos_uri *parsed)
398 const char *start = s;
400 if ((s = sub_segment_nz(s)) == NULL)
402 s = sub_path_common(s);
403 return set_path(start, s, parsed);
407 * path-empty = 0<pchar>
410 parse_path_empty(const char *s, struct phos_uri *parsed)
416 * hier-part = "//" authority path-abempty
422 parse_hier_part(const char *s, struct phos_uri *parsed)
426 if (s[0] == '/' && s[1] == '/') {
428 if ((s = parse_authority(s, parsed)) == NULL)
430 return parse_path_abempty(s, parsed);
433 if ((t = parse_path_absolute(s, parsed)) != NULL)
436 if ((t = parse_path_rootless(s, parsed)) != NULL)
439 return parse_path_empty(s, parsed);
443 * query = *( pchar / "/" / "?" )
446 parse_query(const char *s, struct phos_uri *parsed)
448 const char *t, *start = s;
452 if (*s == '/' || *s == '?') {
457 if ((t = sub_pchar(s)) == NULL)
463 if (len >= sizeof(parsed->query))
466 memcpy(parsed->query, start, len);
471 * fragment = *( pchar / "/" / "?" )
474 parse_fragment(const char *s, struct phos_uri *parsed)
476 const char *start = s;
483 if (*s == '/' || *s == '?') {
488 if ((s = sub_pchar(s)) == NULL)
493 if (len >= sizeof(parsed->fragment))
496 memcpy(parsed->fragment, start, len);
501 * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
504 parse_uri(const char *s, struct phos_uri *parsed)
506 if ((s = parse_scheme(s, parsed)) == NULL)
513 if ((s = parse_hier_part(s, parsed)) == NULL)
518 if ((s = parse_query(s, parsed)) == NULL)
524 if ((s = parse_fragment(s, parsed)) == NULL)
532 * relative-part = "//" authority path-abempty
538 parse_relative_part(const char *s, struct phos_uri *parsed)
542 if (s[0] == '/' && s[1] == '/') {
544 if ((s = parse_authority(s, parsed)) == NULL)
546 return parse_path_abempty(s, parsed);
549 if ((t = parse_path_absolute(s, parsed)) != NULL)
552 if ((t = parse_path_noscheme(s, parsed)) != NULL)
555 return parse_path_empty(s, parsed);
559 * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
562 parse_relative_ref(const char *s, struct phos_uri *parsed)
564 if ((s = parse_relative_part(s, parsed)) == NULL)
569 if ((s = parse_query(s, parsed)) == NULL)
575 if ((s = parse_fragment(s, parsed)) == NULL)
583 * URI-reference = URI / relative-ref
586 parse_uri_reference(const char *s, struct phos_uri *parsed)
590 if ((t = parse_uri(s, parsed)) != NULL)
592 memset(parsed, 0, sizeof(*parsed));
593 return parse_relative_ref(s, parsed);
598 * absolute-URI = scheme ":" hier-part [ "?" query ]
601 parse_absolute_uri(const char *s, struct phos_uri *parsed)
603 if ((s = parse_scheme(s, parsed)) == NULL)
610 if ((s = parse_hier_part(s, parsed)) == NULL)
615 if ((s = parse_query(s, parsed)) == NULL)
623 /* normalizing fns */
626 hasprefix(const char *str, const char *prfx)
628 for (; *str == *prfx && *prfx != '\0'; str++, prfx++)
631 return *prfx == '\0';
635 dotdot(char *point, char *start)
639 for (t = point-1; t > start; --t) {
646 memmove(t, point, strlen(point)+1);
651 * This is the "Remove Dot Segments" straight outta RFC3986, section
655 path_clean(struct phos_uri *uri)
657 char *in = uri->path;
659 while (in != NULL && *in != '\0') {
660 assert(in >= uri->path);
662 /* A) drop leading ../ or ./ */
663 if (hasprefix(in, "../"))
664 memmove(in, &in[3], strlen(&in[3])+1);
665 else if (hasprefix(in, "./"))
666 memmove(in, &in[2], strlen(&in[2])+1);
668 /* B) replace /./ or /. with / */
669 else if (hasprefix(in, "/./"))
670 memmove(&in[1], &in[3], strlen(&in[3])+1);
671 else if (!strcmp(in, "/."))
674 /* C) resolve dot-dot */
675 else if (hasprefix(in, "/../")) {
676 in = dotdot(in, uri->path);
677 memmove(&in[1], &in[4], strlen(&in[4])+1);
678 } else if (!strcmp(in, "/..")) {
679 in = dotdot(in, uri->path);
685 else if (!strcmp(in, "."))
687 else if (!strcmp(in, ".."))
692 in = strchr(in+1, '/');
697 * see RFC3986 5.3.3 "Merge Paths".
700 merge_path(struct phos_uri *ret, const struct phos_uri *base,
701 const struct phos_uri *ref)
706 len = sizeof(ret->path);
708 s = strrchr(base->path, '/');
709 if ((*base->host != '\0' && *base->path == '\0') || s == NULL) {
710 strlcpy(ret->path, "/", len);
713 memcpy(ret->path, base->path, s - base->path + 1);
716 return strlcat(ret->path, ref->path, len) < len;
720 /* public interface */
723 phos_parse_absolute_uri(const char *s, struct phos_uri *uri)
725 memset(uri, 0, sizeof(*uri));
727 if ((s = parse_absolute_uri(s, uri)) == NULL)
736 phos_parse_uri_reference(const char *s, struct phos_uri *uri)
738 memset(uri, 0, sizeof(*uri));
740 if ((s = parse_uri_reference(s, uri)) == NULL)
749 * Implementation of the "transform references" algorithm from
750 * RFC3986, see 5.2.2.
752 * We expect base and ref to be URIs constructed by this library
753 * (because we emit only normalized URIs).
755 * ATM this is marked as private because:
756 * - let's say the URI is "."
757 * - one calls phos_parse_uri_references
758 * - it exists with success, but the path becomes ""
759 * - this routine does the right thing, but the outcome is not what expected.
761 * so users for now have to user resolve_uri_from_str, which parses
762 * the URI but not normalize it, and then call into us.
765 phos_resolve_uri_from(const struct phos_uri *base, const struct phos_uri *ref,
766 struct phos_uri *ret)
768 memset(ret, 0, sizeof(*ret));
770 if (*ref->scheme != '\0') {
771 strlcpy(ret->scheme, ref->scheme, sizeof(ret->scheme));
772 strlcpy(ret->host, ref->host, sizeof(ret->host));
773 strlcpy(ret->port, ref->port, sizeof(ret->port));
774 ret->dec_port = ret->dec_port;
775 strlcpy(ret->path, ref->path, sizeof(ret->path));
776 strlcpy(ret->query, ref->query, sizeof(ret->query));
778 if (*ref->host != '\0') {
779 strlcpy(ret->host, ref->host, sizeof(ret->host));
780 strlcpy(ret->port, ref->port, sizeof(ret->port));
781 ret->dec_port = ret->dec_port;
782 strlcpy(ret->path, ref->path, sizeof(ret->path));
783 strlcpy(ret->query, ref->query, sizeof(ret->query));
785 if (*ref->path == '\0') {
786 strlcpy(ret->path, base->path, sizeof(ret->path));
787 if (*ref->query != '\0')
788 strlcpy(ret->query, ref->query, sizeof(ret->query));
790 strlcpy(ret->query, base->query, sizeof(ret->query));
792 if (*ref->path == '/')
793 strlcpy(ret->path, ref->path, sizeof(ret->path));
795 if (!merge_path(ret, base, ref))
800 strlcpy(ret->query, ref->query, sizeof(ret->query));
803 strlcpy(ret->host, base->host, sizeof(ret->host));
804 strlcpy(ret->port, base->port, sizeof(ret->port));
805 ret->dec_port = base->dec_port;
808 strlcpy(ret->scheme, base->scheme, sizeof(ret->scheme));
811 strlcpy(ret->fragment, ref->fragment, sizeof(ret->fragment));
817 phos_resolve_uri_from_str(const struct phos_uri *base, const char *refstr,
818 struct phos_uri *ret)
822 memset(&ref, 0, sizeof(ref));
824 if ((refstr = parse_uri_reference(refstr, &ref)) == NULL)
830 return phos_resolve_uri_from(base, &ref, ret);
834 phos_uri_drop_empty_segments(struct phos_uri *uri)
838 for (i = uri->path; *i; ++i) {
839 if (*i == '/' && *(i+1) == '/') {
840 memmove(i, i+1, strlen(i)); /* move also the \0 */
847 phos_serialize_uri(const struct phos_uri *uri, char *buf, size_t len)
850 if (strlcat(buf, s, len) >= len) \
853 strlcpy(buf, "", len);
855 if (*uri->scheme != '\0') {
860 if (*uri->host != '\0') {
867 if (*uri->query != '\0') {
872 if (*uri->fragment) {