2 * Copyright (c) 2022 Omar Polo <op@omarpolo.com>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
26 /* TODO: URI -> IRI. accept IRI but emit always URI */
29 cpstr(const char *start, const char *till, char *buf, size_t len)
31 size_t slen = till - start;
35 memcpy(buf, start, slen);
43 return (isalnum((unsigned char)c) ||
55 return (t[0] == '%' &&
56 isxdigit((unsigned char)t[1]) &&
57 isxdigit((unsigned char)t[2]));
63 return (c == '!' || c == '$' || c == '&' || c == '\'' ||
64 c == '(' || c == ')' || c == '*' || c == '+' || c == ',' ||
65 c == ';' || c == '=');
68 static inline const char *
69 advance_pchar(const char *s)
71 if (unreserved(*s) || sub_delims(*s) || *s == ':' || *s == '@')
78 static inline const char *
79 advance_segment(const char *s)
83 while ((t = advance_pchar(s)) != NULL)
88 static inline const char *
89 advance_segment_nz(const char *s)
93 if ((t = advance_pchar(s)) == NULL)
95 return (advance_segment(t));
98 static inline const char *
99 advance_segment_nz_nc(const char *s)
104 if (unreserved(*t) || sub_delims(*t) || *t == '@')
112 return (t != s ? t : NULL);
116 parse_scheme(const char *s, struct iri *iri)
120 if (!isalpha((unsigned char)*t))
123 while (isalnum((unsigned char)*t) ||
129 if (cpstr(s, t, iri->iri_scheme, sizeof(iri->iri_scheme)) == -1)
132 iri->iri_flags |= IH_SCHEME;
136 /* userinfo is always optional */
138 parse_uinfo(const char *s, struct iri *iri)
143 if (unreserved(*t) || sub_delims(*t) || *t == ':')
154 if (cpstr(s, t, iri->iri_uinfo, sizeof(iri->iri_uinfo)) == -1)
156 iri->iri_flags |= IH_UINFO;
161 parse_host(const char *s, struct iri *iri)
166 * cheating a bit by relaxing and merging the rule for
167 * IPv6address and IPvFuture and by merging IPv4address and
172 while (*t && *t != ']')
177 if (cpstr(s, t, iri->iri_host, sizeof(iri->iri_host)) == -1)
179 iri->iri_flags |= IH_HOST;
184 if (unreserved(*t) || sub_delims(*t))
192 if (cpstr(s, t, iri->iri_host, sizeof(iri->iri_host)) == -1)
194 iri->iri_flags |= IH_HOST;
199 parse_port(const char *s, struct iri *iri)
204 while (isdigit((unsigned char)*t))
206 if (cpstr(s, t, iri->iri_portstr, sizeof(iri->iri_portstr)) == -1)
208 iri->iri_port = strtonum(iri->iri_portstr, 1, UINT16_MAX, &errstr);
211 iri->iri_flags |= IH_PORT;
216 parse_authority(const char *s, struct iri *iri)
220 if ((t = parse_uinfo(s, iri)) == NULL)
223 if ((t = parse_host(t, iri)) == NULL)
227 return (parse_port(t, iri));
233 parse_path_abempty(const char *s, struct iri *iri)
238 t = advance_segment(t + 1);
240 if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
242 iri->iri_flags |= IH_PATH;
247 parse_path_absolute(const char *s, struct iri *iri)
254 if ((t = advance_segment_nz(s + 1)) == NULL)
258 t = advance_segment(t + 1);
260 if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
262 iri->iri_flags |= IH_PATH;
267 parse_path_rootless(const char *s, struct iri *iri)
271 if ((t = advance_segment_nz(s)) == NULL)
275 t = advance_segment(t + 1);
277 if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
279 iri->iri_flags |= IH_PATH;
284 parse_path_noscheme(const char *s, struct iri *iri)
288 if ((t = advance_segment_nz_nc(s)) == NULL)
292 t = advance_segment(t + 1);
294 if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
296 iri->iri_flags |= IH_PATH;
301 parse_path_empty(const char *s, struct iri *iri)
303 iri->iri_path[0] = '\0';
304 iri->iri_flags |= IH_PATH;
309 parse_hier(const char *s, struct iri *iri)
313 if (!strncmp(s, "//", 2)) {
314 if ((t = parse_authority(s + 2, iri)) == NULL)
316 return (parse_path_abempty(t, iri));
319 if ((t = parse_path_absolute(s, iri)) != NULL)
322 if ((t = parse_path_rootless(s, iri)) != NULL)
325 return (parse_path_empty(s, iri));
329 parse_relative(const char *s, struct iri *iri)
333 if (!strncmp(s, "//", 2)) {
334 if ((t = parse_authority(s + 2, iri)) == NULL)
336 return (parse_path_abempty(t, iri));
339 if ((t = parse_path_absolute(s, iri)) != NULL)
342 if ((t = parse_path_noscheme(s, iri)) != NULL)
345 return (parse_path_empty(s, iri));
349 parse_query(const char *s, struct iri *iri)
351 const char *n, *t = s;
354 if ((n = advance_pchar(t)) != NULL)
356 else if (*t == '/' || *t == '?')
362 if (cpstr(s, t, iri->iri_query, sizeof(iri->iri_query)) == -1)
364 iri->iri_flags |= IH_QUERY;
369 parse_fragment(const char *s, struct iri *iri)
371 const char *n, *t = s;
374 if ((n = advance_pchar(t)) != NULL)
376 else if (*t == '/' || *t == '?')
382 if (cpstr(s, t, iri->iri_fragment, sizeof(iri->iri_fragment)) == -1)
384 iri->iri_flags |= IH_FRAGMENT;
389 parse_uri(const char *s, struct iri *iri)
391 if ((s = parse_scheme(s, iri)) == NULL)
397 if ((s = parse_hier(s + 1, iri)) == NULL)
400 if (*s == '?' && (s = parse_query(s + 1, iri)) == NULL)
403 if (*s == '#' && (s = parse_fragment(s + 1, iri)) == NULL)
413 parse_relative_ref(const char *s, struct iri *iri)
415 if ((s = parse_relative(s, iri)) == NULL)
418 if (*s == '?' && (s = parse_query(s + 1, iri)) == NULL)
421 if (*s == '#' && (s = parse_fragment(s + 1, iri)) == NULL)
431 parse(const char *s, struct iri *iri)
438 if (parse_uri(s, iri) == -1) {
440 if (parse_relative_ref(s, iri) == -1)
451 *s = tolower((unsigned char)*s);
455 cpfields(struct iri *dest, const struct iri *src, int flags)
457 if (flags & IH_SCHEME) {
458 dest->iri_flags |= IH_SCHEME;
459 if (src->iri_flags & IH_SCHEME)
460 memcpy(dest->iri_scheme, src->iri_scheme,
461 sizeof(dest->iri_scheme));
462 lowerify(dest->iri_scheme);
464 if (flags & IH_UINFO) {
465 if (src->iri_flags & IH_UINFO) {
466 memcpy(dest->iri_uinfo, src->iri_uinfo,
467 sizeof(dest->iri_uinfo));
468 dest->iri_flags |= IH_UINFO;
471 if (flags & IH_HOST) {
472 dest->iri_flags |= IH_HOST;
473 if (src->iri_flags & IH_HOST)
474 memcpy(dest->iri_host, src->iri_host,
475 sizeof(dest->iri_host));
476 lowerify(dest->iri_host);
478 if (flags & IH_PORT) {
479 if (src->iri_flags & IH_PORT) {
480 dest->iri_port = src->iri_port;
481 dest->iri_flags |= IH_PORT;
484 if (flags & IH_PATH) {
485 dest->iri_flags |= IH_PATH;
486 if (src->iri_flags & IH_PATH)
487 memcpy(dest->iri_path, src->iri_path,
488 sizeof(dest->iri_path));
490 if (flags & IH_QUERY) {
491 if (src->iri_flags & IH_QUERY) {
492 dest->iri_flags |= IH_QUERY;
493 memcpy(dest->iri_query, src->iri_query,
494 sizeof(dest->iri_query));
497 if (flags & IH_FRAGMENT) {
498 if (src->iri_flags & IH_FRAGMENT) {
499 dest->iri_flags |= IH_FRAGMENT;
500 memcpy(dest->iri_fragment, src->iri_fragment,
501 sizeof(dest->iri_fragment));
507 remove_dot_segments(struct iri *i)
512 buf = p = q = i->iri_path;
513 bufsize = sizeof(i->iri_path);
514 while (*p && (q - buf < bufsize)) {
515 if (p[0] == '/' && p[1] == '.' &&
516 (p[2] == '/' || p[2] == '\0')) {
520 } else if (p[0] == '/' && p[1] == '.' && p[2] == '.' &&
521 (p[3] == '/' || p[3] == '\0')) {
523 while (q > buf && *--q != '/')
525 if (*p != '/' && (q > buf && q[-1] != '/'))
530 if ((*p == '\0') && (q - buf < bufsize)) {
535 errno = ENAMETOOLONG;
540 mergepath(struct iri *i, struct iri *base, struct iri *r)
542 const char *bpath, *rpath, *s;
544 bpath = (base->iri_flags & IH_PATH) ? base->iri_path : "/";
545 rpath = (r->iri_flags & IH_PATH) ? r->iri_path : "/";
547 i->iri_flags |= IH_PATH;
548 i->iri_path[0] = '\0';
550 if ((base->iri_flags & IH_AUTHORITY) &&
551 (*bpath == '\0' || !strcmp(bpath, "/"))) {
554 strlcpy(i->iri_path, "/", sizeof(i->iri_path));
555 strlcat(i->iri_path, rpath, sizeof(i->iri_path));
559 if ((s = strrchr(bpath, '/')) != NULL) {
560 cpstr(bpath, s + 1, i->iri_path, sizeof(i->iri_path));
564 if (strlcat(i->iri_path, rpath, sizeof(i->iri_path)) >=
565 sizeof(i->iri_path)) {
566 errno = ENAMETOOLONG;
574 iri_parse(const char *base, const char *str, struct iri *iri)
576 static struct iri ibase, iparsed;
578 memset(iri, 0, sizeof(*iri));
582 if (parse_uri(str, &iparsed) == -1) {
587 if (parse_uri(base, &ibase) == -1 ||
588 parse(str, &iparsed) == -1) {
594 cpfields(iri, &iparsed, IH_FRAGMENT);
596 if (iparsed.iri_flags & IH_SCHEME) {
597 cpfields(iri, &iparsed, iparsed.iri_flags);
598 remove_dot_segments(iri);
602 cpfields(iri, &ibase, IH_SCHEME);
604 if (iparsed.iri_flags & IH_HOST) {
605 cpfields(iri, &iparsed, IH_AUTHORITY|IH_PATH|IH_QUERY);
606 remove_dot_segments(iri);
610 cpfields(iri, &ibase, IH_AUTHORITY);
612 if ((iparsed.iri_flags & IH_PATH) && *iparsed.iri_path == '\0') {
613 cpfields(iri, &ibase, IH_PATH);
614 if (iparsed.iri_flags & IH_QUERY)
615 cpfields(iri, &iparsed, IH_QUERY);
617 cpfields(iri, &ibase, IH_QUERY);
621 cpfields(iri, &iparsed, IH_QUERY);
622 if ((iparsed.iri_flags & IH_PATH) && *iparsed.iri_path == '/')
623 cpfields(iri, &iparsed, IH_PATH);
625 if (!(ibase.iri_flags & IH_PATH))
626 ibase.iri_path[0] = '\0';
627 if (!(iparsed.iri_flags & IH_PATH))
628 iparsed.iri_path[0] = '\0';
629 if (mergepath(iri, &ibase, &iparsed) == -1)
632 if (remove_dot_segments(iri) == -1)
638 iri_unparse(const struct iri *i, char *buf, size_t buflen)
643 /* TODO: should %enc octets if needed */
647 if (i->iri_flags & IH_SCHEME) {
648 if (strlcat(buf, i->iri_scheme, buflen) >= buflen ||
649 strlcat(buf, ":", buflen) >= buflen)
653 if (i->iri_flags & IH_AUTHORITY) {
654 if (strlcat(buf, "//", buflen) >= buflen)
658 if (i->iri_flags & IH_UINFO) {
659 if (strlcat(buf, i->iri_uinfo, buflen) >= buflen ||
660 strlcat(buf, "@", buflen) >= buflen)
663 if (i->iri_flags & IH_HOST) {
664 if (strlcat(buf, i->iri_host, buflen) >= buflen)
667 if (i->iri_flags & IH_PORT) {
668 if (strlcat(buf, ":", buflen) >= buflen ||
669 strlcat(buf, i->iri_portstr, buflen) >= buflen)
673 if (i->iri_flags & IH_PATH) {
674 if (i->iri_flags & IH_AUTHORITY &&
675 i->iri_path[0] != '/' &&
676 strlcat(buf, "/", buflen) >= buflen)
678 if (strlcat(buf, i->iri_path, buflen) >= buflen)
682 if (i->iri_flags & IH_QUERY) {
683 if (strlcat(buf, "?", buflen) >= buflen ||
684 strlcat(buf, i->iri_query, buflen) >= buflen)
688 if (i->iri_flags & IH_FRAGMENT) {
689 if (strlcat(buf, "#", buflen) >= buflen ||
690 strlcat(buf, i->iri_fragment, buflen) >= buflen)
702 iri_human(const struct iri *iri, char *buf, size_t buflen)
704 memset(buf, 0, buflen);
709 iri_setquery(struct iri *iri, const char *text)