2 * Copyright (c) 2022 Omar Polo <op@omarpolo.com>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
27 /* TODO: URI -> IRI. accept IRI but emit always URI */
30 cpstr(const char *start, const char *till, char *buf, size_t len)
32 size_t slen = till - start;
36 memcpy(buf, start, slen);
44 return (isalnum((unsigned char)c) ||
56 return (t[0] == '%' &&
57 isxdigit((unsigned char)t[1]) &&
58 isxdigit((unsigned char)t[2]));
64 return (c == '!' || c == '$' || c == '&' || c == '\'' ||
65 c == '(' || c == ')' || c == '*' || c == '+' || c == ',' ||
66 c == ';' || c == '=');
69 static inline const char *
70 advance_pchar(const char *s)
72 if (unreserved(*s) || sub_delims(*s) || *s == ':' || *s == '@')
79 static inline const char *
80 advance_segment(const char *s)
84 while ((t = advance_pchar(s)) != NULL)
89 static inline const char *
90 advance_segment_nz(const char *s)
94 if ((t = advance_pchar(s)) == NULL)
96 return (advance_segment(t));
99 static inline const char *
100 advance_segment_nz_nc(const char *s)
105 if (unreserved(*t) || sub_delims(*t) || *t == '@')
113 return (t != s ? t : NULL);
117 parse_scheme(const char *s, struct iri *iri)
121 if (!isalpha((unsigned char)*t))
124 while (isalnum((unsigned char)*t) ||
130 if (cpstr(s, t, iri->iri_scheme, sizeof(iri->iri_scheme)) == -1)
133 iri->iri_flags |= IH_SCHEME;
137 /* userinfo is always optional */
139 parse_uinfo(const char *s, struct iri *iri)
144 if (unreserved(*t) || sub_delims(*t) || *t == ':')
155 if (cpstr(s, t, iri->iri_uinfo, sizeof(iri->iri_uinfo)) == -1)
157 iri->iri_flags |= IH_UINFO;
162 parse_host(const char *s, struct iri *iri)
167 * cheating a bit by relaxing and merging the rule for
168 * IPv6address and IPvFuture and by merging IPv4address and
173 while (*t && *t != ']')
178 if (cpstr(s, t, iri->iri_host, sizeof(iri->iri_host)) == -1)
180 iri->iri_flags |= IH_HOST;
185 if (unreserved(*t) || sub_delims(*t))
193 if (cpstr(s, t, iri->iri_host, sizeof(iri->iri_host)) == -1)
195 iri->iri_flags |= IH_HOST;
200 parse_port(const char *s, struct iri *iri)
205 while (isdigit((unsigned char)*t))
207 if (cpstr(s, t, iri->iri_portstr, sizeof(iri->iri_portstr)) == -1)
209 iri->iri_port = strtonum(iri->iri_portstr, 1, UINT16_MAX, &errstr);
212 iri->iri_flags |= IH_PORT;
217 parse_authority(const char *s, struct iri *iri)
221 if ((t = parse_uinfo(s, iri)) == NULL)
224 if ((t = parse_host(t, iri)) == NULL)
228 return (parse_port(t + 1, iri));
234 parse_path_abempty(const char *s, struct iri *iri)
239 t = advance_segment(t + 1);
241 if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
243 iri->iri_flags |= IH_PATH;
248 parse_path_absolute(const char *s, struct iri *iri)
255 if ((t = advance_segment_nz(s + 1)) == NULL)
259 t = advance_segment(t + 1);
261 if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
263 iri->iri_flags |= IH_PATH;
268 parse_path_rootless(const char *s, struct iri *iri)
272 if ((t = advance_segment_nz(s)) == NULL)
276 t = advance_segment(t + 1);
278 if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
280 iri->iri_flags |= IH_PATH;
285 parse_path_noscheme(const char *s, struct iri *iri)
289 if ((t = advance_segment_nz_nc(s)) == NULL)
293 t = advance_segment(t + 1);
295 if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
297 iri->iri_flags |= IH_PATH;
302 parse_path_empty(const char *s, struct iri *iri)
304 iri->iri_path[0] = '\0';
305 iri->iri_flags |= IH_PATH;
310 parse_hier(const char *s, struct iri *iri)
314 if (!strncmp(s, "//", 2)) {
315 if ((t = parse_authority(s + 2, iri)) == NULL)
317 return (parse_path_abempty(t, iri));
320 if ((t = parse_path_absolute(s, iri)) != NULL)
323 if ((t = parse_path_rootless(s, iri)) != NULL)
326 return (parse_path_empty(s, iri));
330 parse_relative(const char *s, struct iri *iri)
334 if (!strncmp(s, "//", 2)) {
335 if ((t = parse_authority(s + 2, iri)) == NULL)
337 return (parse_path_abempty(t, iri));
340 if ((t = parse_path_absolute(s, iri)) != NULL)
343 if ((t = parse_path_noscheme(s, iri)) != NULL)
346 return (parse_path_empty(s, iri));
350 parse_query(const char *s, struct iri *iri)
352 const char *n, *t = s;
355 if ((n = advance_pchar(t)) != NULL)
357 else if (*t == '/' || *t == '?')
363 if (cpstr(s, t, iri->iri_query, sizeof(iri->iri_query)) == -1)
365 iri->iri_flags |= IH_QUERY;
370 parse_fragment(const char *s, struct iri *iri)
372 const char *n, *t = s;
375 if ((n = advance_pchar(t)) != NULL)
377 else if (*t == '/' || *t == '?')
383 if (cpstr(s, t, iri->iri_fragment, sizeof(iri->iri_fragment)) == -1)
385 iri->iri_flags |= IH_FRAGMENT;
390 parse_uri(const char *s, struct iri *iri)
394 if ((s = parse_scheme(s, iri)) == NULL)
400 if ((s = parse_hier(s + 1, iri)) == NULL)
403 if (*s == '?' && (s = parse_query(s + 1, iri)) == NULL)
406 if (*s == '#' && (s = parse_fragment(s + 1, iri)) == NULL)
416 parse_relative_ref(const char *s, struct iri *iri)
418 if ((s = parse_relative(s, iri)) == NULL)
421 if (*s == '?' && (s = parse_query(s + 1, iri)) == NULL)
424 if (*s == '#' && (s = parse_fragment(s + 1, iri)) == NULL)
434 parse(const char *s, struct iri *iri)
441 if (parse_uri(s, iri) == -1) {
443 if (parse_relative_ref(s, iri) == -1)
454 *s = tolower((unsigned char)*s);
458 cpfields(struct iri *dest, const struct iri *src, int flags)
460 if (flags & IH_SCHEME) {
461 dest->iri_flags |= IH_SCHEME;
462 if (src->iri_flags & IH_SCHEME)
463 memcpy(dest->iri_scheme, src->iri_scheme,
464 sizeof(dest->iri_scheme));
465 lowerify(dest->iri_scheme);
467 if (flags & IH_UINFO) {
468 if (src->iri_flags & IH_UINFO) {
469 memcpy(dest->iri_uinfo, src->iri_uinfo,
470 sizeof(dest->iri_uinfo));
471 dest->iri_flags |= IH_UINFO;
474 if (flags & IH_HOST) {
475 dest->iri_flags |= IH_HOST;
476 if (src->iri_flags & IH_HOST)
477 memcpy(dest->iri_host, src->iri_host,
478 sizeof(dest->iri_host));
479 lowerify(dest->iri_host);
481 if (flags & IH_PORT) {
482 if (src->iri_flags & IH_PORT) {
483 dest->iri_port = src->iri_port;
484 dest->iri_flags |= IH_PORT;
487 if (flags & IH_PATH) {
488 dest->iri_flags |= IH_PATH;
489 if (src->iri_flags & IH_PATH)
490 memcpy(dest->iri_path, src->iri_path,
491 sizeof(dest->iri_path));
493 if (flags & IH_QUERY) {
494 if (src->iri_flags & IH_QUERY) {
495 dest->iri_flags |= IH_QUERY;
496 memcpy(dest->iri_query, src->iri_query,
497 sizeof(dest->iri_query));
500 if (flags & IH_FRAGMENT) {
501 if (src->iri_flags & IH_FRAGMENT) {
502 dest->iri_flags |= IH_FRAGMENT;
503 memcpy(dest->iri_fragment, src->iri_fragment,
504 sizeof(dest->iri_fragment));
510 remove_dot_segments(struct iri *i)
515 buf = p = q = i->iri_path;
516 bufsize = sizeof(i->iri_path);
517 while (*p && (q - buf < bufsize)) {
518 if (p[0] == '/' && p[1] == '.' &&
519 (p[2] == '/' || p[2] == '\0')) {
523 } else if (p[0] == '/' && p[1] == '.' && p[2] == '.' &&
524 (p[3] == '/' || p[3] == '\0')) {
526 while (q > buf && *--q != '/')
528 if (*p != '/' && (q > buf && q[-1] != '/'))
533 if ((*p == '\0') && (q - buf < bufsize)) {
538 errno = ENAMETOOLONG;
543 mergepath(struct iri *i, struct iri *base, struct iri *r)
545 const char *bpath, *rpath, *s;
547 bpath = (base->iri_flags & IH_PATH) ? base->iri_path : "/";
548 rpath = (r->iri_flags & IH_PATH) ? r->iri_path : "/";
550 i->iri_flags |= IH_PATH;
551 i->iri_path[0] = '\0';
553 if ((base->iri_flags & IH_AUTHORITY) &&
554 (*bpath == '\0' || !strcmp(bpath, "/"))) {
557 strlcpy(i->iri_path, "/", sizeof(i->iri_path));
558 strlcat(i->iri_path, rpath, sizeof(i->iri_path));
562 if ((s = strrchr(bpath, '/')) != NULL) {
563 cpstr(bpath, s + 1, i->iri_path, sizeof(i->iri_path));
567 if (strlcat(i->iri_path, rpath, sizeof(i->iri_path)) >=
568 sizeof(i->iri_path)) {
569 errno = ENAMETOOLONG;
577 iri_parse(const char *base, const char *str, struct iri *iri)
579 static struct iri ibase, iparsed;
581 memset(iri, 0, sizeof(*iri));
585 if (parse_uri(str, &iparsed) == -1) {
590 if (parse_uri(base, &ibase) == -1 ||
591 parse(str, &iparsed) == -1) {
597 cpfields(iri, &iparsed, IH_FRAGMENT);
599 if (iparsed.iri_flags & IH_SCHEME) {
600 cpfields(iri, &iparsed, iparsed.iri_flags);
601 remove_dot_segments(iri);
605 cpfields(iri, &ibase, IH_SCHEME);
607 if (iparsed.iri_flags & IH_HOST) {
608 cpfields(iri, &iparsed, IH_AUTHORITY|IH_PATH|IH_QUERY);
609 remove_dot_segments(iri);
613 cpfields(iri, &ibase, IH_AUTHORITY);
615 if ((iparsed.iri_flags & IH_PATH) && *iparsed.iri_path == '\0') {
616 cpfields(iri, &ibase, IH_PATH);
617 if (iparsed.iri_flags & IH_QUERY)
618 cpfields(iri, &iparsed, IH_QUERY);
620 cpfields(iri, &ibase, IH_QUERY);
624 cpfields(iri, &iparsed, IH_QUERY);
625 if ((iparsed.iri_flags & IH_PATH) && *iparsed.iri_path == '/')
626 cpfields(iri, &iparsed, IH_PATH);
628 if (!(ibase.iri_flags & IH_PATH))
629 ibase.iri_path[0] = '\0';
630 if (!(iparsed.iri_flags & IH_PATH))
631 iparsed.iri_path[0] = '\0';
632 if (mergepath(iri, &ibase, &iparsed) == -1)
635 if (remove_dot_segments(iri) == -1)
641 iri_unparse(const struct iri *i, char *buf, size_t buflen)
646 /* TODO: should %enc octets if needed */
650 if (i->iri_flags & IH_SCHEME) {
651 if (strlcat(buf, i->iri_scheme, buflen) >= buflen ||
652 strlcat(buf, ":", buflen) >= buflen)
656 if (i->iri_flags & IH_AUTHORITY) {
657 if (strlcat(buf, "//", buflen) >= buflen)
661 if (i->iri_flags & IH_UINFO) {
662 if (strlcat(buf, i->iri_uinfo, buflen) >= buflen ||
663 strlcat(buf, "@", buflen) >= buflen)
666 if (i->iri_flags & IH_HOST) {
667 if (strlcat(buf, i->iri_host, buflen) >= buflen)
670 if (i->iri_flags & IH_PORT) {
671 if (strlcat(buf, ":", buflen) >= buflen ||
672 strlcat(buf, i->iri_portstr, buflen) >= buflen)
676 if (i->iri_flags & IH_PATH) {
677 if (i->iri_flags & IH_AUTHORITY &&
678 i->iri_path[0] != '/' &&
679 strlcat(buf, "/", buflen) >= buflen)
681 if (strlcat(buf, i->iri_path, buflen) >= buflen)
685 if (i->iri_flags & IH_QUERY) {
686 if (strlcat(buf, "?", buflen) >= buflen ||
687 strlcat(buf, i->iri_query, buflen) >= buflen)
691 if (i->iri_flags & IH_FRAGMENT) {
692 if (strlcat(buf, "#", buflen) >= buflen ||
693 strlcat(buf, i->iri_fragment, buflen) >= buflen)
705 iri_human(const struct iri *iri, char *buf, size_t buflen)
707 memset(buf, 0, buflen);
712 iri_setquery(struct iri *iri, const char *p)
716 char *buf, *q, tmp[4];
718 buf = q = iri->iri_query;
719 bufsize = sizeof(iri->iri_query);
720 while (*p && (q - buf < bufsize)) {
721 if (unreserved(*p) || sub_delims(*p) || *p == ':' || *p == '@' ||
722 *p == '/' || *p == '?')
725 if (q - buf >= bufsize - 3)
727 r = snprintf(tmp, sizeof(tmp), "%%%02X", (int)*p);
728 if (r < 0 || (size_t)r > sizeof(tmp))
736 if ((*p == '\0') && (q - buf < bufsize)) {
737 iri->iri_flags |= IH_QUERY;