2 * Copyright (c) 2022 Omar Polo <op@omarpolo.com>
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
24 /* TODO: URI -> IRI. accept IRI but emit always URI */
27 cpstr(const char *start, const char *till, char *buf, size_t len)
29 size_t slen = till - start;
33 memcpy(buf, start, slen);
41 return (isalnum((unsigned char)c) ||
53 return (t[0] == '%' &&
54 isxdigit((unsigned char)t[1]) &&
55 isxdigit((unsigned char)t[2]));
61 return (c == '!' || c == '$' || c == '&' || c == '\'' ||
62 c == '(' || c == ')' || c == '*' || c == '+' || c == ',' ||
63 c == ';' || c == '=');
66 static inline const char *
67 advance_pchar(const char *s)
69 if (unreserved(*s) || sub_delims(*s) || *s == ':' || *s == '@')
76 static inline const char *
77 advance_segment(const char *s)
81 while ((t = advance_pchar(s)) != NULL)
86 static inline const char *
87 advance_segment_nz(const char *s)
91 if ((t = advance_pchar(s)) == NULL)
93 return (advance_segment(t));
96 static inline const char *
97 advance_segment_nz_nc(const char *s)
102 if (unreserved(*t) || sub_delims(*t) || *t == '@')
110 return (t != s ? t : NULL);
114 parse_scheme(const char *s, struct iri *iri)
118 if (!isalpha((unsigned char)*t))
121 while (isalnum((unsigned char)*t) ||
127 if (cpstr(s, t, iri->iri_scheme, sizeof(iri->iri_scheme)) == -1)
130 iri->iri_flags |= IH_SCHEME;
134 /* userinfo is always optional */
136 parse_uinfo(const char *s, struct iri *iri)
141 if (unreserved(*t) || sub_delims(*t) || *t == ':')
152 if (cpstr(s, t, iri->iri_uinfo, sizeof(iri->iri_uinfo)) == -1)
154 iri->iri_flags |= IH_UINFO;
159 parse_host(const char *s, struct iri *iri)
164 * cheating a bit by relaxing and merging the rule for
165 * IPv6address and IPvFuture and by merging IPv4address and
170 while (*t && *t != ']')
175 if (cpstr(s, t, iri->iri_host, sizeof(iri->iri_host)) == -1)
177 iri->iri_flags |= IH_HOST;
182 if (unreserved(*t) || sub_delims(*t))
190 if (cpstr(s, t, iri->iri_host, sizeof(iri->iri_host)) == -1)
192 iri->iri_flags |= IH_HOST;
197 parse_port(const char *s, struct iri *iri)
202 while (isdigit((unsigned char)*t))
204 if (cpstr(s, t, iri->iri_portstr, sizeof(iri->iri_portstr)) == -1)
206 iri->iri_port = strtonum(iri->iri_portstr, 1, UINT16_MAX, &errstr);
209 iri->iri_flags |= IH_PORT;
214 parse_authority(const char *s, struct iri *iri)
218 if ((t = parse_uinfo(s, iri)) == NULL)
221 if ((t = parse_host(t, iri)) == NULL)
225 return (parse_port(t, iri));
231 parse_path_abempty(const char *s, struct iri *iri)
236 t = advance_segment(t + 1);
238 if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
240 iri->iri_flags |= IH_PATH;
245 parse_path_absolute(const char *s, struct iri *iri)
252 if ((t = advance_segment_nz(s + 1)) == NULL)
256 t = advance_segment(t + 1);
258 if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
260 iri->iri_flags |= IH_PATH;
265 parse_path_rootless(const char *s, struct iri *iri)
269 if ((t = advance_segment_nz(s)) == NULL)
273 t = advance_segment(t + 1);
275 if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
277 iri->iri_flags |= IH_PATH;
282 parse_path_noscheme(const char *s, struct iri *iri)
286 if ((t = advance_segment_nz_nc(s)) == NULL)
290 t = advance_segment(t + 1);
292 if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
294 iri->iri_flags |= IH_PATH;
299 parse_path_empty(const char *s, struct iri *iri)
301 iri->iri_path[0] = '\0';
302 iri->iri_flags |= IH_PATH;
307 parse_hier(const char *s, struct iri *iri)
311 if (!strncmp(s, "//", 2)) {
312 if ((t = parse_authority(s + 2, iri)) == NULL)
314 return (parse_path_abempty(t, iri));
317 if ((t = parse_path_absolute(s, iri)) != NULL)
320 if ((t = parse_path_rootless(s, iri)) != NULL)
323 return (parse_path_empty(s, iri));
327 parse_relative(const char *s, struct iri *iri)
331 if (!strncmp(s, "//", 2)) {
332 if ((t = parse_authority(s + 2, iri)) == NULL)
334 return (parse_path_abempty(t, iri));
337 if ((t = parse_path_absolute(s, iri)) != NULL)
340 if ((t = parse_path_noscheme(s, iri)) != NULL)
343 return (parse_path_empty(s, iri));
347 parse_query(const char *s, struct iri *iri)
349 const char *n, *t = s;
352 if ((n = advance_pchar(t)) != NULL)
354 else if (*t == '/' || *t == '?')
360 if (cpstr(s, t, iri->iri_query, sizeof(iri->iri_query)) == -1)
362 iri->iri_flags |= IH_QUERY;
367 parse_uri(const char *s, struct iri *iri)
369 if ((s = parse_scheme(s, iri)) == NULL)
375 if ((s = parse_hier(s + 1, iri)) == NULL)
378 if (*s == '?' && (s = parse_query(s + 1, iri)) == NULL)
382 if (*s == '#' || *s == '\0')
389 parse_relative_ref(const char *s, struct iri *iri)
391 if ((s = parse_relative(s, iri)) == NULL)
394 if (*s == '?' && (s = parse_query(s + 1, iri)) == NULL)
398 if (*s == '#' || *s == '\0')
405 parse(const char *s, struct iri *iri)
412 if (parse_uri(s, iri) == -1) {
414 if (parse_relative_ref(s, iri) == -1)
425 *s = tolower((unsigned char)*s);
429 cpfields(struct iri *dest, const struct iri *src, int flags)
431 if (flags & IH_SCHEME) {
432 dest->iri_flags |= IH_SCHEME;
433 if (src->iri_flags & IH_SCHEME)
434 memcpy(dest->iri_scheme, src->iri_scheme,
435 sizeof(dest->iri_scheme));
436 lowerify(dest->iri_scheme);
438 if (flags & IH_UINFO) {
439 dest->iri_flags |= IH_UINFO;
440 if (src->iri_flags & IH_UINFO)
441 memcpy(dest->iri_uinfo, src->iri_uinfo,
442 sizeof(dest->iri_uinfo));
444 if (flags & IH_HOST) {
445 dest->iri_flags |= IH_HOST;
446 if (src->iri_flags & IH_HOST)
447 memcpy(dest->iri_host, src->iri_host,
448 sizeof(dest->iri_host));
449 lowerify(dest->iri_host);
451 if (flags & IH_PORT) {
452 dest->iri_flags |= IH_PORT;
453 if (src->iri_flags & IH_PORT)
454 dest->iri_port = src->iri_port;
456 if (flags & IH_PATH) {
457 dest->iri_flags |= IH_PATH;
458 if (src->iri_flags & IH_PATH)
459 memcpy(dest->iri_path, src->iri_path,
460 sizeof(dest->iri_path));
462 if (flags & IH_QUERY) {
463 dest->iri_flags |= IH_QUERY;
464 if (src->iri_flags & IH_QUERY)
465 memcpy(dest->iri_query, src->iri_query,
466 sizeof(dest->iri_query));
471 remove_dot_segments(struct iri *iri)
473 /* TODO: fixup iri->iri_path */
478 mergepath(char *out, size_t len, const char *a, const char *b)
480 /* TODO: compute into out path `b' resolved from `a' */
486 iri_parse(const char *base, const char *str, struct iri *iri)
488 static struct iri ibase, iparsed;
490 memset(iri, 0, sizeof(*iri));
494 if (parse_uri(str, &iparsed) == -1)
497 if (parse_uri(base, &ibase) == -1)
499 if (parse(str, &iparsed) == -1)
503 if (iparsed.iri_flags & IH_SCHEME) {
504 cpfields(iri, &iparsed, iparsed.iri_flags);
505 remove_dot_segments(iri);
509 /* if fragments are supported, copy iparsed fragment to iri */
511 cpfields(iri, &ibase, IH_SCHEME);
513 if (iparsed.iri_flags & IH_HOST) {
514 cpfields(iri, &iparsed, IH_AUTHORITY|IH_PATH|IH_QUERY);
515 remove_dot_segments(iri);
519 cpfields(iri, &ibase, IH_AUTHORITY);
521 if ((iparsed.iri_flags & IH_PATH) && *iparsed.iri_path == '\0') {
522 cpfields(iri, &ibase, IH_PATH);
523 if (iparsed.iri_flags & IH_QUERY)
524 cpfields(iri, &iparsed, IH_QUERY);
526 cpfields(iri, &ibase, IH_QUERY);
530 cpfields(iri, &iparsed, IH_QUERY);
531 if ((iparsed.iri_flags & IH_PATH) && !strcmp(iparsed.iri_path, "/"))
532 cpfields(iri, &iparsed, IH_PATH);
534 if (!(ibase.iri_flags & IH_PATH))
535 ibase.iri_path[0] = '\0';
536 if (!(iparsed.iri_flags & IH_PATH))
537 iparsed.iri_path[0] = '\0';
538 mergepath(iri->iri_path, sizeof(iri->iri_path),
539 ibase.iri_path, iparsed.iri_path);
541 remove_dot_segments(iri);
542 cpfields(iri, &ibase, IH_QUERY);
547 iri_unparse(const struct iri *iri, char *buf, size_t buflen)
549 memset(buf, 0, buflen);
554 iri_human(const struct iri *iri, char *buf, size_t buflen)
556 memset(buf, 0, buflen);
561 iri_setquery(struct iri *iri, const char *text)