Blob


1 /*
2 * Copyright (c) 2022 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <ctype.h>
18 #include <errno.h>
19 #include <stddef.h>
20 #include <stdint.h>
21 #include <stdlib.h>
22 #include <string.h>
24 #include "iri.h"
26 /* TODO: URI -> IRI. accept IRI but emit always URI */
28 static inline int
29 cpstr(const char *start, const char *till, char *buf, size_t len)
30 {
31 size_t slen = till - start;
33 if (slen + 1 >= len)
34 return (-1);
35 memcpy(buf, start, slen);
36 buf[slen] = '\0';
37 return (0);
38 }
40 static inline int
41 unreserved(int c)
42 {
43 return (isalnum((unsigned char)c) ||
44 c == '-' ||
45 c == '.' ||
46 c == '_' ||
47 c == '~');
48 }
50 static inline int
51 pctenc(const char *s)
52 {
53 const char *t = s;
55 return (t[0] == '%' &&
56 isxdigit((unsigned char)t[1]) &&
57 isxdigit((unsigned char)t[2]));
58 }
60 static inline int
61 sub_delims(int c)
62 {
63 return (c == '!' || c == '$' || c == '&' || c == '\'' ||
64 c == '(' || c == ')' || c == '*' || c == '+' || c == ',' ||
65 c == ';' || c == '=');
66 }
68 static inline const char *
69 advance_pchar(const char *s)
70 {
71 if (unreserved(*s) || sub_delims(*s) || *s == ':' || *s == '@')
72 return (s + 1);
73 if (pctenc(s))
74 return (s + 3);
75 return (NULL);
76 }
78 static inline const char *
79 advance_segment(const char *s)
80 {
81 const char *t = s;
83 while ((t = advance_pchar(s)) != NULL)
84 s = t;
85 return (s);
86 }
88 static inline const char *
89 advance_segment_nz(const char *s)
90 {
91 const char *t;
93 if ((t = advance_pchar(s)) == NULL)
94 return (NULL);
95 return (advance_segment(t));
96 }
98 static inline const char *
99 advance_segment_nz_nc(const char *s)
101 const char *t = s;
103 for (;;) {
104 if (unreserved(*t) || sub_delims(*t) || *t == '@')
105 t++;
106 else if (pctenc(t))
107 t += 3;
108 else
109 break;
112 return (t != s ? t : NULL);
115 static const char *
116 parse_scheme(const char *s, struct iri *iri)
118 const char *t = s;
120 if (!isalpha((unsigned char)*t))
121 return (NULL);
123 while (isalnum((unsigned char)*t) ||
124 *t == '+' ||
125 *t == '-' ||
126 *t == '.')
127 t++;
129 if (cpstr(s, t, iri->iri_scheme, sizeof(iri->iri_scheme)) == -1)
130 return (NULL);
132 iri->iri_flags |= IH_SCHEME;
133 return (t);
136 /* userinfo is always optional */
137 static const char *
138 parse_uinfo(const char *s, struct iri *iri)
140 const char *t = s;
142 for (;;) {
143 if (unreserved(*t) || sub_delims(*t) || *t == ':')
144 t++;
145 else if (pctenc(t))
146 t += 3;
147 else
148 break;
151 if (*t != '@')
152 return (s);
154 if (cpstr(s, t, iri->iri_uinfo, sizeof(iri->iri_uinfo)) == -1)
155 return (NULL);
156 iri->iri_flags |= IH_UINFO;
157 return (t + 1);
160 static const char *
161 parse_host(const char *s, struct iri *iri)
163 const char *t = s;
165 /*
166 * cheating a bit by relaxing and merging the rule for
167 * IPv6address and IPvFuture and by merging IPv4address and
168 * reg-name.
169 */
171 if (*t == '[') {
172 while (*t && *t != ']')
173 ++t;
174 if (*t == '\0')
175 return (NULL);
176 t++;
177 if (cpstr(s, t, iri->iri_host, sizeof(iri->iri_host)) == -1)
178 return (NULL);
179 iri->iri_flags |= IH_HOST;
180 return (t);
183 for (;;) {
184 if (unreserved(*t) || sub_delims(*t))
185 t++;
186 else if (pctenc(t))
187 t += 3;
188 else
189 break;
192 if (cpstr(s, t, iri->iri_host, sizeof(iri->iri_host)) == -1)
193 return (NULL);
194 iri->iri_flags |= IH_HOST;
195 return (t);
198 static const char *
199 parse_port(const char *s, struct iri *iri)
201 const char *t = s;
202 const char *errstr;
204 while (isdigit((unsigned char)*t))
205 t++;
206 if (cpstr(s, t, iri->iri_portstr, sizeof(iri->iri_portstr)) == -1)
207 return (NULL);
208 iri->iri_port = strtonum(iri->iri_portstr, 1, UINT16_MAX, &errstr);
209 if (errstr)
210 return (NULL);
211 iri->iri_flags |= IH_PORT;
212 return (t);
215 static const char *
216 parse_authority(const char *s, struct iri *iri)
218 const char *t;
220 if ((t = parse_uinfo(s, iri)) == NULL)
221 return (NULL);
223 if ((t = parse_host(t, iri)) == NULL)
224 return (NULL);
226 if (*t == ':')
227 return (parse_port(t, iri));
229 return (t);
232 static const char *
233 parse_path_abempty(const char *s, struct iri *iri)
235 const char *t = s;
237 while (*t == '/')
238 t = advance_segment(t + 1);
240 if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
241 return (NULL);
242 iri->iri_flags |= IH_PATH;
243 return (t);
246 static const char *
247 parse_path_absolute(const char *s, struct iri *iri)
249 const char *t;
251 if (*s != '/')
252 return (NULL);
254 if ((t = advance_segment_nz(s + 1)) == NULL)
255 return (s + 1);
257 while (*t == '/')
258 t = advance_segment(t + 1);
260 if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
261 return (NULL);
262 iri->iri_flags |= IH_PATH;
263 return (t);
266 static const char *
267 parse_path_rootless(const char *s, struct iri *iri)
269 const char *t;
271 if ((t = advance_segment_nz(s)) == NULL)
272 return (NULL);
274 while (*t == '/')
275 t = advance_segment(t + 1);
277 if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
278 return (NULL);
279 iri->iri_flags |= IH_PATH;
280 return (t);
283 static const char *
284 parse_path_noscheme(const char *s, struct iri *iri)
286 const char *t;
288 if ((t = advance_segment_nz_nc(s)) == NULL)
289 return (NULL);
291 while (*t == '/')
292 t = advance_segment(t + 1);
294 if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
295 return (NULL);
296 iri->iri_flags |= IH_PATH;
297 return (t);
300 static const char *
301 parse_path_empty(const char *s, struct iri *iri)
303 iri->iri_path[0] = '\0';
304 iri->iri_flags |= IH_PATH;
305 return (s);
308 static const char *
309 parse_hier(const char *s, struct iri *iri)
311 const char *t;
313 if (!strncmp(s, "//", 2)) {
314 if ((t = parse_authority(s + 2, iri)) == NULL)
315 return (NULL);
316 return (parse_path_abempty(t, iri));
319 if ((t = parse_path_absolute(s, iri)) != NULL)
320 return (t);
322 if ((t = parse_path_rootless(s, iri)) != NULL)
323 return (t);
325 return (parse_path_empty(s, iri));
328 static const char *
329 parse_relative(const char *s, struct iri *iri)
331 const char *t = s;
333 if (!strncmp(s, "//", 2)) {
334 if ((t = parse_authority(s + 2, iri)) == NULL)
335 return (NULL);
336 return (parse_path_abempty(t, iri));
339 if ((t = parse_path_absolute(s, iri)) != NULL)
340 return (t);
342 if ((t = parse_path_noscheme(s, iri)) != NULL)
343 return (t);
345 return (parse_path_empty(s, iri));
348 static const char *
349 parse_query(const char *s, struct iri *iri)
351 const char *n, *t = s;
353 for (;;) {
354 if ((n = advance_pchar(t)) != NULL)
355 t = n;
356 else if (*t == '/' || *t == '?')
357 t++;
358 else
359 break;
362 if (cpstr(s, t, iri->iri_query, sizeof(iri->iri_query)) == -1)
363 return (NULL);
364 iri->iri_flags |= IH_QUERY;
365 return (t);
368 static int
369 parse_uri(const char *s, struct iri *iri)
371 if ((s = parse_scheme(s, iri)) == NULL)
372 return (-1);
374 if (*s != ':')
375 return (-1);
377 if ((s = parse_hier(s + 1, iri)) == NULL)
378 return (-1);
380 if (*s == '?' && (s = parse_query(s + 1, iri)) == NULL)
381 return (-1);
383 /* skip fragments */
384 if (*s == '#' || *s == '\0')
385 return (0);
387 return (-1);
390 static int
391 parse_relative_ref(const char *s, struct iri *iri)
393 if ((s = parse_relative(s, iri)) == NULL)
394 return (-1);
396 if (*s == '?' && (s = parse_query(s + 1, iri)) == NULL)
397 return (-1);
399 /* skip fragments */
400 if (*s == '#' || *s == '\0')
401 return (0);
403 return (-1);
406 static int
407 parse(const char *s, struct iri *iri)
409 iri->iri_flags = 0;
411 if (s == NULL)
412 return (0);
414 if (parse_uri(s, iri) == -1) {
415 iri->iri_flags = 0;
416 if (parse_relative_ref(s, iri) == -1)
417 return (-1);
420 return (0);
423 static inline void
424 lowerify(char *s)
426 for (; *s; ++s)
427 *s = tolower((unsigned char)*s);
430 static void
431 cpfields(struct iri *dest, const struct iri *src, int flags)
433 if (flags & IH_SCHEME) {
434 dest->iri_flags |= IH_SCHEME;
435 if (src->iri_flags & IH_SCHEME)
436 memcpy(dest->iri_scheme, src->iri_scheme,
437 sizeof(dest->iri_scheme));
438 lowerify(dest->iri_scheme);
440 if (flags & IH_UINFO) {
441 dest->iri_flags |= IH_UINFO;
442 if (src->iri_flags & IH_UINFO)
443 memcpy(dest->iri_uinfo, src->iri_uinfo,
444 sizeof(dest->iri_uinfo));
446 if (flags & IH_HOST) {
447 dest->iri_flags |= IH_HOST;
448 if (src->iri_flags & IH_HOST)
449 memcpy(dest->iri_host, src->iri_host,
450 sizeof(dest->iri_host));
451 lowerify(dest->iri_host);
453 if (flags & IH_PORT) {
454 dest->iri_flags |= IH_PORT;
455 if (src->iri_flags & IH_PORT)
456 dest->iri_port = src->iri_port;
458 if (flags & IH_PATH) {
459 dest->iri_flags |= IH_PATH;
460 if (src->iri_flags & IH_PATH)
461 memcpy(dest->iri_path, src->iri_path,
462 sizeof(dest->iri_path));
464 if (flags & IH_QUERY) {
465 dest->iri_flags |= IH_QUERY;
466 if (src->iri_flags & IH_QUERY)
467 memcpy(dest->iri_query, src->iri_query,
468 sizeof(dest->iri_query));
472 static inline int
473 remove_dot_segments(struct iri *iri)
475 char *p, *q, *buf, *s;
476 ptrdiff_t bufsize;
478 buf = p = q = iri->iri_path;
479 bufsize = sizeof(iri->iri_path);
481 while (*p && (q - buf < bufsize)) {
482 /* A */
483 if (!strncmp(p, "../", 3)) {
484 p += 3;
485 continue;
487 if (!strncmp(p, "./", 3)) {
488 p += 2;
489 continue;
491 /* B */
492 if (!strncmp(p, "/./", 3)) {
493 *q++ = '/';
494 p += 3;
495 continue;
497 if (!strcmp(p, "/.")) {
498 p += 2;
499 break;
501 /* C */
502 if (p[0] == '/' && p[1] == '.' && p[2] == '.' &&
503 (p[3] == '/' || p[3] == '\0')) {
504 p += 3;
505 while (q != buf && *--q != '/')
506 continue;
507 continue;
509 /* D */
510 if (!strcmp(p, ".")) {
511 p++;
512 break;
514 if (!strcmp(p, "..")) {
515 p += 2;
516 break;
518 /* E */
519 s = strchr(p + 1, '/');
520 while (*p && p != s && (q - buf < bufsize))
521 *q++ = *p++;
524 if (*p == '\0' && (q - buf < bufsize)) {
525 *q = '\0';
526 return (0);
529 errno = ENAMETOOLONG;
530 return (-1);
533 static inline int
534 mergepath(struct iri *i, struct iri *base, struct iri *r)
536 const char *bpath, *rpath, *s;
538 bpath = (base->iri_flags & IH_PATH) ? base->iri_path : "/";
539 rpath = (r->iri_flags & IH_PATH) ? r->iri_path : "/";
541 i->iri_flags |= IH_PATH;
542 i->iri_path[0] = '\0';
544 if ((base->iri_flags & IH_AUTHORITY) &&
545 (*bpath == '\0' || !strcmp(bpath, "/"))) {
546 if (*rpath == '/')
547 rpath++;
548 strlcpy(i->iri_path, "/", sizeof(i->iri_path));
549 strlcat(i->iri_path, rpath, sizeof(i->iri_path));
550 return (0);
553 if ((s = strrchr(bpath, '/')) != NULL) {
554 cpstr(bpath, s + 1, i->iri_path, sizeof(i->iri_path));
555 if (*rpath == '/')
556 rpath++;
558 if (strlcat(i->iri_path, rpath, sizeof(i->iri_path)) >=
559 sizeof(i->iri_path)) {
560 errno = ENAMETOOLONG;
561 return (-1);
564 return (0);
567 int
568 iri_parse(const char *base, const char *str, struct iri *iri)
570 static struct iri ibase, iparsed;
572 memset(iri, 0, sizeof(*iri));
574 if (base == NULL) {
575 ibase.iri_flags = 0;
576 if (parse_uri(str, &iparsed) == -1) {
577 errno = EINVAL;
578 return (-1);
580 } else {
581 if (parse_uri(base, &ibase) == -1 ||
582 parse(str, &iparsed) == -1) {
583 errno = EINVAL;
584 return (-1);
588 if (iparsed.iri_flags & IH_SCHEME) {
589 cpfields(iri, &iparsed, iparsed.iri_flags);
590 remove_dot_segments(iri);
591 return (0);
594 /* if fragments are supported, copy iparsed fragment to iri */
596 cpfields(iri, &ibase, IH_SCHEME);
598 if (iparsed.iri_flags & IH_HOST) {
599 cpfields(iri, &iparsed, IH_AUTHORITY|IH_PATH|IH_QUERY);
600 remove_dot_segments(iri);
601 return (0);
604 cpfields(iri, &ibase, IH_AUTHORITY);
606 if ((iparsed.iri_flags & IH_PATH) && *iparsed.iri_path == '\0') {
607 cpfields(iri, &ibase, IH_PATH);
608 if (iparsed.iri_flags & IH_QUERY)
609 cpfields(iri, &iparsed, IH_QUERY);
610 else
611 cpfields(iri, &ibase, IH_QUERY);
612 return (0);
615 cpfields(iri, &iparsed, IH_QUERY);
616 if ((iparsed.iri_flags & IH_PATH) && !strcmp(iparsed.iri_path, "/"))
617 cpfields(iri, &iparsed, IH_PATH);
618 else {
619 if (!(ibase.iri_flags & IH_PATH))
620 ibase.iri_path[0] = '\0';
621 if (!(iparsed.iri_flags & IH_PATH))
622 iparsed.iri_path[0] = '\0';
623 if (mergepath(iri, &ibase, &iparsed) == -1)
624 return (-1);
626 if (remove_dot_segments(iri) == -1)
627 return (-1);
628 cpfields(iri, &ibase, IH_QUERY);
629 return (0);
632 int
633 iri_unparse(const struct iri *i, char *buf, size_t buflen)
635 if (buflen == 0)
636 goto err;
638 /* TODO: should %enc octets if needed */
640 buf[0] = '\0';
642 if (i->iri_flags & IH_SCHEME) {
643 if (strlcat(buf, i->iri_scheme, buflen) >= buflen ||
644 strlcat(buf, ":", buflen) >= buflen)
645 goto err;
648 if (i->iri_flags & IH_AUTHORITY) {
649 if (strlcat(buf, "//", buflen) >= buflen)
650 goto err;
653 if (i->iri_flags & IH_UINFO) {
654 if (strlcat(buf, i->iri_uinfo, buflen) >= buflen ||
655 strlcat(buf, "@", buflen) >= buflen)
656 goto err;
658 if (i->iri_flags & IH_HOST) {
659 if (strlcat(buf, i->iri_host, buflen) >= buflen)
660 goto err;
662 if (i->iri_flags & IH_PORT) {
663 if (strlcat(buf, ":", buflen) >= buflen ||
664 strlcat(buf, i->iri_portstr, buflen) >= buflen)
665 goto err;
668 if (i->iri_flags & IH_PATH) {
669 if (!(i->iri_flags & IH_AUTHORITY) &&
670 i->iri_path[0] != '/' &&
671 strlcat(buf, "/", buflen) >= buflen)
672 goto err;
673 if (strlcat(buf, i->iri_path, buflen) >= buflen)
674 goto err;
677 if (i->iri_flags & IH_QUERY) {
678 if (strlcat(buf, "?", buflen) >= buflen ||
679 strlcat(buf, i->iri_query, buflen) >= buflen)
680 goto err;
683 return (0);
685 err:
686 errno = ENOBUFS;
687 return (-1);
690 int
691 iri_human(const struct iri *iri, char *buf, size_t buflen)
693 memset(buf, 0, buflen);
694 return (-1);
697 int
698 iri_setquery(struct iri *iri, const char *text)
700 return (-1);