Blob


1 /*
2 * Copyright (c) 2022 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <ctype.h>
18 #include <errno.h>
19 #include <stddef.h>
20 #include <stdint.h>
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
25 #include "iri.h"
27 /* TODO: URI -> IRI. accept IRI but emit always URI */
29 static inline int
30 cpstr(const char *start, const char *till, char *buf, size_t len)
31 {
32 size_t slen = till - start;
34 if (slen + 1 >= len)
35 return (-1);
36 memcpy(buf, start, slen);
37 buf[slen] = '\0';
38 return (0);
39 }
41 static inline int
42 unreserved(int c)
43 {
44 return (isalnum((unsigned char)c) ||
45 c == '-' ||
46 c == '.' ||
47 c == '_' ||
48 c == '~');
49 }
51 static inline int
52 pctenc(const char *s)
53 {
54 const char *t = s;
56 return (t[0] == '%' &&
57 isxdigit((unsigned char)t[1]) &&
58 isxdigit((unsigned char)t[2]));
59 }
61 static inline int
62 sub_delims(int c)
63 {
64 return (c == '!' || c == '$' || c == '&' || c == '\'' ||
65 c == '(' || c == ')' || c == '*' || c == '+' || c == ',' ||
66 c == ';' || c == '=');
67 }
69 static inline const char *
70 advance_pchar(const char *s)
71 {
72 if (unreserved(*s) || sub_delims(*s) || *s == ':' || *s == '@')
73 return (s + 1);
74 if (pctenc(s))
75 return (s + 3);
76 return (NULL);
77 }
79 static inline const char *
80 advance_segment(const char *s)
81 {
82 const char *t = s;
84 while ((t = advance_pchar(s)) != NULL)
85 s = t;
86 return (s);
87 }
89 static inline const char *
90 advance_segment_nz(const char *s)
91 {
92 const char *t;
94 if ((t = advance_pchar(s)) == NULL)
95 return (NULL);
96 return (advance_segment(t));
97 }
99 static inline const char *
100 advance_segment_nz_nc(const char *s)
102 const char *t = s;
104 for (;;) {
105 if (unreserved(*t) || sub_delims(*t) || *t == '@')
106 t++;
107 else if (pctenc(t))
108 t += 3;
109 else
110 break;
113 return (t != s ? t : NULL);
116 static const char *
117 parse_scheme(const char *s, struct iri *iri)
119 const char *t = s;
121 if (!isalpha((unsigned char)*t))
122 return (NULL);
124 while (isalnum((unsigned char)*t) ||
125 *t == '+' ||
126 *t == '-' ||
127 *t == '.')
128 t++;
130 if (cpstr(s, t, iri->iri_scheme, sizeof(iri->iri_scheme)) == -1)
131 return (NULL);
133 iri->iri_flags |= IH_SCHEME;
134 return (t);
137 /* userinfo is always optional */
138 static const char *
139 parse_uinfo(const char *s, struct iri *iri)
141 const char *t = s;
143 for (;;) {
144 if (unreserved(*t) || sub_delims(*t) || *t == ':')
145 t++;
146 else if (pctenc(t))
147 t += 3;
148 else
149 break;
152 if (*t != '@')
153 return (s);
155 if (cpstr(s, t, iri->iri_uinfo, sizeof(iri->iri_uinfo)) == -1)
156 return (NULL);
157 iri->iri_flags |= IH_UINFO;
158 return (t + 1);
161 static const char *
162 parse_host(const char *s, struct iri *iri)
164 const char *t = s;
166 /*
167 * cheating a bit by relaxing and merging the rule for
168 * IPv6address and IPvFuture and by merging IPv4address and
169 * reg-name.
170 */
172 if (*t == '[') {
173 while (*t && *t != ']')
174 ++t;
175 if (*t == '\0')
176 return (NULL);
177 t++;
178 if (cpstr(s, t, iri->iri_host, sizeof(iri->iri_host)) == -1)
179 return (NULL);
180 iri->iri_flags |= IH_HOST;
181 return (t);
184 for (;;) {
185 if (unreserved(*t) || sub_delims(*t))
186 t++;
187 else if (pctenc(t))
188 t += 3;
189 else
190 break;
193 if (cpstr(s, t, iri->iri_host, sizeof(iri->iri_host)) == -1)
194 return (NULL);
195 iri->iri_flags |= IH_HOST;
196 return (t);
199 static const char *
200 parse_port(const char *s, struct iri *iri)
202 const char *t = s;
203 const char *errstr;
205 while (isdigit((unsigned char)*t))
206 t++;
207 if (cpstr(s, t, iri->iri_portstr, sizeof(iri->iri_portstr)) == -1)
208 return (NULL);
209 iri->iri_port = strtonum(iri->iri_portstr, 1, UINT16_MAX, &errstr);
210 if (errstr)
211 return (NULL);
212 iri->iri_flags |= IH_PORT;
213 return (t);
216 static const char *
217 parse_authority(const char *s, struct iri *iri)
219 const char *t;
221 if ((t = parse_uinfo(s, iri)) == NULL)
222 return (NULL);
224 if ((t = parse_host(t, iri)) == NULL)
225 return (NULL);
227 if (*t == ':')
228 return (parse_port(t + 1, iri));
230 return (t);
233 static const char *
234 parse_path_abempty(const char *s, struct iri *iri)
236 const char *t = s;
238 while (*t == '/')
239 t = advance_segment(t + 1);
241 if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
242 return (NULL);
243 iri->iri_flags |= IH_PATH;
244 return (t);
247 static const char *
248 parse_path_absolute(const char *s, struct iri *iri)
250 const char *t;
252 if (*s != '/')
253 return (NULL);
255 if ((t = advance_segment_nz(s + 1)) == NULL)
256 return (s + 1);
258 while (*t == '/')
259 t = advance_segment(t + 1);
261 if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
262 return (NULL);
263 iri->iri_flags |= IH_PATH;
264 return (t);
267 static const char *
268 parse_path_rootless(const char *s, struct iri *iri)
270 const char *t;
272 if ((t = advance_segment_nz(s)) == NULL)
273 return (NULL);
275 while (*t == '/')
276 t = advance_segment(t + 1);
278 if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
279 return (NULL);
280 iri->iri_flags |= IH_PATH;
281 return (t);
284 static const char *
285 parse_path_noscheme(const char *s, struct iri *iri)
287 const char *t;
289 if ((t = advance_segment_nz_nc(s)) == NULL)
290 return (NULL);
292 while (*t == '/')
293 t = advance_segment(t + 1);
295 if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
296 return (NULL);
297 iri->iri_flags |= IH_PATH;
298 return (t);
301 static const char *
302 parse_path_empty(const char *s, struct iri *iri)
304 iri->iri_path[0] = '\0';
305 iri->iri_flags |= IH_PATH;
306 return (s);
309 static const char *
310 parse_hier(const char *s, struct iri *iri)
312 const char *t;
314 if (!strncmp(s, "//", 2)) {
315 if ((t = parse_authority(s + 2, iri)) == NULL)
316 return (NULL);
317 return (parse_path_abempty(t, iri));
320 if ((t = parse_path_absolute(s, iri)) != NULL)
321 return (t);
323 if ((t = parse_path_rootless(s, iri)) != NULL)
324 return (t);
326 return (parse_path_empty(s, iri));
329 static const char *
330 parse_relative(const char *s, struct iri *iri)
332 const char *t = s;
334 if (!strncmp(s, "//", 2)) {
335 if ((t = parse_authority(s + 2, iri)) == NULL)
336 return (NULL);
337 return (parse_path_abempty(t, iri));
340 if ((t = parse_path_absolute(s, iri)) != NULL)
341 return (t);
343 if ((t = parse_path_noscheme(s, iri)) != NULL)
344 return (t);
346 return (parse_path_empty(s, iri));
349 static const char *
350 parse_qf(const char *s, int flag, struct iri *iri, char *buf, size_t bufsize)
352 const char *n, *t = s;
354 for (;;) {
355 if ((n = advance_pchar(t)) != NULL)
356 t = n;
357 else if (*t == '/' || *t == '?')
358 t++;
359 else
360 break;
363 if (cpstr(s, t, buf, bufsize) == -1)
364 return (NULL);
365 iri->iri_flags |= flag;
366 return (t);
369 static int
370 parse_uri(const char *s, struct iri *iri)
372 iri->iri_flags = 0;
374 if ((s = parse_scheme(s, iri)) == NULL)
375 return (-1);
377 if (*s != ':')
378 return (-1);
380 if ((s = parse_hier(s + 1, iri)) == NULL)
381 return (-1);
383 if (*s == '?') {
384 s = parse_qf(s + 1, IH_QUERY, iri, iri->iri_query,
385 sizeof(iri->iri_query));
386 if (s == NULL)
387 return (-1);
390 if (*s == '#') {
391 s = parse_qf(s + 1, IH_FRAGMENT, iri, iri->iri_fragment,
392 sizeof(iri->iri_fragment));
393 if (s == NULL)
394 return (-1);
397 if (*s == '\0')
398 return (0);
400 return (-1);
403 static int
404 parse_relative_ref(const char *s, struct iri *iri)
406 if ((s = parse_relative(s, iri)) == NULL)
407 return (-1);
409 if (*s == '?') {
410 s = parse_qf(s + 1, IH_QUERY, iri, iri->iri_query,
411 sizeof(iri->iri_query));
412 if (s == NULL)
413 return (-1);
416 if (*s == '#') {
417 s = parse_qf(s + 1, IH_FRAGMENT, iri, iri->iri_fragment,
418 sizeof(iri->iri_fragment));
419 if (s == NULL)
420 return (-1);
423 if (*s == '\0')
424 return (0);
426 return (-1);
429 static int
430 parse(const char *s, struct iri *iri)
432 iri->iri_flags = 0;
434 if (s == NULL)
435 return (0);
437 if (parse_uri(s, iri) == -1) {
438 iri->iri_flags = 0;
439 if (parse_relative_ref(s, iri) == -1)
440 return (-1);
443 return (0);
446 static inline void
447 lowerify(char *s)
449 for (; *s; ++s)
450 *s = tolower((unsigned char)*s);
453 static void
454 cpfields(struct iri *dest, const struct iri *src, int flags)
456 if (flags & IH_SCHEME) {
457 dest->iri_flags |= IH_SCHEME;
458 if (src->iri_flags & IH_SCHEME)
459 memcpy(dest->iri_scheme, src->iri_scheme,
460 sizeof(dest->iri_scheme));
461 lowerify(dest->iri_scheme);
463 if (flags & IH_UINFO) {
464 if (src->iri_flags & IH_UINFO) {
465 memcpy(dest->iri_uinfo, src->iri_uinfo,
466 sizeof(dest->iri_uinfo));
467 dest->iri_flags |= IH_UINFO;
470 if (flags & IH_HOST) {
471 dest->iri_flags |= IH_HOST;
472 if (src->iri_flags & IH_HOST)
473 memcpy(dest->iri_host, src->iri_host,
474 sizeof(dest->iri_host));
475 lowerify(dest->iri_host);
477 if (flags & IH_PORT) {
478 if (src->iri_flags & IH_PORT) {
479 dest->iri_port = src->iri_port;
480 memcpy(dest->iri_portstr, src->iri_portstr,
481 sizeof(dest->iri_portstr));
482 dest->iri_flags |= IH_PORT;
485 if (flags & IH_PATH) {
486 dest->iri_flags |= IH_PATH;
487 if (src->iri_flags & IH_PATH)
488 memcpy(dest->iri_path, src->iri_path,
489 sizeof(dest->iri_path));
491 if (flags & IH_QUERY) {
492 if (src->iri_flags & IH_QUERY) {
493 dest->iri_flags |= IH_QUERY;
494 memcpy(dest->iri_query, src->iri_query,
495 sizeof(dest->iri_query));
498 if (flags & IH_FRAGMENT) {
499 if (src->iri_flags & IH_FRAGMENT) {
500 dest->iri_flags |= IH_FRAGMENT;
501 memcpy(dest->iri_fragment, src->iri_fragment,
502 sizeof(dest->iri_fragment));
507 static inline int
508 remove_dot_segments(char *buf, ptrdiff_t bufsize)
510 char *p, *q;
512 p = q = buf;
513 while (*p && (q - buf < bufsize)) {
514 if (p[0] == '/' && p[1] == '.' &&
515 (p[2] == '/' || p[2] == '\0')) {
516 p += 2;
517 if (*p != '/')
518 *q++ = '/';
519 } else if (p[0] == '/' && p[1] == '.' && p[2] == '.' &&
520 (p[3] == '/' || p[3] == '\0')) {
521 p += 3;
522 while (q > buf && *--q != '/')
523 continue;
524 if (*p != '/' && (q > buf && q[-1] != '/'))
525 *q++ = '/';
526 } else
527 *q++ = *p++;
529 if ((*p == '\0') && (q - buf < bufsize)) {
530 *q = '\0';
531 return (0);
534 errno = ENAMETOOLONG;
535 return (-1);
538 static inline int
539 mergepath(char *buf, size_t bufsize, int abs, const char *base, const char *r)
541 const char *s;
543 if (base == NULL || *base == '\0')
544 base = "/";
545 if (r == NULL || *r == '\0')
546 r = "/";
548 if (bufsize == 0)
549 return (-1);
550 buf[0] = '\0';
552 if (abs && (*base == '\0' || !strcmp(base, "/"))) {
553 if (*r == '/')
554 r++;
555 strlcpy(buf, "/", bufsize);
556 strlcat(buf, r, bufsize);
557 return (0);
560 if ((s = strrchr(base, '/')) != NULL) {
561 cpstr(base, s + 1, buf, bufsize);
562 if (*r == '/')
563 r++;
565 if (strlcat(buf, r, bufsize) >= bufsize) {
566 errno = ENAMETOOLONG;
567 return (-1);
570 return (0);
573 int
574 iri_parse(const char *base, const char *str, struct iri *iri)
576 static struct iri ibase, iparsed;
578 memset(iri, 0, sizeof(*iri));
580 if (base == NULL) {
581 ibase.iri_flags = 0;
582 if (parse_uri(str, &iparsed) == -1) {
583 errno = EINVAL;
584 return (-1);
586 } else {
587 if (parse_uri(base, &ibase) == -1 ||
588 parse(str, &iparsed) == -1) {
589 errno = EINVAL;
590 return (-1);
594 cpfields(iri, &iparsed, IH_FRAGMENT);
596 if (iparsed.iri_flags & IH_SCHEME) {
597 cpfields(iri, &iparsed, iparsed.iri_flags);
598 remove_dot_segments(iri->iri_path, sizeof(iri->iri_path));
599 return (0);
602 cpfields(iri, &ibase, IH_SCHEME);
604 if (iparsed.iri_flags & IH_HOST) {
605 cpfields(iri, &iparsed, IH_AUTHORITY|IH_PATH|IH_QUERY);
606 remove_dot_segments(iri->iri_path, sizeof(iri->iri_path));
607 return (0);
610 cpfields(iri, &ibase, IH_AUTHORITY);
612 if ((iparsed.iri_flags & IH_PATH) && *iparsed.iri_path == '\0') {
613 cpfields(iri, &ibase, IH_PATH);
614 if (iparsed.iri_flags & IH_QUERY)
615 cpfields(iri, &iparsed, IH_QUERY);
616 else
617 cpfields(iri, &ibase, IH_QUERY);
618 return (0);
621 cpfields(iri, &iparsed, IH_QUERY);
622 if ((iparsed.iri_flags & IH_PATH) && *iparsed.iri_path == '/')
623 cpfields(iri, &iparsed, IH_PATH);
624 else {
625 if (!(ibase.iri_flags & IH_PATH))
626 ibase.iri_path[0] = '\0';
627 if (!(iparsed.iri_flags & IH_PATH))
628 iparsed.iri_path[0] = '\0';
629 if (mergepath(iri->iri_path, sizeof(iri->iri_path),
630 ibase.iri_flags & IH_AUTHORITY, ibase.iri_path,
631 iparsed.iri_path) == -1)
632 return (-1);
633 iri->iri_flags |= IH_PATH;
635 if (remove_dot_segments(iri->iri_path, sizeof(iri->iri_path)) == -1)
636 return (-1);
637 return (0);
640 int
641 iri_unparse(const struct iri *i, char *buf, size_t buflen)
643 if (buflen == 0)
644 goto err;
646 /* TODO: should %enc octets if needed */
648 buf[0] = '\0';
650 if (i->iri_flags & IH_SCHEME) {
651 if (strlcat(buf, i->iri_scheme, buflen) >= buflen ||
652 strlcat(buf, ":", buflen) >= buflen)
653 goto err;
656 if (i->iri_flags & IH_AUTHORITY) {
657 if (strlcat(buf, "//", buflen) >= buflen)
658 goto err;
661 if (i->iri_flags & IH_UINFO) {
662 if (strlcat(buf, i->iri_uinfo, buflen) >= buflen ||
663 strlcat(buf, "@", buflen) >= buflen)
664 goto err;
666 if (i->iri_flags & IH_HOST) {
667 if (strlcat(buf, i->iri_host, buflen) >= buflen)
668 goto err;
670 if (i->iri_flags & IH_PORT) {
671 if (strlcat(buf, ":", buflen) >= buflen ||
672 strlcat(buf, i->iri_portstr, buflen) >= buflen)
673 goto err;
676 if (i->iri_flags & IH_PATH) {
677 if (i->iri_flags & IH_AUTHORITY &&
678 i->iri_path[0] != '/' &&
679 strlcat(buf, "/", buflen) >= buflen)
680 goto err;
681 if (strlcat(buf, i->iri_path, buflen) >= buflen)
682 goto err;
685 if (i->iri_flags & IH_QUERY) {
686 if (strlcat(buf, "?", buflen) >= buflen ||
687 strlcat(buf, i->iri_query, buflen) >= buflen)
688 goto err;
691 if (i->iri_flags & IH_FRAGMENT) {
692 if (strlcat(buf, "#", buflen) >= buflen ||
693 strlcat(buf, i->iri_fragment, buflen) >= buflen)
694 goto err;
697 return (0);
699 err:
700 errno = ENOBUFS;
701 return (-1);
704 int
705 iri_human(const struct iri *iri, char *buf, size_t buflen)
707 memset(buf, 0, buflen);
708 return (-1);
711 int
712 iri_setquery(struct iri *iri, const char *p)
714 ptrdiff_t bufsize;
715 int r;
716 char *buf, *q, tmp[4];
718 buf = q = iri->iri_query;
719 bufsize = sizeof(iri->iri_query);
720 while (*p && (q - buf < bufsize)) {
721 if (unreserved(*p) || sub_delims(*p) || *p == ':' || *p == '@' ||
722 *p == '/' || *p == '?')
723 *q++ = *p++;
724 else {
725 if (q - buf >= bufsize - 3)
726 goto err;
727 r = snprintf(tmp, sizeof(tmp), "%%%02X", (int)*p);
728 if (r < 0 || (size_t)r > sizeof(tmp))
729 return (-1);
730 *q++ = tmp[0];
731 *q++ = tmp[1];
732 *q++ = tmp[2];
733 p++;
736 if ((*p == '\0') && (q - buf < bufsize)) {
737 iri->iri_flags |= IH_QUERY;
738 *q = '\0';
739 return (0);
742 err:
743 errno = ENOBUFS;
744 return (-1);