Blob


1 /*
2 * Copyright (c) 2022 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <ctype.h>
18 #include <stdint.h>
19 #include <stdlib.h>
20 #include <string.h>
22 #include "iri.h"
24 /* TODO: URI -> IRI. accept IRI but emit always URI */
26 static inline int
27 cpstr(const char *start, const char *till, char *buf, size_t len)
28 {
29 size_t slen = till - start;
31 if (slen + 1 >= len)
32 return (-1);
33 memcpy(buf, start, slen);
34 buf[slen] = '\0';
35 return (0);
36 }
38 static inline int
39 unreserved(int c)
40 {
41 return (isalnum((unsigned char)c) ||
42 c == '-' ||
43 c == '.' ||
44 c == '_' ||
45 c == '~');
46 }
48 static inline int
49 pctenc(const char *s)
50 {
51 const char *t = s;
53 return (t[0] == '%' &&
54 isxdigit((unsigned char)t[1]) &&
55 isxdigit((unsigned char)t[2]));
56 }
58 static inline int
59 sub_delims(int c)
60 {
61 return (c == '!' || c == '$' || c == '&' || c == '\'' ||
62 c == '(' || c == ')' || c == '*' || c == '+' || c == ',' ||
63 c == ';' || c == '=');
64 }
66 static inline const char *
67 advance_pchar(const char *s)
68 {
69 if (unreserved(*s) || sub_delims(*s) || *s == ':' || *s == '@')
70 return (s + 1);
71 if (pctenc(s))
72 return (s + 3);
73 return (NULL);
74 }
76 static inline const char *
77 advance_segment(const char *s)
78 {
79 const char *t = s;
81 while ((t = advance_pchar(s)) != NULL)
82 s = t;
83 return (s);
84 }
86 static inline const char *
87 advance_segment_nz(const char *s)
88 {
89 const char *t;
91 if ((t = advance_pchar(s)) == NULL)
92 return (NULL);
93 return (advance_segment(t));
94 }
96 static inline const char *
97 advance_segment_nz_nc(const char *s)
98 {
99 const char *t = s;
101 for (;;) {
102 if (unreserved(*t) || sub_delims(*t) || *t == '@')
103 t++;
104 else if (pctenc(t))
105 t += 3;
106 else
107 break;
110 return (t != s ? t : NULL);
113 static const char *
114 parse_scheme(const char *s, struct iri *iri)
116 const char *t = s;
118 if (!isalpha((unsigned char)*t))
119 return (NULL);
121 while (isalnum((unsigned char)*t) ||
122 *t == '+' ||
123 *t == '-' ||
124 *t == '.')
125 t++;
127 if (cpstr(s, t, iri->iri_scheme, sizeof(iri->iri_scheme)) == -1)
128 return (NULL);
130 iri->iri_flags |= IH_SCHEME;
131 return (t);
134 /* userinfo is always optional */
135 static const char *
136 parse_uinfo(const char *s, struct iri *iri)
138 const char *t = s;
140 for (;;) {
141 if (unreserved(*t) || sub_delims(*t) || *t == ':')
142 t++;
143 else if (pctenc(t))
144 t += 3;
145 else
146 break;
149 if (*t != '@')
150 return (s);
152 if (cpstr(s, t, iri->iri_uinfo, sizeof(iri->iri_uinfo)) == -1)
153 return (NULL);
154 iri->iri_flags |= IH_UINFO;
155 return (t + 1);
158 static const char *
159 parse_host(const char *s, struct iri *iri)
161 const char *t = s;
163 /*
164 * cheating a bit by relaxing and merging the rule for
165 * IPv6address and IPvFuture and by merging IPv4address and
166 * reg-name.
167 */
169 if (*t == '[') {
170 while (*t && *t != ']')
171 ++t;
172 if (*t == '\0')
173 return (NULL);
174 t++;
175 if (cpstr(s, t, iri->iri_host, sizeof(iri->iri_host)) == -1)
176 return (NULL);
177 iri->iri_flags |= IH_HOST;
178 return (t);
181 for (;;) {
182 if (unreserved(*t) || sub_delims(*t))
183 t++;
184 else if (pctenc(t))
185 t += 3;
186 else
187 break;
190 if (cpstr(s, t, iri->iri_host, sizeof(iri->iri_host)) == -1)
191 return (NULL);
192 iri->iri_flags |= IH_HOST;
193 return (t);
196 static const char *
197 parse_port(const char *s, struct iri *iri)
199 const char *t = s;
200 const char *errstr;
202 while (isdigit((unsigned char)*t))
203 t++;
204 if (cpstr(s, t, iri->iri_portstr, sizeof(iri->iri_portstr)) == -1)
205 return (NULL);
206 iri->iri_port = strtonum(iri->iri_portstr, 1, UINT16_MAX, &errstr);
207 if (errstr)
208 return (NULL);
209 iri->iri_flags |= IH_PORT;
210 return (t);
213 static const char *
214 parse_authority(const char *s, struct iri *iri)
216 const char *t;
218 if ((t = parse_uinfo(s, iri)) == NULL)
219 return (NULL);
221 if ((t = parse_host(t, iri)) == NULL)
222 return (NULL);
224 if (*t == ':')
225 return (parse_port(t, iri));
227 return (t);
230 static const char *
231 parse_path_abempty(const char *s, struct iri *iri)
233 const char *t = s;
235 while (*t == '/')
236 t = advance_segment(t + 1);
238 if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
239 return (NULL);
240 iri->iri_flags |= IH_PATH;
241 return (t);
244 static const char *
245 parse_path_absolute(const char *s, struct iri *iri)
247 const char *t;
249 if (*s != '/')
250 return (NULL);
252 if ((t = advance_segment_nz(s + 1)) == NULL)
253 return (s + 1);
255 while (*t == '/')
256 t = advance_segment(t + 1);
258 if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
259 return (NULL);
260 iri->iri_flags |= IH_PATH;
261 return (t);
264 static const char *
265 parse_path_rootless(const char *s, struct iri *iri)
267 const char *t;
269 if ((t = advance_segment_nz(s)) == NULL)
270 return (NULL);
272 while (*t == '/')
273 t = advance_segment(t + 1);
275 if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
276 return (NULL);
277 iri->iri_flags |= IH_PATH;
278 return (t);
281 static const char *
282 parse_path_noscheme(const char *s, struct iri *iri)
284 const char *t;
286 if ((t = advance_segment_nz_nc(s)) == NULL)
287 return (NULL);
289 while (*t == '/')
290 t = advance_segment(t + 1);
292 if (cpstr(s, t, iri->iri_path, sizeof(iri->iri_path)) == -1)
293 return (NULL);
294 iri->iri_flags |= IH_PATH;
295 return (t);
298 static const char *
299 parse_path_empty(const char *s, struct iri *iri)
301 iri->iri_path[0] = '\0';
302 iri->iri_flags |= IH_PATH;
303 return (s);
306 static const char *
307 parse_hier(const char *s, struct iri *iri)
309 const char *t;
311 if (!strncmp(s, "//", 2)) {
312 if ((t = parse_authority(s + 2, iri)) == NULL)
313 return (NULL);
314 return (parse_path_abempty(t, iri));
317 if ((t = parse_path_absolute(s, iri)) != NULL)
318 return (t);
320 if ((t = parse_path_rootless(s, iri)) != NULL)
321 return (t);
323 return (parse_path_empty(s, iri));
326 static const char *
327 parse_relative(const char *s, struct iri *iri)
329 const char *t = s;
331 if (!strncmp(s, "//", 2)) {
332 if ((t = parse_authority(s + 2, iri)) == NULL)
333 return (NULL);
334 return (parse_path_abempty(t, iri));
337 if ((t = parse_path_absolute(s, iri)) != NULL)
338 return (t);
340 if ((t = parse_path_noscheme(s, iri)) != NULL)
341 return (t);
343 return (parse_path_empty(s, iri));
346 static const char *
347 parse_query(const char *s, struct iri *iri)
349 const char *n, *t = s;
351 for (;;) {
352 if ((n = advance_pchar(t)) != NULL)
353 t = n;
354 else if (*t == '/' || *t == '?')
355 t++;
356 else
357 break;
360 if (cpstr(s, t, iri->iri_query, sizeof(iri->iri_query)) == -1)
361 return (NULL);
362 iri->iri_flags |= IH_QUERY;
363 return (t);
366 static int
367 parse_uri(const char *s, struct iri *iri)
369 if ((s = parse_scheme(s, iri)) == NULL)
370 return (-1);
372 if (*s != ':')
373 return (-1);
375 if ((s = parse_hier(s + 1, iri)) == NULL)
376 return (-1);
378 if (*s == '?' && (s = parse_query(s + 1, iri)) == NULL)
379 return (-1);
381 /* skip fragments */
382 if (*s == '#' || *s == '\0')
383 return (0);
385 return (-1);
388 static int
389 parse_relative_ref(const char *s, struct iri *iri)
391 if ((s = parse_relative(s, iri)) == NULL)
392 return (-1);
394 if (*s == '?' && (s = parse_query(s + 1, iri)) == NULL)
395 return (-1);
397 /* skip fragments */
398 if (*s == '#' || *s == '\0')
399 return (0);
401 return (-1);
404 static int
405 parse(const char *s, struct iri *iri)
407 iri->iri_flags = 0;
409 if (s == NULL)
410 return (0);
412 if (parse_uri(s, iri) == -1) {
413 iri->iri_flags = 0;
414 if (parse_relative_ref(s, iri) == -1)
415 return (-1);
418 return (0);
421 static inline void
422 lowerify(char *s)
424 for (; *s; ++s)
425 *s = tolower((unsigned char)*s);
428 static void
429 cpfields(struct iri *dest, const struct iri *src, int flags)
431 if (flags & IH_SCHEME) {
432 dest->iri_flags |= IH_SCHEME;
433 if (src->iri_flags & IH_SCHEME)
434 memcpy(dest->iri_scheme, src->iri_scheme,
435 sizeof(dest->iri_scheme));
436 lowerify(dest->iri_scheme);
438 if (flags & IH_UINFO) {
439 dest->iri_flags |= IH_UINFO;
440 if (src->iri_flags & IH_UINFO)
441 memcpy(dest->iri_uinfo, src->iri_uinfo,
442 sizeof(dest->iri_uinfo));
444 if (flags & IH_HOST) {
445 dest->iri_flags |= IH_HOST;
446 if (src->iri_flags & IH_HOST)
447 memcpy(dest->iri_host, src->iri_host,
448 sizeof(dest->iri_host));
449 lowerify(dest->iri_host);
451 if (flags & IH_PORT) {
452 dest->iri_flags |= IH_PORT;
453 if (src->iri_flags & IH_PORT)
454 dest->iri_port = src->iri_port;
456 if (flags & IH_PATH) {
457 dest->iri_flags |= IH_PATH;
458 if (src->iri_flags & IH_PATH)
459 memcpy(dest->iri_path, src->iri_path,
460 sizeof(dest->iri_path));
462 if (flags & IH_QUERY) {
463 dest->iri_flags |= IH_QUERY;
464 if (src->iri_flags & IH_QUERY)
465 memcpy(dest->iri_query, src->iri_query,
466 sizeof(dest->iri_query));
470 static inline void
471 remove_dot_segments(struct iri *iri)
473 /* TODO: fixup iri->iri_path */
474 return;
477 static inline void
478 mergepath(char *out, size_t len, const char *a, const char *b)
480 /* TODO: compute into out path `b' resolved from `a' */
481 memset(out, 0, len);
482 return;
485 int
486 iri_parse(const char *base, const char *str, struct iri *iri)
488 static struct iri ibase, iparsed;
490 memset(iri, 0, sizeof(*iri));
492 if (base == NULL) {
493 ibase.iri_flags = 0;
494 if (parse_uri(str, &iparsed) == -1)
495 return (-1);
496 } else {
497 if (parse_uri(base, &ibase) == -1)
498 return (-1);
499 if (parse(str, &iparsed) == -1)
500 return (-1);
503 if (iparsed.iri_flags & IH_SCHEME) {
504 cpfields(iri, &iparsed, iparsed.iri_flags);
505 remove_dot_segments(iri);
506 return (0);
509 /* if fragments are supported, copy iparsed fragment to iri */
511 cpfields(iri, &ibase, IH_SCHEME);
513 if (iparsed.iri_flags & IH_HOST) {
514 cpfields(iri, &iparsed, IH_AUTHORITY|IH_PATH|IH_QUERY);
515 remove_dot_segments(iri);
516 return (0);
519 cpfields(iri, &ibase, IH_AUTHORITY);
521 if ((iparsed.iri_flags & IH_PATH) && *iparsed.iri_path == '\0') {
522 cpfields(iri, &ibase, IH_PATH);
523 if (iparsed.iri_flags & IH_QUERY)
524 cpfields(iri, &iparsed, IH_QUERY);
525 else
526 cpfields(iri, &ibase, IH_QUERY);
527 return (0);
530 cpfields(iri, &iparsed, IH_QUERY);
531 if ((iparsed.iri_flags & IH_PATH) && !strcmp(iparsed.iri_path, "/"))
532 cpfields(iri, &iparsed, IH_PATH);
533 else {
534 if (!(ibase.iri_flags & IH_PATH))
535 ibase.iri_path[0] = '\0';
536 if (!(iparsed.iri_flags & IH_PATH))
537 iparsed.iri_path[0] = '\0';
538 mergepath(iri->iri_path, sizeof(iri->iri_path),
539 ibase.iri_path, iparsed.iri_path);
541 remove_dot_segments(iri);
542 cpfields(iri, &ibase, IH_QUERY);
543 return (0);
546 int
547 iri_unparse(const struct iri *iri, char *buf, size_t buflen)
549 memset(buf, 0, buflen);
550 return (-1);
553 int
554 iri_human(const struct iri *iri, char *buf, size_t buflen)
556 memset(buf, 0, buflen);
557 return (-1);
560 int
561 iri_setquery(struct iri *iri, const char *text)
563 return (-1);