Blob


1 /*
2 * Copyright (c) 2020, 2022, 2024 Omar Polo <op@omarpolo.com>
3 * Copyright (c) 2015 Theo de Raadt <deraadt@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
18 #include "gmid.h"
20 #include <ctype.h>
21 #include <string.h>
23 static inline int
24 unreserved(int p)
25 {
26 return isalnum((unsigned char)p)
27 || p == '-'
28 || p == '.'
29 || p == '_'
30 || p == '~';
31 }
33 static inline int
34 sub_delimiters(int p)
35 {
36 return p == '!'
37 || p == '$'
38 || p == '&'
39 || p == '\''
40 || p == '('
41 || p == ')'
42 || p == '*'
43 || p == '+'
44 || p == ','
45 || p == ';'
46 || p == '=';
47 }
49 static int
50 valid_pct_enc_string(char *s)
51 {
52 if (*s != '%')
53 return 1;
55 if (!isxdigit((unsigned char)s[1]) ||
56 !isxdigit((unsigned char)s[2]))
57 return 0;
59 if (s[1] == '0' && s[2] == '0')
60 return 0;
62 return 1;
63 }
65 static int
66 valid_pct_encoded(struct parser *p)
67 {
68 if (p->iri[0] != '%')
69 return 0;
71 if (!valid_pct_enc_string(p->iri)) {
72 p->err = "illegal percent-encoding";
73 return 0;
74 }
76 p->iri += 2;
77 return 1;
78 }
80 static void
81 pct_decode(char *s)
82 {
83 sscanf(s+1, "%2hhx", s);
84 memmove(s+1, s+3, strlen(s+3)+1);
85 }
87 static int
88 parse_pct_encoded(struct parser *p)
89 {
90 if (p->iri[0] != '%')
91 return 0;
93 if (!valid_pct_enc_string(p->iri)) {
94 p->err = "illegal percent-encoding";
95 return 0;
96 }
98 pct_decode(p->iri);
99 if (*p->iri == '\0') {
100 p->err = "illegal percent-encoding";
101 return 0;
104 return 1;
107 /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) "://" */
108 static int
109 parse_scheme(struct parser *p)
111 p->parsed->schema = p->iri;
113 if (!isalpha((unsigned char)*p->iri)) {
114 p->err = "illegal character in scheme";
115 return 0;
118 do {
119 /*
120 * normalize the scheme (i.e. lowercase it)
122 * XXX: since we cannot have good things, tolower
123 * behaviour depends on the LC_CTYPE locale. The good
124 * news is that we're sure p->iri points to something
125 * that's in the ASCII range, so tolower can't
126 * mis-behave on some systems due to the locale.
127 */
128 *p->iri = tolower(*p->iri);
129 p->iri++;
130 } while (isalnum((unsigned char)*p->iri)
131 || *p->iri == '+'
132 || *p->iri == '-'
133 || *p->iri == '.');
135 if (*p->iri != ':') {
136 p->err = "illegal character in scheme";
137 return 0;
140 *p->iri = '\0';
141 if (p->iri[1] != '/' || p->iri[2] != '/') {
142 p->err = "invalid marker after scheme";
143 return 0;
146 p->iri += 3;
147 return 1;
150 /* *DIGIT */
151 static int
152 parse_port(struct parser *p)
154 uint32_t i = 0;
156 p->parsed->port = p->iri;
158 for (; isdigit((unsigned char)*p->iri); p->iri++) {
159 i = i * 10 + *p->iri - '0';
160 if (i > UINT16_MAX) {
161 p->err = "port number too large";
162 return 0;
166 if (*p->iri != '/' && *p->iri != '\0') {
167 p->err = "illegal character in port number";
168 return 0;
171 p->parsed->port_no = i;
173 if (*p->iri != '\0') {
174 *p->iri = '\0';
175 p->iri++;
178 return 1;
181 /* *( unreserved / sub-delims / pct-encoded ) */
182 static int
183 parse_authority(struct parser *p)
185 struct addrinfo hints, *ai;
186 char *end;
187 int err;
189 if (*p->iri == '[') {
190 p->iri++;
191 p->parsed->host = p->iri;
192 if ((end = strchr(p->iri, ']')) == NULL) {
193 p->err = "invalid IPv6 address";
194 return 0;
196 *end++ = '\0';
197 p->iri = end;
199 memset(&hints, 0, sizeof(hints));
200 hints.ai_flags = AI_NUMERICHOST;
201 err = getaddrinfo(p->parsed->host, NULL, &hints, &ai);
202 if (err != 0) {
203 p->err = "invalid IPv6 address";
204 return 0;
206 freeaddrinfo(ai);
207 } else {
208 p->parsed->host = p->iri;
209 while (unreserved(*p->iri)
210 || sub_delimiters(*p->iri)
211 || parse_pct_encoded(p)
212 || valid_multibyte_utf8(p)) {
213 /* normalize the host name. */
214 if (*p->iri < 0x7F)
215 *p->iri = tolower(*p->iri);
216 p->iri++;
219 if (p->err != NULL)
220 return 0;
223 if (*p->iri == ':') {
224 *p->iri = '\0';
225 p->iri++;
226 return parse_port(p);
227 } else
228 p->parsed->port_no = 1965;
230 if (*p->iri == '/') {
231 *p->iri = '\0';
232 p->iri++;
233 return 1;
236 if (*p->iri == '\0')
237 return 1;
239 p->err = "illegal character in authority section";
240 return 0;
243 /*
244 * Use an algorithm based on canonpath() from kern_pledge.c.
246 * It's slightly more complicated since even if your paths are
247 * absolutely, they don't start with '/'. q == path asserts
248 * that we're at the start of the path.
249 */
250 static int
251 path_clean(char *path)
253 char *p, *q;
255 p = q = path;
256 while (*p) {
257 if (q == path && p[0] == '/') {
258 /* special case, if path is just "/" trim it */
259 p++;
260 } else if (q == path && p[0] == '.' && p[1] == '.' &&
261 (p[2] == '/' || p[2] == '\0')) {
262 /* ../ at the start of path */
263 p += 2;
264 if (*p == '/')
265 p++;
266 } else if (q == path && p[0] == '.' &&
267 (p[1] == '/' || p[1] == '\0')) {
268 /* ./ at the start of path */
269 p++;
270 if (*p == '/')
271 p++;
272 } else if (p[0] == '/' && p[1] == '/') {
273 /* trim double slashes */
274 p++;
275 } else if (p[0] == '/' && p[1] == '.' && p[2] == '.' &&
276 (p[3] == '/' || p[3] == '\0')) {
277 /* /../ component */
278 while (q > path && *--q != '/')
279 continue;
280 p += 3;
281 if (q == path && *p == '/')
282 p++;
283 } else if (p[0] == '/' && p[1] == '.' &&
284 (p[2] == '/' || p[2] == '\0')) {
285 /* /./ component */
286 p += 2;
287 } else {
288 *q++ = *p++;
291 if (*p != '\0')
292 return 0;
293 *q = '\0';
294 return 1;
297 static int
298 parse_query(struct parser *p)
300 p->parsed->query = p->iri;
301 if (*p->iri == '\0')
302 return 1;
304 while (unreserved(*p->iri)
305 || sub_delimiters(*p->iri)
306 || *p->iri == '/'
307 || *p->iri == '?'
308 || *p->iri == ':'
309 || *p->iri == '@'
310 || valid_pct_encoded(p)
311 || valid_multibyte_utf8(p))
312 p->iri++;
314 if (p->err != NULL)
315 return 0;
317 if (*p->iri != '\0' && *p->iri != '#') {
318 p->err = "illegal character in query";
319 return 0;
322 if (*p->iri != '\0') {
323 *p->iri = '\0';
324 p->iri++;
327 return 1;
330 /* don't even bother */
331 static int
332 parse_fragment(struct parser *p)
334 p->parsed->fragment = p->iri;
335 return 1;
338 /* XXX: is it too broad? */
339 /* *(pchar / "/") */
340 static int
341 parse_path(struct parser *p)
343 char c;
345 /* trim initial slashes */
346 while (*p->iri == '/')
347 p->iri++;
349 p->parsed->path = p->iri;
350 if (*p->iri == '\0') {
351 p->parsed->query = p->parsed->fragment = p->iri;
352 return 1;
355 while (unreserved(*p->iri)
356 || sub_delimiters(*p->iri)
357 || *p->iri == '@'
358 || *p->iri == ':'
359 || *p->iri == '/'
360 || parse_pct_encoded(p)
361 || valid_multibyte_utf8(p))
362 p->iri++;
364 if (p->err != NULL)
365 return 0;
367 if (*p->iri != '\0' && *p->iri != '?' && *p->iri != '#') {
368 p->err = "illegal character in path";
369 return 0;
372 if (*p->iri != '\0') {
373 c = *p->iri;
374 *p->iri = '\0';
375 p->iri++;
377 if (c == '#') {
378 if (!parse_fragment(p))
379 return 0;
380 } else
381 if (!parse_query(p) || !parse_fragment(p))
382 return 0;
385 if (!path_clean(p->parsed->path)) {
386 p->err = "illegal path";
387 return 0;
390 return 1;
393 int
394 parse_iri(char *iri, struct iri *ret, const char **err_ret)
396 char *end;
397 struct parser p = {
398 .iri = iri,
399 .parsed = ret,
400 .err = NULL,
401 };
403 memset(ret, 0, sizeof(*ret));
405 /* initialize optional stuff to the empty string */
406 end = iri + strlen(iri);
407 p.parsed->host = end;
408 p.parsed->port = end;
409 p.parsed->path = end;
410 p.parsed->query = end;
411 p.parsed->fragment = end;
413 if (!parse_scheme(&p) || !parse_authority(&p) || !parse_path(&p)) {
414 *err_ret = p.err;
415 return 0;
418 *err_ret = NULL;
419 return 1;
422 int
423 serialize_iri(struct iri *i, char *buf, size_t len)
425 size_t l = 0;
427 /* in ex.c we receive empty "" strings as NULL */
428 if (i->schema == NULL || i->host == NULL) {
429 memset(buf, 0, len);
430 return 0;
433 strlcpy(buf, i->schema, len);
434 strlcat(buf, "://", len);
435 strlcat(buf, i->host, len);
436 strlcat(buf, "/", len);
438 if (i->path != NULL)
439 l = strlcat(buf, i->path, len);
441 if (i->query != NULL && *i->query != '\0') {
442 strlcat(buf, "?", len);
443 l = strlcat(buf, i->query, len);
446 return l < len;
449 int
450 encode_path(char *buf, size_t len, const char *path)
452 char *p = buf;
453 int a, b;
455 memset(buf, 0, len);
456 while (*path != '\0') {
457 if (len == 1) /* NUL */
458 return -1;
460 if (unreserved(*path) ||
461 sub_delimiters(*path) ||
462 *path == '@' ||
463 *path == ':' ||
464 *path == '/') {
465 *p++ = *path++;
466 len--;
467 } else if (len < 4)
468 return -1;
469 else {
470 a = (*path & 0xF0) >> 4;
471 b = (*path & 0x0F);
473 p[0] = '%';
474 p[1] = a <= 9 ? ('0' + a) : ('7' + a);
475 p[2] = b <= 9 ? ('0' + b) : ('7' + b);
477 path++;
478 p += 3;
479 len -= 3;
483 return 0;
486 char *
487 pct_decode_str(char *s)
489 char *t;
491 for (t = s; *t; ++t) {
492 if (*t == '+')
493 *t = ' ';
494 else if (*t == '%' && valid_pct_enc_string(t))
495 pct_decode(t);
498 return s;