2 33d32d1f 2020-12-25 op * Copyright (c) 2020 Omar Polo <op@omarpolo.com>
4 33d32d1f 2020-12-25 op * Permission to use, copy, modify, and distribute this software for any
5 33d32d1f 2020-12-25 op * purpose with or without fee is hereby granted, provided that the above
6 33d32d1f 2020-12-25 op * copyright notice and this permission notice appear in all copies.
8 33d32d1f 2020-12-25 op * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 33d32d1f 2020-12-25 op * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 33d32d1f 2020-12-25 op * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 33d32d1f 2020-12-25 op * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 33d32d1f 2020-12-25 op * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 33d32d1f 2020-12-25 op * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 33d32d1f 2020-12-25 op * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 33d32d1f 2020-12-25 op #include <err.h>
18 33d32d1f 2020-12-25 op #include <stdio.h>
19 33d32d1f 2020-12-25 op #include <string.h>
21 33d32d1f 2020-12-25 op #include "gmid.h"
23 33d32d1f 2020-12-25 op #define TEST(uri, fail, exp, descr) \
24 33d32d1f 2020-12-25 op if (!run_test(uri, fail, exp)) { \
25 33d32d1f 2020-12-25 op fprintf(stderr, "%s:%d: error: %s\n", \
26 33d32d1f 2020-12-25 op __FILE__, __LINE__, descr); \
30 33d32d1f 2020-12-25 op #define URI(schema, host, port, path, query, frag) \
31 33d32d1f 2020-12-25 op ((struct uri){schema, host, port, 0, path, query, frag})
33 33d32d1f 2020-12-25 op #define DIFF(wanted, got, field) \
34 33d32d1f 2020-12-25 op if (wanted->field == NULL || got->field == NULL || \
35 33d32d1f 2020-12-25 op strcmp(wanted->field, got->field)) { \
36 33d32d1f 2020-12-25 op fprintf(stderr, #field ":\n\tgot: %s\n\twanted: %s\n", \
37 33d32d1f 2020-12-25 op got->field, wanted->field); \
41 33d32d1f 2020-12-25 op #define PASS 0
42 33d32d1f 2020-12-25 op #define FAIL 1
45 33d32d1f 2020-12-25 op diff_uri(struct uri *p, struct uri *exp)
47 33d32d1f 2020-12-25 op DIFF(p, exp, schema);
48 33d32d1f 2020-12-25 op DIFF(p, exp, host);
49 33d32d1f 2020-12-25 op DIFF(p, exp, port);
50 33d32d1f 2020-12-25 op DIFF(p, exp, path);
51 33d32d1f 2020-12-25 op DIFF(p, exp, query);
52 33d32d1f 2020-12-25 op DIFF(p, exp, fragment);
57 33d32d1f 2020-12-25 op run_test(const char *uri, int should_fail, struct uri expected)
59 33d32d1f 2020-12-25 op int failed, ok = 1;
60 33d32d1f 2020-12-25 op char *uri_copy;
61 33d32d1f 2020-12-25 op struct uri parsed;
62 33d32d1f 2020-12-25 op const char *error;
64 33d32d1f 2020-12-25 op if ((uri_copy = strdup(uri)) == NULL)
65 33d32d1f 2020-12-25 op err(1, "strdup");
67 33d32d1f 2020-12-25 op fprintf(stderr, "=> %s\n", uri);
68 33d32d1f 2020-12-25 op failed = !parse_uri(uri_copy, &parsed, &error);
70 33d32d1f 2020-12-25 op if (failed && should_fail)
73 33d32d1f 2020-12-25 op if (error != NULL)
74 33d32d1f 2020-12-25 op fprintf(stderr, "> %s\n", error);
76 33d32d1f 2020-12-25 op ok = !failed && !should_fail;
78 33d32d1f 2020-12-25 op ok = diff_uri(&expected, &parsed);
81 33d32d1f 2020-12-25 op free(uri_copy);
88 33d32d1f 2020-12-25 op struct uri empty = {"", "", "", PASS, "", "", ""};
90 df6ca41d 2020-12-25 op TEST("foo://bar.com/foo%00?baz",
93 df6ca41d 2020-12-25 op "rejects %00");
96 33d32d1f 2020-12-25 op TEST("http://omarpolo.com",
98 33d32d1f 2020-12-25 op URI("http", "omarpolo.com", "", "", "", ""),
99 33d32d1f 2020-12-25 op "can parse uri with empty path");
102 33d32d1f 2020-12-25 op TEST("omarpolo.com", FAIL, empty, "FAIL when the schema is missing");
103 33d32d1f 2020-12-25 op TEST("gemini:/omarpolo.com", FAIL, empty, "FAIL with invalid marker");
104 33d32d1f 2020-12-25 op TEST("gemini//omarpolo.com", FAIL, empty, "FAIL with invalid marker");
105 33d32d1f 2020-12-25 op TEST("h!!p://omarpolo.com", FAIL, empty, "FAIL with invalid schema");
107 33d32d1f 2020-12-25 op /* authority */
108 33d32d1f 2020-12-25 op TEST("gemini://omarpolo.com",
110 33d32d1f 2020-12-25 op URI("gemini", "omarpolo.com", "", "", "", ""),
111 33d32d1f 2020-12-25 op "can parse authority with empty path");
112 33d32d1f 2020-12-25 op TEST("gemini://omarpolo.com/",
114 33d32d1f 2020-12-25 op URI("gemini", "omarpolo.com", "", "", "", ""),
115 33d32d1f 2020-12-25 op "can parse authority with empty path (alt)")
116 33d32d1f 2020-12-25 op TEST("gemini://omarpolo.com:1965",
118 33d32d1f 2020-12-25 op URI("gemini", "omarpolo.com", "1965", "", "", ""),
119 33d32d1f 2020-12-25 op "can parse with port and empty path");
120 33d32d1f 2020-12-25 op TEST("gemini://omarpolo.com:1965/",
122 33d32d1f 2020-12-25 op URI("gemini", "omarpolo.com", "1965", "", "", ""),
123 33d32d1f 2020-12-25 op "can parse with port and empty path")
124 33d32d1f 2020-12-25 op TEST("gemini://omarpolo.com:196s",
127 33d32d1f 2020-12-25 op "FAIL with invalid port number");
130 33d32d1f 2020-12-25 op TEST("gemini://omarpolo.com/foo/bar/baz",
132 33d32d1f 2020-12-25 op URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
133 33d32d1f 2020-12-25 op "parse simple paths");
134 33d32d1f 2020-12-25 op TEST("gemini://omarpolo.com/foo//bar///baz",
136 33d32d1f 2020-12-25 op URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
137 33d32d1f 2020-12-25 op "parse paths with multiple slashes");
138 33d32d1f 2020-12-25 op TEST("gemini://omarpolo.com/foo/./bar/./././baz",
140 33d32d1f 2020-12-25 op URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
141 33d32d1f 2020-12-25 op "parse paths with . elements");
142 33d32d1f 2020-12-25 op TEST("gemini://omarpolo.com/foo/bar/../bar/baz",
144 33d32d1f 2020-12-25 op URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
145 33d32d1f 2020-12-25 op "parse paths with .. elements");
146 33d32d1f 2020-12-25 op TEST("gemini://omarpolo.com/foo/../foo/bar/../bar/baz/../baz",
148 33d32d1f 2020-12-25 op URI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
149 33d32d1f 2020-12-25 op "parse paths with multiple .. elements");
150 33d32d1f 2020-12-25 op TEST("gemini://omarpolo.com/foo/..",
152 33d32d1f 2020-12-25 op URI("gemini", "omarpolo.com", "", "", "", ""),
153 33d32d1f 2020-12-25 op "parse paths with a trailing ..");
154 33d32d1f 2020-12-25 op TEST("gemini://omarpolo.com/foo/../",
156 33d32d1f 2020-12-25 op URI("gemini", "omarpolo.com", "", "", "", ""),
157 33d32d1f 2020-12-25 op "parse paths with a trailing ..");
158 33d32d1f 2020-12-25 op TEST("gemini://omarpolo.com/foo/../..",
161 33d32d1f 2020-12-25 op "reject paths that would escape the root");
162 df6ca41d 2020-12-25 op TEST("gemini://omarpolo.com/foo/../foo/../././/bar/baz/.././.././/",
164 df6ca41d 2020-12-25 op URI("gemini", "omarpolo.com", "", "", "", ""),
165 df6ca41d 2020-12-25 op "parse path with lots of cleaning available");
168 33d32d1f 2020-12-25 op TEST("foo://example.com/foo/?gne",
170 33d32d1f 2020-12-25 op URI("foo", "example.com", "", "foo/", "gne", ""),
171 33d32d1f 2020-12-25 op "parse query strings");
172 33d32d1f 2020-12-25 op TEST("foo://example.com/foo/?gne&foo",
174 33d32d1f 2020-12-25 op URI("foo", "example.com", "", "foo/", "gne&foo", ""),
175 33d32d1f 2020-12-25 op "parse query strings");
176 33d32d1f 2020-12-25 op TEST("foo://example.com/foo/?gne%2F",
178 33d32d1f 2020-12-25 op URI("foo", "example.com", "", "foo/", "gne/", ""),
179 33d32d1f 2020-12-25 op "parse query strings");
181 33d32d1f 2020-12-25 op /* fragment */
182 33d32d1f 2020-12-25 op TEST("foo://bar.co/#foo",
184 33d32d1f 2020-12-25 op URI("foo", "bar.co", "", "", "", "foo"),
185 33d32d1f 2020-12-25 op "can recognize fragments");
187 33d32d1f 2020-12-25 op /* percent encoding */
188 33d32d1f 2020-12-25 op TEST("foo://bar.com/caf%C3%A8.gmi",
190 33d32d1f 2020-12-25 op URI("foo", "bar.com", "", "cafè.gmi", "", ""),
191 33d32d1f 2020-12-25 op "can decode");
192 df6ca41d 2020-12-25 op TEST("foo://bar.com/caff%C3%A8%20macchiato.gmi",
194 df6ca41d 2020-12-25 op URI("foo", "bar.com", "", "caffè macchiato.gmi", "", ""),
195 df6ca41d 2020-12-25 op "can decode");
196 df6ca41d 2020-12-25 op TEST("foo://bar.com/caff%C3%A8+macchiato.gmi",
198 df6ca41d 2020-12-25 op URI("foo", "bar.com", "", "caffè+macchiato.gmi", "", ""),
199 df6ca41d 2020-12-25 op "can decode");
200 df6ca41d 2020-12-25 op TEST("foo://bar.com/foo%2F..%2F..",
203 df6ca41d 2020-12-25 op "conversion and checking are done in the correct order");
204 df6ca41d 2020-12-25 op TEST("foo://bar.com/foo%00?baz",
207 df6ca41d 2020-12-25 op "rejects %00");
210 df6ca41d 2020-12-25 op TEST("foo://bar.com/cafè.gmi",
212 df6ca41d 2020-12-25 op URI("foo", "bar.com", "", "cafè.gmi", "" , ""),
213 df6ca41d 2020-12-25 op "decode IRI (with a 2-byte utf8 seq)");
214 df6ca41d 2020-12-25 op TEST("foo://bar.com/世界.gmi",
216 df6ca41d 2020-12-25 op URI("foo", "bar.com", "", "世界.gmi", "" , ""),
217 df6ca41d 2020-12-25 op "decode IRI");
218 df6ca41d 2020-12-25 op TEST("foo://bar.com/😼.gmi",
220 df6ca41d 2020-12-25 op URI("foo", "bar.com", "", "😼.gmi", "" , ""),
221 df6ca41d 2020-12-25 op "decode IRI (with a 3-byte utf8 seq)");
222 df6ca41d 2020-12-25 op TEST("foo://bar.com/😼/𤭢.gmi",
224 df6ca41d 2020-12-25 op URI("foo", "bar.com", "", "😼/𤭢.gmi", "" , ""),
225 df6ca41d 2020-12-25 op "decode IRI (with a 3-byte and a 4-byte utf8 seq)");
226 df6ca41d 2020-12-25 op TEST("foo://bar.com/世界/\xC0\x80",
229 df6ca41d 2020-12-25 op "reject invalid sequence (overlong NUL)");