Blame


1 5c2e310e 2021-01-22 op /*
2 a555e0d6 2022-07-04 op * Copyright (c) 2020, 2022 Omar Polo <op@omarpolo.com>
3 5c2e310e 2021-01-22 op *
4 5c2e310e 2021-01-22 op * Permission to use, copy, modify, and distribute this software for any
5 5c2e310e 2021-01-22 op * purpose with or without fee is hereby granted, provided that the above
6 5c2e310e 2021-01-22 op * copyright notice and this permission notice appear in all copies.
7 5c2e310e 2021-01-22 op *
8 5c2e310e 2021-01-22 op * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 5c2e310e 2021-01-22 op * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 5c2e310e 2021-01-22 op * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 5c2e310e 2021-01-22 op * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 5c2e310e 2021-01-22 op * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 5c2e310e 2021-01-22 op * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 5c2e310e 2021-01-22 op * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 5c2e310e 2021-01-22 op */
16 5c2e310e 2021-01-22 op
17 5c2e310e 2021-01-22 op #include <err.h>
18 5c2e310e 2021-01-22 op #include <stdio.h>
19 5c2e310e 2021-01-22 op #include <string.h>
20 5c2e310e 2021-01-22 op
21 5c2e310e 2021-01-22 op #include "../gmid.h"
22 617ae385 2022-07-04 op
23 617ae385 2022-07-04 op #define ENCTEST(buf, len, raw, exp) \
24 617ae385 2022-07-04 op if (encode_path(buf, len, raw) == -1) { \
25 617ae385 2022-07-04 op fprintf(stderr, "%s:%d: failed to encode: %s\n", \
26 617ae385 2022-07-04 op __FILE__, __LINE__, raw); \
27 617ae385 2022-07-04 op exit(1); \
28 617ae385 2022-07-04 op } \
29 617ae385 2022-07-04 op if (strcmp(buf, exp) != 0) { \
30 617ae385 2022-07-04 op fprintf(stderr, "%s:%d: error: " \
31 617ae385 2022-07-04 op "unexpected encoding: got %s, want %s\n", \
32 617ae385 2022-07-04 op __FILE__, __LINE__, buf, exp); \
33 617ae385 2022-07-04 op exit(1); \
34 617ae385 2022-07-04 op }
35 5c2e310e 2021-01-22 op
36 5c2e310e 2021-01-22 op #define TEST(iri, fail, exp, descr) \
37 5c2e310e 2021-01-22 op if (!run_test(iri, fail, exp)) { \
38 5c2e310e 2021-01-22 op fprintf(stderr, "%s:%d: error: %s\n", \
39 5c2e310e 2021-01-22 op __FILE__, __LINE__, descr); \
40 5c2e310e 2021-01-22 op exit(1); \
41 5c2e310e 2021-01-22 op }
42 5c2e310e 2021-01-22 op
43 5c2e310e 2021-01-22 op #define IRI(schema, host, port, path, query, frag) \
44 5c2e310e 2021-01-22 op ((struct iri){(char*)schema, (char*)host, (char*)port, \
45 5c2e310e 2021-01-22 op 0, (char*)path, (char*)query, \
46 5c2e310e 2021-01-22 op (char*)frag})
47 5c2e310e 2021-01-22 op
48 5c2e310e 2021-01-22 op #define DIFF(wanted, got, field) \
49 5c2e310e 2021-01-22 op if (wanted->field == NULL || got->field == NULL || \
50 5c2e310e 2021-01-22 op strcmp(wanted->field, got->field)) { \
51 5c2e310e 2021-01-22 op fprintf(stderr, #field ":\n\tgot: %s\n\twanted: %s\n", \
52 5c2e310e 2021-01-22 op got->field, wanted->field); \
53 5c2e310e 2021-01-22 op return 0; \
54 5c2e310e 2021-01-22 op }
55 5c2e310e 2021-01-22 op
56 5c2e310e 2021-01-22 op #define PASS 0
57 5c2e310e 2021-01-22 op #define FAIL 1
58 5c2e310e 2021-01-22 op
59 5c2e310e 2021-01-22 op int diff_iri(struct iri*, struct iri*);
60 5c2e310e 2021-01-22 op int run_test(const char*, int, struct iri);
61 5c2e310e 2021-01-22 op
62 5c2e310e 2021-01-22 op int
63 5c2e310e 2021-01-22 op diff_iri(struct iri *p, struct iri *exp)
64 5c2e310e 2021-01-22 op {
65 5c2e310e 2021-01-22 op DIFF(p, exp, schema);
66 5c2e310e 2021-01-22 op DIFF(p, exp, host);
67 5c2e310e 2021-01-22 op DIFF(p, exp, port);
68 5c2e310e 2021-01-22 op DIFF(p, exp, path);
69 5c2e310e 2021-01-22 op DIFF(p, exp, query);
70 5c2e310e 2021-01-22 op DIFF(p, exp, fragment);
71 5c2e310e 2021-01-22 op return 1;
72 5c2e310e 2021-01-22 op }
73 5c2e310e 2021-01-22 op
74 5c2e310e 2021-01-22 op int
75 5c2e310e 2021-01-22 op run_test(const char *iri, int should_fail, struct iri expected)
76 5c2e310e 2021-01-22 op {
77 5c2e310e 2021-01-22 op int failed, ok = 1;
78 5c2e310e 2021-01-22 op char *iri_copy;
79 5c2e310e 2021-01-22 op struct iri parsed;
80 5c2e310e 2021-01-22 op const char *error;
81 5c2e310e 2021-01-22 op
82 5c2e310e 2021-01-22 op if ((iri_copy = strdup(iri)) == NULL)
83 5c2e310e 2021-01-22 op err(1, "strdup");
84 5c2e310e 2021-01-22 op
85 5c2e310e 2021-01-22 op fprintf(stderr, "=> %s\n", iri);
86 5c2e310e 2021-01-22 op failed = !parse_iri(iri_copy, &parsed, &error);
87 5c2e310e 2021-01-22 op
88 5c2e310e 2021-01-22 op if (failed && should_fail)
89 5c2e310e 2021-01-22 op goto done;
90 5c2e310e 2021-01-22 op
91 5c2e310e 2021-01-22 op if (error != NULL)
92 5c2e310e 2021-01-22 op fprintf(stderr, "> %s\n", error);
93 5c2e310e 2021-01-22 op
94 5c2e310e 2021-01-22 op ok = !failed && !should_fail;
95 5c2e310e 2021-01-22 op if (ok)
96 5c2e310e 2021-01-22 op ok = diff_iri(&expected, &parsed);
97 5c2e310e 2021-01-22 op
98 5c2e310e 2021-01-22 op done:
99 5c2e310e 2021-01-22 op free(iri_copy);
100 5c2e310e 2021-01-22 op return ok;
101 5c2e310e 2021-01-22 op }
102 5c2e310e 2021-01-22 op
103 5c2e310e 2021-01-22 op int
104 5c2e310e 2021-01-22 op main(void)
105 5c2e310e 2021-01-22 op {
106 617ae385 2022-07-04 op char buf[32];
107 5c2e310e 2021-01-22 op struct iri empty = IRI("", "", "", "", "", "");
108 5c2e310e 2021-01-22 op
109 617ae385 2022-07-04 op ENCTEST(buf, sizeof(buf), "hello world", "hello%20world");
110 617ae385 2022-07-04 op ENCTEST(buf, sizeof(buf), "hello\nworld", "hello%0Aworld");
111 617ae385 2022-07-04 op ENCTEST(buf, sizeof(buf), "hello\r\nworld", "hello%0D%0Aworld");
112 617ae385 2022-07-04 op
113 5c2e310e 2021-01-22 op TEST("http://omarpolo.com",
114 5c2e310e 2021-01-22 op PASS,
115 5c2e310e 2021-01-22 op IRI("http", "omarpolo.com", "", "", "", ""),
116 5c2e310e 2021-01-22 op "can parse iri with empty path");
117 5c2e310e 2021-01-22 op
118 5c2e310e 2021-01-22 op /* schema */
119 5c2e310e 2021-01-22 op TEST("omarpolo.com", FAIL, empty, "FAIL when the schema is missing");
120 5c2e310e 2021-01-22 op TEST("gemini:/omarpolo.com", FAIL, empty, "FAIL with invalid marker");
121 5c2e310e 2021-01-22 op TEST("gemini//omarpolo.com", FAIL, empty, "FAIL with invalid marker");
122 5c2e310e 2021-01-22 op TEST("h!!p://omarpolo.com", FAIL, empty, "FAIL with invalid schema");
123 5c2e310e 2021-01-22 op TEST("GEMINI://omarpolo.com",
124 5c2e310e 2021-01-22 op PASS,
125 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "", "", ""),
126 5c2e310e 2021-01-22 op "Schemas are case insensitive.");
127 5c2e310e 2021-01-22 op
128 5c2e310e 2021-01-22 op /* authority */
129 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com",
130 5c2e310e 2021-01-22 op PASS,
131 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "", "", ""),
132 5c2e310e 2021-01-22 op "can parse authority with empty path");
133 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com/",
134 5c2e310e 2021-01-22 op PASS,
135 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "", "", ""),
136 5c2e310e 2021-01-22 op "can parse authority with empty path (alt)")
137 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com:1965",
138 5c2e310e 2021-01-22 op PASS,
139 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "1965", "", "", ""),
140 5c2e310e 2021-01-22 op "can parse with port and empty path");
141 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com:1965/",
142 5c2e310e 2021-01-22 op PASS,
143 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "1965", "", "", ""),
144 5c2e310e 2021-01-22 op "can parse with port and empty path")
145 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com:196s",
146 5c2e310e 2021-01-22 op FAIL,
147 5c2e310e 2021-01-22 op empty,
148 5c2e310e 2021-01-22 op "FAIL with invalid port number");
149 5c2e310e 2021-01-22 op TEST("gemini://OmArPoLo.CoM",
150 5c2e310e 2021-01-22 op PASS,
151 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "", "", ""),
152 5c2e310e 2021-01-22 op "host is case-insensitive");
153 e7c7f19c 2021-01-29 op TEST("gemini://xn--nave-6pa.omarpolo.com",
154 e7c7f19c 2021-01-29 op PASS,
155 e7c7f19c 2021-01-29 op IRI("gemini", "xn--nave-6pa.omarpolo.com", "", "", "", ""),
156 e7c7f19c 2021-01-29 op "Can parse punycode-encoded hostnames");
157 e7c7f19c 2021-01-29 op TEST("gemini://naïve.omarpolo.com",
158 e7c7f19c 2021-01-29 op PASS,
159 e7c7f19c 2021-01-29 op IRI("gemini", "naïve.omarpolo.com", "", "", "", ""),
160 e7c7f19c 2021-01-29 op "Accept non punycode-encoded hostnames");
161 e7c7f19c 2021-01-29 op TEST("gemini://na%c3%afve.omarpolo.com",
162 e7c7f19c 2021-01-29 op PASS,
163 e7c7f19c 2021-01-29 op IRI("gemini", "naïve.omarpolo.com", "", "", "", ""),
164 e7c7f19c 2021-01-29 op "Can percent decode hostnames");
165 5c2e310e 2021-01-22 op
166 5c2e310e 2021-01-22 op /* path */
167 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com/foo/bar/baz",
168 5c2e310e 2021-01-22 op PASS,
169 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
170 5c2e310e 2021-01-22 op "parse simple paths");
171 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com/foo//bar///baz",
172 5c2e310e 2021-01-22 op PASS,
173 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
174 5c2e310e 2021-01-22 op "parse paths with multiple slashes");
175 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com/foo/./bar/./././baz",
176 5c2e310e 2021-01-22 op PASS,
177 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
178 5c2e310e 2021-01-22 op "parse paths with . elements");
179 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com/foo/bar/../bar/baz",
180 5c2e310e 2021-01-22 op PASS,
181 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
182 5c2e310e 2021-01-22 op "parse paths with .. elements");
183 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com/foo/../foo/bar/../bar/baz/../baz",
184 5c2e310e 2021-01-22 op PASS,
185 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
186 5c2e310e 2021-01-22 op "parse paths with multiple .. elements");
187 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com/foo/..",
188 5c2e310e 2021-01-22 op PASS,
189 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "", "", ""),
190 5c2e310e 2021-01-22 op "parse paths with a trailing ..");
191 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com/foo/../",
192 5c2e310e 2021-01-22 op PASS,
193 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "", "", ""),
194 5c2e310e 2021-01-22 op "parse paths with a trailing ..");
195 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com/foo/../..",
196 5c2e310e 2021-01-22 op FAIL,
197 5c2e310e 2021-01-22 op empty,
198 5c2e310e 2021-01-22 op "reject paths that would escape the root");
199 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com/foo/../../",
200 5c2e310e 2021-01-22 op FAIL,
201 5c2e310e 2021-01-22 op empty,
202 5c2e310e 2021-01-22 op "reject paths that would escape the root")
203 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com/foo/../foo/../././/bar/baz/.././.././/",
204 5c2e310e 2021-01-22 op PASS,
205 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "", "", ""),
206 5c2e310e 2021-01-22 op "parse path with lots of cleaning available");
207 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com//foo",
208 5c2e310e 2021-01-22 op PASS,
209 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "foo", "", ""),
210 5c2e310e 2021-01-22 op "Trim initial slashes");
211 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com/////foo",
212 5c2e310e 2021-01-22 op PASS,
213 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "foo", "", ""),
214 5c2e310e 2021-01-22 op "Trim initial slashes (pt. 2)");
215 9d092b60 2021-04-12 op TEST("http://a/b/c/../..",
216 9d092b60 2021-04-12 op PASS,
217 9d092b60 2021-04-12 op IRI("http", "a", "", "", "", ""),
218 9d092b60 2021-04-12 op "avoid infinite loops (see v1.6.1)");
219 5e41063f 2022-07-04 op TEST("gemini://example.com/@f:b!(z$&)/baz",
220 5e41063f 2022-07-04 op PASS,
221 5e41063f 2022-07-04 op IRI("gemini", "example.com", "", "@f:b!(z$&)/baz", "", ""),
222 5e41063f 2022-07-04 op "allow @, :, !, (), $ and & in paths");
223 5c2e310e 2021-01-22 op
224 5c2e310e 2021-01-22 op /* query */
225 5c2e310e 2021-01-22 op TEST("foo://example.com/foo/?gne",
226 5c2e310e 2021-01-22 op PASS,
227 5c2e310e 2021-01-22 op IRI("foo", "example.com", "", "foo/", "gne", ""),
228 5c2e310e 2021-01-22 op "parse query strings");
229 5c2e310e 2021-01-22 op TEST("foo://example.com/foo/?gne&foo",
230 5c2e310e 2021-01-22 op PASS,
231 5c2e310e 2021-01-22 op IRI("foo", "example.com", "", "foo/", "gne&foo", ""),
232 5c2e310e 2021-01-22 op "parse query strings");
233 8404ec30 2021-02-05 op /* TEST("foo://example.com/foo/?gne%2F", */
234 8404ec30 2021-02-05 op /* PASS, */
235 8404ec30 2021-02-05 op /* IRI("foo", "example.com", "", "foo/", "gne/", ""), */
236 8404ec30 2021-02-05 op /* "parse query strings"); */
237 4125c94f 2021-02-06 op TEST("foo://ex.com/robots.txt?name=foobar&url=https://foo.com",
238 4125c94f 2021-02-06 op PASS,
239 4125c94f 2021-02-06 op IRI("foo", "ex.com", "", "robots.txt", "name=foobar&url=https://foo.com", ""),
240 4125c94f 2021-02-06 op "Accepts : in queries");
241 4125c94f 2021-02-06 op TEST("foo://ex.com/foo?email=foo@bar.com#quuz",
242 4125c94f 2021-02-06 op PASS,
243 4125c94f 2021-02-06 op IRI("foo", "ex.com", "", "foo", "email=foo@bar.com", "quuz"),
244 4125c94f 2021-02-06 op "Accepts @ in queries");
245 5c2e310e 2021-01-22 op
246 5c2e310e 2021-01-22 op /* fragment */
247 5c2e310e 2021-01-22 op TEST("foo://bar.co/#foo",
248 5c2e310e 2021-01-22 op PASS,
249 5c2e310e 2021-01-22 op IRI("foo", "bar.co", "", "", "", "foo"),
250 5c2e310e 2021-01-22 op "can recognize fragments");
251 5c2e310e 2021-01-22 op
252 5c2e310e 2021-01-22 op /* percent encoding */
253 5c2e310e 2021-01-22 op TEST("foo://bar.com/caf%C3%A8.gmi",
254 5c2e310e 2021-01-22 op PASS,
255 5c2e310e 2021-01-22 op IRI("foo", "bar.com", "", "cafè.gmi", "", ""),
256 5c2e310e 2021-01-22 op "can decode");
257 5c2e310e 2021-01-22 op TEST("foo://bar.com/caff%C3%A8%20macchiato.gmi",
258 5c2e310e 2021-01-22 op PASS,
259 5c2e310e 2021-01-22 op IRI("foo", "bar.com", "", "caffè macchiato.gmi", "", ""),
260 5c2e310e 2021-01-22 op "can decode");
261 5c2e310e 2021-01-22 op TEST("foo://bar.com/caff%C3%A8+macchiato.gmi",
262 5c2e310e 2021-01-22 op PASS,
263 5c2e310e 2021-01-22 op IRI("foo", "bar.com", "", "caffè+macchiato.gmi", "", ""),
264 5c2e310e 2021-01-22 op "can decode");
265 5c2e310e 2021-01-22 op TEST("foo://bar.com/foo%2F..%2F..",
266 5c2e310e 2021-01-22 op FAIL,
267 5c2e310e 2021-01-22 op empty,
268 5c2e310e 2021-01-22 op "conversion and checking are done in the correct order");
269 5c2e310e 2021-01-22 op TEST("foo://bar.com/foo%00?baz",
270 5c2e310e 2021-01-22 op FAIL,
271 5c2e310e 2021-01-22 op empty,
272 5c2e310e 2021-01-22 op "rejects %00");
273 5c2e310e 2021-01-22 op
274 5c2e310e 2021-01-22 op /* IRI */
275 5c2e310e 2021-01-22 op TEST("foo://bar.com/cafè.gmi",
276 5c2e310e 2021-01-22 op PASS,
277 5c2e310e 2021-01-22 op IRI("foo", "bar.com", "", "cafè.gmi", "" , ""),
278 5c2e310e 2021-01-22 op "decode IRI (with a 2-byte utf8 seq)");
279 5c2e310e 2021-01-22 op TEST("foo://bar.com/世界.gmi",
280 5c2e310e 2021-01-22 op PASS,
281 5c2e310e 2021-01-22 op IRI("foo", "bar.com", "", "世界.gmi", "" , ""),
282 5c2e310e 2021-01-22 op "decode IRI");
283 5c2e310e 2021-01-22 op TEST("foo://bar.com/😼.gmi",
284 5c2e310e 2021-01-22 op PASS,
285 5c2e310e 2021-01-22 op IRI("foo", "bar.com", "", "😼.gmi", "" , ""),
286 5c2e310e 2021-01-22 op "decode IRI (with a 3-byte utf8 seq)");
287 5c2e310e 2021-01-22 op TEST("foo://bar.com/😼/𤭢.gmi",
288 5c2e310e 2021-01-22 op PASS,
289 5c2e310e 2021-01-22 op IRI("foo", "bar.com", "", "😼/𤭢.gmi", "" , ""),
290 5c2e310e 2021-01-22 op "decode IRI (with a 3-byte and a 4-byte utf8 seq)");
291 5c2e310e 2021-01-22 op TEST("foo://bar.com/世界/\xC0\x80",
292 5c2e310e 2021-01-22 op FAIL,
293 5c2e310e 2021-01-22 op empty,
294 5c2e310e 2021-01-22 op "reject invalid sequence (overlong NUL)");
295 5c2e310e 2021-01-22 op
296 5c2e310e 2021-01-22 op return 0;
297 5c2e310e 2021-01-22 op }