Blob


1 /*
2 * Copyright (c) 2020, 2022 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <err.h>
18 #include <stdio.h>
19 #include <string.h>
21 #include "../gmid.h"
23 #define ENCTEST(buf, len, raw, exp) \
24 if (encode_path(buf, len, raw) == -1) { \
25 fprintf(stderr, "%s:%d: failed to encode: %s\n", \
26 __FILE__, __LINE__, raw); \
27 exit(1); \
28 } \
29 if (strcmp(buf, exp) != 0) { \
30 fprintf(stderr, "%s:%d: error: " \
31 "unexpected encoding: got %s, want %s\n", \
32 __FILE__, __LINE__, buf, exp); \
33 exit(1); \
34 }
36 #define TEST(iri, fail, exp, descr) \
37 if (!run_test(iri, fail, exp)) { \
38 fprintf(stderr, "%s:%d: error: %s\n", \
39 __FILE__, __LINE__, descr); \
40 exit(1); \
41 }
43 #define IRI(schema, host, port, path, query, frag) \
44 ((struct iri){(char*)schema, (char*)host, (char*)port, \
45 0, (char*)path, (char*)query, \
46 (char*)frag})
48 #define DIFF(wanted, got, field) \
49 if (wanted->field == NULL || got->field == NULL || \
50 strcmp(wanted->field, got->field)) { \
51 fprintf(stderr, #field ":\n\tgot: %s\n\twanted: %s\n", \
52 got->field, wanted->field); \
53 return 0; \
54 }
56 #define PASS 0
57 #define FAIL 1
59 int diff_iri(struct iri*, struct iri*);
60 int run_test(const char*, int, struct iri);
62 int
63 diff_iri(struct iri *p, struct iri *exp)
64 {
65 DIFF(p, exp, schema);
66 DIFF(p, exp, host);
67 DIFF(p, exp, port);
68 DIFF(p, exp, path);
69 DIFF(p, exp, query);
70 DIFF(p, exp, fragment);
71 return 1;
72 }
74 int
75 run_test(const char *iri, int should_fail, struct iri expected)
76 {
77 int failed, ok = 1;
78 char *iri_copy;
79 struct iri parsed;
80 const char *error;
82 if ((iri_copy = strdup(iri)) == NULL)
83 err(1, "strdup");
85 fprintf(stderr, "=> %s\n", iri);
86 failed = !parse_iri(iri_copy, &parsed, &error);
88 if (failed && should_fail)
89 goto done;
91 if (error != NULL)
92 fprintf(stderr, "> %s\n", error);
94 ok = !failed && !should_fail;
95 if (ok)
96 ok = diff_iri(&expected, &parsed);
98 done:
99 free(iri_copy);
100 return ok;
103 int
104 main(void)
106 char buf[32];
107 struct iri empty = IRI("", "", "", "", "", "");
109 ENCTEST(buf, sizeof(buf), "hello world", "hello%20world");
110 ENCTEST(buf, sizeof(buf), "hello\nworld", "hello%0Aworld");
111 ENCTEST(buf, sizeof(buf), "hello\r\nworld", "hello%0D%0Aworld");
113 TEST("http://omarpolo.com",
114 PASS,
115 IRI("http", "omarpolo.com", "", "", "", ""),
116 "can parse iri with empty path");
118 /* schema */
119 TEST("omarpolo.com", FAIL, empty, "FAIL when the schema is missing");
120 TEST("gemini:/omarpolo.com", FAIL, empty, "FAIL with invalid marker");
121 TEST("gemini//omarpolo.com", FAIL, empty, "FAIL with invalid marker");
122 TEST("h!!p://omarpolo.com", FAIL, empty, "FAIL with invalid schema");
123 TEST("GEMINI://omarpolo.com",
124 PASS,
125 IRI("gemini", "omarpolo.com", "", "", "", ""),
126 "Schemas are case insensitive.");
128 /* authority */
129 TEST("gemini://omarpolo.com",
130 PASS,
131 IRI("gemini", "omarpolo.com", "", "", "", ""),
132 "can parse authority with empty path");
133 TEST("gemini://omarpolo.com/",
134 PASS,
135 IRI("gemini", "omarpolo.com", "", "", "", ""),
136 "can parse authority with empty path (alt)")
137 TEST("gemini://omarpolo.com:1965",
138 PASS,
139 IRI("gemini", "omarpolo.com", "1965", "", "", ""),
140 "can parse with port and empty path");
141 TEST("gemini://omarpolo.com:1965/",
142 PASS,
143 IRI("gemini", "omarpolo.com", "1965", "", "", ""),
144 "can parse with port and empty path")
145 TEST("gemini://omarpolo.com:196s",
146 FAIL,
147 empty,
148 "FAIL with invalid port number");
149 TEST("gemini://OmArPoLo.CoM",
150 PASS,
151 IRI("gemini", "omarpolo.com", "", "", "", ""),
152 "host is case-insensitive");
153 TEST("gemini://xn--nave-6pa.omarpolo.com",
154 PASS,
155 IRI("gemini", "xn--nave-6pa.omarpolo.com", "", "", "", ""),
156 "Can parse punycode-encoded hostnames");
157 TEST("gemini://naïve.omarpolo.com",
158 PASS,
159 IRI("gemini", "naïve.omarpolo.com", "", "", "", ""),
160 "Accept non punycode-encoded hostnames");
161 TEST("gemini://na%c3%afve.omarpolo.com",
162 PASS,
163 IRI("gemini", "naïve.omarpolo.com", "", "", "", ""),
164 "Can percent decode hostnames");
166 /* path */
167 TEST("gemini://omarpolo.com/foo/bar/baz",
168 PASS,
169 IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
170 "parse simple paths");
171 TEST("gemini://omarpolo.com/foo//bar///baz",
172 PASS,
173 IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
174 "parse paths with multiple slashes");
175 TEST("gemini://omarpolo.com/foo/./bar/./././baz",
176 PASS,
177 IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
178 "parse paths with . elements");
179 TEST("gemini://omarpolo.com/foo/bar/../bar/baz",
180 PASS,
181 IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
182 "parse paths with .. elements");
183 TEST("gemini://omarpolo.com/foo/../foo/bar/../bar/baz/../baz",
184 PASS,
185 IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
186 "parse paths with multiple .. elements");
187 TEST("gemini://omarpolo.com/foo/..",
188 PASS,
189 IRI("gemini", "omarpolo.com", "", "", "", ""),
190 "parse paths with a trailing ..");
191 TEST("gemini://omarpolo.com/foo/../",
192 PASS,
193 IRI("gemini", "omarpolo.com", "", "", "", ""),
194 "parse paths with a trailing ..");
195 TEST("gemini://omarpolo.com/foo/../..",
196 FAIL,
197 empty,
198 "reject paths that would escape the root");
199 TEST("gemini://omarpolo.com/foo/../../",
200 FAIL,
201 empty,
202 "reject paths that would escape the root")
203 TEST("gemini://omarpolo.com/foo/../foo/../././/bar/baz/.././.././/",
204 PASS,
205 IRI("gemini", "omarpolo.com", "", "", "", ""),
206 "parse path with lots of cleaning available");
207 TEST("gemini://omarpolo.com//foo",
208 PASS,
209 IRI("gemini", "omarpolo.com", "", "foo", "", ""),
210 "Trim initial slashes");
211 TEST("gemini://omarpolo.com/////foo",
212 PASS,
213 IRI("gemini", "omarpolo.com", "", "foo", "", ""),
214 "Trim initial slashes (pt. 2)");
215 TEST("http://a/b/c/../..",
216 PASS,
217 IRI("http", "a", "", "", "", ""),
218 "avoid infinite loops (see v1.6.1)");
219 TEST("gemini://example.com/@f:b!(z$&)/baz",
220 PASS,
221 IRI("gemini", "example.com", "", "@f:b!(z$&)/baz", "", ""),
222 "allow @, :, !, (), $ and & in paths");
224 /* query */
225 TEST("foo://example.com/foo/?gne",
226 PASS,
227 IRI("foo", "example.com", "", "foo/", "gne", ""),
228 "parse query strings");
229 TEST("foo://example.com/foo/?gne&foo",
230 PASS,
231 IRI("foo", "example.com", "", "foo/", "gne&foo", ""),
232 "parse query strings");
233 /* TEST("foo://example.com/foo/?gne%2F", */
234 /* PASS, */
235 /* IRI("foo", "example.com", "", "foo/", "gne/", ""), */
236 /* "parse query strings"); */
237 TEST("foo://ex.com/robots.txt?name=foobar&url=https://foo.com",
238 PASS,
239 IRI("foo", "ex.com", "", "robots.txt", "name=foobar&url=https://foo.com", ""),
240 "Accepts : in queries");
241 TEST("foo://ex.com/foo?email=foo@bar.com#quuz",
242 PASS,
243 IRI("foo", "ex.com", "", "foo", "email=foo@bar.com", "quuz"),
244 "Accepts @ in queries");
246 /* fragment */
247 TEST("foo://bar.co/#foo",
248 PASS,
249 IRI("foo", "bar.co", "", "", "", "foo"),
250 "can recognize fragments");
252 /* percent encoding */
253 TEST("foo://bar.com/caf%C3%A8.gmi",
254 PASS,
255 IRI("foo", "bar.com", "", "cafè.gmi", "", ""),
256 "can decode");
257 TEST("foo://bar.com/caff%C3%A8%20macchiato.gmi",
258 PASS,
259 IRI("foo", "bar.com", "", "caffè macchiato.gmi", "", ""),
260 "can decode");
261 TEST("foo://bar.com/caff%C3%A8+macchiato.gmi",
262 PASS,
263 IRI("foo", "bar.com", "", "caffè+macchiato.gmi", "", ""),
264 "can decode");
265 TEST("foo://bar.com/foo%2F..%2F..",
266 FAIL,
267 empty,
268 "conversion and checking are done in the correct order");
269 TEST("foo://bar.com/foo%00?baz",
270 FAIL,
271 empty,
272 "rejects %00");
274 /* IRI */
275 TEST("foo://bar.com/cafè.gmi",
276 PASS,
277 IRI("foo", "bar.com", "", "cafè.gmi", "" , ""),
278 "decode IRI (with a 2-byte utf8 seq)");
279 TEST("foo://bar.com/世界.gmi",
280 PASS,
281 IRI("foo", "bar.com", "", "世界.gmi", "" , ""),
282 "decode IRI");
283 TEST("foo://bar.com/😼.gmi",
284 PASS,
285 IRI("foo", "bar.com", "", "😼.gmi", "" , ""),
286 "decode IRI (with a 3-byte utf8 seq)");
287 TEST("foo://bar.com/😼/𤭢.gmi",
288 PASS,
289 IRI("foo", "bar.com", "", "😼/𤭢.gmi", "" , ""),
290 "decode IRI (with a 3-byte and a 4-byte utf8 seq)");
291 TEST("foo://bar.com/世界/\xC0\x80",
292 FAIL,
293 empty,
294 "reject invalid sequence (overlong NUL)");
296 return 0;