Blob


1 /*
2 * Copyright (c) 2020 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <err.h>
18 #include <stdio.h>
19 #include <string.h>
21 #include "../gmid.h"
23 #define TEST(iri, fail, exp, descr) \
24 if (!run_test(iri, fail, exp)) { \
25 fprintf(stderr, "%s:%d: error: %s\n", \
26 __FILE__, __LINE__, descr); \
27 exit(1); \
28 }
30 #define IRI(schema, host, port, path, query, frag) \
31 ((struct iri){(char*)schema, (char*)host, (char*)port, \
32 0, (char*)path, (char*)query, \
33 (char*)frag})
35 #define DIFF(wanted, got, field) \
36 if (wanted->field == NULL || got->field == NULL || \
37 strcmp(wanted->field, got->field)) { \
38 fprintf(stderr, #field ":\n\tgot: %s\n\twanted: %s\n", \
39 got->field, wanted->field); \
40 return 0; \
41 }
43 #define PASS 0
44 #define FAIL 1
46 int diff_iri(struct iri*, struct iri*);
47 int run_test(const char*, int, struct iri);
49 int
50 diff_iri(struct iri *p, struct iri *exp)
51 {
52 DIFF(p, exp, schema);
53 DIFF(p, exp, host);
54 DIFF(p, exp, port);
55 DIFF(p, exp, path);
56 DIFF(p, exp, query);
57 DIFF(p, exp, fragment);
58 return 1;
59 }
61 int
62 run_test(const char *iri, int should_fail, struct iri expected)
63 {
64 int failed, ok = 1;
65 char *iri_copy;
66 struct iri parsed;
67 const char *error;
69 if ((iri_copy = strdup(iri)) == NULL)
70 err(1, "strdup");
72 fprintf(stderr, "=> %s\n", iri);
73 failed = !parse_iri(iri_copy, &parsed, &error);
75 if (failed && should_fail)
76 goto done;
78 if (error != NULL)
79 fprintf(stderr, "> %s\n", error);
81 ok = !failed && !should_fail;
82 if (ok)
83 ok = diff_iri(&expected, &parsed);
85 done:
86 free(iri_copy);
87 return ok;
88 }
90 int
91 main(void)
92 {
93 struct iri empty = IRI("", "", "", "", "", "");
95 TEST("http://omarpolo.com",
96 PASS,
97 IRI("http", "omarpolo.com", "", "", "", ""),
98 "can parse iri with empty path");
100 /* schema */
101 TEST("omarpolo.com", FAIL, empty, "FAIL when the schema is missing");
102 TEST("gemini:/omarpolo.com", FAIL, empty, "FAIL with invalid marker");
103 TEST("gemini//omarpolo.com", FAIL, empty, "FAIL with invalid marker");
104 TEST("h!!p://omarpolo.com", FAIL, empty, "FAIL with invalid schema");
105 TEST("GEMINI://omarpolo.com",
106 PASS,
107 IRI("gemini", "omarpolo.com", "", "", "", ""),
108 "Schemas are case insensitive.");
110 /* authority */
111 TEST("gemini://omarpolo.com",
112 PASS,
113 IRI("gemini", "omarpolo.com", "", "", "", ""),
114 "can parse authority with empty path");
115 TEST("gemini://omarpolo.com/",
116 PASS,
117 IRI("gemini", "omarpolo.com", "", "", "", ""),
118 "can parse authority with empty path (alt)")
119 TEST("gemini://omarpolo.com:1965",
120 PASS,
121 IRI("gemini", "omarpolo.com", "1965", "", "", ""),
122 "can parse with port and empty path");
123 TEST("gemini://omarpolo.com:1965/",
124 PASS,
125 IRI("gemini", "omarpolo.com", "1965", "", "", ""),
126 "can parse with port and empty path")
127 TEST("gemini://omarpolo.com:196s",
128 FAIL,
129 empty,
130 "FAIL with invalid port number");
131 TEST("gemini://OmArPoLo.CoM",
132 PASS,
133 IRI("gemini", "omarpolo.com", "", "", "", ""),
134 "host is case-insensitive");
135 TEST("gemini://xn--nave-6pa.omarpolo.com",
136 PASS,
137 IRI("gemini", "xn--nave-6pa.omarpolo.com", "", "", "", ""),
138 "Can parse punycode-encoded hostnames");
139 TEST("gemini://naïve.omarpolo.com",
140 PASS,
141 IRI("gemini", "naïve.omarpolo.com", "", "", "", ""),
142 "Accept non punycode-encoded hostnames");
143 TEST("gemini://na%c3%afve.omarpolo.com",
144 PASS,
145 IRI("gemini", "naïve.omarpolo.com", "", "", "", ""),
146 "Can percent decode hostnames");
148 /* path */
149 TEST("gemini://omarpolo.com/foo/bar/baz",
150 PASS,
151 IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
152 "parse simple paths");
153 TEST("gemini://omarpolo.com/foo//bar///baz",
154 PASS,
155 IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
156 "parse paths with multiple slashes");
157 TEST("gemini://omarpolo.com/foo/./bar/./././baz",
158 PASS,
159 IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
160 "parse paths with . elements");
161 TEST("gemini://omarpolo.com/foo/bar/../bar/baz",
162 PASS,
163 IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
164 "parse paths with .. elements");
165 TEST("gemini://omarpolo.com/foo/../foo/bar/../bar/baz/../baz",
166 PASS,
167 IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
168 "parse paths with multiple .. elements");
169 TEST("gemini://omarpolo.com/foo/..",
170 PASS,
171 IRI("gemini", "omarpolo.com", "", "", "", ""),
172 "parse paths with a trailing ..");
173 TEST("gemini://omarpolo.com/foo/../",
174 PASS,
175 IRI("gemini", "omarpolo.com", "", "", "", ""),
176 "parse paths with a trailing ..");
177 TEST("gemini://omarpolo.com/foo/../..",
178 FAIL,
179 empty,
180 "reject paths that would escape the root");
181 TEST("gemini://omarpolo.com/foo/../../",
182 FAIL,
183 empty,
184 "reject paths that would escape the root")
185 TEST("gemini://omarpolo.com/foo/../foo/../././/bar/baz/.././.././/",
186 PASS,
187 IRI("gemini", "omarpolo.com", "", "", "", ""),
188 "parse path with lots of cleaning available");
189 TEST("gemini://omarpolo.com//foo",
190 PASS,
191 IRI("gemini", "omarpolo.com", "", "foo", "", ""),
192 "Trim initial slashes");
193 TEST("gemini://omarpolo.com/////foo",
194 PASS,
195 IRI("gemini", "omarpolo.com", "", "foo", "", ""),
196 "Trim initial slashes (pt. 2)");
197 TEST("http://a/b/c/../..",
198 PASS,
199 IRI("http", "a", "", "", "", ""),
200 "avoid infinite loops (see v1.6.1)");
202 /* query */
203 TEST("foo://example.com/foo/?gne",
204 PASS,
205 IRI("foo", "example.com", "", "foo/", "gne", ""),
206 "parse query strings");
207 TEST("foo://example.com/foo/?gne&foo",
208 PASS,
209 IRI("foo", "example.com", "", "foo/", "gne&foo", ""),
210 "parse query strings");
211 /* TEST("foo://example.com/foo/?gne%2F", */
212 /* PASS, */
213 /* IRI("foo", "example.com", "", "foo/", "gne/", ""), */
214 /* "parse query strings"); */
215 TEST("foo://ex.com/robots.txt?name=foobar&url=https://foo.com",
216 PASS,
217 IRI("foo", "ex.com", "", "robots.txt", "name=foobar&url=https://foo.com", ""),
218 "Accepts : in queries");
219 TEST("foo://ex.com/foo?email=foo@bar.com#quuz",
220 PASS,
221 IRI("foo", "ex.com", "", "foo", "email=foo@bar.com", "quuz"),
222 "Accepts @ in queries");
224 /* fragment */
225 TEST("foo://bar.co/#foo",
226 PASS,
227 IRI("foo", "bar.co", "", "", "", "foo"),
228 "can recognize fragments");
230 /* percent encoding */
231 TEST("foo://bar.com/caf%C3%A8.gmi",
232 PASS,
233 IRI("foo", "bar.com", "", "cafè.gmi", "", ""),
234 "can decode");
235 TEST("foo://bar.com/caff%C3%A8%20macchiato.gmi",
236 PASS,
237 IRI("foo", "bar.com", "", "caffè macchiato.gmi", "", ""),
238 "can decode");
239 TEST("foo://bar.com/caff%C3%A8+macchiato.gmi",
240 PASS,
241 IRI("foo", "bar.com", "", "caffè+macchiato.gmi", "", ""),
242 "can decode");
243 TEST("foo://bar.com/foo%2F..%2F..",
244 FAIL,
245 empty,
246 "conversion and checking are done in the correct order");
247 TEST("foo://bar.com/foo%00?baz",
248 FAIL,
249 empty,
250 "rejects %00");
252 /* IRI */
253 TEST("foo://bar.com/cafè.gmi",
254 PASS,
255 IRI("foo", "bar.com", "", "cafè.gmi", "" , ""),
256 "decode IRI (with a 2-byte utf8 seq)");
257 TEST("foo://bar.com/世界.gmi",
258 PASS,
259 IRI("foo", "bar.com", "", "世界.gmi", "" , ""),
260 "decode IRI");
261 TEST("foo://bar.com/😼.gmi",
262 PASS,
263 IRI("foo", "bar.com", "", "😼.gmi", "" , ""),
264 "decode IRI (with a 3-byte utf8 seq)");
265 TEST("foo://bar.com/😼/𤭢.gmi",
266 PASS,
267 IRI("foo", "bar.com", "", "😼/𤭢.gmi", "" , ""),
268 "decode IRI (with a 3-byte and a 4-byte utf8 seq)");
269 TEST("foo://bar.com/世界/\xC0\x80",
270 FAIL,
271 empty,
272 "reject invalid sequence (overlong NUL)");
274 return 0;