2 5c2e310e 2021-01-22 op * Copyright (c) 2020 Omar Polo <op@omarpolo.com>
4 5c2e310e 2021-01-22 op * Permission to use, copy, modify, and distribute this software for any
5 5c2e310e 2021-01-22 op * purpose with or without fee is hereby granted, provided that the above
6 5c2e310e 2021-01-22 op * copyright notice and this permission notice appear in all copies.
8 5c2e310e 2021-01-22 op * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 5c2e310e 2021-01-22 op * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 5c2e310e 2021-01-22 op * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 5c2e310e 2021-01-22 op * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 5c2e310e 2021-01-22 op * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 5c2e310e 2021-01-22 op * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 5c2e310e 2021-01-22 op * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 5c2e310e 2021-01-22 op #include <err.h>
18 5c2e310e 2021-01-22 op #include <stdio.h>
19 5c2e310e 2021-01-22 op #include <string.h>
21 5c2e310e 2021-01-22 op #include "../gmid.h"
23 5c2e310e 2021-01-22 op #define TEST(iri, fail, exp, descr) \
24 5c2e310e 2021-01-22 op if (!run_test(iri, fail, exp)) { \
25 5c2e310e 2021-01-22 op fprintf(stderr, "%s:%d: error: %s\n", \
26 5c2e310e 2021-01-22 op __FILE__, __LINE__, descr); \
30 5c2e310e 2021-01-22 op #define IRI(schema, host, port, path, query, frag) \
31 5c2e310e 2021-01-22 op ((struct iri){(char*)schema, (char*)host, (char*)port, \
32 5c2e310e 2021-01-22 op 0, (char*)path, (char*)query, \
35 5c2e310e 2021-01-22 op #define DIFF(wanted, got, field) \
36 5c2e310e 2021-01-22 op if (wanted->field == NULL || got->field == NULL || \
37 5c2e310e 2021-01-22 op strcmp(wanted->field, got->field)) { \
38 5c2e310e 2021-01-22 op fprintf(stderr, #field ":\n\tgot: %s\n\twanted: %s\n", \
39 5c2e310e 2021-01-22 op got->field, wanted->field); \
43 5c2e310e 2021-01-22 op #define PASS 0
44 5c2e310e 2021-01-22 op #define FAIL 1
46 5c2e310e 2021-01-22 op int diff_iri(struct iri*, struct iri*);
47 5c2e310e 2021-01-22 op int run_test(const char*, int, struct iri);
50 5c2e310e 2021-01-22 op diff_iri(struct iri *p, struct iri *exp)
52 5c2e310e 2021-01-22 op DIFF(p, exp, schema);
53 5c2e310e 2021-01-22 op DIFF(p, exp, host);
54 5c2e310e 2021-01-22 op DIFF(p, exp, port);
55 5c2e310e 2021-01-22 op DIFF(p, exp, path);
56 5c2e310e 2021-01-22 op DIFF(p, exp, query);
57 5c2e310e 2021-01-22 op DIFF(p, exp, fragment);
62 5c2e310e 2021-01-22 op run_test(const char *iri, int should_fail, struct iri expected)
64 5c2e310e 2021-01-22 op int failed, ok = 1;
65 5c2e310e 2021-01-22 op char *iri_copy;
66 5c2e310e 2021-01-22 op struct iri parsed;
67 5c2e310e 2021-01-22 op const char *error;
69 5c2e310e 2021-01-22 op if ((iri_copy = strdup(iri)) == NULL)
70 5c2e310e 2021-01-22 op err(1, "strdup");
72 5c2e310e 2021-01-22 op fprintf(stderr, "=> %s\n", iri);
73 5c2e310e 2021-01-22 op failed = !parse_iri(iri_copy, &parsed, &error);
75 5c2e310e 2021-01-22 op if (failed && should_fail)
78 5c2e310e 2021-01-22 op if (error != NULL)
79 5c2e310e 2021-01-22 op fprintf(stderr, "> %s\n", error);
81 5c2e310e 2021-01-22 op ok = !failed && !should_fail;
83 5c2e310e 2021-01-22 op ok = diff_iri(&expected, &parsed);
86 5c2e310e 2021-01-22 op free(iri_copy);
93 5c2e310e 2021-01-22 op struct iri empty = IRI("", "", "", "", "", "");
95 5c2e310e 2021-01-22 op TEST("http://omarpolo.com",
97 5c2e310e 2021-01-22 op IRI("http", "omarpolo.com", "", "", "", ""),
98 5c2e310e 2021-01-22 op "can parse iri with empty path");
101 5c2e310e 2021-01-22 op TEST("omarpolo.com", FAIL, empty, "FAIL when the schema is missing");
102 5c2e310e 2021-01-22 op TEST("gemini:/omarpolo.com", FAIL, empty, "FAIL with invalid marker");
103 5c2e310e 2021-01-22 op TEST("gemini//omarpolo.com", FAIL, empty, "FAIL with invalid marker");
104 5c2e310e 2021-01-22 op TEST("h!!p://omarpolo.com", FAIL, empty, "FAIL with invalid schema");
105 5c2e310e 2021-01-22 op TEST("GEMINI://omarpolo.com",
107 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "", "", ""),
108 5c2e310e 2021-01-22 op "Schemas are case insensitive.");
110 5c2e310e 2021-01-22 op /* authority */
111 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com",
113 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "", "", ""),
114 5c2e310e 2021-01-22 op "can parse authority with empty path");
115 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com/",
117 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "", "", ""),
118 5c2e310e 2021-01-22 op "can parse authority with empty path (alt)")
119 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com:1965",
121 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "1965", "", "", ""),
122 5c2e310e 2021-01-22 op "can parse with port and empty path");
123 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com:1965/",
125 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "1965", "", "", ""),
126 5c2e310e 2021-01-22 op "can parse with port and empty path")
127 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com:196s",
130 5c2e310e 2021-01-22 op "FAIL with invalid port number");
131 5c2e310e 2021-01-22 op TEST("gemini://OmArPoLo.CoM",
133 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "", "", ""),
134 5c2e310e 2021-01-22 op "host is case-insensitive");
137 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com/foo/bar/baz",
139 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
140 5c2e310e 2021-01-22 op "parse simple paths");
141 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com/foo//bar///baz",
143 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
144 5c2e310e 2021-01-22 op "parse paths with multiple slashes");
145 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com/foo/./bar/./././baz",
147 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
148 5c2e310e 2021-01-22 op "parse paths with . elements");
149 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com/foo/bar/../bar/baz",
151 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
152 5c2e310e 2021-01-22 op "parse paths with .. elements");
153 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com/foo/../foo/bar/../bar/baz/../baz",
155 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
156 5c2e310e 2021-01-22 op "parse paths with multiple .. elements");
157 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com/foo/..",
159 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "", "", ""),
160 5c2e310e 2021-01-22 op "parse paths with a trailing ..");
161 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com/foo/../",
163 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "", "", ""),
164 5c2e310e 2021-01-22 op "parse paths with a trailing ..");
165 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com/foo/../..",
168 5c2e310e 2021-01-22 op "reject paths that would escape the root");
169 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com/foo/../../",
172 5c2e310e 2021-01-22 op "reject paths that would escape the root")
173 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com/foo/../foo/../././/bar/baz/.././.././/",
175 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "", "", ""),
176 5c2e310e 2021-01-22 op "parse path with lots of cleaning available");
177 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com//foo",
179 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "foo", "", ""),
180 5c2e310e 2021-01-22 op "Trim initial slashes");
181 5c2e310e 2021-01-22 op TEST("gemini://omarpolo.com/////foo",
183 5c2e310e 2021-01-22 op IRI("gemini", "omarpolo.com", "", "foo", "", ""),
184 5c2e310e 2021-01-22 op "Trim initial slashes (pt. 2)");
187 5c2e310e 2021-01-22 op TEST("foo://example.com/foo/?gne",
189 5c2e310e 2021-01-22 op IRI("foo", "example.com", "", "foo/", "gne", ""),
190 5c2e310e 2021-01-22 op "parse query strings");
191 5c2e310e 2021-01-22 op TEST("foo://example.com/foo/?gne&foo",
193 5c2e310e 2021-01-22 op IRI("foo", "example.com", "", "foo/", "gne&foo", ""),
194 5c2e310e 2021-01-22 op "parse query strings");
195 5c2e310e 2021-01-22 op TEST("foo://example.com/foo/?gne%2F",
197 5c2e310e 2021-01-22 op IRI("foo", "example.com", "", "foo/", "gne/", ""),
198 5c2e310e 2021-01-22 op "parse query strings");
200 5c2e310e 2021-01-22 op /* fragment */
201 5c2e310e 2021-01-22 op TEST("foo://bar.co/#foo",
203 5c2e310e 2021-01-22 op IRI("foo", "bar.co", "", "", "", "foo"),
204 5c2e310e 2021-01-22 op "can recognize fragments");
206 5c2e310e 2021-01-22 op /* percent encoding */
207 5c2e310e 2021-01-22 op TEST("foo://bar.com/caf%C3%A8.gmi",
209 5c2e310e 2021-01-22 op IRI("foo", "bar.com", "", "cafè.gmi", "", ""),
210 5c2e310e 2021-01-22 op "can decode");
211 5c2e310e 2021-01-22 op TEST("foo://bar.com/caff%C3%A8%20macchiato.gmi",
213 5c2e310e 2021-01-22 op IRI("foo", "bar.com", "", "caffè macchiato.gmi", "", ""),
214 5c2e310e 2021-01-22 op "can decode");
215 5c2e310e 2021-01-22 op TEST("foo://bar.com/caff%C3%A8+macchiato.gmi",
217 5c2e310e 2021-01-22 op IRI("foo", "bar.com", "", "caffè+macchiato.gmi", "", ""),
218 5c2e310e 2021-01-22 op "can decode");
219 5c2e310e 2021-01-22 op TEST("foo://bar.com/foo%2F..%2F..",
222 5c2e310e 2021-01-22 op "conversion and checking are done in the correct order");
223 5c2e310e 2021-01-22 op TEST("foo://bar.com/foo%00?baz",
226 5c2e310e 2021-01-22 op "rejects %00");
229 5c2e310e 2021-01-22 op TEST("foo://bar.com/cafè.gmi",
231 5c2e310e 2021-01-22 op IRI("foo", "bar.com", "", "cafè.gmi", "" , ""),
232 5c2e310e 2021-01-22 op "decode IRI (with a 2-byte utf8 seq)");
233 5c2e310e 2021-01-22 op TEST("foo://bar.com/世界.gmi",
235 5c2e310e 2021-01-22 op IRI("foo", "bar.com", "", "世界.gmi", "" , ""),
236 5c2e310e 2021-01-22 op "decode IRI");
237 5c2e310e 2021-01-22 op TEST("foo://bar.com/😼.gmi",
239 5c2e310e 2021-01-22 op IRI("foo", "bar.com", "", "😼.gmi", "" , ""),
240 5c2e310e 2021-01-22 op "decode IRI (with a 3-byte utf8 seq)");
241 5c2e310e 2021-01-22 op TEST("foo://bar.com/😼/𤭢.gmi",
243 5c2e310e 2021-01-22 op IRI("foo", "bar.com", "", "😼/𤭢.gmi", "" , ""),
244 5c2e310e 2021-01-22 op "decode IRI (with a 3-byte and a 4-byte utf8 seq)");
245 5c2e310e 2021-01-22 op TEST("foo://bar.com/世界/\xC0\x80",
248 5c2e310e 2021-01-22 op "reject invalid sequence (overlong NUL)");