Blame


1 3c1cf9d0 2021-01-11 op /*
2 3c1cf9d0 2021-01-11 op * Copyright (c) 2020 Omar Polo <op@omarpolo.com>
3 3c1cf9d0 2021-01-11 op *
4 3c1cf9d0 2021-01-11 op * Permission to use, copy, modify, and distribute this software for any
5 3c1cf9d0 2021-01-11 op * purpose with or without fee is hereby granted, provided that the above
6 3c1cf9d0 2021-01-11 op * copyright notice and this permission notice appear in all copies.
7 3c1cf9d0 2021-01-11 op *
8 3c1cf9d0 2021-01-11 op * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 3c1cf9d0 2021-01-11 op * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 3c1cf9d0 2021-01-11 op * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 3c1cf9d0 2021-01-11 op * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 3c1cf9d0 2021-01-11 op * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 3c1cf9d0 2021-01-11 op * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 3c1cf9d0 2021-01-11 op * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 3c1cf9d0 2021-01-11 op */
16 3c1cf9d0 2021-01-11 op
17 3c1cf9d0 2021-01-11 op #include <err.h>
18 3c1cf9d0 2021-01-11 op #include <stdio.h>
19 3c1cf9d0 2021-01-11 op #include <string.h>
20 3c1cf9d0 2021-01-11 op
21 3c1cf9d0 2021-01-11 op #include "gmid.h"
22 3c1cf9d0 2021-01-11 op
23 3c1cf9d0 2021-01-11 op #define TEST(iri, fail, exp, descr) \
24 3c1cf9d0 2021-01-11 op if (!run_test(iri, fail, exp)) { \
25 3c1cf9d0 2021-01-11 op fprintf(stderr, "%s:%d: error: %s\n", \
26 3c1cf9d0 2021-01-11 op __FILE__, __LINE__, descr); \
27 3c1cf9d0 2021-01-11 op exit(1); \
28 3c1cf9d0 2021-01-11 op }
29 3c1cf9d0 2021-01-11 op
30 3c1cf9d0 2021-01-11 op #define IRI(schema, host, port, path, query, frag) \
31 3c1cf9d0 2021-01-11 op ((struct iri){schema, host, port, 0, path, query, frag})
32 3c1cf9d0 2021-01-11 op
33 3c1cf9d0 2021-01-11 op #define DIFF(wanted, got, field) \
34 3c1cf9d0 2021-01-11 op if (wanted->field == NULL || got->field == NULL || \
35 3c1cf9d0 2021-01-11 op strcmp(wanted->field, got->field)) { \
36 3c1cf9d0 2021-01-11 op fprintf(stderr, #field ":\n\tgot: %s\n\twanted: %s\n", \
37 3c1cf9d0 2021-01-11 op got->field, wanted->field); \
38 3c1cf9d0 2021-01-11 op return 0; \
39 3c1cf9d0 2021-01-11 op }
40 3c1cf9d0 2021-01-11 op
41 3c1cf9d0 2021-01-11 op #define PASS 0
42 3c1cf9d0 2021-01-11 op #define FAIL 1
43 3c1cf9d0 2021-01-11 op
44 3c1cf9d0 2021-01-11 op int
45 3c1cf9d0 2021-01-11 op diff_iri(struct iri *p, struct iri *exp)
46 3c1cf9d0 2021-01-11 op {
47 3c1cf9d0 2021-01-11 op DIFF(p, exp, schema);
48 3c1cf9d0 2021-01-11 op DIFF(p, exp, host);
49 3c1cf9d0 2021-01-11 op DIFF(p, exp, port);
50 3c1cf9d0 2021-01-11 op DIFF(p, exp, path);
51 3c1cf9d0 2021-01-11 op DIFF(p, exp, query);
52 3c1cf9d0 2021-01-11 op DIFF(p, exp, fragment);
53 3c1cf9d0 2021-01-11 op return 1;
54 3c1cf9d0 2021-01-11 op }
55 3c1cf9d0 2021-01-11 op
56 3c1cf9d0 2021-01-11 op int
57 3c1cf9d0 2021-01-11 op run_test(const char *iri, int should_fail, struct iri expected)
58 3c1cf9d0 2021-01-11 op {
59 3c1cf9d0 2021-01-11 op int failed, ok = 1;
60 3c1cf9d0 2021-01-11 op char *iri_copy;
61 3c1cf9d0 2021-01-11 op struct iri parsed;
62 3c1cf9d0 2021-01-11 op const char *error;
63 3c1cf9d0 2021-01-11 op
64 3c1cf9d0 2021-01-11 op if ((iri_copy = strdup(iri)) == NULL)
65 3c1cf9d0 2021-01-11 op err(1, "strdup");
66 3c1cf9d0 2021-01-11 op
67 3c1cf9d0 2021-01-11 op fprintf(stderr, "=> %s\n", iri);
68 3c1cf9d0 2021-01-11 op failed = !parse_iri(iri_copy, &parsed, &error);
69 3c1cf9d0 2021-01-11 op
70 3c1cf9d0 2021-01-11 op if (failed && should_fail)
71 3c1cf9d0 2021-01-11 op goto done;
72 3c1cf9d0 2021-01-11 op
73 3c1cf9d0 2021-01-11 op if (error != NULL)
74 3c1cf9d0 2021-01-11 op fprintf(stderr, "> %s\n", error);
75 3c1cf9d0 2021-01-11 op
76 3c1cf9d0 2021-01-11 op ok = !failed && !should_fail;
77 3c1cf9d0 2021-01-11 op if (ok)
78 3c1cf9d0 2021-01-11 op ok = diff_iri(&expected, &parsed);
79 3c1cf9d0 2021-01-11 op
80 3c1cf9d0 2021-01-11 op done:
81 3c1cf9d0 2021-01-11 op free(iri_copy);
82 3c1cf9d0 2021-01-11 op return ok;
83 3c1cf9d0 2021-01-11 op }
84 3c1cf9d0 2021-01-11 op
85 3c1cf9d0 2021-01-11 op int
86 3c1cf9d0 2021-01-11 op main(void)
87 3c1cf9d0 2021-01-11 op {
88 3c1cf9d0 2021-01-11 op struct iri empty = {"", "", "", PASS, "", "", ""};
89 3c1cf9d0 2021-01-11 op
90 3c1cf9d0 2021-01-11 op TEST("http://omarpolo.com",
91 3c1cf9d0 2021-01-11 op PASS,
92 3c1cf9d0 2021-01-11 op IRI("http", "omarpolo.com", "", "", "", ""),
93 3c1cf9d0 2021-01-11 op "can parse iri with empty path");
94 3c1cf9d0 2021-01-11 op
95 3c1cf9d0 2021-01-11 op /* schema */
96 3c1cf9d0 2021-01-11 op TEST("omarpolo.com", FAIL, empty, "FAIL when the schema is missing");
97 3c1cf9d0 2021-01-11 op TEST("gemini:/omarpolo.com", FAIL, empty, "FAIL with invalid marker");
98 3c1cf9d0 2021-01-11 op TEST("gemini//omarpolo.com", FAIL, empty, "FAIL with invalid marker");
99 3c1cf9d0 2021-01-11 op TEST("h!!p://omarpolo.com", FAIL, empty, "FAIL with invalid schema");
100 3c1cf9d0 2021-01-11 op
101 3c1cf9d0 2021-01-11 op /* authority */
102 3c1cf9d0 2021-01-11 op TEST("gemini://omarpolo.com",
103 3c1cf9d0 2021-01-11 op PASS,
104 3c1cf9d0 2021-01-11 op IRI("gemini", "omarpolo.com", "", "", "", ""),
105 3c1cf9d0 2021-01-11 op "can parse authority with empty path");
106 3c1cf9d0 2021-01-11 op TEST("gemini://omarpolo.com/",
107 3c1cf9d0 2021-01-11 op PASS,
108 3c1cf9d0 2021-01-11 op IRI("gemini", "omarpolo.com", "", "", "", ""),
109 3c1cf9d0 2021-01-11 op "can parse authority with empty path (alt)")
110 3c1cf9d0 2021-01-11 op TEST("gemini://omarpolo.com:1965",
111 3c1cf9d0 2021-01-11 op PASS,
112 3c1cf9d0 2021-01-11 op IRI("gemini", "omarpolo.com", "1965", "", "", ""),
113 3c1cf9d0 2021-01-11 op "can parse with port and empty path");
114 3c1cf9d0 2021-01-11 op TEST("gemini://omarpolo.com:1965/",
115 3c1cf9d0 2021-01-11 op PASS,
116 3c1cf9d0 2021-01-11 op IRI("gemini", "omarpolo.com", "1965", "", "", ""),
117 3c1cf9d0 2021-01-11 op "can parse with port and empty path")
118 3c1cf9d0 2021-01-11 op TEST("gemini://omarpolo.com:196s",
119 3c1cf9d0 2021-01-11 op FAIL,
120 3c1cf9d0 2021-01-11 op empty,
121 3c1cf9d0 2021-01-11 op "FAIL with invalid port number");
122 3c1cf9d0 2021-01-11 op
123 3c1cf9d0 2021-01-11 op /* path */
124 3c1cf9d0 2021-01-11 op TEST("gemini://omarpolo.com/foo/bar/baz",
125 3c1cf9d0 2021-01-11 op PASS,
126 3c1cf9d0 2021-01-11 op IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
127 3c1cf9d0 2021-01-11 op "parse simple paths");
128 3c1cf9d0 2021-01-11 op TEST("gemini://omarpolo.com/foo//bar///baz",
129 3c1cf9d0 2021-01-11 op PASS,
130 3c1cf9d0 2021-01-11 op IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
131 3c1cf9d0 2021-01-11 op "parse paths with multiple slashes");
132 3c1cf9d0 2021-01-11 op TEST("gemini://omarpolo.com/foo/./bar/./././baz",
133 3c1cf9d0 2021-01-11 op PASS,
134 3c1cf9d0 2021-01-11 op IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
135 3c1cf9d0 2021-01-11 op "parse paths with . elements");
136 3c1cf9d0 2021-01-11 op TEST("gemini://omarpolo.com/foo/bar/../bar/baz",
137 3c1cf9d0 2021-01-11 op PASS,
138 3c1cf9d0 2021-01-11 op IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
139 3c1cf9d0 2021-01-11 op "parse paths with .. elements");
140 3c1cf9d0 2021-01-11 op TEST("gemini://omarpolo.com/foo/../foo/bar/../bar/baz/../baz",
141 3c1cf9d0 2021-01-11 op PASS,
142 3c1cf9d0 2021-01-11 op IRI("gemini", "omarpolo.com", "", "foo/bar/baz", "", ""),
143 3c1cf9d0 2021-01-11 op "parse paths with multiple .. elements");
144 3c1cf9d0 2021-01-11 op TEST("gemini://omarpolo.com/foo/..",
145 3c1cf9d0 2021-01-11 op PASS,
146 3c1cf9d0 2021-01-11 op IRI("gemini", "omarpolo.com", "", "", "", ""),
147 3c1cf9d0 2021-01-11 op "parse paths with a trailing ..");
148 3c1cf9d0 2021-01-11 op TEST("gemini://omarpolo.com/foo/../",
149 3c1cf9d0 2021-01-11 op PASS,
150 3c1cf9d0 2021-01-11 op IRI("gemini", "omarpolo.com", "", "", "", ""),
151 3c1cf9d0 2021-01-11 op "parse paths with a trailing ..");
152 3c1cf9d0 2021-01-11 op TEST("gemini://omarpolo.com/foo/../..",
153 3c1cf9d0 2021-01-11 op FAIL,
154 3c1cf9d0 2021-01-11 op empty,
155 3c1cf9d0 2021-01-11 op "reject paths that would escape the root");
156 3c1cf9d0 2021-01-11 op TEST("gemini://omarpolo.com/foo/../foo/../././/bar/baz/.././.././/",
157 3c1cf9d0 2021-01-11 op PASS,
158 3c1cf9d0 2021-01-11 op IRI("gemini", "omarpolo.com", "", "", "", ""),
159 3c1cf9d0 2021-01-11 op "parse path with lots of cleaning available");
160 3c1cf9d0 2021-01-11 op
161 3c1cf9d0 2021-01-11 op /* query */
162 3c1cf9d0 2021-01-11 op TEST("foo://example.com/foo/?gne",
163 3c1cf9d0 2021-01-11 op PASS,
164 3c1cf9d0 2021-01-11 op IRI("foo", "example.com", "", "foo/", "gne", ""),
165 3c1cf9d0 2021-01-11 op "parse query strings");
166 3c1cf9d0 2021-01-11 op TEST("foo://example.com/foo/?gne&foo",
167 3c1cf9d0 2021-01-11 op PASS,
168 3c1cf9d0 2021-01-11 op IRI("foo", "example.com", "", "foo/", "gne&foo", ""),
169 3c1cf9d0 2021-01-11 op "parse query strings");
170 3c1cf9d0 2021-01-11 op TEST("foo://example.com/foo/?gne%2F",
171 3c1cf9d0 2021-01-11 op PASS,
172 3c1cf9d0 2021-01-11 op IRI("foo", "example.com", "", "foo/", "gne/", ""),
173 3c1cf9d0 2021-01-11 op "parse query strings");
174 3c1cf9d0 2021-01-11 op
175 3c1cf9d0 2021-01-11 op /* fragment */
176 3c1cf9d0 2021-01-11 op TEST("foo://bar.co/#foo",
177 3c1cf9d0 2021-01-11 op PASS,
178 3c1cf9d0 2021-01-11 op IRI("foo", "bar.co", "", "", "", "foo"),
179 3c1cf9d0 2021-01-11 op "can recognize fragments");
180 3c1cf9d0 2021-01-11 op
181 3c1cf9d0 2021-01-11 op /* percent encoding */
182 3c1cf9d0 2021-01-11 op TEST("foo://bar.com/caf%C3%A8.gmi",
183 3c1cf9d0 2021-01-11 op PASS,
184 3c1cf9d0 2021-01-11 op IRI("foo", "bar.com", "", "cafè.gmi", "", ""),
185 3c1cf9d0 2021-01-11 op "can decode");
186 3c1cf9d0 2021-01-11 op TEST("foo://bar.com/caff%C3%A8%20macchiato.gmi",
187 3c1cf9d0 2021-01-11 op PASS,
188 3c1cf9d0 2021-01-11 op IRI("foo", "bar.com", "", "caffè macchiato.gmi", "", ""),
189 3c1cf9d0 2021-01-11 op "can decode");
190 3c1cf9d0 2021-01-11 op TEST("foo://bar.com/caff%C3%A8+macchiato.gmi",
191 3c1cf9d0 2021-01-11 op PASS,
192 3c1cf9d0 2021-01-11 op IRI("foo", "bar.com", "", "caffè+macchiato.gmi", "", ""),
193 3c1cf9d0 2021-01-11 op "can decode");
194 3c1cf9d0 2021-01-11 op TEST("foo://bar.com/foo%2F..%2F..",
195 3c1cf9d0 2021-01-11 op FAIL,
196 3c1cf9d0 2021-01-11 op empty,
197 3c1cf9d0 2021-01-11 op "conversion and checking are done in the correct order");
198 3c1cf9d0 2021-01-11 op TEST("foo://bar.com/foo%00?baz",
199 3c1cf9d0 2021-01-11 op FAIL,
200 3c1cf9d0 2021-01-11 op empty,
201 3c1cf9d0 2021-01-11 op "rejects %00");
202 3c1cf9d0 2021-01-11 op
203 3c1cf9d0 2021-01-11 op /* IRI */
204 3c1cf9d0 2021-01-11 op TEST("foo://bar.com/cafè.gmi",
205 3c1cf9d0 2021-01-11 op PASS,
206 3c1cf9d0 2021-01-11 op IRI("foo", "bar.com", "", "cafè.gmi", "" , ""),
207 3c1cf9d0 2021-01-11 op "decode IRI (with a 2-byte utf8 seq)");
208 3c1cf9d0 2021-01-11 op TEST("foo://bar.com/世界.gmi",
209 3c1cf9d0 2021-01-11 op PASS,
210 3c1cf9d0 2021-01-11 op IRI("foo", "bar.com", "", "世界.gmi", "" , ""),
211 3c1cf9d0 2021-01-11 op "decode IRI");
212 3c1cf9d0 2021-01-11 op TEST("foo://bar.com/😼.gmi",
213 3c1cf9d0 2021-01-11 op PASS,
214 3c1cf9d0 2021-01-11 op IRI("foo", "bar.com", "", "😼.gmi", "" , ""),
215 3c1cf9d0 2021-01-11 op "decode IRI (with a 3-byte utf8 seq)");
216 3c1cf9d0 2021-01-11 op TEST("foo://bar.com/😼/𤭢.gmi",
217 3c1cf9d0 2021-01-11 op PASS,
218 3c1cf9d0 2021-01-11 op IRI("foo", "bar.com", "", "😼/𤭢.gmi", "" , ""),
219 3c1cf9d0 2021-01-11 op "decode IRI (with a 3-byte and a 4-byte utf8 seq)");
220 3c1cf9d0 2021-01-11 op TEST("foo://bar.com/世界/\xC0\x80",
221 3c1cf9d0 2021-01-11 op FAIL,
222 3c1cf9d0 2021-01-11 op empty,
223 3c1cf9d0 2021-01-11 op "reject invalid sequence (overlong NUL)");
224 3c1cf9d0 2021-01-11 op
225 3c1cf9d0 2021-01-11 op return 0;
226 3c1cf9d0 2021-01-11 op }