commit - 043acc97b16be18d85bb1914da50f7ce2aa2623e
commit + df6ca41da36c3f617cbbf3302ab120721ebfcfd2
blob - 597391a9c578320b2e748a05ffc019358eac8a39
blob + 1c9b75f2c471e4d3b9e67dba136bfb081e1a8d7e
--- README.md
+++ README.md
and execute CGI scripts.
**gmid**
-will strip any sequence of
-*../*
-or trailing
-*..*
-in the requests made by clients and will refuse to follow symlinks.
+won't serve files outside the given directory and won't follow
+symlinks.
Furthermore, on
OpenBSD,
pledge(2)
dosen't do anything else than read files from the given directory,
accept network connections and, optionally, execute CGI scripts.
+**gmid**
+fully supports IRIs (Internationalized Resource Identifiers, see
+RFC3987).
+
It should be noted that
**gmid**
is very simple in its implementation, and so it may not be appropriate
blob - 77ef87db7e835803323eb5c04d6794dcfec67099
blob + edf67d5281ed871d651552902d513166ee9d2d2c
--- gmid.1
+++ gmid.1
and execute CGI scripts.
.Pp
.Nm
-will strip any sequence of
-.Pa ../
-or trailing
-.Pa ..
-in the requests made by clients and will refuse to follow symlinks.
+won't serve files outside the given directory and won't follow
+symlinks.
Furthermore, on
.Ox ,
.Xr pledge 2
dosen't do anything else than read files from the given directory,
accept network connections and, optionally, execute CGI scripts.
.Pp
+.Nm
+fully supports IRIs (Internationalized Resource Identifiers, see
+RFC3987).
+.Pp
It should be noted that
.Nm
is very simple in its implementation, and so it may not be appropriate
blob - 245928ac2a36f5594f0eda298dc56e4c1c8b7900
blob + 3f81b762a5c8152496864bf438e9f8c16be84c63
--- uri.c
+++ uri.c
const char *err;
};
+#define CONT_BYTE(b) ((b & 0xC0) == 0x80)
+
/* XXX: these macros will expand multiple times their argument */
#define UNRESERVED(p) \
|| p == ',' \
|| p == ';' \
|| p == '=')
+
+/* NOTE: the increment are one less what it should be, because the
+ * caller will add one byte after we return. */
+static int
+valid_multibyte_utf8(struct parser *p)
+{
+ uint32_t c;
+ uint8_t s;
+
+ c = 0;
+ s = *p->uri;
+
+ if ((s & 0xE0) == 0xC0) {
+ if (!CONT_BYTE(*(p->uri+1)))
+ return 0;
+ c = ((s & 0x1F) << 6) | (*(p->uri+1) & 0x3F);
+ p->uri += 1;
+ } else if ((s & 0xF0) == 0xE0) {
+ if (!CONT_BYTE(*(p->uri+1)) ||
+ !CONT_BYTE(*(p->uri+2)))
+ return 0;
+ c = (s & 0x0F) << 12
+ | ((*(p->uri+1) & 0x3F) << 6)
+ | ((*(p->uri+2) & 0x3F));
+ p->uri += 2;
+ } else if ((s & 0xF8) == 0xF0) {
+ if (!CONT_BYTE(*(p->uri+1)) ||
+ !CONT_BYTE(*(p->uri+2)) ||
+ !CONT_BYTE(*(p->uri+3)))
+ return 0;
+ c = (s & 0x07) << 18
+ | ((*(p->uri+1) & 0x3F) << 12)
+ | ((*(p->uri+2) & 0x3F) << 6)
+ | ((*(p->uri+3) & 0x3F));
+ p->uri += 3;
+ } else
+ return 0;
+ return (((0x080 <= c) && (c <= 0x7FF))
+ || (((0x800 <= c) && (c <= 0xFFFF)))
+ || (((0x10000 <= c) && (c <= 0x10FFFF))));
+}
+
static int
parse_pct_encoded(struct parser *p)
{
|| SUB_DELIMITERS(*p->uri)
|| *p->uri == '/'
|| *p->uri == '?'
- || parse_pct_encoded(p))
+ || parse_pct_encoded(p)
+ || valid_multibyte_utf8(p))
p->uri++;
if (*p->uri != '\0' && *p->uri != '#') {
while (UNRESERVED(*p->uri)
|| SUB_DELIMITERS(*p->uri)
|| *p->uri == '/'
- || parse_pct_encoded(p))
+ || parse_pct_encoded(p)
+ || valid_multibyte_utf8(p))
p->uri++;
if (*p->uri != '\0' && *p->uri != '?' && *p->uri != '#') {
blob - c6521f668c8263cd6b9162966dd9b212dc529948
blob + f322c1e1f6e7a85c1b328cd3825dbb091521f9d5
--- uri_test.c
+++ uri_test.c
{
struct uri empty = {"", "", "", PASS, "", "", ""};
+ TEST("foo://bar.com/foo%00?baz",
+ FAIL,
+ empty,
+ "rejects %00");
+ return 0;
+
TEST("http://omarpolo.com",
PASS,
URI("http", "omarpolo.com", "", "", "", ""),
FAIL,
empty,
"reject paths that would escape the root");
+ TEST("gemini://omarpolo.com/foo/../foo/../././/bar/baz/.././.././/",
+ PASS,
+ URI("gemini", "omarpolo.com", "", "", "", ""),
+ "parse path with lots of cleaning available");
/* query */
TEST("foo://example.com/foo/?gne",
PASS,
URI("foo", "bar.com", "", "cafè.gmi", "", ""),
"can decode");
+ TEST("foo://bar.com/caff%C3%A8%20macchiato.gmi",
+ PASS,
+ URI("foo", "bar.com", "", "caffè macchiato.gmi", "", ""),
+ "can decode");
+ TEST("foo://bar.com/caff%C3%A8+macchiato.gmi",
+ PASS,
+ URI("foo", "bar.com", "", "caffè+macchiato.gmi", "", ""),
+ "can decode");
+ TEST("foo://bar.com/foo%2F..%2F..",
+ FAIL,
+ empty,
+ "conversion and checking are done in the correct order");
+ TEST("foo://bar.com/foo%00?baz",
+ FAIL,
+ empty,
+ "rejects %00");
+ /* IRI */
+ TEST("foo://bar.com/cafè.gmi",
+ PASS,
+ URI("foo", "bar.com", "", "cafè.gmi", "" , ""),
+ "decode IRI (with a 2-byte utf8 seq)");
+ TEST("foo://bar.com/世界.gmi",
+ PASS,
+ URI("foo", "bar.com", "", "世界.gmi", "" , ""),
+ "decode IRI");
+ TEST("foo://bar.com/😼.gmi",
+ PASS,
+ URI("foo", "bar.com", "", "😼.gmi", "" , ""),
+ "decode IRI (with a 3-byte utf8 seq)");
+ TEST("foo://bar.com/😼/𤭢.gmi",
+ PASS,
+ URI("foo", "bar.com", "", "😼/𤭢.gmi", "" , ""),
+ "decode IRI (with a 3-byte and a 4-byte utf8 seq)");
+ TEST("foo://bar.com/世界/\xC0\x80",
+ FAIL,
+ empty,
+ "reject invalid sequence (overlong NUL)");
+
return 0;
}