commit 8b2b06b5122892c74e4dd58772be73ea2fd2933b from: Omar Polo date: Sun Dec 25 18:35:34 2022 UTC iri: rewrite remove_dot_segments to be clearer the previous version was straight out RFC3986 (more or less, it had a bug I didn't spot), but it's ugly and extremely long. Rewrite it to be somewhat inspired by canonpath (from kern_pledge.c) and way simpler. There are some small differences in how UNIX paths and URLs paths are canonized (in UNIX path you don't care too much of a trailing '/', in URLs you do.) The new code is more clear and passes all the tests in RFC3986, so I'm calling it a win. commit - cb7621af57ac2abad12fa61893d00199daebdd29 commit + 8b2b06b5122892c74e4dd58772be73ea2fd2933b blob - 362ed324bf60c6a99a1d6f2c6ab318910bbaf212 blob + f2687041ed034ffc3566a0168819e0ec26fc5c1c --- iri.c +++ iri.c @@ -473,60 +473,30 @@ cpfields(struct iri *dest, const struct iri *src, int } static inline int -remove_dot_segments(struct iri *iri) +remove_dot_segments(struct iri *i) { - char *p, *q, *buf, *s; + char *p, *q, *buf; ptrdiff_t bufsize; - buf = p = q = iri->iri_path; - bufsize = sizeof(iri->iri_path); - + buf = p = q = i->iri_path; + bufsize = sizeof(i->iri_path); while (*p && (q - buf < bufsize)) { - /* A */ - if (!strncmp(p, "../", 3)) { - p += 3; - continue; - } - if (!strncmp(p, "./", 2)) { + if (p[0] == '/' && p[1] == '.' && + (p[2] == '/' || p[2] == '\0')) { p += 2; - continue; - } - /* B */ - if (!strncmp(p, "/./", 3)) { - *q++ = '/'; - p += 3; - continue; - } - if (!strcmp(p, "/.")) { - *q++ = '/'; - p += 2; - break; - } - /* C */ - if (p[0] == '/' && p[1] == '.' && p[2] == '.' && + if (*p != '/') + *q++ = '/'; + } else if (p[0] == '/' && p[1] == '.' && p[2] == '.' && (p[3] == '/' || p[3] == '\0')) { p += 3; - while (q != buf && *--q != '/') + while (q > buf && *--q != '/') continue; - *q++ = '/'; - continue; - } - /* D */ - if (!strcmp(p, ".")) { - p++; - break; - } - if (!strcmp(p, "..")) { - p += 2; - break; - } - /* E */ - s = strchr(p + 1, '/'); - while (*p && p != s && (q - buf < bufsize)) + if (*p != '/' && (q > buf && q[-1] != '/')) + *q++ = '/'; + } else *q++ = *p++; } - - if (*p == '\0' && (q - buf < bufsize)) { + if ((*p == '\0') && (q - buf < bufsize)) { *q = '\0'; return (0); }