commit 7957cbd9aad6ace179287b2ddcbae0b14a25a90d from: Omar Polo date: Wed Jan 27 11:21:23 2021 UTC const-ify puny_decode (and add puny.c) commit - 42650adec078a7d3e885afbafa1fa4406d4823fb commit + 7957cbd9aad6ace179287b2ddcbae0b14a25a90d blob - b4e6b8448d5c945cf71a3c0f3df01ee8b505e4a3 blob + 47a053630219f8ad6fcf58063659192f45fcc53e --- gmid.h +++ gmid.h @@ -247,6 +247,6 @@ int parse_iri(char*, struct iri*, const char**); int trim_req_iri(char*); /* puny.c */ -int puny_decode(char*, char*, size_t); +int puny_decode(const char*, char*, size_t); #endif blob - b25f82a9f9f6e410156922ee3f9e588bc1a6fa9e blob + 26d65714f651ce49d330afaa1daa8555360d1e59 --- regress/puny-test.c +++ regress/puny-test.c @@ -48,16 +48,12 @@ main(int argc, char **argv) { struct suite *i; int failed; - char *hostname; char buf[64]; /* name len */ failed = 0; for (i = t; i->src != NULL; ++i) { - if ((hostname = strdup(i->src)) == NULL) - return 0; - memset(buf, 0, sizeof(buf)); - if (!puny_decode(hostname, buf, sizeof(buf))) { + if (!puny_decode(i->src, buf, sizeof(buf))) { printf("decode: failure with %s\n", i->src); failed = 1; continue; @@ -70,8 +66,6 @@ main(int argc, char **argv) continue; } else printf("OK: %s => %s\n", i->src, buf); - - free(hostname); } return failed; blob - /dev/null blob + f465198c4a46207ed74e1461add5cab140c0c60a (mode 644) --- /dev/null +++ puny.c @@ -0,0 +1,253 @@ +/* + * Copyright (c) 2021 Omar Polo + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include + +#include "gmid.h" + +#define BASE 36 +#define TMIN 1 +#define TMAX 26 +#define SKEW 38 +#define DAMP 700 +#define IBIAS 72 +#define IN 128 + +static int +adapt(int delta, int numpoints, int firsttime) +{ + int k; + + if (firsttime) + delta = delta / DAMP; + else + delta = delta / 2; + + delta += (delta / numpoints); + + k = 0; + while (delta > ((BASE - TMIN) * TMAX) / 2) { + delta = delta / (BASE - TMIN); + k += BASE; + } + return k + (((BASE - TMIN + 1) * delta) / (delta + SKEW)); +} + +static const char * +copy_until_delimiter(const char *s, char *out, size_t len) +{ + char *end, *t; + + end = strchr(s, '\0'); + if (end - s > len) + return NULL; + + for (t = end; t >= s; --t) + if (*t == '-') + break; + + if (t < s) + t = end; + + for (; s < t; ++s, ++out) { + if (*s > 'z') + return NULL; + *out = *s; + } + + return s; +} + +static unsigned int +digit_value(char c) +{ + if ('A' <= c && c <= 'Z') + return c - 'A'; + + if ('a' <= c && c <= 'z') + return c - 'a'; + + if ('0' <= c && c <= '9') + return 26 + c - '0'; + + return c; +} + +static int +insert(char *out, size_t len, int codepoint, size_t i) +{ + int l; + size_t outlen; + char *t; + + if (codepoint <= 0x7F) + return 0; + else if (codepoint <= 0x7FF) + l = 2; + else if (codepoint <= 0xFFFF) + l = 3; + else if (codepoint <= 0x10FFFF) + l = 4; + else + return 0; + + if ((t = utf8_nth(out, i)) == NULL) + return 0; + if (t + l >= out + len) + return 0; + + memmove(t + l, t, strlen(t)); + + switch (l) { + case 2: + t[1] = ( codepoint & 0x3F) + 0x80; + t[0] = ((codepoint >> 6) & 0x1F) + 0xC0; + break; + case 3: + t[2] = ( codepoint & 0x3F) + 0x80; + t[1] = ((codepoint >> 6) & 0x3F) + 0x80; + t[0] = ((codepoint >> 12) & 0x0F) + 0xE0; + break; + case 4: + t[3] = ( codepoint & 0x3F) + 0x80; + t[2] = ((codepoint >> 6) & 0x3F) + 0x80; + t[1] = ((codepoint >> 12) & 0x3F) + 0x80; + t[0] = ((codepoint >> 18) & 0x07) + 0xF0; + break; + } + return 1; +} + +static int +decode(const char *str, char *out, size_t len) +{ + size_t i; + uint32_t n; + unsigned int oldi, bias, w, k, digit, t; + unsigned int numpoints; + const char *s; + + if (str == NULL || len <= 4) + return 0; + + /* todo: starts_with */ + if (strstr(str, "xn--") != str) { + strncpy(out, str, len); + return 1; + } + + /* skip the xn-- */ + str += 4; + + if (strchr(str, '-') != NULL) { + if ((s = copy_until_delimiter(str, out, len)) == NULL) + return 0; + if (*s == '-') + s++; + } else + s = str; + + numpoints = strlen(out); + + n = IN; + i = 0; + bias = IBIAS; + + while (*s != '\0') { + oldi = i; + w = 1; + + for (k = BASE; ; k += BASE) { + if (*s == '\0') + return 0; + /* fail eventually? */ + digit = digit_value(*s); + s++; + + /* fail on overflow */ + i += digit * w; + + if (k <= bias) + t = TMIN; + else if (k >= bias + TMAX) + t = TMAX; + else + t = k - bias; + + if (digit < t) + break; + w *= (BASE - t); + } + + bias = adapt(i - oldi, numpoints+1, oldi == 0); + n += i / (numpoints+1); /* fail on overflow */ + i = i % (numpoints+1); + + if (!insert(out, len, n, i)) + return 0; + numpoints++; + ++i; + } + + return 1; +} + +const char * +end_of_component(const char *hostname) +{ + for (; *hostname != '\0' && *hostname != '.'; ++hostname) + ; /* nop */ + return hostname; +} + +int +puny_decode(const char *hostname, char *out, size_t len) +{ + char comp[DOMAIN_NAME_LEN]; + const char *s, *end; + size_t l; + + memset(out, 0, len); + + s = hostname; + for (;;) { + end = end_of_component(s); + if (end - s >= sizeof(comp)) + return 0; + + memcpy(comp, s, end - s); + comp[end - s] = '\0'; + + if (!decode(comp, out, len)) + return 0; + + if (*end == '\0') + return 1; + + if (strlcat(out, ".", len) >= len) + return 0; + + l = strlen(out); + if (l >= len) + return 0; + out += l; + len -= l; + + s = end+1; + } +}