Commit Diff


commit - 42650adec078a7d3e885afbafa1fa4406d4823fb
commit + 7957cbd9aad6ace179287b2ddcbae0b14a25a90d
blob - b4e6b8448d5c945cf71a3c0f3df01ee8b505e4a3
blob + 47a053630219f8ad6fcf58063659192f45fcc53e
--- gmid.h
+++ gmid.h
@@ -247,6 +247,6 @@ int		 parse_iri(char*, struct iri*, const char**);
 int		 trim_req_iri(char*);
 
 /* puny.c */
-int		 puny_decode(char*, char*, size_t);
+int		 puny_decode(const char*, char*, size_t);
 
 #endif
blob - b25f82a9f9f6e410156922ee3f9e588bc1a6fa9e
blob + 26d65714f651ce49d330afaa1daa8555360d1e59
--- regress/puny-test.c
+++ regress/puny-test.c
@@ -48,16 +48,12 @@ main(int argc, char **argv)
 {
 	struct suite *i;
 	int failed;
-	char *hostname;
 	char buf[64];		/* name len */
 
 	failed = 0;
 	for (i = t; i->src != NULL; ++i) {
-		if ((hostname = strdup(i->src)) == NULL)
-			return 0;
-
 		memset(buf, 0, sizeof(buf));
-		if (!puny_decode(hostname, buf, sizeof(buf))) {
+		if (!puny_decode(i->src, buf, sizeof(buf))) {
                         printf("decode: failure with %s\n", i->src);
                         failed = 1;
 			continue;
@@ -70,8 +66,6 @@ main(int argc, char **argv)
 			continue;
 		} else
 			printf("OK:  %s => %s\n", i->src, buf);
-
-		free(hostname);
 	}
 
 	return failed;
blob - /dev/null
blob + f465198c4a46207ed74e1461add5cab140c0c60a (mode 644)
--- /dev/null
+++ puny.c
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include "gmid.h"
+
+#define BASE	36
+#define TMIN	1
+#define TMAX	26
+#define SKEW	38
+#define DAMP	700
+#define IBIAS	72
+#define IN	128
+
+static int
+adapt(int delta, int numpoints, int firsttime)
+{
+	int k;
+
+	if (firsttime)
+		delta = delta / DAMP;
+	else
+		delta = delta / 2;
+
+	delta += (delta / numpoints);
+
+	k = 0;
+	while (delta > ((BASE - TMIN) * TMAX) / 2) {
+		delta = delta / (BASE - TMIN);
+		k += BASE;
+	}
+	return k + (((BASE - TMIN + 1) * delta) / (delta + SKEW));
+}
+
+static const char *
+copy_until_delimiter(const char *s, char *out, size_t len)
+{
+	char *end, *t;
+
+	end = strchr(s, '\0');
+	if (end - s  > len)
+		return NULL;
+
+	for (t = end; t >= s; --t)
+		if (*t == '-')
+			break;
+
+	if (t < s)
+		t = end;
+
+	for (; s < t; ++s, ++out) {
+		if (*s > 'z')
+			return NULL;
+		*out = *s;
+	}
+
+	return s;
+}
+
+static unsigned int
+digit_value(char c)
+{
+	if ('A' <= c && c <= 'Z')
+		return c - 'A';
+
+	if ('a' <= c && c <= 'z')
+		return c - 'a';
+
+	if ('0' <= c && c <= '9')
+		return 26 + c - '0';
+
+	return c;
+}
+
+static int
+insert(char *out, size_t len, int codepoint, size_t i)
+{
+	int l;
+	size_t outlen;
+	char *t;
+
+	if (codepoint <= 0x7F)
+		return 0;
+	else if (codepoint <= 0x7FF)
+		l = 2;
+	else if (codepoint <= 0xFFFF)
+		l = 3;
+	else if (codepoint <= 0x10FFFF)
+		l = 4;
+	else
+		return 0;
+
+	if ((t = utf8_nth(out, i)) == NULL)
+		return 0;
+	if (t + l >= out + len)
+		return 0;
+
+	memmove(t + l, t, strlen(t));
+
+	switch (l) {
+	case 2:
+		t[1] = ( codepoint        & 0x3F) + 0x80;
+		t[0] = ((codepoint >> 6)  & 0x1F) + 0xC0;
+		break;
+	case 3:
+		t[2] = ( codepoint        & 0x3F) + 0x80;
+		t[1] = ((codepoint >> 6)  & 0x3F) + 0x80;
+		t[0] = ((codepoint >> 12) & 0x0F) + 0xE0;
+		break;
+	case 4:
+		t[3] = ( codepoint        & 0x3F) + 0x80;
+		t[2] = ((codepoint >> 6)  & 0x3F) + 0x80;
+		t[1] = ((codepoint >> 12) & 0x3F) + 0x80;
+		t[0] = ((codepoint >> 18) & 0x07) + 0xF0;
+		break;
+	}
+	return 1;
+}
+
+static int
+decode(const char *str, char *out, size_t len)
+{
+	size_t i;
+	uint32_t n;
+	unsigned int oldi, bias, w, k, digit, t;
+	unsigned int numpoints;
+	const char *s;
+
+	if (str == NULL || len <= 4)
+		return 0;
+
+	/* todo: starts_with */
+	if (strstr(str, "xn--") != str) {
+		strncpy(out, str, len);
+		return 1;
+	}
+
+	/* skip the xn-- */
+	str += 4;
+
+	if (strchr(str, '-') != NULL) {
+		if ((s = copy_until_delimiter(str, out, len)) == NULL)
+			return 0;
+		if (*s == '-')
+			s++;
+	} else
+		s = str;
+
+	numpoints = strlen(out);
+
+	n = IN;
+	i = 0;
+	bias = IBIAS;
+
+	while (*s != '\0') {
+		oldi = i;
+		w = 1;
+
+		for (k = BASE; ; k += BASE) {
+			if (*s == '\0')
+				return 0;
+			/* fail eventually? */
+			digit = digit_value(*s);
+			s++;
+
+			/* fail on overflow */
+			i += digit * w;
+
+			if (k <= bias)
+				t = TMIN;
+			else if (k >= bias + TMAX)
+				t = TMAX;
+			else
+				t = k - bias;
+
+			if (digit < t)
+				break;
+			w *= (BASE - t);
+		}
+
+		bias = adapt(i - oldi, numpoints+1, oldi == 0);
+		n += i / (numpoints+1); /* fail on overflow */
+		i = i % (numpoints+1);
+
+		if (!insert(out, len, n, i))
+			return 0;
+		numpoints++;
+		++i;
+	}
+
+	return 1;
+}
+
+const char *
+end_of_component(const char *hostname)
+{
+	for (; *hostname != '\0' && *hostname != '.'; ++hostname)
+		;		/* nop */
+	return hostname;
+}
+
+int
+puny_decode(const char *hostname, char *out, size_t len)
+{
+	char comp[DOMAIN_NAME_LEN];
+	const char *s, *end;
+	size_t l;
+
+	memset(out, 0, len);
+
+	s = hostname;
+	for (;;) {
+		end = end_of_component(s);
+		if (end - s >= sizeof(comp))
+			return 0;
+
+		memcpy(comp, s, end - s);
+		comp[end - s] = '\0';
+
+		if (!decode(comp, out, len))
+			return 0;
+
+		if (*end == '\0')
+			return 1;
+
+		if (strlcat(out, ".", len) >= len)
+			return 0;
+
+		l = strlen(out);
+		if (l >= len)
+			return 0;
+		out += l;
+		len -= l;
+
+		s = end+1;
+	}
+}