Blob


1 /*
2 * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include "gmid.h"
19 #include <stddef.h>
20 #include <stdint.h>
21 #include <string.h>
23 #define BASE 36
24 #define TMIN 1
25 #define TMAX 26
26 #define SKEW 38
27 #define DAMP 700
28 #define IBIAS 72
29 #define IN 128
31 static int
32 adapt(int delta, int numpoints, int firsttime)
33 {
34 int k;
36 if (firsttime)
37 delta = delta / DAMP;
38 else
39 delta = delta / 2;
41 delta += (delta / numpoints);
43 k = 0;
44 while (delta > ((BASE - TMIN) * TMAX) / 2) {
45 delta = delta / (BASE - TMIN);
46 k += BASE;
47 }
48 return k + (((BASE - TMIN + 1) * delta) / (delta + SKEW));
49 }
51 static const char *
52 copy_label(const char *s, char *out, size_t len)
53 {
54 char *end, *t;
55 size_t l;
57 end = strchr(s, '\0');
58 l = end - s;
59 if (l > len)
60 return NULL;
62 for (t = end; t >= s; --t)
63 if (*t == '-')
64 break;
66 if (t < s)
67 t = end;
69 for (; s < t; ++s, ++out) {
70 if (*s > 'z')
71 return NULL;
72 *out = *s;
73 }
75 return s;
76 }
78 static unsigned int
79 digit_value(char c)
80 {
81 if ('A' <= c && c <= 'Z')
82 return c - 'A';
84 if ('a' <= c && c <= 'z')
85 return c - 'a';
87 if ('0' <= c && c <= '9')
88 return 26 + c - '0';
90 return c;
91 }
93 static int
94 insert(char *out, size_t len, int codepoint, size_t i, const char **err)
95 {
96 int l;
97 char *t;
99 if (codepoint <= 0x7F) {
100 *err = "puny: invalid decoded character (ASCII range)";
101 return 0;
102 } else if (codepoint <= 0x7FF) {
103 l = 2;
104 } else if (codepoint <= 0xFFFF) {
105 l = 3;
106 } else if (codepoint <= 0x10FFFF) {
107 l = 4;
108 } else {
109 *err = "puny: invalid decoded character";
110 return 0;
113 if ((t = utf8_nth(out, i)) == NULL) {
114 *err = "puny: invalid insert position";
115 return 0;
118 if (t + l >= out + len) {
119 *err = "puny: insert would overflow";
120 return 0;
123 memmove(t + l, t, strlen(t));
125 switch (l) {
126 case 2:
127 t[1] = ( codepoint & 0x3F) + 0x80;
128 t[0] = ((codepoint >> 6) & 0x1F) + 0xC0;
129 break;
130 case 3:
131 t[2] = ( codepoint & 0x3F) + 0x80;
132 t[1] = ((codepoint >> 6) & 0x3F) + 0x80;
133 t[0] = ((codepoint >> 12) & 0x0F) + 0xE0;
134 break;
135 case 4:
136 t[3] = ( codepoint & 0x3F) + 0x80;
137 t[2] = ((codepoint >> 6) & 0x3F) + 0x80;
138 t[1] = ((codepoint >> 12) & 0x3F) + 0x80;
139 t[0] = ((codepoint >> 18) & 0x07) + 0xF0;
140 break;
142 return 1;
145 static int
146 decode(const char *str, char *out, size_t len, const char **err)
148 size_t i;
149 uint32_t n;
150 unsigned int oldi, bias, w, k, digit, t;
151 unsigned int numpoints;
152 const char *s;
154 if (!starts_with(str, "xn--")) {
155 strncpy(out, str, len);
156 return 1;
159 /* skip the xn-- */
160 str += 4;
162 if (strchr(str, '-') != NULL) {
163 if ((s = copy_label(str, out, len)) == NULL) {
164 *err = "puny: invalid label";
165 return 0;
167 if (*s == '-')
168 s++;
169 } else
170 s = str;
172 numpoints = strlen(out);
174 n = IN;
175 i = 0;
176 bias = IBIAS;
178 while (*s != '\0') {
179 oldi = i;
180 w = 1;
182 for (k = BASE; ; k += BASE) {
183 if (*s == '\0') {
184 *err = "puny: label truncated?";
185 return 0;
187 /* fail eventually? */
188 digit = digit_value(*s);
189 s++;
191 /* fail on overflow */
192 i += digit * w;
194 if (k <= bias)
195 t = TMIN;
196 else if (k >= bias + TMAX)
197 t = TMAX;
198 else
199 t = k - bias;
201 if (digit < t)
202 break;
203 w *= (BASE - t);
206 bias = adapt(i - oldi, numpoints+1, oldi == 0);
207 n += i / (numpoints+1); /* fail on overflow */
208 i = i % (numpoints+1);
210 if (!insert(out, len, n, i, err))
211 return 0;
212 numpoints++;
213 ++i;
216 return 1;
219 static const char *
220 end_of_label(const char *hostname)
222 for (; *hostname != '\0' && *hostname != '.'; ++hostname)
223 ; /* nop */
224 return hostname;
227 int
228 puny_decode(const char *hostname, char *out, size_t len, const char **err)
230 char label[LABEL_LEN];
231 const char *s, *end;
232 size_t l;
234 memset(out, 0, len);
235 if (hostname == NULL)
236 return 1;
238 s = hostname;
239 for (;;) {
240 end = end_of_label(s);
241 l = end - s;
242 if (l >= sizeof(label)) {
243 *err = "label too long";
244 return 0;
247 memcpy(label, s, l);
248 label[l] = '\0';
250 if (!decode(label, out, len, err))
251 return 0;
253 if (*end == '\0')
254 return 1;
256 if (strlcat(out, ".", len) >= len) {
257 *err = "domain name too long";
258 return 0;
261 l = strlen(out);
262 if (l >= len) {
263 *err = "domain name too long";
264 return 0;
266 out += l;
267 len -= l;
269 s = end+1;