Blob


1 /*
2 * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <stddef.h>
18 #include <stdint.h>
19 #include <string.h>
21 #include "gmid.h"
23 #define BASE 36
24 #define TMIN 1
25 #define TMAX 26
26 #define SKEW 38
27 #define DAMP 700
28 #define IBIAS 72
29 #define IN 128
31 /* to make the linker happy */
32 struct conf conf;
34 static int
35 adapt(int delta, int numpoints, int firsttime)
36 {
37 int k;
39 if (firsttime)
40 delta = delta / DAMP;
41 else
42 delta = delta / 2;
44 delta += (delta / numpoints);
46 k = 0;
47 while (delta > ((BASE - TMIN) * TMAX) / 2) {
48 delta = delta / (BASE - TMIN);
49 k += BASE;
50 }
51 return k + (((BASE - TMIN + 1) * delta) / (delta + SKEW));
52 }
54 static const char *
55 copy_label(const char *s, char *out, size_t len)
56 {
57 char *end, *t;
58 size_t l;
60 end = strchr(s, '\0');
61 l = end - s;
62 if (l > len)
63 return NULL;
65 for (t = end; t >= s; --t)
66 if (*t == '-')
67 break;
69 if (t < s)
70 t = end;
72 for (; s < t; ++s, ++out) {
73 if (*s > 'z')
74 return NULL;
75 *out = *s;
76 }
78 return s;
79 }
81 static unsigned int
82 digit_value(char c)
83 {
84 if ('A' <= c && c <= 'Z')
85 return c - 'A';
87 if ('a' <= c && c <= 'z')
88 return c - 'a';
90 if ('0' <= c && c <= '9')
91 return 26 + c - '0';
93 return c;
94 }
96 static int
97 insert(char *out, size_t len, int codepoint, size_t i, const char **err)
98 {
99 int l;
100 char *t;
102 if (codepoint <= 0x7F) {
103 *err = "puny: invalid decoded character (ASCII range)";
104 return 0;
105 } else if (codepoint <= 0x7FF) {
106 l = 2;
107 } else if (codepoint <= 0xFFFF) {
108 l = 3;
109 } else if (codepoint <= 0x10FFFF) {
110 l = 4;
111 } else {
112 *err = "puny: invalid decoded character";
113 return 0;
116 if ((t = utf8_nth(out, i)) == NULL) {
117 *err = "puny: invalid insert position";
118 return 0;
121 if (t + l >= out + len) {
122 *err = "puny: insert would overflow";
123 return 0;
126 memmove(t + l, t, strlen(t));
128 switch (l) {
129 case 2:
130 t[1] = ( codepoint & 0x3F) + 0x80;
131 t[0] = ((codepoint >> 6) & 0x1F) + 0xC0;
132 break;
133 case 3:
134 t[2] = ( codepoint & 0x3F) + 0x80;
135 t[1] = ((codepoint >> 6) & 0x3F) + 0x80;
136 t[0] = ((codepoint >> 12) & 0x0F) + 0xE0;
137 break;
138 case 4:
139 t[3] = ( codepoint & 0x3F) + 0x80;
140 t[2] = ((codepoint >> 6) & 0x3F) + 0x80;
141 t[1] = ((codepoint >> 12) & 0x3F) + 0x80;
142 t[0] = ((codepoint >> 18) & 0x07) + 0xF0;
143 break;
145 return 1;
148 static int
149 decode(const char *str, char *out, size_t len, const char **err)
151 size_t i;
152 uint32_t n;
153 unsigned int oldi, bias, w, k, digit, t;
154 unsigned int numpoints;
155 const char *s;
157 if (!starts_with(str, "xn--")) {
158 strncpy(out, str, len);
159 return 1;
162 /* skip the xn-- */
163 str += 4;
165 if (strchr(str, '-') != NULL) {
166 if ((s = copy_label(str, out, len)) == NULL) {
167 *err = "puny: invalid label";
168 return 0;
170 if (*s == '-')
171 s++;
172 } else
173 s = str;
175 numpoints = strlen(out);
177 n = IN;
178 i = 0;
179 bias = IBIAS;
181 while (*s != '\0') {
182 oldi = i;
183 w = 1;
185 for (k = BASE; ; k += BASE) {
186 if (*s == '\0') {
187 *err = "puny: label truncated?";
188 return 0;
190 /* fail eventually? */
191 digit = digit_value(*s);
192 s++;
194 /* fail on overflow */
195 i += digit * w;
197 if (k <= bias)
198 t = TMIN;
199 else if (k >= bias + TMAX)
200 t = TMAX;
201 else
202 t = k - bias;
204 if (digit < t)
205 break;
206 w *= (BASE - t);
209 bias = adapt(i - oldi, numpoints+1, oldi == 0);
210 n += i / (numpoints+1); /* fail on overflow */
211 i = i % (numpoints+1);
213 if (!insert(out, len, n, i, err))
214 return 0;
215 numpoints++;
216 ++i;
219 return 1;
222 static const char *
223 end_of_label(const char *hostname)
225 for (; *hostname != '\0' && *hostname != '.'; ++hostname)
226 ; /* nop */
227 return hostname;
230 int
231 puny_decode(const char *hostname, char *out, size_t len, const char **err)
233 char label[LABEL_LEN];
234 const char *s, *end;
235 size_t l;
237 memset(out, 0, len);
238 if (hostname == NULL)
239 return 1;
241 s = hostname;
242 for (;;) {
243 end = end_of_label(s);
244 l = end - s;
245 if (l >= sizeof(label)) {
246 *err = "label too long";
247 return 0;
250 memcpy(label, s, l);
251 label[l] = '\0';
253 if (!decode(label, out, len, err))
254 return 0;
256 if (*end == '\0')
257 return 1;
259 if (strlcat(out, ".", len) >= len) {
260 *err = "domain name too long";
261 return 0;
264 l = strlen(out);
265 if (l >= len) {
266 *err = "domain name too long";
267 return 0;
269 out += l;
270 len -= l;
272 s = end+1;