Blob


1 /*
2 * Copyright (c) 2021 Omar Polo <op@omarpolo.com>
3 *
4 * Permission to use, copy, modify, and distribute this software for any
5 * purpose with or without fee is hereby granted, provided that the above
6 * copyright notice and this permission notice appear in all copies.
7 *
8 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
9 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
10 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
11 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
12 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
13 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
14 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
15 */
17 #include <stddef.h>
18 #include <stdint.h>
19 #include <string.h>
21 #include "gmid.h"
23 #define BASE 36
24 #define TMIN 1
25 #define TMAX 26
26 #define SKEW 38
27 #define DAMP 700
28 #define IBIAS 72
29 #define IN 128
31 static int
32 adapt(int delta, int numpoints, int firsttime)
33 {
34 int k;
36 if (firsttime)
37 delta = delta / DAMP;
38 else
39 delta = delta / 2;
41 delta += (delta / numpoints);
43 k = 0;
44 while (delta > ((BASE - TMIN) * TMAX) / 2) {
45 delta = delta / (BASE - TMIN);
46 k += BASE;
47 }
48 return k + (((BASE - TMIN + 1) * delta) / (delta + SKEW));
49 }
51 static const char *
52 copy_until_delimiter(const char *s, char *out, size_t len)
53 {
54 char *end, *t;
56 end = strchr(s, '\0');
57 if (end - s > len)
58 return NULL;
60 for (t = end; t >= s; --t)
61 if (*t == '-')
62 break;
64 if (t < s)
65 t = end;
67 for (; s < t; ++s, ++out) {
68 if (*s > 'z')
69 return NULL;
70 *out = *s;
71 }
73 return s;
74 }
76 static unsigned int
77 digit_value(char c)
78 {
79 if ('A' <= c && c <= 'Z')
80 return c - 'A';
82 if ('a' <= c && c <= 'z')
83 return c - 'a';
85 if ('0' <= c && c <= '9')
86 return 26 + c - '0';
88 return c;
89 }
91 static int
92 insert(char *out, size_t len, int codepoint, size_t i)
93 {
94 int l;
95 size_t outlen;
96 char *t;
98 if (codepoint <= 0x7F)
99 return 0;
100 else if (codepoint <= 0x7FF)
101 l = 2;
102 else if (codepoint <= 0xFFFF)
103 l = 3;
104 else if (codepoint <= 0x10FFFF)
105 l = 4;
106 else
107 return 0;
109 if ((t = utf8_nth(out, i)) == NULL)
110 return 0;
111 if (t + l >= out + len)
112 return 0;
114 memmove(t + l, t, strlen(t));
116 switch (l) {
117 case 2:
118 t[1] = ( codepoint & 0x3F) + 0x80;
119 t[0] = ((codepoint >> 6) & 0x1F) + 0xC0;
120 break;
121 case 3:
122 t[2] = ( codepoint & 0x3F) + 0x80;
123 t[1] = ((codepoint >> 6) & 0x3F) + 0x80;
124 t[0] = ((codepoint >> 12) & 0x0F) + 0xE0;
125 break;
126 case 4:
127 t[3] = ( codepoint & 0x3F) + 0x80;
128 t[2] = ((codepoint >> 6) & 0x3F) + 0x80;
129 t[1] = ((codepoint >> 12) & 0x3F) + 0x80;
130 t[0] = ((codepoint >> 18) & 0x07) + 0xF0;
131 break;
133 return 1;
136 static int
137 decode(const char *str, char *out, size_t len)
139 size_t i;
140 uint32_t n;
141 unsigned int oldi, bias, w, k, digit, t;
142 unsigned int numpoints;
143 const char *s;
145 if (str == NULL || len <= 4)
146 return 0;
148 /* todo: starts_with */
149 if (strstr(str, "xn--") != str) {
150 strncpy(out, str, len);
151 return 1;
154 /* skip the xn-- */
155 str += 4;
157 if (strchr(str, '-') != NULL) {
158 if ((s = copy_until_delimiter(str, out, len)) == NULL)
159 return 0;
160 if (*s == '-')
161 s++;
162 } else
163 s = str;
165 numpoints = strlen(out);
167 n = IN;
168 i = 0;
169 bias = IBIAS;
171 while (*s != '\0') {
172 oldi = i;
173 w = 1;
175 for (k = BASE; ; k += BASE) {
176 if (*s == '\0')
177 return 0;
178 /* fail eventually? */
179 digit = digit_value(*s);
180 s++;
182 /* fail on overflow */
183 i += digit * w;
185 if (k <= bias)
186 t = TMIN;
187 else if (k >= bias + TMAX)
188 t = TMAX;
189 else
190 t = k - bias;
192 if (digit < t)
193 break;
194 w *= (BASE - t);
197 bias = adapt(i - oldi, numpoints+1, oldi == 0);
198 n += i / (numpoints+1); /* fail on overflow */
199 i = i % (numpoints+1);
201 if (!insert(out, len, n, i))
202 return 0;
203 numpoints++;
204 ++i;
207 return 1;
210 const char *
211 end_of_component(const char *hostname)
213 for (; *hostname != '\0' && *hostname != '.'; ++hostname)
214 ; /* nop */
215 return hostname;
218 int
219 puny_decode(const char *hostname, char *out, size_t len)
221 char comp[DOMAIN_NAME_LEN];
222 const char *s, *end;
223 size_t l;
225 memset(out, 0, len);
227 s = hostname;
228 for (;;) {
229 end = end_of_component(s);
230 if (end - s >= sizeof(comp))
231 return 0;
233 memcpy(comp, s, end - s);
234 comp[end - s] = '\0';
236 if (!decode(comp, out, len))
237 return 0;
239 if (*end == '\0')
240 return 1;
242 if (strlcat(out, ".", len) >= len)
243 return 0;
245 l = strlen(out);
246 if (l >= len)
247 return 0;
248 out += l;
249 len -= l;
251 s = end+1;