Blob


1 /* Quick and dirty RFC 2047 */
3 #include "a.h"
5 static int
6 unhex1(char c)
7 {
8 if('0' <= c && c <= '9')
9 return c-'0';
10 if('a' <= c && c <= 'f')
11 return c-'a'+10;
12 if('A' <= c && c <= 'F')
13 return c-'A'+10;
14 return 15;
15 }
17 static int
18 unhex(char *s)
19 {
20 return unhex1(s[0])*16+unhex1(s[1]);
21 }
23 int
24 _decqp(uchar *out, int lim, char *in, int n, int underscores)
25 {
26 char *p, *ep;
27 uchar *eout, *out0;
29 out0 = out;
30 eout = out+lim;
31 for(p=in, ep=in+n; p<ep && out<eout; ){
32 if(underscores && *p == '_'){
33 *out++ = ' ';
34 p++;
35 }
36 else if(*p == '='){
37 if(p+1 >= ep)
38 break;
39 if(*(p+1) == '\n'){
40 p += 2;
41 continue;
42 }
43 if(p+3 > ep)
44 break;
45 *out++ = unhex(p+1);
46 p += 3;
47 }else
48 *out++ = *p++;
49 }
50 return out-out0;
51 }
53 int
54 decqp(uchar *out, int lim, char *in, int n)
55 {
56 return _decqp(out, lim, in, n, 0);
57 }
59 char*
60 decode(int kind, char *s, int *len)
61 {
62 char *t;
63 int l;
65 if(s == nil)
66 return s;
67 switch(kind){
68 case QuotedPrintable:
69 case QuotedPrintableU:
70 l = strlen(s)+1;
71 t = emalloc(l);
72 l = _decqp((uchar*)t, l, s, l-1, kind==QuotedPrintableU);
73 *len = l;
74 t[l] = 0;
75 return t;
77 case Base64:
78 l = strlen(s)+1;
79 t = emalloc(l);
80 l = dec64((uchar*)t, l, s, l-1);
81 *len = l;
82 t[l] = 0;
83 return t;
85 default:
86 *len = strlen(s);
87 return estrdup(s);
88 }
89 }
91 struct {
92 char *mime;
93 char *tcs;
94 } tcstab[] = {
95 "iso-8859-2", "8859-2",
96 "iso-8859-3", "8859-3",
97 "iso-8859-4", "8859-4",
98 "iso-8859-5", "8859-5",
99 "iso-8859-6", "8859-6",
100 "iso-8859-7", "8859-7",
101 "iso-8859-8", "8859-8",
102 "iso-8859-9", "8859-9",
103 "iso-8859-10", "8859-10",
104 "iso-8859-15", "8859-15",
105 "big5", "big5",
106 "iso-2022-jp", "jis-kanji",
107 "windows-1250", "windows-1250",
108 "windows-1251", "windows-1251",
109 "windows-1252", "windows-1252",
110 "windows-1253", "windows-1253",
111 "windows-1254", "windows-1254",
112 "windows-1255", "windows-1255",
113 "windows-1256", "windows-1256",
114 "windows-1257", "windows-1257",
115 "windows-1258", "windows-1258",
116 "koi8-r", "koi8"
117 };
119 typedef struct Writeargs Writeargs;
120 struct Writeargs
122 int fd;
123 char *s;
124 };
126 static void
127 twriter(void *v)
129 Writeargs *w;
131 w = v;
132 write(w->fd, w->s, strlen(w->s));
133 close(w->fd);
134 free(w->s);
135 free(w);
138 char*
139 tcs(char *charset, char *s)
141 char *buf;
142 int i, n, nbuf;
143 int fd[3], p[2], pp[2];
144 uchar *us;
145 char *t, *u;
146 Rune r;
147 Writeargs *w;
149 if(s == nil || charset == nil || *s == 0)
150 return s;
152 if(cistrcmp(charset, "utf-8") == 0)
153 return s;
154 if(cistrcmp(charset, "iso-8859-1") == 0 || cistrcmp(charset, "us-ascii") == 0){
155 latin1:
156 n = 0;
157 for(us=(uchar*)s; *us; us++)
158 n += runelen(*us);
159 n++;
160 t = emalloc(n);
161 for(us=(uchar*)s, u=t; *us; us++){
162 r = *us;
163 u += runetochar(u, &r);
165 *u = 0;
166 free(s);
167 return t;
169 for(i=0; i<nelem(tcstab); i++)
170 if(cistrcmp(charset, tcstab[i].mime) == 0)
171 goto tcs;
172 goto latin1;
174 tcs:
175 if(pipe(p) < 0 || pipe(pp) < 0)
176 sysfatal("pipe: %r");
177 fd[0] = p[0];
178 fd[1] = pp[0];
179 fd[2] = dup(2, -1);
180 if(threadspawnl(fd, "tcs", "tcs", "-f", tcstab[i].tcs, nil) < 0){
181 close(p[0]);
182 close(p[1]);
183 close(pp[0]);
184 close(pp[1]);
185 close(fd[2]);
186 goto latin1;
188 close(p[0]);
189 close(pp[0]);
191 nbuf = UTFmax*strlen(s)+100; /* just a guess at worst case */
192 buf = emalloc(nbuf);
194 w = emalloc(sizeof *w);
195 w->fd = p[1];
196 w->s = estrdup(s);
197 proccreate(twriter, w, STACK);
199 n = readn(pp[1], buf, nbuf-1);
200 close(pp[1]);
201 if(n <= 0){
202 free(buf);
203 goto latin1;
205 buf[n] = 0;
206 free(s);
207 s = estrdup(buf);
208 free(buf);
209 return s;
212 char*
213 unrfc2047(char *s)
215 char *p, *q, *t, *u, *v;
216 int len;
217 Rune r;
218 Fmt fmt;
220 if(s == nil)
221 return nil;
223 if(strstr(s, "=?") == nil)
224 return s;
226 fmtstrinit(&fmt);
227 for(p=s; *p; ){
228 /* =?charset?e?text?= */
229 if(*p=='=' && *(p+1)=='?'){
230 p += 2;
231 q = strchr(p, '?');
232 if(q == nil)
233 goto emit;
234 q++;
235 if(*q == '?' || *(q+1) != '?')
236 goto emit;
237 t = q+2;
238 u = strchr(t, '?');
239 if(u == nil || *(u+1) != '=')
240 goto emit;
241 switch(*q){
242 case 'q':
243 case 'Q':
244 *u = 0;
245 v = decode(QuotedPrintableU, t, &len);
246 break;
247 case 'b':
248 case 'B':
249 *u = 0;
250 v = decode(Base64, t, &len);
251 break;
252 default:
253 goto emit;
255 *(q-1) = 0;
256 v = tcs(p, v);
257 fmtstrcpy(&fmt, v);
258 free(v);
259 p = u+2;
261 emit:
262 p += chartorune(&r, p);
263 fmtrune(&fmt, r);
265 p = fmtstrflush(&fmt);
266 if(p == nil)
267 sysfatal("out of memory");
268 free(s);
269 return p;
272 #ifdef TEST
273 char *test[] =
275 "hello world",
276 "hello =?iso-8859-1?q?this is some text?=",
277 "=?US-ASCII?Q?Keith_Moore?=",
278 "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=",
279 "=?ISO-8859-1?Q?Andr=E9?= Pirard",
280 "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=",
281 "=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=",
282 "=?ISO-8859-1?Q?Olle_J=E4rnefors?=",
283 "=?iso-2022-jp?B?GyRCTTVKISRKP006SiRyS34kPyQ3JEZKcz03JCIkahsoQg==?=",
284 "=?UTF-8?B?Ik5pbHMgTy4gU2Vsw6VzZGFsIg==?="
285 };
287 void
288 threadmain(int argc, char **argv)
290 int i;
292 for(i=0; i<nelem(test); i++)
293 print("%s\n\t%s\n", test[i], unrfc2047(estrdup(test[i])));
294 threadexitsall(0);
297 #endif