Blob


1 /* Quick and dirty RFC 2047 */
3 #include "a.h"
5 static int
6 unhex1(char c)
7 {
8 if('0' <= c && c <= '9')
9 return c-'0';
10 if('a' <= c && c <= 'f')
11 return c-'a'+10;
12 if('A' <= c && c <= 'F')
13 return c-'A'+10;
14 return 15;
15 }
17 static int
18 unhex(char *s)
19 {
20 return unhex1(s[0])*16+unhex1(s[1]);
21 }
23 int
24 _decqp(uchar *out, int lim, char *in, int n, int underscores)
25 {
26 char *p, *ep;
27 uchar *eout, *out0;
29 out0 = out;
30 eout = out+lim;
31 for(p=in, ep=in+n; p<ep && out<eout; ){
32 if(underscores && *p == '_'){
33 *out++ = ' ';
34 p++;
35 }
36 else if(*p == '='){
37 if(p+1 >= ep)
38 break;
39 if(*(p+1) == '\n'){
40 p += 2;
41 continue;
42 }
43 if(p+3 > ep)
44 break;
45 *out++ = unhex(p+1);
46 p += 3;
47 }else
48 *out++ = *p++;
49 }
50 return out-out0;
51 }
53 int
54 decqp(uchar *out, int lim, char *in, int n)
55 {
56 return _decqp(out, lim, in, n, 0);
57 }
59 char*
60 decode(int kind, char *s, int *len)
61 {
62 char *t;
63 int l;
65 if(s == nil)
66 return s;
67 switch(kind){
68 case QuotedPrintable:
69 case QuotedPrintableU:
70 l = strlen(s)+1;
71 t = emalloc(l);
72 l = _decqp((uchar*)t, l, s, l-1, kind==QuotedPrintableU);
73 *len = l;
74 t[l] = 0;
75 return t;
77 case Base64:
78 l = strlen(s)+1;
79 t = emalloc(l);
80 l = dec64((uchar*)t, l, s, l-1);
81 *len = l;
82 t[l] = 0;
83 return t;
85 default:
86 *len = strlen(s);
87 return estrdup(s);
88 }
89 }
91 struct {
92 char *mime;
93 char *tcs;
94 } tcstab[] = {
95 "iso-8859-2", "8859-2",
96 "iso-8859-3", "8859-3",
97 "iso-8859-4", "8859-4",
98 "iso-8859-5", "8859-5",
99 "iso-8859-6", "8859-6",
100 "iso-8859-7", "8859-7",
101 "iso-8859-8", "8859-8",
102 "iso-8859-9", "8859-9",
103 "iso-8859-10", "8859-10",
104 "iso-8859-15", "8859-15",
105 "big5", "big5",
106 "iso-2022-jp", "jis-kanji",
107 "windows-1250", "windows-1250",
108 "windows-1251", "windows-1251",
109 "windows-1252", "windows-1252",
110 "windows-1253", "windows-1253",
111 "windows-1254", "windows-1254",
112 "windows-1255", "windows-1255",
113 "windows-1256", "windows-1256",
114 "windows-1257", "windows-1257",
115 "windows-1258", "windows-1258",
116 "koi8-r", "koi8"
117 };
119 typedef struct Writeargs Writeargs;
120 struct Writeargs
122 int fd;
123 char *s;
124 };
126 static void
127 twriter(void *v)
129 Writeargs *w;
131 w = v;
132 write(w->fd, w->s, strlen(w->s));
133 close(w->fd);
134 free(w->s);
135 free(w);
138 char*
139 tcs(char *charset, char *s)
141 char *buf;
142 int i, n, nbuf;
143 int fd[3], p[2], pp[2];
144 uchar *us;
145 char *t, *u;
146 char *argv[4];
147 Rune r;
148 Writeargs *w;
150 if(s == nil || charset == nil || *s == 0)
151 return s;
153 if(cistrcmp(charset, "utf-8") == 0)
154 return s;
155 if(cistrcmp(charset, "iso-8859-1") == 0 || cistrcmp(charset, "us-ascii") == 0){
156 latin1:
157 n = 0;
158 for(us=(uchar*)s; *us; us++)
159 n += runelen(*us);
160 n++;
161 t = emalloc(n);
162 for(us=(uchar*)s, u=t; *us; us++){
163 r = *us;
164 u += runetochar(u, &r);
166 *u = 0;
167 free(s);
168 return t;
170 for(i=0; i<nelem(tcstab); i++)
171 if(cistrcmp(charset, tcstab[i].mime) == 0)
172 goto tcs;
173 goto latin1;
175 tcs:
176 argv[0] = "tcs";
177 argv[1] = "-f";
178 argv[2] = charset;
179 argv[3] = nil;
181 if(pipe(p) < 0 || pipe(pp) < 0)
182 sysfatal("pipe: %r");
183 fd[0] = p[0];
184 fd[1] = pp[0];
185 fd[2] = dup(2, -1);
186 if(threadspawnl(fd, "tcs", "tcs", "-f", tcstab[i].tcs, nil) < 0){
187 close(p[0]);
188 close(p[1]);
189 close(pp[0]);
190 close(pp[1]);
191 close(fd[2]);
192 goto latin1;
194 close(p[0]);
195 close(pp[0]);
197 nbuf = UTFmax*strlen(s)+100; /* just a guess at worst case */
198 buf = emalloc(nbuf);
200 w = emalloc(sizeof *w);
201 w->fd = p[1];
202 w->s = estrdup(s);
203 proccreate(twriter, w, STACK);
205 n = readn(pp[1], buf, nbuf-1);
206 close(pp[1]);
207 if(n <= 0){
208 free(buf);
209 goto latin1;
211 buf[n] = 0;
212 free(s);
213 s = estrdup(buf);
214 free(buf);
215 return s;
218 char*
219 unrfc2047(char *s)
221 char *p, *q, *t, *u, *v;
222 int len;
223 Rune r;
224 Fmt fmt;
226 if(s == nil)
227 return nil;
229 if(strstr(s, "=?") == nil)
230 return s;
232 fmtstrinit(&fmt);
233 for(p=s; *p; ){
234 /* =?charset?e?text?= */
235 if(*p=='=' && *(p+1)=='?'){
236 p += 2;
237 q = strchr(p, '?');
238 if(q == nil)
239 goto emit;
240 q++;
241 if(*q == '?' || *(q+1) != '?')
242 goto emit;
243 t = q+2;
244 u = strchr(t, '?');
245 if(u == nil || *(u+1) != '=')
246 goto emit;
247 switch(*q){
248 case 'q':
249 case 'Q':
250 *u = 0;
251 v = decode(QuotedPrintableU, t, &len);
252 break;
253 case 'b':
254 case 'B':
255 *u = 0;
256 v = decode(Base64, t, &len);
257 break;
258 default:
259 goto emit;
261 *(q-1) = 0;
262 v = tcs(p, v);
263 fmtstrcpy(&fmt, v);
264 free(v);
265 p = u+2;
267 emit:
268 p += chartorune(&r, p);
269 fmtrune(&fmt, r);
271 p = fmtstrflush(&fmt);
272 if(p == nil)
273 sysfatal("out of memory");
274 free(s);
275 return p;
278 #ifdef TEST
279 char *test[] =
281 "hello world",
282 "hello =?iso-8859-1?q?this is some text?=",
283 "=?US-ASCII?Q?Keith_Moore?=",
284 "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=",
285 "=?ISO-8859-1?Q?Andr=E9?= Pirard",
286 "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=",
287 "=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=",
288 "=?ISO-8859-1?Q?Olle_J=E4rnefors?=",
289 "=?iso-2022-jp?B?GyRCTTVKISRKP006SiRyS34kPyQ3JEZKcz03JCIkahsoQg==?=",
290 "=?UTF-8?B?Ik5pbHMgTy4gU2Vsw6VzZGFsIg==?="
291 };
293 void
294 threadmain(int argc, char **argv)
296 int i;
298 for(i=0; i<nelem(test); i++)
299 print("%s\n\t%s\n", test[i], unrfc2047(estrdup(test[i])));
300 threadexitsall(0);
303 #endif