Blob


1 /* Quick and dirty RFC 2047 */
3 #include "a.h"
5 static int
6 unhex1(char c)
7 {
8 if('0' <= c && c <= '9')
9 return c-'0';
10 if('a' <= c && c <= 'f')
11 return c-'a'+10;
12 if('A' <= c && c <= 'F')
13 return c-'A'+10;
14 return 15;
15 }
17 static int
18 unhex(char *s)
19 {
20 return unhex1(s[0])*16+unhex1(s[1]);
21 }
23 int
24 _decqp(uchar *out, int lim, char *in, int n, int underscores)
25 {
26 char *p, *ep;
27 uchar *eout, *out0;
29 out0 = out;
30 eout = out+lim;
31 for(p=in, ep=in+n; p<ep && out<eout; ){
32 if(underscores && *p == '_'){
33 *out++ = ' ';
34 p++;
35 }
36 else if(*p == '='){
37 if(p+1 >= ep)
38 break;
39 if(*(p+1) == '\n'){
40 p += 2;
41 continue;
42 }
43 if(p+3 > ep)
44 break;
45 *out++ = unhex(p+1);
46 p += 3;
47 }else
48 *out++ = *p++;
49 }
50 return out-out0;
51 }
53 int
54 decqp(uchar *out, int lim, char *in, int n)
55 {
56 return _decqp(out, lim, in, n, 0);
57 }
59 char*
60 decode(int kind, char *s, int *len)
61 {
62 char *t;
63 int l;
65 if(s == nil)
66 return s;
67 switch(kind){
68 case QuotedPrintable:
69 case QuotedPrintableU:
70 l = strlen(s)+1;
71 t = emalloc(l);
72 l = _decqp((uchar*)t, l, s, l-1, kind==QuotedPrintableU);
73 *len = l;
74 t[l] = 0;
75 return t;
77 case Base64:
78 l = strlen(s)+1;
79 t = emalloc(l);
80 l = dec64((uchar*)t, l, s, l-1);
81 *len = l;
82 t[l] = 0;
83 return t;
85 default:
86 *len = strlen(s);
87 return estrdup(s);
88 }
89 }
91 struct {
92 char *mime;
93 char *tcs;
94 } tcstab[] = {
95 "iso-8859-2", "8859-2",
96 "iso-8859-3", "8859-3",
97 "iso-8859-4", "8859-4",
98 "iso-8859-5", "8859-5",
99 "iso-8859-6", "8859-6",
100 "iso-8859-7", "8859-7",
101 "iso-8859-8", "8859-8",
102 "iso-8859-9", "8859-9",
103 "iso-8859-10", "8859-10",
104 "iso-8859-15", "8859-15",
105 "big5", "big5",
106 "iso-2022-jp", "jis-kanji",
107 "windows-1251", "cp1251",
108 "koi8-r", "koi8"
109 };
111 char*
112 tcs(char *charset, char *s)
114 static char buf[4096];
115 int i, n;
116 int fd[3], p[2], pp[2];
117 uchar *us;
118 char *t, *u;
119 char *argv[4];
120 Rune r;
122 if(s == nil || charset == nil || *s == 0)
123 return s;
125 if(cistrcmp(charset, "utf-8") == 0)
126 return s;
127 if(cistrcmp(charset, "iso-8859-1") == 0 || cistrcmp(charset, "us-ascii") == 0){
128 latin1:
129 n = 0;
130 for(us=(uchar*)s; *us; us++)
131 n += runelen(*us);
132 n++;
133 t = emalloc(n);
134 for(us=(uchar*)s, u=t; *us; us++){
135 r = *us;
136 u += runetochar(u, &r);
138 *u = 0;
139 free(s);
140 return t;
142 for(i=0; i<nelem(tcstab); i++)
143 if(cistrcmp(charset, tcstab[i].mime) == 0)
144 goto tcs;
145 goto latin1;
147 tcs:
148 argv[0] = "tcs";
149 argv[1] = "-f";
150 argv[2] = charset;
151 argv[3] = nil;
153 if(pipe(p) < 0 || pipe(pp) < 0)
154 sysfatal("pipe: %r");
155 fd[0] = p[0];
156 fd[1] = pp[0];
157 fd[2] = dup(2, -1);
158 if(threadspawnl(fd, "tcs", "tcs", "-f", tcstab[i].tcs, nil) < 0){
159 close(p[0]);
160 close(p[1]);
161 close(pp[0]);
162 close(pp[1]);
163 close(fd[2]);
164 goto latin1;
166 close(p[0]);
167 close(pp[0]);
168 write(p[1], s, strlen(s));
169 close(p[1]);
170 n = readn(pp[1], buf, sizeof buf-1);
171 close(pp[1]);
172 if(n <= 0)
173 goto latin1;
174 free(s);
175 buf[n] = 0;
176 return estrdup(buf);
179 char*
180 unrfc2047(char *s)
182 char *p, *q, *t, *u, *v;
183 int len;
184 Rune r;
185 Fmt fmt;
187 if(s == nil)
188 return nil;
190 if(strstr(s, "=?") == nil)
191 return s;
193 fmtstrinit(&fmt);
194 for(p=s; *p; ){
195 /* =?charset?e?text?= */
196 if(*p=='=' && *(p+1)=='?'){
197 p += 2;
198 q = strchr(p, '?');
199 if(q == nil)
200 goto emit;
201 q++;
202 if(*q == '?' || *(q+1) != '?')
203 goto emit;
204 t = q+2;
205 u = strchr(t, '?');
206 if(u == nil || *(u+1) != '=')
207 goto emit;
208 switch(*q){
209 case 'q':
210 case 'Q':
211 *u = 0;
212 v = decode(QuotedPrintableU, t, &len);
213 break;
214 case 'b':
215 case 'B':
216 *u = 0;
217 v = decode(Base64, t, &len);
218 break;
219 default:
220 goto emit;
222 *(q-1) = 0;
223 v = tcs(p, v);
224 fmtstrcpy(&fmt, v);
225 free(v);
226 p = u+2;
228 emit:
229 p += chartorune(&r, p);
230 fmtrune(&fmt, r);
232 p = fmtstrflush(&fmt);
233 if(p == nil)
234 sysfatal("out of memory");
235 free(s);
236 return p;
239 #ifdef TEST
240 char *test[] =
242 "hello world",
243 "hello =?iso-8859-1?q?this is some text?=",
244 "=?US-ASCII?Q?Keith_Moore?=",
245 "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=",
246 "=?ISO-8859-1?Q?Andr=E9?= Pirard",
247 "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=",
248 "=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=",
249 "=?ISO-8859-1?Q?Olle_J=E4rnefors?=",
250 "=?iso-2022-jp?B?GyRCTTVKISRKP006SiRyS34kPyQ3JEZKcz03JCIkahsoQg==?=",
251 "=?UTF-8?B?Ik5pbHMgTy4gU2Vsw6VzZGFsIg==?="
252 };
254 void
255 threadmain(int argc, char **argv)
257 int i;
259 for(i=0; i<nelem(test); i++)
260 print("%s\n\t%s\n", test[i], unrfc2047(estrdup(test[i])));
261 threadexitsall(0);
264 #endif