Blob
1 /* Quick and dirty RFC 2047 */3 #include "a.h"5 static int6 unhex1(char c)7 {8 if('0' <= c && c <= '9')9 return c-'0';10 if('a' <= c && c <= 'f')11 return c-'a'+10;12 if('A' <= c && c <= 'F')13 return c-'A'+10;14 return 15;15 }17 static int18 unhex(char *s)19 {20 return unhex1(s[0])*16+unhex1(s[1]);21 }23 int24 _decqp(uchar *out, int lim, char *in, int n, int underscores)25 {26 char *p, *ep;27 uchar *eout, *out0;29 out0 = out;30 eout = out+lim;31 for(p=in, ep=in+n; p<ep && out<eout; ){32 if(underscores && *p == '_'){33 *out++ = ' ';34 p++;35 }36 else if(*p == '='){37 if(p+1 >= ep)38 break;39 if(*(p+1) == '\n'){40 p += 2;41 continue;42 }43 if(p+3 > ep)44 break;45 *out++ = unhex(p+1);46 p += 3;47 }else48 *out++ = *p++;49 }50 return out-out0;51 }53 int54 decqp(uchar *out, int lim, char *in, int n)55 {56 return _decqp(out, lim, in, n, 0);57 }59 char*60 decode(int kind, char *s, int *len)61 {62 char *t;63 int l;65 if(s == nil)66 return s;67 switch(kind){68 case QuotedPrintable:69 case QuotedPrintableU:70 l = strlen(s)+1;71 t = emalloc(l);72 l = _decqp((uchar*)t, l, s, l-1, kind==QuotedPrintableU);73 *len = l;74 t[l] = 0;75 return t;77 case Base64:78 l = strlen(s)+1;79 t = emalloc(l);80 l = dec64((uchar*)t, l, s, l-1);81 *len = l;82 t[l] = 0;83 return t;85 default:86 *len = strlen(s);87 return estrdup(s);88 }89 }91 struct {92 char *mime;93 char *tcs;94 } tcstab[] = {95 "iso-8859-2", "8859-2",96 "iso-8859-3", "8859-3",97 "iso-8859-4", "8859-4",98 "iso-8859-5", "8859-5",99 "iso-8859-6", "8859-6",100 "iso-8859-7", "8859-7",101 "iso-8859-8", "8859-8",102 "iso-8859-9", "8859-9",103 "iso-8859-10", "8859-10",104 "iso-8859-15", "8859-15",105 "big5", "big5",106 "iso-2022-jp", "jis-kanji",107 "windows-1251", "cp1251",108 "koi8-r", "koi8"109 };111 char*112 tcs(char *charset, char *s)113 {114 static char buf[4096];115 int i, n;116 int fd[3], p[2], pp[2];117 uchar *us;118 char *t, *u;119 char *argv[4];120 Rune r;122 if(s == nil || charset == nil || *s == 0)123 return s;125 if(cistrcmp(charset, "utf-8") == 0)126 return s;127 if(cistrcmp(charset, "iso-8859-1") == 0 || cistrcmp(charset, "us-ascii") == 0){128 latin1:129 n = 0;130 for(us=(uchar*)s; *us; us++)131 n += runelen(*us);132 n++;133 t = emalloc(n);134 for(us=(uchar*)s, u=t; *us; us++){135 r = *us;136 u += runetochar(u, &r);137 }138 *u = 0;139 free(s);140 return t;141 }142 for(i=0; i<nelem(tcstab); i++)143 if(cistrcmp(charset, tcstab[i].mime) == 0)144 goto tcs;145 goto latin1;147 tcs:148 argv[0] = "tcs";149 argv[1] = "-f";150 argv[2] = charset;151 argv[3] = nil;153 if(pipe(p) < 0 || pipe(pp) < 0)154 sysfatal("pipe: %r");155 fd[0] = p[0];156 fd[1] = pp[0];157 fd[2] = dup(2, -1);158 if(threadspawnl(fd, "tcs", "tcs", "-f", tcstab[i].tcs, nil) < 0){159 close(p[0]);160 close(p[1]);161 close(pp[0]);162 close(pp[1]);163 close(fd[2]);164 goto latin1;165 }166 close(p[0]);167 close(pp[0]);168 write(p[1], s, strlen(s));169 close(p[1]);170 n = readn(pp[1], buf, sizeof buf-1);171 close(pp[1]);172 if(n <= 0)173 goto latin1;174 free(s);175 buf[n] = 0;176 return estrdup(buf);177 }179 char*180 unrfc2047(char *s)181 {182 char *p, *q, *t, *u, *v;183 int len;184 Rune r;185 Fmt fmt;187 if(s == nil)188 return nil;190 if(strstr(s, "=?") == nil)191 return s;193 fmtstrinit(&fmt);194 for(p=s; *p; ){195 /* =?charset?e?text?= */196 if(*p=='=' && *(p+1)=='?'){197 p += 2;198 q = strchr(p, '?');199 if(q == nil)200 goto emit;201 q++;202 if(*q == '?' || *(q+1) != '?')203 goto emit;204 t = q+2;205 u = strchr(t, '?');206 if(u == nil || *(u+1) != '=')207 goto emit;208 switch(*q){209 case 'q':210 case 'Q':211 *u = 0;212 v = decode(QuotedPrintableU, t, &len);213 break;214 case 'b':215 case 'B':216 *u = 0;217 v = decode(Base64, t, &len);218 break;219 default:220 goto emit;221 }222 *(q-1) = 0;223 v = tcs(p, v);224 fmtstrcpy(&fmt, v);225 free(v);226 p = u+2;227 }228 emit:229 p += chartorune(&r, p);230 fmtrune(&fmt, r);231 }232 p = fmtstrflush(&fmt);233 if(p == nil)234 sysfatal("out of memory");235 free(s);236 return p;237 }239 #ifdef TEST240 char *test[] =241 {242 "hello world",243 "hello =?iso-8859-1?q?this is some text?=",244 "=?US-ASCII?Q?Keith_Moore?=",245 "=?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?=",246 "=?ISO-8859-1?Q?Andr=E9?= Pirard",247 "=?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?=",248 "=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?=",249 "=?ISO-8859-1?Q?Olle_J=E4rnefors?=",250 "=?iso-2022-jp?B?GyRCTTVKISRKP006SiRyS34kPyQ3JEZKcz03JCIkahsoQg==?=",251 "=?UTF-8?B?Ik5pbHMgTy4gU2Vsw6VzZGFsIg==?="252 };254 void255 threadmain(int argc, char **argv)256 {257 int i;259 for(i=0; i<nelem(test); i++)260 print("%s\n\t%s\n", test[i], unrfc2047(estrdup(test[i])));261 threadexitsall(0);262 }264 #endif