2 * this is a filter that changes mime types and names of
3 * suspect executable attachments.
11 typedef struct Mtype Mtype;
12 typedef struct Hdef Hdef;
13 typedef struct Hline Hline;
14 typedef struct Part Part;
16 static int badfile(char *name);
17 static int badtype(char *type);
18 static void ctype(Part*, Hdef*, char*);
19 static void cencoding(Part*, Hdef*, char*);
20 static void cdisposition(Part*, Hdef*, char*);
21 static int decquoted(char *out, char *in, char *e);
22 static char* getstring(char *p, String *s, int dolower);
23 static void init_hdefs(void);
24 static int isattribute(char **pp, char *attr);
25 static int latin1toutf(char *out, char *in, char *e);
26 static String* mkboundary(void);
27 static Part* part(Part *pp);
28 static Part* passbody(Part *p, int dobound);
29 static void passnotheader(void);
30 static void passunixheader(void);
31 static Part* problemchild(Part *p);
32 static void readheader(Part *p);
33 static Hline* readhl(void);
34 static void readmtypes(void);
35 static int save(Part *p, char *file);
36 static void setfilename(Part *p, char *name);
37 static char* skiptosemi(char *p);
38 static char* skipwhite(char *p);
39 static String* tokenconvert(String *t);
40 static void writeheader(Part *p, int);
49 // disposition possibilities
59 * a message part; either the whole message or a subpart
63 Part *pp; /* parent part */
64 Hline *hl; /* linked list of header lines */
69 String *boundary; /* boundary for multiparts */
71 String *charset; /* character set */
72 String *type; /* content type */
73 String *filename; /* file name */
74 Biobuf *tmpbuf; /* diversion input buffer */
78 * a (multi)line header
87 * header definitions for parsing
92 void (*f)(Part*, Hdef*, char*);
98 { "content-type:", ctype, },
99 { "content-transfer-encoding:", cencoding, },
100 { "content-disposition:", cdisposition, },
105 * acceptable content types and their extensions
109 char *ext; /* extension */
110 char *gtype; /* generic content type */
111 char *stype; /* specific content type */
120 main(int argc, char **argv)
133 Binit(&in, 0, OREAD);
134 Binit(&out, 1, OWRITE);
139 /* pass through our standard 'From ' line */
142 /* parse with the top level part */
151 postnote(PNGROUP, getpid(), "mail refused: we don't accept executable attachments");
152 exits("mail refused: we don't accept executable attachments");
157 * parse a part; returns the ancestor whose boundary terminated
158 * this part or nil on EOF.
165 p = mallocz(sizeof *p, 1);
169 if(p->boundary != nil){
170 /* the format of a multipart part is always:
172 * null or ignored body
190 /* may still be multipart if this is a forwarded message */
191 if(p->type && cistrcmp(s_to_c(p->type), "message/rfc822") == 0){
192 /* the format of forwarded message is:
202 * This is the meat. This may be an executable.
203 * if so, wrap it and change its type
205 if(p->badtype || p->badfile){
209 syslog(0, "vf", "vf rejected %s %s", p->type?s_to_c(p->type):"?",
210 p->filename?s_to_c(p->filename):"?");
211 fprint(2, "The mail contained an executable attachment.\n");
212 fprint(2, "We refuse all mail containing such.\n");
215 np = problemchild(p);
218 /* if problemchild returns p, it turns out p is okay: fall thru */
221 return passbody(p, 1);
227 * read and parse a complete header
243 for(hd = hdefs; hd->type != nil; hd++){
244 if(cistrncmp(s_to_c(hl->s), hd->type, hd->len) == 0){
245 (*hd->f)(p, hd, s_to_c(hl->s));
253 * read a possibly multiline header line
263 p = Brdline(&in, '\n');
267 if(memchr(p, ':', n) == nil){
271 s = s_nappend(s_new(), p, n);
273 p = Brdline(&in, '\n');
277 if(*p != ' ' && *p != '\t'){
281 s = s_nappend(s, p, n);
283 hl = malloc(sizeof *hl);
290 * write out a complete header
293 writeheader(Part *p, int xfree)
297 for(hl = p->hl; hl != nil; hl = next){
298 Bprint(&out, "%s", s_to_c(hl->s));
310 * pass a body through. return if we hit one of our ancestors'
311 * boundaries or EOF. if we hit a boundary, return a pointer to
312 * that ancestor. if we hit EOF, return nil.
315 passbody(Part *p, int dobound)
324 cp = Brdline(b, '\n');
333 cp = Brdline(b, '\n');
337 for(pp = p; pp != nil; pp = pp->pp)
338 if(pp->boundary != nil
339 && strncmp(cp, s_to_c(pp->boundary), pp->blen) == 0){
341 Bwrite(&out, cp, Blinelen(b));
343 Bseek(b, -Blinelen(b), 1);
346 Bwrite(&out, cp, Blinelen(b));
352 * save the message somewhere
354 static vlong bodyoff; /* clumsy hack */
356 save(Part *p, char *file)
362 memset(&out, 0, sizeof(out));
364 fd = open(file, OWRITE);
368 Binit(&out, fd, OWRITE);
371 Bprint(&out, "From virusfilter %s\n", cp);
373 bodyoff = Boffset(&out);
379 memset(&out, 0, sizeof out);
380 Binit(&out, 1, OWRITE);
385 * write to a file but save the fd for passbody.
393 strcpy(buf, "/tmp/vf.XXXXXXXXXXX");
395 if((fd = create(name, OWRITE|OEXCL, 0666)) < 0){
396 fprint(2, "error creating temporary file: %r\n");
400 if(save(p, name) < 0){
401 fprint(2, "error saving temporary file: %r\n");
405 fprint(2, "error in savetmp: already have tmp file!\n");
408 p->tmpbuf = Bopen(name, OREAD|ORCLOSE);
409 if(p->tmpbuf == nil){
410 fprint(2, "error reading tempoary file: %r\n");
413 Bseek(p->tmpbuf, bodyoff, 0);
418 * XXX save the decoded file, run 9 unzip -tf on it, and then
419 * look at the file list.
428 if(access("/mail/lib/validateattachment", AEXEC) < 0)
432 fprint(2, "run checker %s\n", name);
433 switch(pid = fork()){
435 sysfatal("fork: %r");
438 execl("/mail/lib/validateattachment", "validateattachment", name, nil);
439 _exits("exec failed");
443 * Okay to return on error - will let mail through but wrapped.
447 syslog(0, "mail", "vf wait failed: %r");
451 syslog(0, "mail", "vf wrong pid %d != %d", w->pid, pid);
455 name = s_to_c(p->filename);
456 if(strstr(w->msg, "discard")){
457 syslog(0, "mail", "vf validateattachment rejected %s", name);
460 if(strstr(w->msg, "accept")){
461 syslog(0, "mail", "vf validateattachment accepted %s", name);
469 * emit a multipart Part that explains the problem
472 problemchild(Part *p)
480 * We don't know whether the attachment is okay.
481 * If there's an external checker, let it have a crack at it.
483 if(runchecker(p) > 0)
487 syslog(0, "mail", "vf wrapped %s %s", p->type?s_to_c(p->type):"?",
488 p->filename?s_to_c(p->filename):"?");
491 boundary = mkboundary();
493 /* print out non-mime headers */
494 for(hl = p->hl; hl != nil; hl = hl->next)
495 if(cistrncmp(s_to_c(hl->s), "content-", 8) != 0)
496 Bprint(&out, "%s", s_to_c(hl->s));
499 /* add in our own multipart headers and message */
500 Bprint(&out, "Content-Type: multipart/mixed;\n");
501 Bprint(&out, "\tboundary=\"%s\"\n", s_to_c(boundary));
502 Bprint(&out, "Content-Disposition: inline\n");
504 Bprint(&out, "This is a multi-part message in MIME format.\n");
505 Bprint(&out, "--%s\n", s_to_c(boundary));
506 Bprint(&out, "Content-Disposition: inline\n");
507 Bprint(&out, "Content-Type: text/plain; charset=\"US-ASCII\"\n");
508 Bprint(&out, "Content-Transfer-Encoding: 7bit\n");
510 Bprint(&out, "from postmaster@%s:\n", sysname());
511 Bprint(&out, "The following attachment had content that we can't\n");
512 Bprint(&out, "prove to be harmless. To avoid possible automatic\n");
513 Bprint(&out, "execution, we changed the content headers.\n");
514 Bprint(&out, "The original header was:\n\n");
516 /* print out original header lines */
517 for(hl = p->hl; hl != nil; hl = hl->next)
518 if(cistrncmp(s_to_c(hl->s), "content-", 8) == 0)
519 Bprint(&out, "\t%s", s_to_c(hl->s));
520 Bprint(&out, "--%s\n", s_to_c(boundary));
522 /* change file name */
524 s_append(p->filename, ".suspect");
526 p->filename = s_copy("file.suspect");
528 /* print out new header */
529 Bprint(&out, "Content-Type: application/octet-stream\n");
530 Bprint(&out, "Content-Disposition: attachment; filename=\"%s\"\n", s_to_c(p->filename));
535 Bprint(&out, "Content-Transfer-Encoding: base64\n");
538 Bprint(&out, "Content-Transfer-Encoding: quoted-printable\n");
547 /* add the new boundary and the original terminator */
548 Bprint(&out, "--%s--\n", s_to_c(boundary));
549 if(np && np->boundary){
550 cp = Brdline(&in, '\n');
551 Bwrite(&out, cp, Blinelen(&in));
554 fprint(2, "a %p\n", np);
559 isattribute(char **pp, char *attr)
566 if(cistrncmp(p, attr, n) != 0)
580 * parse content type header
583 ctype(Part *p, Hdef *h, char *cp)
591 cp = getstring(cp, p->type, 1);
592 if(badtype(s_to_c(p->type)))
596 if(isattribute(&cp, "boundary")){
598 cp = getstring(cp, s, 0);
599 p->boundary = s_reset(p->boundary);
600 s_append(p->boundary, "--");
601 s_append(p->boundary, s_to_c(s));
602 p->blen = s_len(p->boundary);
604 } else if(cistrncmp(cp, "multipart", 9) == 0){
606 * the first unbounded part of a multipart message,
607 * the preamble, is not displayed or saved
609 } else if(isattribute(&cp, "name")){
611 } else if(isattribute(&cp, "charset")){
612 if(p->charset == nil)
613 p->charset = s_new();
614 cp = getstring(cp, s_reset(p->charset), 0);
622 * parse content encoding header
625 cencoding(Part *m, Hdef *h, char *p)
629 if(cistrncmp(p, "base64", 6) == 0)
630 m->encoding = Ebase64;
631 else if(cistrncmp(p, "quoted-printable", 16) == 0)
632 m->encoding = Equoted;
636 * parse content disposition header
639 cdisposition(Part *p, Hdef *h, char *cp)
644 if(cistrncmp(cp, "inline", 6) == 0){
645 p->disposition = Dinline;
646 } else if(cistrncmp(cp, "attachment", 10) == 0){
647 p->disposition = Dfile;
648 } else if(cistrncmp(cp, "filename=", 9) == 0){
658 setfilename(Part *p, char *name)
660 if(p->filename == nil)
661 p->filename = s_new();
662 getstring(name, s_reset(p->filename), 0);
663 p->filename = tokenconvert(p->filename);
664 p->badfile = badfile(s_to_c(p->filename));
678 while(*p && *p != ';')
680 while(*p == ';' || isspace(*p))
686 * parse a possibly "'d string from a header. A
687 * ';' terminates the string.
690 getstring(char *p, String *s, int dolower)
696 for(;*p && *p != '"'; p++)
698 s_putc(s, tolower(*p));
708 for(; *p && !isspace(*p) && *p != ';'; p++)
710 s_putc(s, tolower(*p));
728 for(hd = hdefs; hd->type != nil; hd++)
729 hd->len = strlen(hd->type);
733 * create a new boundary
743 srand((time(0)<<16)|getpid());
746 strcpy(buf, "upas-");
747 for(i = 5; i < sizeof(buf)-1; i++)
748 buf[i] = 'a' + nrand(26);
754 * skip blank lines till header
762 while((cp = Brdline(&in, '\n')) != nil){
764 for(i = 0; i < n-1; i++)
765 if(cp[i] != ' ' && cp[i] != '\t' && cp[i] != '\r'){
774 * pass unix header lines
782 while((p = Brdline(&in, '\n')) != nil){
784 if(strncmp(p, "From ", 5) != 0){
804 b = Bopen(unsharp("#9/sys/lib/mimetype"), OREAD);
809 while((p = Brdline(b, '\n')) != nil){
812 p[Blinelen(b)-1] = 0;
813 if(tokenize(p, f, nelem(f)) < 5)
815 m = mallocz(sizeof *m, 1);
818 m->ext = strdup(f[0]);
821 m->gtype = strdup(f[1]);
824 m->stype = strdup(f[2]);
844 * if the class is 'm' or 'y', accept it
845 * if the class is 'p' check a previous extension
846 * otherwise, filename is bad
855 p = strrchr(name, '.');
859 for(m = mtypes; m != nil; m = m->next)
860 if(cistrcmp(p, m->ext) == 0){
880 * if the class is 'm' or 'y' or 'p', accept it
881 * otherwise, filename is bad
893 fix = s = strchr(type, '/');
899 for(m = mtypes; m != nil; m = m->next){
900 if(cistrcmp(type, m->gtype) != 0)
902 if(cistrcmp(s, m->stype) != 0)
919 /* rfc2047 non-ascii */
920 typedef struct Charset Charset;
927 { "us-ascii", 8, 1, },
929 { "iso-8859-1", 10, 1, },
933 * convert to UTF if need be
936 tokenconvert(String *t)
948 if(token[0] != '=' || token[1] != '?' ||
949 token[len-2] != '?' || token[len-1] != '=')
954 // bail if we don't understand the character set
955 for(i = 0; i < nelem(charsets); i++)
956 if(cistrncmp(charsets[i].name, token, charsets[i].len) == 0)
957 if(token[charsets[i].len] == '?'){
958 token += charsets[i].len + 1;
961 if(i >= nelem(charsets))
964 // bail if it doesn't fit
965 if(strlen(token) > sizeof(decoded)-1)
968 // bail if we don't understand the encoding
969 if(cistrncmp(token, "b?", 2) == 0){
971 len = dec64((uchar*)decoded, sizeof(decoded), token, e-token);
973 } else if(cistrncmp(token, "q?", 2) == 0){
975 len = decquoted(decoded, token, e);
976 if(len > 0 && decoded[len-1] == '\n')
983 switch(charsets[i].convert){
989 latin1toutf(utfbuf, decoded, decoded+len);
1014 memset(tableqp, 0, 256);
1015 for(c = ' '; c <= '<'; c++)
1017 for(c = '>'; c <= '~'; c++)
1019 tableqp['\t'] = Self;
1026 if(x >= '0' && x <= '9')
1028 if(x >= 'A' && x <= 'F')
1029 return (x - 'A') + 10;
1030 if(x >= 'a' && x <= 'f')
1031 return (x - 'a') + 10;
1036 decquotedline(char *out, char *in, char *e)
1040 /* dump trailing white space */
1041 while(e >= in && (*e == ' ' || *e == '\t' || *e == '\r' || *e == '\n'))
1044 /* trailing '=' means no newline */
1058 c = hex2int(*in++)<<4;
1059 c |= hex2int(*in++);
1072 decquoted(char *out, char *in, char *e)
1076 if(tableqp[' '] == 0)
1080 while((nl = strchr(in, '\n')) != nil && nl < e){
1081 p = decquotedline(p, in, nl);
1085 p = decquotedline(p, in, e-1);
1087 // make sure we end with a new line
1096 /* translate latin1 directly since it fits neatly in utf */
1098 latin1toutf(char *out, char *in, char *e)
1104 for(; in < e; in++){
1106 p += runetochar(p, &r);