2 * this is a filter that changes mime types and names of
3 * suspect executable attachments.
16 typedef struct Mtype Mtype;
17 typedef struct Hdef Hdef;
18 typedef struct Hline Hline;
19 typedef struct Part Part;
21 static int badfile(char *name);
22 static int badtype(char *type);
23 static void ctype(Part*, Hdef*, char*);
24 static void cencoding(Part*, Hdef*, char*);
25 static void cdisposition(Part*, Hdef*, char*);
26 static int decquoted(char *out, char *in, char *e);
27 static char* getstring(char *p, String *s, int dolower);
28 static void init_hdefs(void);
29 static int isattribute(char **pp, char *attr);
30 static int latin1toutf(char *out, char *in, char *e);
31 static String* mkboundary(void);
32 static Part* part(Part *pp);
33 static Part* passbody(Part *p, int dobound);
34 static void passnotheader(void);
35 static void passunixheader(void);
36 static Part* problemchild(Part *p);
37 static void readheader(Part *p);
38 static Hline* readhl(void);
39 static void readmtypes(void);
40 static int save(Part *p, char *file);
41 static void setfilename(Part *p, char *name);
42 static char* skiptosemi(char *p);
43 static char* skipwhite(char *p);
44 static String* tokenconvert(String *t);
45 static void writeheader(Part *p, int);
54 /* disposition possibilities */
64 * a message part; either the whole message or a subpart
68 Part *pp; /* parent part */
69 Hline *hl; /* linked list of header lines */
74 String *boundary; /* boundary for multiparts */
76 String *charset; /* character set */
77 String *type; /* content type */
78 String *filename; /* file name */
79 Biobuf *tmpbuf; /* diversion input buffer */
83 * a (multi)line header
92 * header definitions for parsing
97 void (*f)(Part*, Hdef*, char*);
103 { "content-type:", ctype, },
104 { "content-transfer-encoding:", cencoding, },
105 { "content-disposition:", cdisposition, },
110 * acceptable content types and their extensions
114 char *ext; /* extension */
115 char *gtype; /* generic content type */
116 char *stype; /* specific content type */
127 fprint(2, "usage: upas/vf [-r] [-s savefile]\n");
132 main(int argc, char **argv)
139 savefile = EARGF(usage());
148 Binit(&in, 0, OREAD);
149 Binit(&out, 1, OWRITE);
154 /* pass through our standard 'From ' line */
157 /* parse with the top level part */
166 postnote(PNGROUP, getpid(), "mail refused: we don't accept executable attachments");
167 exits("mail refused: we don't accept executable attachments");
172 * parse a part; returns the ancestor whose boundary terminated
173 * this part or nil on EOF.
180 p = mallocz(sizeof *p, 1);
184 if(p->boundary != nil){
185 /* the format of a multipart part is always:
187 * null or ignored body
205 /* may still be multipart if this is a forwarded message */
206 if(p->type && cistrcmp(s_to_c(p->type), "message/rfc822") == 0){
207 /* the format of forwarded message is:
217 * This is the meat. This may be an executable.
218 * if so, wrap it and change its type
220 if(p->badtype || p->badfile){
224 syslog(0, "vf", "vf rejected %s %s", p->type?s_to_c(p->type):"?",
225 p->filename?s_to_c(p->filename):"?");
226 fprint(2, "The mail contained an executable attachment.\n");
227 fprint(2, "We refuse all mail containing such.\n");
230 np = problemchild(p);
233 /* if problemchild returns p, it turns out p is okay: fall thru */
236 return passbody(p, 1);
242 * read and parse a complete header
258 for(hd = hdefs; hd->type != nil; hd++){
259 if(cistrncmp(s_to_c(hl->s), hd->type, hd->len) == 0){
260 (*hd->f)(p, hd, s_to_c(hl->s));
268 * read a possibly multiline header line
278 p = Brdline(&in, '\n');
282 if(memchr(p, ':', n) == nil){
286 s = s_nappend(s_new(), p, n);
288 p = Brdline(&in, '\n');
292 if(*p != ' ' && *p != '\t'){
296 s = s_nappend(s, p, n);
298 hl = malloc(sizeof *hl);
305 * write out a complete header
308 writeheader(Part *p, int xfree)
312 for(hl = p->hl; hl != nil; hl = next){
313 Bprint(&out, "%s", s_to_c(hl->s));
325 * pass a body through. return if we hit one of our ancestors'
326 * boundaries or EOF. if we hit a boundary, return a pointer to
327 * that ancestor. if we hit EOF, return nil.
330 passbody(Part *p, int dobound)
339 cp = Brdline(b, '\n');
348 cp = Brdline(b, '\n');
352 for(pp = p; pp != nil; pp = pp->pp)
353 if(pp->boundary != nil
354 && strncmp(cp, s_to_c(pp->boundary), pp->blen) == 0){
356 Bwrite(&out, cp, Blinelen(b));
358 Bseek(b, -Blinelen(b), 1);
361 Bwrite(&out, cp, Blinelen(b));
367 * save the message somewhere
369 static vlong bodyoff; /* clumsy hack */
371 save(Part *p, char *file)
377 memset(&out, 0, sizeof(out));
379 fd = open(file, OWRITE);
383 Binit(&out, fd, OWRITE);
386 Bprint(&out, "From virusfilter %s\n", cp);
388 bodyoff = Boffset(&out);
394 memset(&out, 0, sizeof out);
395 Binit(&out, 1, OWRITE);
400 * write to a file but save the fd for passbody.
408 strcpy(buf, "/var/tmp/vf.XXXXXXXXXXX");
409 if((fd = mkstemp(buf)) < 0){
410 fprint(2, "error creating temporary file: %r\n");
415 if(save(p, name) < 0){
416 fprint(2, "error saving temporary file: %r\n");
420 fprint(2, "error in savetmp: already have tmp file!\n");
423 p->tmpbuf = Bopen(name, OREAD|ORCLOSE);
424 if(p->tmpbuf == nil){
425 fprint(2, "error reading tempoary file: %r\n");
428 Bseek(p->tmpbuf, bodyoff, 0);
433 * Run the external checker to do content-based checks.
444 val = unsharp("#9/mail/lib/validateattachment");
445 if(val == nil || access(val, AEXEC) < 0)
449 fprint(2, "run checker %s\n", name);
450 switch(pid = fork()){
452 sysfatal("fork: %r");
455 execl(val, "validateattachment", name, nil);
456 _exits("exec failed");
460 * Okay to return on error - will let mail through but wrapped.
465 syslog(0, "mail", "vf wait failed: %r");
469 syslog(0, "mail", "vf wrong pid %d != %d", w->pid, pid);
473 name = s_to_c(p->filename);
474 if(atoi(w->msg) == Discard){
475 syslog(0, "mail", "vf validateattachment rejected %s", name);
478 if(atoi(w->msg) == Accept){
479 syslog(0, "mail", "vf validateattachment accepted %s", name);
487 * emit a multipart Part that explains the problem
490 problemchild(Part *p)
498 * We don't know whether the attachment is okay.
499 * If there's an external checker, let it have a crack at it.
501 if(runchecker(p) > 0)
507 syslog(0, "mail", "vf wrapped %s %s", p->type?s_to_c(p->type):"?",
508 p->filename?s_to_c(p->filename):"?");
510 boundary = mkboundary();
511 /* print out non-mime headers */
512 for(hl = p->hl; hl != nil; hl = hl->next)
513 if(cistrncmp(s_to_c(hl->s), "content-", 8) != 0)
514 Bprint(&out, "%s", s_to_c(hl->s));
516 /* add in our own multipart headers and message */
517 Bprint(&out, "Content-Type: multipart/mixed;\n");
518 Bprint(&out, "\tboundary=\"%s\"\n", s_to_c(boundary));
519 Bprint(&out, "Content-Disposition: inline\n");
521 Bprint(&out, "This is a multi-part message in MIME format.\n");
522 Bprint(&out, "--%s\n", s_to_c(boundary));
523 Bprint(&out, "Content-Disposition: inline\n");
524 Bprint(&out, "Content-Type: text/plain; charset=\"US-ASCII\"\n");
525 Bprint(&out, "Content-Transfer-Encoding: 7bit\n");
527 Bprint(&out, "from postmaster@%s:\n", sysname());
528 Bprint(&out, "The following attachment had content that we can't\n");
529 Bprint(&out, "prove to be harmless. To avoid possible automatic\n");
530 Bprint(&out, "execution, we changed the content headers.\n");
531 Bprint(&out, "The original header was:\n\n");
533 /* print out original header lines */
534 for(hl = p->hl; hl != nil; hl = hl->next)
535 if(cistrncmp(s_to_c(hl->s), "content-", 8) == 0)
536 Bprint(&out, "\t%s", s_to_c(hl->s));
537 Bprint(&out, "--%s\n", s_to_c(boundary));
539 /* change file name */
541 s_append(p->filename, ".suspect");
543 p->filename = s_copy("file.suspect");
545 /* print out new header */
546 Bprint(&out, "Content-Type: application/octet-stream\n");
547 Bprint(&out, "Content-Disposition: attachment; filename=\"%s\"\n", s_to_c(p->filename));
552 Bprint(&out, "Content-Transfer-Encoding: base64\n");
555 Bprint(&out, "Content-Transfer-Encoding: quoted-printable\n");
562 /* add the new boundary and the original terminator */
563 Bprint(&out, "--%s--\n", s_to_c(boundary));
564 if(np && np->boundary){
565 cp = Brdline(&in, '\n');
566 Bwrite(&out, cp, Blinelen(&in));
573 isattribute(char **pp, char *attr)
580 if(cistrncmp(p, attr, n) != 0)
594 * parse content type header
597 ctype(Part *p, Hdef *h, char *cp)
605 cp = getstring(cp, p->type, 1);
606 if(badtype(s_to_c(p->type)))
610 if(isattribute(&cp, "boundary")){
612 cp = getstring(cp, s, 0);
613 p->boundary = s_reset(p->boundary);
614 s_append(p->boundary, "--");
615 s_append(p->boundary, s_to_c(s));
616 p->blen = s_len(p->boundary);
618 } else if(cistrncmp(cp, "multipart", 9) == 0){
620 * the first unbounded part of a multipart message,
621 * the preamble, is not displayed or saved
623 } else if(isattribute(&cp, "name")){
625 } else if(isattribute(&cp, "charset")){
626 if(p->charset == nil)
627 p->charset = s_new();
628 cp = getstring(cp, s_reset(p->charset), 0);
636 * parse content encoding header
639 cencoding(Part *m, Hdef *h, char *p)
643 if(cistrncmp(p, "base64", 6) == 0)
644 m->encoding = Ebase64;
645 else if(cistrncmp(p, "quoted-printable", 16) == 0)
646 m->encoding = Equoted;
650 * parse content disposition header
653 cdisposition(Part *p, Hdef *h, char *cp)
658 if(cistrncmp(cp, "inline", 6) == 0){
659 p->disposition = Dinline;
660 } else if(cistrncmp(cp, "attachment", 10) == 0){
661 p->disposition = Dfile;
662 } else if(cistrncmp(cp, "filename=", 9) == 0){
672 setfilename(Part *p, char *name)
674 if(p->filename == nil)
675 p->filename = s_new();
676 getstring(name, s_reset(p->filename), 0);
677 p->filename = tokenconvert(p->filename);
678 p->badfile = badfile(s_to_c(p->filename));
692 while(*p && *p != ';')
694 while(*p == ';' || isspace(*p))
700 * parse a possibly "'d string from a header. A
701 * ';' terminates the string.
704 getstring(char *p, String *s, int dolower)
710 for(;*p && *p != '"'; p++)
712 s_putc(s, tolower(*p));
722 for(; *p && !isspace(*p) && *p != ';'; p++)
724 s_putc(s, tolower(*p));
742 for(hd = hdefs; hd->type != nil; hd++)
743 hd->len = strlen(hd->type);
747 * create a new boundary
757 srand((time(0)<<16)|getpid());
760 strcpy(buf, "upas-");
761 for(i = 5; i < sizeof(buf)-1; i++)
762 buf[i] = 'a' + nrand(26);
768 * skip blank lines till header
776 while((cp = Brdline(&in, '\n')) != nil){
778 for(i = 0; i < n-1; i++)
779 if(cp[i] != ' ' && cp[i] != '\t' && cp[i] != '\r'){
788 * pass unix header lines
796 while((p = Brdline(&in, '\n')) != nil){
798 if(strncmp(p, "From ", 5) != 0){
818 b = Bopen(unsharp("#9/lib/mimetype"), OREAD);
823 while((p = Brdline(b, '\n')) != nil){
826 p[Blinelen(b)-1] = 0;
827 if(tokenize(p, f, nelem(f)) < 5)
829 m = mallocz(sizeof *m, 1);
832 m->ext = strdup(f[0]);
835 m->gtype = strdup(f[1]);
838 m->stype = strdup(f[2]);
858 * if the class is 'm' or 'y', accept it
859 * if the class is 'p' check a previous extension
860 * otherwise, filename is bad
869 p = strrchr(name, '.');
873 for(m = mtypes; m != nil; m = m->next)
874 if(cistrcmp(p, m->ext) == 0){
892 * if the class is 'm' or 'y' or 'p', accept it
893 * otherwise, filename is bad
902 fix = s = strchr(type, '/');
908 for(m = mtypes; m != nil; m = m->next){
909 if(cistrcmp(type, m->gtype) != 0)
911 if(cistrcmp(s, m->stype) != 0)
928 /* rfc2047 non-ascii */
929 typedef struct Charset Charset;
936 { "us-ascii", 8, 1, },
938 { "iso-8859-1", 10, 1, }
942 * convert to UTF if need be
945 tokenconvert(String *t)
957 if(token[0] != '=' || token[1] != '?' ||
958 token[len-2] != '?' || token[len-1] != '=')
963 /* bail if we don't understand the character set */
964 for(i = 0; i < nelem(charsets); i++)
965 if(cistrncmp(charsets[i].name, token, charsets[i].len) == 0)
966 if(token[charsets[i].len] == '?'){
967 token += charsets[i].len + 1;
970 if(i >= nelem(charsets))
973 /* bail if it doesn't fit */
974 if(strlen(token) > sizeof(decoded)-1)
977 /* bail if we don't understand the encoding */
978 if(cistrncmp(token, "b?", 2) == 0){
980 len = dec64((uchar*)decoded, sizeof(decoded), token, e-token);
982 } else if(cistrncmp(token, "q?", 2) == 0){
984 len = decquoted(decoded, token, e);
985 if(len > 0 && decoded[len-1] == '\n')
992 switch(charsets[i].convert){
998 latin1toutf(utfbuf, decoded, decoded+len);
1023 memset(tableqp, 0, 256);
1024 for(c = ' '; c <= '<'; c++)
1026 for(c = '>'; c <= '~'; c++)
1028 tableqp['\t'] = Self;
1035 if(x >= '0' && x <= '9')
1037 if(x >= 'A' && x <= 'F')
1038 return (x - 'A') + 10;
1039 if(x >= 'a' && x <= 'f')
1040 return (x - 'a') + 10;
1045 decquotedline(char *out, char *in, char *e)
1049 /* dump trailing white space */
1050 while(e >= in && (*e == ' ' || *e == '\t' || *e == '\r' || *e == '\n'))
1053 /* trailing '=' means no newline */
1067 c = hex2int(*in++)<<4;
1068 c |= hex2int(*in++);
1081 decquoted(char *out, char *in, char *e)
1085 if(tableqp[' '] == 0)
1089 while((nl = strchr(in, '\n')) != nil && nl < e){
1090 p = decquotedline(p, in, nl);
1094 p = decquotedline(p, in, e-1);
1096 /* make sure we end with a new line */
1105 /* translate latin1 directly since it fits neatly in utf */
1107 latin1toutf(char *out, char *in, char *e)
1113 for(; in < e; in++){
1115 p += runetochar(p, &r);