2 * this is a filter that changes mime types and names of
3 * suspect executable attachments.
16 typedef struct Mtype Mtype;
17 typedef struct Hdef Hdef;
18 typedef struct Hline Hline;
19 typedef struct Part Part;
21 static int badfile(char *name);
22 static int badtype(char *type);
23 static void ctype(Part*, Hdef*, char*);
24 static void cencoding(Part*, Hdef*, char*);
25 static void cdisposition(Part*, Hdef*, char*);
26 static int decquoted(char *out, char *in, char *e);
27 static char* getstring(char *p, String *s, int dolower);
28 static void init_hdefs(void);
29 static int isattribute(char **pp, char *attr);
30 static int latin1toutf(char *out, char *in, char *e);
31 static String* mkboundary(void);
32 static Part* part(Part *pp);
33 static Part* passbody(Part *p, int dobound);
34 static void passnotheader(void);
35 static void passunixheader(void);
36 static Part* problemchild(Part *p);
37 static void readheader(Part *p);
38 static Hline* readhl(void);
39 static void readmtypes(void);
40 static int save(Part *p, char *file);
41 static void setfilename(Part *p, char *name);
42 static char* skiptosemi(char *p);
43 static char* skipwhite(char *p);
44 static String* tokenconvert(String *t);
45 static void writeheader(Part *p, int);
54 /* disposition possibilities */
64 * a message part; either the whole message or a subpart
68 Part *pp; /* parent part */
69 Hline *hl; /* linked list of header lines */
74 String *boundary; /* boundary for multiparts */
76 String *charset; /* character set */
77 String *type; /* content type */
78 String *filename; /* file name */
79 Biobuf *tmpbuf; /* diversion input buffer */
83 * a (multi)line header
92 * header definitions for parsing
97 void (*f)(Part*, Hdef*, char*);
103 { "content-type:", ctype, },
104 { "content-transfer-encoding:", cencoding, },
105 { "content-disposition:", cdisposition, },
110 * acceptable content types and their extensions
114 char *ext; /* extension */
115 char *gtype; /* generic content type */
116 char *stype; /* specific content type */
127 fprint(2, "usage: upas/vf [-r] [-s savefile]\n");
132 main(int argc, char **argv)
139 savefile = EARGF(usage());
148 Binit(&in, 0, OREAD);
149 Binit(&out, 1, OWRITE);
154 /* pass through our standard 'From ' line */
157 /* parse with the top level part */
166 postnote(PNGROUP, getpid(), "mail refused: we don't accept executable attachments");
167 exits("mail refused: we don't accept executable attachments");
172 * parse a part; returns the ancestor whose boundary terminated
173 * this part or nil on EOF.
180 p = mallocz(sizeof *p, 1);
184 if(p->boundary != nil){
185 /* the format of a multipart part is always:
187 * null or ignored body
205 /* may still be multipart if this is a forwarded message */
206 if(p->type && cistrcmp(s_to_c(p->type), "message/rfc822") == 0){
207 /* the format of forwarded message is:
217 * This is the meat. This may be an executable.
218 * if so, wrap it and change its type
220 if(p->badtype || p->badfile){
224 syslog(0, "vf", "vf rejected %s %s", p->type?s_to_c(p->type):"?",
225 p->filename?s_to_c(p->filename):"?");
226 fprint(2, "The mail contained an executable attachment.\n");
227 fprint(2, "We refuse all mail containing such.\n");
230 np = problemchild(p);
233 /* if problemchild returns p, it turns out p is okay: fall thru */
236 return passbody(p, 1);
242 * read and parse a complete header
258 for(hd = hdefs; hd->type != nil; hd++){
259 if(cistrncmp(s_to_c(hl->s), hd->type, hd->len) == 0){
260 (*hd->f)(p, hd, s_to_c(hl->s));
268 * read a possibly multiline header line
278 p = Brdline(&in, '\n');
282 if(memchr(p, ':', n) == nil){
286 s = s_nappend(s_new(), p, n);
288 p = Brdline(&in, '\n');
292 if(*p != ' ' && *p != '\t'){
296 s = s_nappend(s, p, n);
298 hl = malloc(sizeof *hl);
305 * write out a complete header
308 writeheader(Part *p, int xfree)
312 for(hl = p->hl; hl != nil; hl = next){
313 Bprint(&out, "%s", s_to_c(hl->s));
325 * pass a body through. return if we hit one of our ancestors'
326 * boundaries or EOF. if we hit a boundary, return a pointer to
327 * that ancestor. if we hit EOF, return nil.
330 passbody(Part *p, int dobound)
339 cp = Brdline(b, '\n');
348 cp = Brdline(b, '\n');
352 for(pp = p; pp != nil; pp = pp->pp)
353 if(pp->boundary != nil
354 && strncmp(cp, s_to_c(pp->boundary), pp->blen) == 0){
356 Bwrite(&out, cp, Blinelen(b));
358 Bseek(b, -Blinelen(b), 1);
361 Bwrite(&out, cp, Blinelen(b));
367 * save the message somewhere
369 static vlong bodyoff; /* clumsy hack */
371 save(Part *p, char *file)
377 memset(&out, 0, sizeof(out));
379 fd = open(file, OWRITE);
383 Binit(&out, fd, OWRITE);
386 Bprint(&out, "From virusfilter %s\n", cp);
388 bodyoff = Boffset(&out);
394 memset(&out, 0, sizeof out);
395 Binit(&out, 1, OWRITE);
400 * write to a file but save the fd for passbody.
408 strcpy(buf, "/var/tmp/vf.XXXXXXXXXXX");
409 if((fd = mkstemp(buf)) < 0){
410 fprint(2, "error creating temporary file: %r\n");
415 if(save(p, name) < 0){
416 fprint(2, "error saving temporary file: %r\n");
420 fprint(2, "error in savetmp: already have tmp file!\n");
423 p->tmpbuf = Bopen(name, OREAD|ORCLOSE);
424 if(p->tmpbuf == nil){
425 fprint(2, "error reading tempoary file: %r\n");
428 Bseek(p->tmpbuf, bodyoff, 0);
433 * Run the external checker to do content-based checks.
444 val = unsharp("#9/mail/lib/validateattachment");
445 if(val == nil || access(val, AEXEC) < 0)
449 fprint(2, "run checker %s\n", name);
450 switch(pid = fork()){
452 sysfatal("fork: %r");
455 execl(val, "validateattachment", name, nil);
456 _exits("exec failed");
460 * Okay to return on error - will let mail through but wrapped.
464 syslog(0, "mail", "vf wait failed: %r");
468 syslog(0, "mail", "vf wrong pid %d != %d", w->pid, pid);
472 name = s_to_c(p->filename);
473 if(atoi(w->msg) == Discard){
474 syslog(0, "mail", "vf validateattachment rejected %s", name);
477 if(atoi(w->msg) == Accept){
478 syslog(0, "mail", "vf validateattachment accepted %s", name);
486 * emit a multipart Part that explains the problem
489 problemchild(Part *p)
497 * We don't know whether the attachment is okay.
498 * If there's an external checker, let it have a crack at it.
500 if(runchecker(p) > 0)
506 syslog(0, "mail", "vf wrapped %s %s", p->type?s_to_c(p->type):"?",
507 p->filename?s_to_c(p->filename):"?");
509 boundary = mkboundary();
510 /* print out non-mime headers */
511 for(hl = p->hl; hl != nil; hl = hl->next)
512 if(cistrncmp(s_to_c(hl->s), "content-", 8) != 0)
513 Bprint(&out, "%s", s_to_c(hl->s));
515 /* add in our own multipart headers and message */
516 Bprint(&out, "Content-Type: multipart/mixed;\n");
517 Bprint(&out, "\tboundary=\"%s\"\n", s_to_c(boundary));
518 Bprint(&out, "Content-Disposition: inline\n");
520 Bprint(&out, "This is a multi-part message in MIME format.\n");
521 Bprint(&out, "--%s\n", s_to_c(boundary));
522 Bprint(&out, "Content-Disposition: inline\n");
523 Bprint(&out, "Content-Type: text/plain; charset=\"US-ASCII\"\n");
524 Bprint(&out, "Content-Transfer-Encoding: 7bit\n");
526 Bprint(&out, "from postmaster@%s:\n", sysname());
527 Bprint(&out, "The following attachment had content that we can't\n");
528 Bprint(&out, "prove to be harmless. To avoid possible automatic\n");
529 Bprint(&out, "execution, we changed the content headers.\n");
530 Bprint(&out, "The original header was:\n\n");
532 /* print out original header lines */
533 for(hl = p->hl; hl != nil; hl = hl->next)
534 if(cistrncmp(s_to_c(hl->s), "content-", 8) == 0)
535 Bprint(&out, "\t%s", s_to_c(hl->s));
536 Bprint(&out, "--%s\n", s_to_c(boundary));
538 /* change file name */
540 s_append(p->filename, ".suspect");
542 p->filename = s_copy("file.suspect");
544 /* print out new header */
545 Bprint(&out, "Content-Type: application/octet-stream\n");
546 Bprint(&out, "Content-Disposition: attachment; filename=\"%s\"\n", s_to_c(p->filename));
551 Bprint(&out, "Content-Transfer-Encoding: base64\n");
554 Bprint(&out, "Content-Transfer-Encoding: quoted-printable\n");
561 /* add the new boundary and the original terminator */
562 Bprint(&out, "--%s--\n", s_to_c(boundary));
563 if(np && np->boundary){
564 cp = Brdline(&in, '\n');
565 Bwrite(&out, cp, Blinelen(&in));
572 isattribute(char **pp, char *attr)
579 if(cistrncmp(p, attr, n) != 0)
593 * parse content type header
596 ctype(Part *p, Hdef *h, char *cp)
604 cp = getstring(cp, p->type, 1);
605 if(badtype(s_to_c(p->type)))
609 if(isattribute(&cp, "boundary")){
611 cp = getstring(cp, s, 0);
612 p->boundary = s_reset(p->boundary);
613 s_append(p->boundary, "--");
614 s_append(p->boundary, s_to_c(s));
615 p->blen = s_len(p->boundary);
617 } else if(cistrncmp(cp, "multipart", 9) == 0){
619 * the first unbounded part of a multipart message,
620 * the preamble, is not displayed or saved
622 } else if(isattribute(&cp, "name")){
624 } else if(isattribute(&cp, "charset")){
625 if(p->charset == nil)
626 p->charset = s_new();
627 cp = getstring(cp, s_reset(p->charset), 0);
635 * parse content encoding header
638 cencoding(Part *m, Hdef *h, char *p)
642 if(cistrncmp(p, "base64", 6) == 0)
643 m->encoding = Ebase64;
644 else if(cistrncmp(p, "quoted-printable", 16) == 0)
645 m->encoding = Equoted;
649 * parse content disposition header
652 cdisposition(Part *p, Hdef *h, char *cp)
657 if(cistrncmp(cp, "inline", 6) == 0){
658 p->disposition = Dinline;
659 } else if(cistrncmp(cp, "attachment", 10) == 0){
660 p->disposition = Dfile;
661 } else if(cistrncmp(cp, "filename=", 9) == 0){
671 setfilename(Part *p, char *name)
673 if(p->filename == nil)
674 p->filename = s_new();
675 getstring(name, s_reset(p->filename), 0);
676 p->filename = tokenconvert(p->filename);
677 p->badfile = badfile(s_to_c(p->filename));
691 while(*p && *p != ';')
693 while(*p == ';' || isspace(*p))
699 * parse a possibly "'d string from a header. A
700 * ';' terminates the string.
703 getstring(char *p, String *s, int dolower)
709 for(;*p && *p != '"'; p++)
711 s_putc(s, tolower(*p));
721 for(; *p && !isspace(*p) && *p != ';'; p++)
723 s_putc(s, tolower(*p));
741 for(hd = hdefs; hd->type != nil; hd++)
742 hd->len = strlen(hd->type);
746 * create a new boundary
756 srand((time(0)<<16)|getpid());
759 strcpy(buf, "upas-");
760 for(i = 5; i < sizeof(buf)-1; i++)
761 buf[i] = 'a' + nrand(26);
767 * skip blank lines till header
775 while((cp = Brdline(&in, '\n')) != nil){
777 for(i = 0; i < n-1; i++)
778 if(cp[i] != ' ' && cp[i] != '\t' && cp[i] != '\r'){
787 * pass unix header lines
795 while((p = Brdline(&in, '\n')) != nil){
797 if(strncmp(p, "From ", 5) != 0){
817 b = Bopen(unsharp("#9/lib/mimetype"), OREAD);
822 while((p = Brdline(b, '\n')) != nil){
825 p[Blinelen(b)-1] = 0;
826 if(tokenize(p, f, nelem(f)) < 5)
828 m = mallocz(sizeof *m, 1);
831 m->ext = strdup(f[0]);
834 m->gtype = strdup(f[1]);
837 m->stype = strdup(f[2]);
857 * if the class is 'm' or 'y', accept it
858 * if the class is 'p' check a previous extension
859 * otherwise, filename is bad
868 p = strrchr(name, '.');
872 for(m = mtypes; m != nil; m = m->next)
873 if(cistrcmp(p, m->ext) == 0){
891 * if the class is 'm' or 'y' or 'p', accept it
892 * otherwise, filename is bad
901 fix = s = strchr(type, '/');
907 for(m = mtypes; m != nil; m = m->next){
908 if(cistrcmp(type, m->gtype) != 0)
910 if(cistrcmp(s, m->stype) != 0)
927 /* rfc2047 non-ascii */
928 typedef struct Charset Charset;
935 { "us-ascii", 8, 1, },
937 { "iso-8859-1", 10, 1, }
941 * convert to UTF if need be
944 tokenconvert(String *t)
956 if(token[0] != '=' || token[1] != '?' ||
957 token[len-2] != '?' || token[len-1] != '=')
962 /* bail if we don't understand the character set */
963 for(i = 0; i < nelem(charsets); i++)
964 if(cistrncmp(charsets[i].name, token, charsets[i].len) == 0)
965 if(token[charsets[i].len] == '?'){
966 token += charsets[i].len + 1;
969 if(i >= nelem(charsets))
972 /* bail if it doesn't fit */
973 if(strlen(token) > sizeof(decoded)-1)
976 /* bail if we don't understand the encoding */
977 if(cistrncmp(token, "b?", 2) == 0){
979 len = dec64((uchar*)decoded, sizeof(decoded), token, e-token);
981 } else if(cistrncmp(token, "q?", 2) == 0){
983 len = decquoted(decoded, token, e);
984 if(len > 0 && decoded[len-1] == '\n')
991 switch(charsets[i].convert){
997 latin1toutf(utfbuf, decoded, decoded+len);
1022 memset(tableqp, 0, 256);
1023 for(c = ' '; c <= '<'; c++)
1025 for(c = '>'; c <= '~'; c++)
1027 tableqp['\t'] = Self;
1034 if(x >= '0' && x <= '9')
1036 if(x >= 'A' && x <= 'F')
1037 return (x - 'A') + 10;
1038 if(x >= 'a' && x <= 'f')
1039 return (x - 'a') + 10;
1044 decquotedline(char *out, char *in, char *e)
1048 /* dump trailing white space */
1049 while(e >= in && (*e == ' ' || *e == '\t' || *e == '\r' || *e == '\n'))
1052 /* trailing '=' means no newline */
1066 c = hex2int(*in++)<<4;
1067 c |= hex2int(*in++);
1080 decquoted(char *out, char *in, char *e)
1084 if(tableqp[' '] == 0)
1088 while((nl = strchr(in, '\n')) != nil && nl < e){
1089 p = decquotedline(p, in, nl);
1093 p = decquotedline(p, in, e-1);
1095 /* make sure we end with a new line */
1104 /* translate latin1 directly since it fits neatly in utf */
1106 latin1toutf(char *out, char *in, char *e)
1112 for(; in < e; in++){
1114 p += runetochar(p, &r);