Blob


1 %{
2 #include "common.h"
3 #include "smtp.h"
4 #include <ctype.h>
6 char *yylp; /* next character to be lex'd */
7 int yydone; /* tell yylex to give up */
8 char *yybuffer; /* first parsed character */
9 char *yyend; /* end of buffer to be parsed */
10 Node *root;
11 Field *firstfield;
12 Field *lastfield;
13 Node *usender;
14 Node *usys;
15 Node *udate;
16 char *startfield, *endfield;
17 int originator;
18 int destination;
19 int date;
20 int received;
21 int messageid;
22 %}
24 %term WORD
25 %term DATE
26 %term RESENT_DATE
27 %term RETURN_PATH
28 %term FROM
29 %term SENDER
30 %term REPLY_TO
31 %term RESENT_FROM
32 %term RESENT_SENDER
33 %term RESENT_REPLY_TO
34 %term SUBJECT
35 %term TO
36 %term CC
37 %term BCC
38 %term RESENT_TO
39 %term RESENT_CC
40 %term RESENT_BCC
41 %term REMOTE
42 %term PRECEDENCE
43 %term MIMEVERSION
44 %term CONTENTTYPE
45 %term MESSAGEID
46 %term RECEIVED
47 %term MAILER
48 %term BADTOKEN
49 %start msg
50 %%
52 msg : fields
53 | unixfrom '\n' fields
54 ;
55 fields : '\n'
56 { yydone = 1; }
57 | field '\n'
58 | field '\n' fields
59 ;
60 field : dates
61 { date = 1; }
62 | originator
63 { originator = 1; }
64 | destination
65 { destination = 1; }
66 | subject
67 | optional
68 | ignored
69 | received
70 | precedence
71 | error '\n' field
72 ;
73 unixfrom : FROM route_addr unix_date_time REMOTE FROM word
74 { freenode($1); freenode($4); freenode($5);
75 usender = $2; udate = $3; usys = $6;
76 }
77 ;
78 originator : REPLY_TO ':' address_list
79 { newfield(link3($1, $2, $3), 1); }
80 | RETURN_PATH ':' route_addr
81 { newfield(link3($1, $2, $3), 1); }
82 | FROM ':' mailbox_list
83 { newfield(link3($1, $2, $3), 1); }
84 | SENDER ':' mailbox
85 { newfield(link3($1, $2, $3), 1); }
86 | RESENT_REPLY_TO ':' address_list
87 { newfield(link3($1, $2, $3), 1); }
88 | RESENT_SENDER ':' mailbox
89 { newfield(link3($1, $2, $3), 1); }
90 | RESENT_FROM ':' mailbox
91 { newfield(link3($1, $2, $3), 1); }
92 ;
93 dates : DATE ':' date_time
94 { newfield(link3($1, $2, $3), 0); }
95 | RESENT_DATE ':' date_time
96 { newfield(link3($1, $2, $3), 0); }
97 ;
98 destination : TO ':'
99 { newfield(link2($1, $2), 0); }
100 | TO ':' address_list
101 { newfield(link3($1, $2, $3), 0); }
102 | RESENT_TO ':'
103 { newfield(link2($1, $2), 0); }
104 | RESENT_TO ':' address_list
105 { newfield(link3($1, $2, $3), 0); }
106 | CC ':'
107 { newfield(link2($1, $2), 0); }
108 | CC ':' address_list
109 { newfield(link3($1, $2, $3), 0); }
110 | RESENT_CC ':'
111 { newfield(link2($1, $2), 0); }
112 | RESENT_CC ':' address_list
113 { newfield(link3($1, $2, $3), 0); }
114 | BCC ':'
115 { newfield(link2($1, $2), 0); }
116 | BCC ':' address_list
117 { newfield(link3($1, $2, $3), 0); }
118 | RESENT_BCC ':'
119 { newfield(link2($1, $2), 0); }
120 | RESENT_BCC ':' address_list
121 { newfield(link3($1, $2, $3), 0); }
123 subject : SUBJECT ':' things
124 { newfield(link3($1, $2, $3), 0); }
125 | SUBJECT ':'
126 { newfield(link2($1, $2), 0); }
128 received : RECEIVED ':' things
129 { newfield(link3($1, $2, $3), 0); received++; }
130 | RECEIVED ':'
131 { newfield(link2($1, $2), 0); received++; }
133 precedence : PRECEDENCE ':' things
134 { newfield(link3($1, $2, $3), 0); }
135 | PRECEDENCE ':'
136 { newfield(link2($1, $2), 0); }
138 ignored : ignoredhdr ':' things
139 { newfield(link3($1, $2, $3), 0); }
140 | ignoredhdr ':'
141 { newfield(link2($1, $2), 0); }
143 ignoredhdr : MIMEVERSION | CONTENTTYPE | MESSAGEID { messageid = 1; } | MAILER
145 optional : fieldwords ':' things
146 { /* hack to allow same lex for field names and the rest */
147 if(badfieldname($1)){
148 freenode($1);
149 freenode($2);
150 freenode($3);
151 return 1;
153 newfield(link3($1, $2, $3), 0);
155 | fieldwords ':'
156 { /* hack to allow same lex for field names and the rest */
157 if(badfieldname($1)){
158 freenode($1);
159 freenode($2);
160 return 1;
162 newfield(link2($1, $2), 0);
165 address_list : address
166 | address_list ',' address
167 { $$ = link3($1, $2, $3); }
169 address : mailbox
170 | group
172 group : phrase ':' address_list ';'
173 { $$ = link2($1, link3($2, $3, $4)); }
174 | phrase ':' ';'
175 { $$ = link3($1, $2, $3); }
177 mailbox_list : mailbox
178 | mailbox_list ',' mailbox
179 { $$ = link3($1, $2, $3); }
181 mailbox : route_addr
182 | phrase brak_addr
183 { $$ = link2($1, $2); }
184 | brak_addr
186 brak_addr : '<' route_addr '>'
187 { $$ = link3($1, $2, $3); }
188 | '<' '>'
189 { $$ = nobody($2); freenode($1); }
191 route_addr : route ':' at_addr
192 { $$ = address(concat($1, concat($2, $3))); }
193 | addr_spec
195 route : '@' domain
196 { $$ = concat($1, $2); }
197 | route ',' '@' domain
198 { $$ = concat($1, concat($2, concat($3, $4))); }
200 addr_spec : local_part
201 { $$ = address($1); }
202 | at_addr
204 at_addr : local_part '@' domain
205 { $$ = address(concat($1, concat($2, $3)));}
206 | at_addr '@' domain
207 { $$ = address(concat($1, concat($2, $3)));}
209 local_part : word
211 domain : word
213 phrase : word
214 | phrase word
215 { $$ = link2($1, $2); }
217 things : thing
218 | things thing
219 { $$ = link2($1, $2); }
221 thing : word | '<' | '>' | '@' | ':' | ';' | ','
223 date_time : things
225 unix_date_time : word word word unix_time word word
226 { $$ = link3($1, $3, link3($2, $6, link2($4, $5))); }
228 unix_time : word
229 | unix_time ':' word
230 { $$ = link3($1, $2, $3); }
232 word : WORD | DATE | RESENT_DATE | RETURN_PATH | FROM | SENDER
233 | REPLY_TO | RESENT_FROM | RESENT_SENDER | RESENT_REPLY_TO
234 | TO | CC | BCC | RESENT_TO | RESENT_CC | RESENT_BCC | REMOTE | SUBJECT
235 | PRECEDENCE | MIMEVERSION | CONTENTTYPE | MESSAGEID | RECEIVED | MAILER
237 fieldwords : fieldword
238 | WORD
239 | fieldwords fieldword
240 { $$ = link2($1, $2); }
241 | fieldwords word
242 { $$ = link2($1, $2); }
244 fieldword : '<' | '>' | '@' | ';' | ','
246 %%
248 /*
249 * Initialize the parsing. Done once for each header field.
250 */
251 void
252 yyinit(char *p, int len)
254 yybuffer = p;
255 yylp = p;
256 yyend = p + len;
257 firstfield = lastfield = 0;
258 received = 0;
261 /*
262 * keywords identifying header fields we care about
263 */
264 typedef struct Keyword Keyword;
265 struct Keyword {
266 char *rep;
267 int val;
268 };
270 /* field names that we need to recognize */
271 Keyword key[] = {
272 { "date", DATE },
273 { "resent-date", RESENT_DATE },
274 { "return_path", RETURN_PATH },
275 { "from", FROM },
276 { "sender", SENDER },
277 { "reply-to", REPLY_TO },
278 { "resent-from", RESENT_FROM },
279 { "resent-sender", RESENT_SENDER },
280 { "resent-reply-to", RESENT_REPLY_TO },
281 { "to", TO },
282 { "cc", CC },
283 { "bcc", BCC },
284 { "resent-to", RESENT_TO },
285 { "resent-cc", RESENT_CC },
286 { "resent-bcc", RESENT_BCC },
287 { "remote", REMOTE },
288 { "subject", SUBJECT },
289 { "precedence", PRECEDENCE },
290 { "mime-version", MIMEVERSION },
291 { "content-type", CONTENTTYPE },
292 { "message-id", MESSAGEID },
293 { "received", RECEIVED },
294 { "mailer", MAILER },
295 { "who-the-hell-cares", WORD }
296 };
298 /*
299 * Lexical analysis for an rfc822 header field. Continuation lines
300 * are handled in yywhite() when skipping over white space.
302 */
303 int
304 yylex(void)
306 String *t;
307 int quoting;
308 int escaping;
309 char *start;
310 Keyword *kp;
311 int c, d;
313 /* print("lexing\n"); /**/
314 if(yylp >= yyend)
315 return 0;
316 if(yydone)
317 return 0;
319 quoting = escaping = 0;
320 start = yylp;
321 yylval = malloc(sizeof(Node));
322 yylval->white = yylval->s = 0;
323 yylval->next = 0;
324 yylval->addr = 0;
325 yylval->start = yylp;
326 for(t = 0; yylp < yyend; yylp++){
327 c = *yylp & 0xff;
329 /* dump nulls, they can't be in header */
330 if(c == 0)
331 continue;
333 if(escaping) {
334 escaping = 0;
335 } else if(quoting) {
336 switch(c){
337 case '\\':
338 escaping = 1;
339 break;
340 case '\n':
341 d = (*(yylp+1))&0xff;
342 if(d != ' ' && d != '\t'){
343 quoting = 0;
344 yylp--;
345 continue;
347 break;
348 case '"':
349 quoting = 0;
350 break;
352 } else {
353 switch(c){
354 case '\\':
355 escaping = 1;
356 break;
357 case '(':
358 case ' ':
359 case '\t':
360 case '\r':
361 goto out;
362 case '\n':
363 if(yylp == start){
364 yylp++;
365 /* print("lex(c %c)\n", c); /**/
366 yylval->end = yylp;
367 return yylval->c = c;
369 goto out;
370 case '@':
371 case '>':
372 case '<':
373 case ':':
374 case ',':
375 case ';':
376 if(yylp == start){
377 yylp++;
378 yylval->white = yywhite();
379 /* print("lex(c %c)\n", c); /**/
380 yylval->end = yylp;
381 return yylval->c = c;
383 goto out;
384 case '"':
385 quoting = 1;
386 break;
387 default:
388 break;
391 if(t == 0)
392 t = s_new();
393 s_putc(t, c);
395 out:
396 yylval->white = yywhite();
397 if(t) {
398 s_terminate(t);
399 } else /* message begins with white-space! */
400 return yylval->c = '\n';
401 yylval->s = t;
402 for(kp = key; kp->val != WORD; kp++)
403 if(cistrcmp(s_to_c(t), kp->rep)==0)
404 break;
405 /* print("lex(%d) %s\n", kp->val-WORD, s_to_c(t)); /**/
406 yylval->end = yylp;
407 return yylval->c = kp->val;
410 void
411 yyerror(char *x)
413 USED(x);
415 /*fprint(2, "parse err: %s\n", x);/**/
418 /*
419 * parse white space and comments
420 */
421 String *
422 yywhite(void)
424 String *w;
425 int clevel;
426 int c;
427 int escaping;
429 escaping = clevel = 0;
430 for(w = 0; yylp < yyend; yylp++){
431 c = *yylp & 0xff;
433 /* dump nulls, they can't be in header */
434 if(c == 0)
435 continue;
437 if(escaping){
438 escaping = 0;
439 } else if(clevel) {
440 switch(c){
441 case '\n':
442 /*
443 * look for multiline fields
444 */
445 if(*(yylp+1)==' ' || *(yylp+1)=='\t')
446 break;
447 else
448 goto out;
449 case '\\':
450 escaping = 1;
451 break;
452 case '(':
453 clevel++;
454 break;
455 case ')':
456 clevel--;
457 break;
459 } else {
460 switch(c){
461 case '\\':
462 escaping = 1;
463 break;
464 case '(':
465 clevel++;
466 break;
467 case ' ':
468 case '\t':
469 case '\r':
470 break;
471 case '\n':
472 /*
473 * look for multiline fields
474 */
475 if(*(yylp+1)==' ' || *(yylp+1)=='\t')
476 break;
477 else
478 goto out;
479 default:
480 goto out;
483 if(w == 0)
484 w = s_new();
485 s_putc(w, c);
487 out:
488 if(w)
489 s_terminate(w);
490 return w;
493 /*
494 * link two parsed entries together
495 */
496 Node*
497 link2(Node *p1, Node *p2)
499 Node *p;
501 for(p = p1; p->next; p = p->next)
503 p->next = p2;
504 return p1;
507 /*
508 * link three parsed entries together
509 */
510 Node*
511 link3(Node *p1, Node *p2, Node *p3)
513 Node *p;
515 for(p = p2; p->next; p = p->next)
517 p->next = p3;
519 for(p = p1; p->next; p = p->next)
521 p->next = p2;
523 return p1;
526 /*
527 * make a:b, move all white space after both
528 */
529 Node*
530 colon(Node *p1, Node *p2)
532 if(p1->white){
533 if(p2->white)
534 s_append(p1->white, s_to_c(p2->white));
535 } else {
536 p1->white = p2->white;
537 p2->white = 0;
540 s_append(p1->s, ":");
541 if(p2->s)
542 s_append(p1->s, s_to_c(p2->s));
544 if(p1->end < p2->end)
545 p1->end = p2->end;
546 freenode(p2);
547 return p1;
550 /*
551 * concatenate two fields, move all white space after both
552 */
553 Node*
554 concat(Node *p1, Node *p2)
556 char buf[2];
558 if(p1->white){
559 if(p2->white)
560 s_append(p1->white, s_to_c(p2->white));
561 } else {
562 p1->white = p2->white;
563 p2->white = 0;
566 if(p1->s == nil){
567 buf[0] = p1->c;
568 buf[1] = 0;
569 p1->s = s_new();
570 s_append(p1->s, buf);
573 if(p2->s)
574 s_append(p1->s, s_to_c(p2->s));
575 else {
576 buf[0] = p2->c;
577 buf[1] = 0;
578 s_append(p1->s, buf);
581 if(p1->end < p2->end)
582 p1->end = p2->end;
583 freenode(p2);
584 return p1;
587 /*
588 * look for disallowed chars in the field name
589 */
590 int
591 badfieldname(Node *p)
593 for(; p; p = p->next){
594 /* field name can't contain white space */
595 if(p->white && p->next)
596 return 1;
598 return 0;
601 /*
602 * mark as an address
603 */
604 Node *
605 address(Node *p)
607 p->addr = 1;
608 return p;
611 /*
612 * case independent string compare
613 */
614 int
615 cistrcmp(char *s1, char *s2)
617 int c1, c2;
619 for(; *s1; s1++, s2++){
620 c1 = isupper(*s1) ? tolower(*s1) : *s1;
621 c2 = isupper(*s2) ? tolower(*s2) : *s2;
622 if (c1 != c2)
623 return -1;
625 return *s2;
628 /*
629 * free a node
630 */
631 void
632 freenode(Node *p)
634 Node *tp;
636 while(p){
637 tp = p->next;
638 if(p->s)
639 s_free(p->s);
640 if(p->white)
641 s_free(p->white);
642 free(p);
643 p = tp;
648 /*
649 * an anonymous user
650 */
651 Node*
652 nobody(Node *p)
654 if(p->s)
655 s_free(p->s);
656 p->s = s_copy("pOsTmAsTeR");
657 p->addr = 1;
658 return p;
661 /*
662 * add anything that was dropped because of a parse error
663 */
664 void
665 missing(Node *p)
667 Node *np;
668 char *start, *end;
669 Field *f;
670 String *s;
672 start = yybuffer;
673 if(lastfield != nil){
674 for(np = lastfield->node; np; np = np->next)
675 start = np->end+1;
678 end = p->start-1;
680 if(end <= start)
681 return;
683 if(strncmp(start, "From ", 5) == 0)
684 return;
686 np = malloc(sizeof(Node));
687 np->start = start;
688 np->end = end;
689 np->white = nil;
690 s = s_copy("BadHeader: ");
691 np->s = s_nappend(s, start, end-start);
692 np->next = nil;
694 f = malloc(sizeof(Field));
695 f->next = 0;
696 f->node = np;
697 f->source = 0;
698 if(firstfield)
699 lastfield->next = f;
700 else
701 firstfield = f;
702 lastfield = f;
705 /*
706 * create a new field
707 */
708 void
709 newfield(Node *p, int source)
711 Field *f;
713 missing(p);
715 f = malloc(sizeof(Field));
716 f->next = 0;
717 f->node = p;
718 f->source = source;
719 if(firstfield)
720 lastfield->next = f;
721 else
722 firstfield = f;
723 lastfield = f;
724 endfield = startfield;
725 startfield = yylp;
728 /*
729 * fee a list of fields
730 */
731 void
732 freefield(Field *f)
734 Field *tf;
736 while(f){
737 tf = f->next;
738 freenode(f->node);
739 free(f);
740 f = tf;
744 /*
745 * add some white space to a node
746 */
747 Node*
748 whiten(Node *p)
750 Node *tp;
752 for(tp = p; tp->next; tp = tp->next)
754 if(tp->white == 0)
755 tp->white = s_copy(" ");
756 return p;
759 void
760 yycleanup(void)
762 Field *f, *fnext;
763 Node *np, *next;
765 for(f = firstfield; f; f = fnext){
766 for(np = f->node; np; np = next){
767 if(np->s)
768 s_free(np->s);
769 if(np->white)
770 s_free(np->white);
771 next = np->next;
772 free(np);
774 fnext = f->next;
775 free(f);
777 firstfield = lastfield = 0;