op public repos

Blob

Date:: Sat Oct 29 16:26:44 2005 UTC
Message:: Thanks to John Cummings.
Actions:: History | Blame | Raw File
1 %{
2 #include "common.h"
3 #include "smtp.h"
4 #include <ctype.h>
5 
6 char	*yylp;		/* next character to be lex'd */
7 int	yydone;		/* tell yylex to give up */
8 char	*yybuffer;	/* first parsed character */
9 char	*yyend;		/* end of buffer to be parsed */
10 Node	*root;
11 Field	*firstfield;
12 Field	*lastfield;
13 Node	*usender;
14 Node	*usys;
15 Node	*udate;
16 char	*startfield, *endfield;
17 int	originator;
18 int	destination;
19 int	date;
20 int	received;
21 int	messageid;
22 %}
23 
24 %term WORD
25 %term DATE
26 %term RESENT_DATE
27 %term RETURN_PATH
28 %term FROM
29 %term SENDER
30 %term REPLY_TO
31 %term RESENT_FROM
32 %term RESENT_SENDER
33 %term RESENT_REPLY_TO
34 %term SUBJECT
35 %term TO
36 %term CC
37 %term BCC
38 %term RESENT_TO
39 %term RESENT_CC
40 %term RESENT_BCC
41 %term REMOTE
42 %term PRECEDENCE
43 %term MIMEVERSION
44 %term CONTENTTYPE
45 %term MESSAGEID
46 %term RECEIVED
47 %term MAILER
48 %term BADTOKEN
49 %start msg
50 %%
51 
52 msg		: fields
53 		| unixfrom '\n' fields
54 		;
55 fields		: '\n'
56 			{ yydone = 1; }
57 		| field '\n'
58 		| field '\n' fields
59 		;
60 field		: dates
61 			{ date = 1; }
62 		| originator
63 			{ originator = 1; }
64 		| destination
65 			{ destination = 1; }
66 		| subject
67 		| optional
68 		| ignored
69 		| received
70 		| precedence
71 		| error '\n' field
72 		;
73 unixfrom	: FROM route_addr unix_date_time REMOTE FROM word
74 			{ freenode($1); freenode($4); freenode($5);
75 			  usender = $2; udate = $3; usys = $6;
76 			}
77 		;
78 originator	: REPLY_TO ':' address_list
79 			{ newfield(link3($1, $2, $3), 1); }
80 		| RETURN_PATH ':' route_addr
81 			{ newfield(link3($1, $2, $3), 1); }
82 		| FROM ':' mailbox_list
83 			{ newfield(link3($1, $2, $3), 1); }
84 		| SENDER ':' mailbox
85 			{ newfield(link3($1, $2, $3), 1); }
86 		| RESENT_REPLY_TO ':' address_list
87 			{ newfield(link3($1, $2, $3), 1); }
88 		| RESENT_SENDER ':' mailbox
89 			{ newfield(link3($1, $2, $3), 1); }
90 		| RESENT_FROM ':' mailbox
91 			{ newfield(link3($1, $2, $3), 1); }
92 		;
93 dates 		: DATE ':' date_time
94 			{ newfield(link3($1, $2, $3), 0); }
95 		| RESENT_DATE ':' date_time
96 			{ newfield(link3($1, $2, $3), 0); }
97 		;
98 destination	: TO ':'
99 			{ newfield(link2($1, $2), 0); }
100 		| TO ':' address_list
101 			{ newfield(link3($1, $2, $3), 0); }
102 		| RESENT_TO ':'
103 			{ newfield(link2($1, $2), 0); }
104 		| RESENT_TO ':' address_list
105 			{ newfield(link3($1, $2, $3), 0); }
106 		| CC ':'
107 			{ newfield(link2($1, $2), 0); }
108 		| CC ':' address_list
109 			{ newfield(link3($1, $2, $3), 0); }
110 		| RESENT_CC ':'
111 			{ newfield(link2($1, $2), 0); }
112 		| RESENT_CC ':' address_list
113 			{ newfield(link3($1, $2, $3), 0); }
114 		| BCC ':'
115 			{ newfield(link2($1, $2), 0); }
116 		| BCC ':' address_list
117 			{ newfield(link3($1, $2, $3), 0); }
118 		| RESENT_BCC ':' 
119 			{ newfield(link2($1, $2), 0); }
120 		| RESENT_BCC ':' address_list
121 			{ newfield(link3($1, $2, $3), 0); }
122 		;
123 subject		: SUBJECT ':' things
124 			{ newfield(link3($1, $2, $3), 0); }
125 		| SUBJECT ':'
126 			{ newfield(link2($1, $2), 0); }
127 		;
128 received	: RECEIVED ':' things
129 			{ newfield(link3($1, $2, $3), 0); received++; }
130 		| RECEIVED ':'
131 			{ newfield(link2($1, $2), 0); received++; }
132 		;
133 precedence	: PRECEDENCE ':' things
134 			{ newfield(link3($1, $2, $3), 0); }
135 		| PRECEDENCE ':'
136 			{ newfield(link2($1, $2), 0); }
137 		;
138 ignored		: ignoredhdr ':' things
139 			{ newfield(link3($1, $2, $3), 0); }
140 		| ignoredhdr ':'
141 			{ newfield(link2($1, $2), 0); }
142 		;
143 ignoredhdr	: MIMEVERSION | CONTENTTYPE | MESSAGEID { messageid = 1; } | MAILER
144 		;
145 optional	: fieldwords ':' things
146 			{ /* hack to allow same lex for field names and the rest */
147 			 if(badfieldname($1)){
148 				freenode($1);
149 				freenode($2);
150 				freenode($3);
151 				return 1;
152 			 }
153 			 newfield(link3($1, $2, $3), 0);
154 			}
155 		| fieldwords ':'
156 			{ /* hack to allow same lex for field names and the rest */
157 			 if(badfieldname($1)){
158 				freenode($1);
159 				freenode($2);
160 				return 1;
161 			 }
162 			 newfield(link2($1, $2), 0);
163 			}
164 		;
165 address_list	: address
166 		| address_list ',' address
167 			{ $$ = link3($1, $2, $3); }
168 		;
169 address		: mailbox
170 		| group
171 		;
172 group		: phrase ':' address_list ';'
173 			{ $$ = link2($1, link3($2, $3, $4)); }
174 		| phrase ':' ';'
175 			{ $$ = link3($1, $2, $3); }
176 		;
177 mailbox_list	: mailbox
178 		| mailbox_list ',' mailbox
179 			{ $$ = link3($1, $2, $3); }
180 		;
181 mailbox		: route_addr
182 		| phrase brak_addr
183 			{ $$ = link2($1, $2); }
184 		| brak_addr
185 		;
186 brak_addr	: '<' route_addr '>'
187 			{ $$ = link3($1, $2, $3); }
188 		| '<' '>'
189 			{ $$ = nobody($2); freenode($1); }
190 		;
191 route_addr	: route ':' at_addr
192 			{ $$ = address(concat($1, concat($2, $3))); }
193 		| addr_spec
194 		;
195 route		: '@' domain
196 			{ $$ = concat($1, $2); }
197 		| route ',' '@' domain
198 			{ $$ = concat($1, concat($2, concat($3, $4))); }
199 		;
200 addr_spec	: local_part
201 			{ $$ = address($1); }
202 		| at_addr
203 		;
204 at_addr		: local_part '@' domain
205 			{ $$ = address(concat($1, concat($2, $3)));}
206 		| at_addr '@' domain
207 			{ $$ = address(concat($1, concat($2, $3)));}
208 		;
209 local_part	: word
210 		;
211 domain		: word
212 		;
213 phrase		: word
214 		| phrase word
215 			{ $$ = link2($1, $2); }
216 		;
217 things		: thing
218 		| things thing
219 			{ $$ = link2($1, $2); }
220 		;
221 thing		: word | '<' | '>' | '@' | ':' | ';' | ','
222 		;
223 date_time	: things
224 		;
225 unix_date_time	: word word word unix_time word word
226 			{ $$ = link3($1, $3, link3($2, $6, link2($4, $5))); }
227 		;
228 unix_time	: word
229 		| unix_time ':' word
230 			{ $$ = link3($1, $2, $3); }
231 		;
232 word		: WORD | DATE | RESENT_DATE | RETURN_PATH | FROM | SENDER
233 		| REPLY_TO | RESENT_FROM | RESENT_SENDER | RESENT_REPLY_TO
234 		| TO | CC | BCC | RESENT_TO | RESENT_CC | RESENT_BCC | REMOTE | SUBJECT
235 		| PRECEDENCE | MIMEVERSION | CONTENTTYPE | MESSAGEID | RECEIVED | MAILER
236 		;
237 fieldwords	: fieldword
238 		| WORD
239 		| fieldwords fieldword
240 			{ $$ = link2($1, $2); }
241 		| fieldwords word
242 			{ $$ = link2($1, $2); }
243 		;
244 fieldword	: '<' | '>' | '@' | ';' | ','
245 		;
246 %%
247 
248 /*
249  *  Initialize the parsing.  Done once for each header field.
250  */
251 void
252 yyinit(char *p, int len)
253 {
254 	yybuffer = p;
255 	yylp = p;
256 	yyend = p + len;
257 	firstfield = lastfield = 0;
258 	received = 0;
259 }
260 
261 /*
262  *  keywords identifying header fields we care about
263  */
264 typedef struct Keyword	Keyword;
265 struct Keyword {
266 	char	*rep;
267 	int	val;
268 };
269 
270 /* field names that we need to recognize */
271 Keyword key[] = {
272 	{ "date", DATE },
273 	{ "resent-date", RESENT_DATE },
274 	{ "return_path", RETURN_PATH },
275 	{ "from", FROM },
276 	{ "sender", SENDER },
277 	{ "reply-to", REPLY_TO },
278 	{ "resent-from", RESENT_FROM },
279 	{ "resent-sender", RESENT_SENDER },
280 	{ "resent-reply-to", RESENT_REPLY_TO },
281 	{ "to", TO },
282 	{ "cc", CC },
283 	{ "bcc", BCC },
284 	{ "resent-to", RESENT_TO },
285 	{ "resent-cc", RESENT_CC },
286 	{ "resent-bcc", RESENT_BCC },
287 	{ "remote", REMOTE },
288 	{ "subject", SUBJECT },
289 	{ "precedence", PRECEDENCE },
290 	{ "mime-version", MIMEVERSION },
291 	{ "content-type", CONTENTTYPE },
292 	{ "message-id", MESSAGEID },
293 	{ "received", RECEIVED },
294 	{ "mailer", MAILER },
295 	{ "who-the-hell-cares", WORD }
296 };
297 
298 /*
299  *  Lexical analysis for an rfc822 header field.  Continuation lines
300  *  are handled in yywhite() when skipping over white space.
301  *
302  */
303 int
304 yylex(void)
305 {
306 	String *t;
307 	int quoting;
308 	int escaping;
309 	char *start;
310 	Keyword *kp;
311 	int c, d;
312 
313 /*	print("lexing\n"); /**/
314 	if(yylp >= yyend)
315 		return 0;
316 	if(yydone)
317 		return 0;
318 
319 	quoting = escaping = 0;
320 	start = yylp;
321 	yylval = malloc(sizeof(Node));
322 	yylval->white = yylval->s = 0;
323 	yylval->next = 0;
324 	yylval->addr = 0;
325 	yylval->start = yylp;
326 	for(t = 0; yylp < yyend; yylp++){
327 		c = *yylp & 0xff;
328 
329 		/* dump nulls, they can't be in header */
330 		if(c == 0)
331 			continue;
332 
333 		if(escaping) {
334 			escaping = 0;
335 		} else if(quoting) {
336 			switch(c){
337 			case '\\':
338 				escaping = 1;
339 				break;
340 			case '\n':
341 				d = (*(yylp+1))&0xff;
342 				if(d != ' ' && d != '\t'){
343 					quoting = 0;
344 					yylp--;
345 					continue;
346 				}
347 				break;
348 			case '"':
349 				quoting = 0;
350 				break;
351 			}
352 		} else {
353 			switch(c){
354 			case '\\':
355 				escaping = 1;
356 				break;
357 			case '(':
358 			case ' ':
359 			case '\t':
360 			case '\r':
361 				goto out;
362 			case '\n':
363 				if(yylp == start){
364 					yylp++;
365 /*					print("lex(c %c)\n", c); /**/
366 					yylval->end = yylp;
367 					return yylval->c = c;
368 				}
369 				goto out;
370 			case '@':
371 			case '>':
372 			case '<':
373 			case ':':
374 			case ',':
375 			case ';':
376 				if(yylp == start){
377 					yylp++;
378 					yylval->white = yywhite();
379 /*					print("lex(c %c)\n", c); /**/
380 					yylval->end = yylp;
381 					return yylval->c = c;
382 				}
383 				goto out;
384 			case '"':
385 				quoting = 1;
386 				break;
387 			default:
388 				break;
389 			}
390 		}
391 		if(t == 0)
392 			t = s_new();
393 		s_putc(t, c);
394 	}
395 out:
396 	yylval->white = yywhite();
397 	if(t) {
398 		s_terminate(t);
399 	} else				/* message begins with white-space! */
400 		return yylval->c = '\n';
401 	yylval->s = t;
402 	for(kp = key; kp->val != WORD; kp++)
403 		if(cistrcmp(s_to_c(t), kp->rep)==0)
404 			break;
405 /*	print("lex(%d) %s\n", kp->val-WORD, s_to_c(t)); /**/
406 	yylval->end = yylp;
407 	return yylval->c = kp->val;
408 }
409 
410 void
411 yyerror(char *x)
412 {
413 	USED(x);
414 
415 	/*fprint(2, "parse err: %s\n", x);/**/
416 }
417 
418 /*
419  *  parse white space and comments
420  */
421 String *
422 yywhite(void)
423 {
424 	String *w;
425 	int clevel;
426 	int c;
427 	int escaping;
428 
429 	escaping = clevel = 0;
430 	for(w = 0; yylp < yyend; yylp++){
431 		c = *yylp & 0xff;
432 
433 		/* dump nulls, they can't be in header */
434 		if(c == 0)
435 			continue;
436 
437 		if(escaping){
438 			escaping = 0;
439 		} else if(clevel) {
440 			switch(c){
441 			case '\n':
442 				/*
443 				 *  look for multiline fields
444 				 */
445 				if(*(yylp+1)==' ' || *(yylp+1)=='\t')
446 					break;
447 				else
448 					goto out;
449 			case '\\':
450 				escaping = 1;
451 				break;
452 			case '(':
453 				clevel++;
454 				break;
455 			case ')':
456 				clevel--;
457 				break;
458 			}
459 		} else {
460 			switch(c){
461 			case '\\':
462 				escaping = 1;
463 				break;
464 			case '(':
465 				clevel++;
466 				break;
467 			case ' ':
468 			case '\t':
469 			case '\r':
470 				break;
471 			case '\n':
472 				/*
473 				 *  look for multiline fields
474 				 */
475 				if(*(yylp+1)==' ' || *(yylp+1)=='\t')
476 					break;
477 				else
478 					goto out;
479 			default:
480 				goto out;
481 			}
482 		}
483 		if(w == 0)
484 			w = s_new();
485 		s_putc(w, c);
486 	}
487 out:
488 	if(w)
489 		s_terminate(w);
490 	return w;
491 }
492 
493 /*
494  *  link two parsed entries together
495  */
496 Node*
497 link2(Node *p1, Node *p2)
498 {
499 	Node *p;
500 
501 	for(p = p1; p->next; p = p->next)
502 		;
503 	p->next = p2;
504 	return p1;
505 }
506 
507 /*
508  *  link three parsed entries together
509  */
510 Node*
511 link3(Node *p1, Node *p2, Node *p3)
512 {
513 	Node *p;
514 
515 	for(p = p2; p->next; p = p->next)
516 		;
517 	p->next = p3;
518 
519 	for(p = p1; p->next; p = p->next)
520 		;
521 	p->next = p2;
522 
523 	return p1;
524 }
525 
526 /*
527  *  make a:b, move all white space after both
528  */
529 Node*
530 colon(Node *p1, Node *p2)
531 {
532 	if(p1->white){
533 		if(p2->white)
534 			s_append(p1->white, s_to_c(p2->white));
535 	} else {
536 		p1->white = p2->white;
537 		p2->white = 0;
538 	}
539 
540 	s_append(p1->s, ":");
541 	if(p2->s)
542 		s_append(p1->s, s_to_c(p2->s));
543 
544 	if(p1->end < p2->end)
545 		p1->end = p2->end;
546 	freenode(p2);
547 	return p1;
548 }
549 
550 /*
551  *  concatenate two fields, move all white space after both
552  */
553 Node*
554 concat(Node *p1, Node *p2)
555 {
556 	char buf[2];
557 
558 	if(p1->white){
559 		if(p2->white)
560 			s_append(p1->white, s_to_c(p2->white));
561 	} else {
562 		p1->white = p2->white;
563 		p2->white = 0;
564 	}
565 
566 	if(p1->s == nil){
567 		buf[0] = p1->c;
568 		buf[1] = 0;
569 		p1->s = s_new();
570 		s_append(p1->s, buf);
571 	}
572 
573 	if(p2->s)
574 		s_append(p1->s, s_to_c(p2->s));
575 	else {
576 		buf[0] = p2->c;
577 		buf[1] = 0;
578 		s_append(p1->s, buf);
579 	}
580 
581 	if(p1->end < p2->end)
582 		p1->end = p2->end;
583 	freenode(p2);
584 	return p1;
585 }
586 
587 /*
588  *  look for disallowed chars in the field name
589  */
590 int
591 badfieldname(Node *p)
592 {
593 	for(; p; p = p->next){
594 		/* field name can't contain white space */
595 		if(p->white && p->next)
596 			return 1;
597 	}
598 	return 0;
599 }
600 
601 /*
602  *  mark as an address
603  */
604 Node *
605 address(Node *p)
606 {
607 	p->addr = 1;
608 	return p;
609 }
610 
611 /*
612  *  case independent string compare
613  */
614 int
615 cistrcmp(char *s1, char *s2)
616 {
617 	int c1, c2;
618 
619 	for(; *s1; s1++, s2++){
620 		c1 = isupper(*s1) ? tolower(*s1) : *s1;
621 		c2 = isupper(*s2) ? tolower(*s2) : *s2;
622 		if (c1 != c2)
623 			return -1;
624 	}
625 	return *s2;
626 }
627 
628 /*
629  *  free a node
630  */
631 void
632 freenode(Node *p)
633 {
634 	Node *tp;
635 
636 	while(p){
637 		tp = p->next;
638 		if(p->s)
639 			s_free(p->s);
640 		if(p->white)
641 			s_free(p->white);
642 		free(p);
643 		p = tp;
644 	}
645 }
646 
647 
648 /*
649  *  an anonymous user
650  */
651 Node*
652 nobody(Node *p)
653 {
654 	if(p->s)
655 		s_free(p->s);
656 	p->s = s_copy("pOsTmAsTeR");
657 	p->addr = 1;
658 	return p;
659 }
660 
661 /*
662  *  add anything that was dropped because of a parse error
663  */
664 void
665 missing(Node *p)
666 {
667 	Node *np;
668 	char *start, *end;
669 	Field *f;
670 	String *s;
671 
672 	start = yybuffer;
673 	if(lastfield != nil){
674 		for(np = lastfield->node; np; np = np->next)
675 			start = np->end+1;
676 	}
677 
678 	end = p->start-1;
679 
680 	if(end <= start)
681 		return;
682 
683 	if(strncmp(start, "From ", 5) == 0)
684 		return;
685 
686 	np = malloc(sizeof(Node));
687 	np->start = start;
688 	np->end = end;
689 	np->white = nil;
690 	s = s_copy("BadHeader: ");
691 	np->s = s_nappend(s, start, end-start);
692 	np->next = nil;
693 
694 	f = malloc(sizeof(Field));
695 	f->next = 0;
696 	f->node = np;
697 	f->source = 0;
698 	if(firstfield)
699 		lastfield->next = f;
700 	else
701 		firstfield = f;
702 	lastfield = f;
703 }
704 
705 /*
706  *  create a new field
707  */
708 void
709 newfield(Node *p, int source)
710 {
711 	Field *f;
712 
713 	missing(p);
714 
715 	f = malloc(sizeof(Field));
716 	f->next = 0;
717 	f->node = p;
718 	f->source = source;
719 	if(firstfield)
720 		lastfield->next = f;
721 	else
722 		firstfield = f;
723 	lastfield = f;
724 	endfield = startfield;
725 	startfield = yylp;
726 }
727 
728 /*
729  *  fee a list of fields
730  */
731 void
732 freefield(Field *f)
733 {
734 	Field *tf;
735 
736 	while(f){
737 		tf = f->next;
738 		freenode(f->node);
739 		free(f);
740 		f = tf;
741 	}
742 }
743 
744 /*
745  *  add some white space to a node
746  */
747 Node*
748 whiten(Node *p)
749 {
750 	Node *tp;
751 
752 	for(tp = p; tp->next; tp = tp->next)
753 		;
754 	if(tp->white == 0)
755 		tp->white = s_copy(" ");
756 	return p;
757 }
758 
759 void
760 yycleanup(void)
761 {
762 	Field *f, *fnext;
763 	Node *np, *next;
764 
765 	for(f = firstfield; f; f = fnext){
766 		for(np = f->node; np; np = next){
767 			if(np->s)
768 				s_free(np->s);
769 			if(np->white)
770 				s_free(np->white);
771 			next = np->next;
772 			free(np);
773 		}
774 		fnext = f->next;
775 		free(f);
776 	}
777 	firstfield = lastfield = 0;
778 }