6 char *yylp; /* next character to be lex'd */
7 int yydone; /* tell yylex to give up */
8 char *yybuffer; /* first parsed character */
9 char *yyend; /* end of buffer to be parsed */
16 char *startfield, *endfield;
53 | unixfrom '\n' fields
73 unixfrom : FROM route_addr unix_date_time REMOTE FROM word
74 { freenode($1); freenode($4); freenode($5);
75 usender = $2; udate = $3; usys = $6;
78 originator : REPLY_TO ':' address_list
79 { newfield(link3($1, $2, $3), 1); }
80 | RETURN_PATH ':' route_addr
81 { newfield(link3($1, $2, $3), 1); }
82 | FROM ':' mailbox_list
83 { newfield(link3($1, $2, $3), 1); }
85 { newfield(link3($1, $2, $3), 1); }
86 | RESENT_REPLY_TO ':' address_list
87 { newfield(link3($1, $2, $3), 1); }
88 | RESENT_SENDER ':' mailbox
89 { newfield(link3($1, $2, $3), 1); }
90 | RESENT_FROM ':' mailbox
91 { newfield(link3($1, $2, $3), 1); }
93 dates : DATE ':' date_time
94 { newfield(link3($1, $2, $3), 0); }
95 | RESENT_DATE ':' date_time
96 { newfield(link3($1, $2, $3), 0); }
99 { newfield(link2($1, $2), 0); }
100 | TO ':' address_list
101 { newfield(link3($1, $2, $3), 0); }
103 { newfield(link2($1, $2), 0); }
104 | RESENT_TO ':' address_list
105 { newfield(link3($1, $2, $3), 0); }
107 { newfield(link2($1, $2), 0); }
108 | CC ':' address_list
109 { newfield(link3($1, $2, $3), 0); }
111 { newfield(link2($1, $2), 0); }
112 | RESENT_CC ':' address_list
113 { newfield(link3($1, $2, $3), 0); }
115 { newfield(link2($1, $2), 0); }
116 | BCC ':' address_list
117 { newfield(link3($1, $2, $3), 0); }
119 { newfield(link2($1, $2), 0); }
120 | RESENT_BCC ':' address_list
121 { newfield(link3($1, $2, $3), 0); }
123 subject : SUBJECT ':' things
124 { newfield(link3($1, $2, $3), 0); }
126 { newfield(link2($1, $2), 0); }
128 received : RECEIVED ':' things
129 { newfield(link3($1, $2, $3), 0); received++; }
131 { newfield(link2($1, $2), 0); received++; }
133 precedence : PRECEDENCE ':' things
134 { newfield(link3($1, $2, $3), 0); }
136 { newfield(link2($1, $2), 0); }
138 ignored : ignoredhdr ':' things
139 { newfield(link3($1, $2, $3), 0); }
141 { newfield(link2($1, $2), 0); }
143 ignoredhdr : MIMEVERSION | CONTENTTYPE | MESSAGEID { messageid = 1; } | MAILER
145 optional : fieldwords ':' things
146 { /* hack to allow same lex for field names and the rest */
147 if(badfieldname($1)){
153 newfield(link3($1, $2, $3), 0);
156 { /* hack to allow same lex for field names and the rest */
157 if(badfieldname($1)){
162 newfield(link2($1, $2), 0);
165 address_list : address
166 | address_list ',' address
167 { $$ = link3($1, $2, $3); }
172 group : phrase ':' address_list ';'
173 { $$ = link2($1, link3($2, $3, $4)); }
175 { $$ = link3($1, $2, $3); }
177 mailbox_list : mailbox
178 | mailbox_list ',' mailbox
179 { $$ = link3($1, $2, $3); }
183 { $$ = link2($1, $2); }
186 brak_addr : '<' route_addr '>'
187 { $$ = link3($1, $2, $3); }
189 { $$ = nobody($2); freenode($1); }
191 route_addr : route ':' at_addr
192 { $$ = address(concat($1, concat($2, $3))); }
196 { $$ = concat($1, $2); }
197 | route ',' '@' domain
198 { $$ = concat($1, concat($2, concat($3, $4))); }
200 addr_spec : local_part
201 { $$ = address($1); }
204 at_addr : local_part '@' domain
205 { $$ = address(concat($1, concat($2, $3)));}
207 { $$ = address(concat($1, concat($2, $3)));}
215 { $$ = link2($1, $2); }
219 { $$ = link2($1, $2); }
221 thing : word | '<' | '>' | '@' | ':' | ';' | ','
225 unix_date_time : word word word unix_time word word
226 { $$ = link3($1, $3, link3($2, $6, link2($4, $5))); }
230 { $$ = link3($1, $2, $3); }
232 word : WORD | DATE | RESENT_DATE | RETURN_PATH | FROM | SENDER
233 | REPLY_TO | RESENT_FROM | RESENT_SENDER | RESENT_REPLY_TO
234 | TO | CC | BCC | RESENT_TO | RESENT_CC | RESENT_BCC | REMOTE | SUBJECT
235 | PRECEDENCE | MIMEVERSION | CONTENTTYPE | MESSAGEID | RECEIVED | MAILER
237 fieldwords : fieldword
239 | fieldwords fieldword
240 { $$ = link2($1, $2); }
242 { $$ = link2($1, $2); }
244 fieldword : '<' | '>' | '@' | ';' | ','
249 * Initialize the parsing. Done once for each header field.
252 yyinit(char *p, int len)
257 firstfield = lastfield = 0;
262 * keywords identifying header fields we care about
264 typedef struct Keyword Keyword;
270 /* field names that we need to recognize */
273 { "resent-date", RESENT_DATE },
274 { "return_path", RETURN_PATH },
276 { "sender", SENDER },
277 { "reply-to", REPLY_TO },
278 { "resent-from", RESENT_FROM },
279 { "resent-sender", RESENT_SENDER },
280 { "resent-reply-to", RESENT_REPLY_TO },
284 { "resent-to", RESENT_TO },
285 { "resent-cc", RESENT_CC },
286 { "resent-bcc", RESENT_BCC },
287 { "remote", REMOTE },
288 { "subject", SUBJECT },
289 { "precedence", PRECEDENCE },
290 { "mime-version", MIMEVERSION },
291 { "content-type", CONTENTTYPE },
292 { "message-id", MESSAGEID },
293 { "received", RECEIVED },
294 { "mailer", MAILER },
295 { "who-the-hell-cares", WORD }
299 * Lexical analysis for an rfc822 header field. Continuation lines
300 * are handled in yywhite() when skipping over white space.
313 /* print("lexing\n"); /**/
319 quoting = escaping = 0;
321 yylval = malloc(sizeof(Node));
322 yylval->white = yylval->s = 0;
325 yylval->start = yylp;
326 for(t = 0; yylp < yyend; yylp++){
329 /* dump nulls, they can't be in header */
341 d = (*(yylp+1))&0xff;
342 if(d != ' ' && d != '\t'){
365 /* print("lex(c %c)\n", c); /**/
367 return yylval->c = c;
378 yylval->white = yywhite();
379 /* print("lex(c %c)\n", c); /**/
381 return yylval->c = c;
396 yylval->white = yywhite();
399 } else /* message begins with white-space! */
400 return yylval->c = '\n';
402 for(kp = key; kp->val != WORD; kp++)
403 if(cistrcmp(s_to_c(t), kp->rep)==0)
405 /* print("lex(%d) %s\n", kp->val-WORD, s_to_c(t)); /**/
407 return yylval->c = kp->val;
415 /*fprint(2, "parse err: %s\n", x);/**/
419 * parse white space and comments
429 escaping = clevel = 0;
430 for(w = 0; yylp < yyend; yylp++){
433 /* dump nulls, they can't be in header */
443 * look for multiline fields
445 if(*(yylp+1)==' ' || *(yylp+1)=='\t')
473 * look for multiline fields
475 if(*(yylp+1)==' ' || *(yylp+1)=='\t')
494 * link two parsed entries together
497 link2(Node *p1, Node *p2)
501 for(p = p1; p->next; p = p->next)
508 * link three parsed entries together
511 link3(Node *p1, Node *p2, Node *p3)
515 for(p = p2; p->next; p = p->next)
519 for(p = p1; p->next; p = p->next)
527 * make a:b, move all white space after both
530 colon(Node *p1, Node *p2)
534 s_append(p1->white, s_to_c(p2->white));
536 p1->white = p2->white;
540 s_append(p1->s, ":");
542 s_append(p1->s, s_to_c(p2->s));
544 if(p1->end < p2->end)
551 * concatenate two fields, move all white space after both
554 concat(Node *p1, Node *p2)
560 s_append(p1->white, s_to_c(p2->white));
562 p1->white = p2->white;
570 s_append(p1->s, buf);
574 s_append(p1->s, s_to_c(p2->s));
578 s_append(p1->s, buf);
581 if(p1->end < p2->end)
588 * look for disallowed chars in the field name
591 badfieldname(Node *p)
593 for(; p; p = p->next){
594 /* field name can't contain white space */
595 if(p->white && p->next)
612 * case independent string compare
615 cistrcmp(char *s1, char *s2)
619 for(; *s1; s1++, s2++){
620 c1 = isupper(*s1) ? tolower(*s1) : *s1;
621 c2 = isupper(*s2) ? tolower(*s2) : *s2;
656 p->s = s_copy("pOsTmAsTeR");
662 * add anything that was dropped because of a parse error
673 if(lastfield != nil){
674 for(np = lastfield->node; np; np = np->next)
683 if(strncmp(start, "From ", 5) == 0)
686 np = malloc(sizeof(Node));
690 s = s_copy("BadHeader: ");
691 np->s = s_nappend(s, start, end-start);
694 f = malloc(sizeof(Field));
709 newfield(Node *p, int source)
715 f = malloc(sizeof(Field));
724 endfield = startfield;
729 * fee a list of fields
745 * add some white space to a node
752 for(tp = p; tp->next; tp = tp->next)
755 tp->white = s_copy(" ");
765 for(f = firstfield; f; f = fnext){
766 for(np = f->node; np; np = next){
777 firstfield = lastfield = 0;