Blame


1 5cdb1798 2005-10-29 devnull #include <u.h>
2 5cdb1798 2005-10-29 devnull #include <libc.h>
3 5cdb1798 2005-10-29 devnull #include <bio.h>
4 5cdb1798 2005-10-29 devnull #include <regexp.h>
5 5cdb1798 2005-10-29 devnull #include "spam.h"
6 5cdb1798 2005-10-29 devnull
7 5cdb1798 2005-10-29 devnull enum {
8 5cdb1798 2005-10-29 devnull Quanta = 8192,
9 5cdb1798 2005-10-29 devnull Minbody = 6000,
10 cbeb0b26 2006-04-01 devnull HdrMax = 15
11 5cdb1798 2005-10-29 devnull };
12 5cdb1798 2005-10-29 devnull
13 5cdb1798 2005-10-29 devnull typedef struct keyword Keyword;
14 5cdb1798 2005-10-29 devnull typedef struct word Word;
15 5cdb1798 2005-10-29 devnull
16 5cdb1798 2005-10-29 devnull struct word{
17 5cdb1798 2005-10-29 devnull char *string;
18 5cdb1798 2005-10-29 devnull int n;
19 5cdb1798 2005-10-29 devnull };
20 5cdb1798 2005-10-29 devnull
21 5cdb1798 2005-10-29 devnull struct keyword{
22 5cdb1798 2005-10-29 devnull char *string;
23 5cdb1798 2005-10-29 devnull int value;
24 5cdb1798 2005-10-29 devnull };
25 5cdb1798 2005-10-29 devnull
26 5cdb1798 2005-10-29 devnull Word htmlcmds[] =
27 5cdb1798 2005-10-29 devnull {
28 5cdb1798 2005-10-29 devnull "html", 4,
29 5cdb1798 2005-10-29 devnull "!doctype html", 13,
30 5cdb1798 2005-10-29 devnull 0,
31 5cdb1798 2005-10-29 devnull
32 5cdb1798 2005-10-29 devnull };
33 5cdb1798 2005-10-29 devnull
34 5cdb1798 2005-10-29 devnull Word hrefs[] =
35 5cdb1798 2005-10-29 devnull {
36 5cdb1798 2005-10-29 devnull "a href=", 7,
37 5cdb1798 2005-10-29 devnull "a title=", 8,
38 5cdb1798 2005-10-29 devnull "a target=", 9,
39 5cdb1798 2005-10-29 devnull "base href=", 10,
40 5cdb1798 2005-10-29 devnull "img src=", 8,
41 5cdb1798 2005-10-29 devnull "img border=", 11,
42 5cdb1798 2005-10-29 devnull "form action=", 12,
43 5cdb1798 2005-10-29 devnull "!--", 3,
44 5cdb1798 2005-10-29 devnull 0,
45 5cdb1798 2005-10-29 devnull
46 5cdb1798 2005-10-29 devnull };
47 5cdb1798 2005-10-29 devnull
48 5cdb1798 2005-10-29 devnull /*
49 5cdb1798 2005-10-29 devnull * RFC822 header keywords to look for for fractured header.
50 5cdb1798 2005-10-29 devnull * all lengths must be less than HdrMax defined above.
51 5cdb1798 2005-10-29 devnull */
52 5cdb1798 2005-10-29 devnull Word hdrwords[] =
53 5cdb1798 2005-10-29 devnull {
54 5cdb1798 2005-10-29 devnull "cc:", 3,
55 5cdb1798 2005-10-29 devnull "bcc:", 4,
56 5cdb1798 2005-10-29 devnull "to:", 3,
57 5cdb1798 2005-10-29 devnull 0, 0,
58 5cdb1798 2005-10-29 devnull
59 5cdb1798 2005-10-29 devnull };
60 5cdb1798 2005-10-29 devnull
61 5cdb1798 2005-10-29 devnull Keyword keywords[] =
62 5cdb1798 2005-10-29 devnull {
63 5cdb1798 2005-10-29 devnull "header", HoldHeader,
64 5cdb1798 2005-10-29 devnull "line", SaveLine,
65 5cdb1798 2005-10-29 devnull "hold", Hold,
66 5cdb1798 2005-10-29 devnull "dump", Dump,
67 5cdb1798 2005-10-29 devnull "loff", Lineoff,
68 cbeb0b26 2006-04-01 devnull 0, Nactions
69 5cdb1798 2005-10-29 devnull };
70 5cdb1798 2005-10-29 devnull
71 5cdb1798 2005-10-29 devnull Patterns patterns[] = {
72 5cdb1798 2005-10-29 devnull [Dump] { "DUMP:", 0, 0 },
73 5cdb1798 2005-10-29 devnull [HoldHeader] { "HEADER:", 0, 0 },
74 5cdb1798 2005-10-29 devnull [Hold] { "HOLD:", 0, 0 },
75 5cdb1798 2005-10-29 devnull [SaveLine] { "LINE:", 0, 0 },
76 5cdb1798 2005-10-29 devnull [Lineoff] { "LINEOFF:", 0, 0 },
77 cbeb0b26 2006-04-01 devnull [Nactions] { 0, 0, 0 }
78 5cdb1798 2005-10-29 devnull };
79 5cdb1798 2005-10-29 devnull
80 5cdb1798 2005-10-29 devnull static char* endofhdr(char*, char*);
81 5cdb1798 2005-10-29 devnull static int escape(char**);
82 5cdb1798 2005-10-29 devnull static int extract(char*);
83 5cdb1798 2005-10-29 devnull static int findkey(char*);
84 5cdb1798 2005-10-29 devnull static int hash(int);
85 5cdb1798 2005-10-29 devnull static int isword(Word*, char*, int);
86 5cdb1798 2005-10-29 devnull static void parsealt(Biobuf*, char*, Spat**);
87 5cdb1798 2005-10-29 devnull
88 5cdb1798 2005-10-29 devnull /*
89 5cdb1798 2005-10-29 devnull * The canonicalizer: convert input to canonical representation
90 5cdb1798 2005-10-29 devnull */
91 5cdb1798 2005-10-29 devnull char*
92 5cdb1798 2005-10-29 devnull readmsg(Biobuf *bp, int *hsize, int *bufsize)
93 5cdb1798 2005-10-29 devnull {
94 5cdb1798 2005-10-29 devnull char *p, *buf;
95 5cdb1798 2005-10-29 devnull int n, offset, eoh, bsize, delta;
96 5cdb1798 2005-10-29 devnull
97 5cdb1798 2005-10-29 devnull buf = 0;
98 5cdb1798 2005-10-29 devnull offset = 0;
99 5cdb1798 2005-10-29 devnull if(bufsize)
100 5cdb1798 2005-10-29 devnull *bufsize = 0;
101 5cdb1798 2005-10-29 devnull if(hsize)
102 5cdb1798 2005-10-29 devnull *hsize = 0;
103 5cdb1798 2005-10-29 devnull for(;;) {
104 5cdb1798 2005-10-29 devnull buf = Realloc(buf, offset+Quanta+1);
105 5cdb1798 2005-10-29 devnull n = Bread(bp, buf+offset, Quanta);
106 5cdb1798 2005-10-29 devnull if(n < 0){
107 5cdb1798 2005-10-29 devnull free(buf);
108 5cdb1798 2005-10-29 devnull return 0;
109 5cdb1798 2005-10-29 devnull }
110 5cdb1798 2005-10-29 devnull p = buf+offset; /* start of this chunk */
111 5cdb1798 2005-10-29 devnull offset += n; /* end of this chunk */
112 5cdb1798 2005-10-29 devnull buf[offset] = 0;
113 5cdb1798 2005-10-29 devnull if(n == 0){
114 5cdb1798 2005-10-29 devnull if(offset == 0)
115 5cdb1798 2005-10-29 devnull return 0;
116 5cdb1798 2005-10-29 devnull break;
117 5cdb1798 2005-10-29 devnull }
118 5cdb1798 2005-10-29 devnull
119 5cdb1798 2005-10-29 devnull if(hsize == 0) /* don't process header */
120 5cdb1798 2005-10-29 devnull break;
121 5cdb1798 2005-10-29 devnull if(p != buf && p[-1] == '\n') /* check for EOH across buffer split */
122 5cdb1798 2005-10-29 devnull p--;
123 5cdb1798 2005-10-29 devnull p = endofhdr(p, buf+offset);
124 5cdb1798 2005-10-29 devnull if(p)
125 5cdb1798 2005-10-29 devnull break;
126 5cdb1798 2005-10-29 devnull if(offset >= Maxread) /* gargantuan header - just punt*/
127 5cdb1798 2005-10-29 devnull {
128 5cdb1798 2005-10-29 devnull if(hsize)
129 5cdb1798 2005-10-29 devnull *hsize = offset;
130 5cdb1798 2005-10-29 devnull if(bufsize)
131 5cdb1798 2005-10-29 devnull *bufsize = offset;
132 5cdb1798 2005-10-29 devnull return buf;
133 5cdb1798 2005-10-29 devnull }
134 5cdb1798 2005-10-29 devnull }
135 5cdb1798 2005-10-29 devnull eoh = p-buf; /* End of header */
136 5cdb1798 2005-10-29 devnull bsize = offset - eoh; /* amount of body already read */
137 5cdb1798 2005-10-29 devnull
138 5cdb1798 2005-10-29 devnull /* Read at least Minbody bytes of the body */
139 5cdb1798 2005-10-29 devnull if (bsize < Minbody){
140 5cdb1798 2005-10-29 devnull delta = Minbody-bsize;
141 5cdb1798 2005-10-29 devnull buf = Realloc(buf, offset+delta+1);
142 5cdb1798 2005-10-29 devnull n = Bread(bp, buf+offset, delta);
143 5cdb1798 2005-10-29 devnull if(n > 0) {
144 5cdb1798 2005-10-29 devnull offset += n;
145 5cdb1798 2005-10-29 devnull buf[offset] = 0;
146 5cdb1798 2005-10-29 devnull }
147 5cdb1798 2005-10-29 devnull }
148 5cdb1798 2005-10-29 devnull if(hsize)
149 5cdb1798 2005-10-29 devnull *hsize = eoh;
150 5cdb1798 2005-10-29 devnull if(bufsize)
151 5cdb1798 2005-10-29 devnull *bufsize = offset;
152 5cdb1798 2005-10-29 devnull return buf;
153 5cdb1798 2005-10-29 devnull }
154 5cdb1798 2005-10-29 devnull
155 5cdb1798 2005-10-29 devnull static int
156 5cdb1798 2005-10-29 devnull isword(Word *wp, char *text, int len)
157 5cdb1798 2005-10-29 devnull {
158 5cdb1798 2005-10-29 devnull for(;wp->string; wp++)
159 5cdb1798 2005-10-29 devnull if(len >= wp->n && strncmp(text, wp->string, wp->n) == 0)
160 5cdb1798 2005-10-29 devnull return 1;
161 5cdb1798 2005-10-29 devnull return 0;
162 5cdb1798 2005-10-29 devnull }
163 5cdb1798 2005-10-29 devnull
164 5cdb1798 2005-10-29 devnull static char*
165 5cdb1798 2005-10-29 devnull endofhdr(char *raw, char *end)
166 5cdb1798 2005-10-29 devnull {
167 5cdb1798 2005-10-29 devnull int i;
168 5cdb1798 2005-10-29 devnull char *p, *q;
169 5cdb1798 2005-10-29 devnull char buf[HdrMax];
170 5cdb1798 2005-10-29 devnull
171 5cdb1798 2005-10-29 devnull /*
172 5cdb1798 2005-10-29 devnull * can't use strchr to search for newlines because
173 5cdb1798 2005-10-29 devnull * there may be embedded NULL's.
174 5cdb1798 2005-10-29 devnull */
175 5cdb1798 2005-10-29 devnull for(p = raw; p < end; p++){
176 5cdb1798 2005-10-29 devnull if(*p != '\n' || p[1] != '\n')
177 5cdb1798 2005-10-29 devnull continue;
178 5cdb1798 2005-10-29 devnull p++;
179 5cdb1798 2005-10-29 devnull for(i = 0, q = p+1; i < sizeof(buf) && *q; q++){
180 5cdb1798 2005-10-29 devnull buf[i++] = tolower(*q);
181 5cdb1798 2005-10-29 devnull if(*q == ':' || *q == '\n')
182 5cdb1798 2005-10-29 devnull break;
183 5cdb1798 2005-10-29 devnull }
184 5cdb1798 2005-10-29 devnull if(!isword(hdrwords, buf, i))
185 5cdb1798 2005-10-29 devnull return p+1;
186 5cdb1798 2005-10-29 devnull }
187 5cdb1798 2005-10-29 devnull return 0;
188 5cdb1798 2005-10-29 devnull }
189 5cdb1798 2005-10-29 devnull
190 5cdb1798 2005-10-29 devnull static int
191 5cdb1798 2005-10-29 devnull htmlmatch(Word *wp, char *text, char *end, int *n)
192 5cdb1798 2005-10-29 devnull {
193 5cdb1798 2005-10-29 devnull char *cp;
194 5cdb1798 2005-10-29 devnull int i, c, lastc;
195 5cdb1798 2005-10-29 devnull char buf[MaxHtml];
196 5cdb1798 2005-10-29 devnull
197 5cdb1798 2005-10-29 devnull /*
198 5cdb1798 2005-10-29 devnull * extract a string up to '>'
199 5cdb1798 2005-10-29 devnull */
200 5cdb1798 2005-10-29 devnull
201 5cdb1798 2005-10-29 devnull i = lastc = 0;
202 5cdb1798 2005-10-29 devnull cp = text;
203 5cdb1798 2005-10-29 devnull while (cp < end && i < sizeof(buf)-1){
204 5cdb1798 2005-10-29 devnull c = *cp++;
205 5cdb1798 2005-10-29 devnull if(c == '=')
206 5cdb1798 2005-10-29 devnull c = escape(&cp);
207 5cdb1798 2005-10-29 devnull switch(c){
208 5cdb1798 2005-10-29 devnull case 0:
209 5cdb1798 2005-10-29 devnull case '\r':
210 5cdb1798 2005-10-29 devnull continue;
211 5cdb1798 2005-10-29 devnull case '>':
212 5cdb1798 2005-10-29 devnull goto out;
213 5cdb1798 2005-10-29 devnull case '\n':
214 5cdb1798 2005-10-29 devnull case ' ':
215 5cdb1798 2005-10-29 devnull case '\t':
216 5cdb1798 2005-10-29 devnull if(lastc == ' ')
217 5cdb1798 2005-10-29 devnull continue;
218 5cdb1798 2005-10-29 devnull c = ' ';
219 5cdb1798 2005-10-29 devnull break;
220 5cdb1798 2005-10-29 devnull default:
221 5cdb1798 2005-10-29 devnull c = tolower(c);
222 5cdb1798 2005-10-29 devnull break;
223 5cdb1798 2005-10-29 devnull }
224 5cdb1798 2005-10-29 devnull buf[i++] = lastc = c;
225 5cdb1798 2005-10-29 devnull }
226 5cdb1798 2005-10-29 devnull out:
227 5cdb1798 2005-10-29 devnull buf[i] = 0;
228 5cdb1798 2005-10-29 devnull if(n)
229 5cdb1798 2005-10-29 devnull *n = cp-text;
230 5cdb1798 2005-10-29 devnull return isword(wp, buf, i);
231 5cdb1798 2005-10-29 devnull }
232 5cdb1798 2005-10-29 devnull
233 5cdb1798 2005-10-29 devnull static int
234 5cdb1798 2005-10-29 devnull escape(char **msg)
235 5cdb1798 2005-10-29 devnull {
236 5cdb1798 2005-10-29 devnull int c;
237 5cdb1798 2005-10-29 devnull char *p;
238 5cdb1798 2005-10-29 devnull
239 5cdb1798 2005-10-29 devnull p = *msg;
240 5cdb1798 2005-10-29 devnull c = *p;
241 5cdb1798 2005-10-29 devnull if(c == '\n'){
242 5cdb1798 2005-10-29 devnull p++;
243 5cdb1798 2005-10-29 devnull c = *p++;
244 5cdb1798 2005-10-29 devnull } else
245 5cdb1798 2005-10-29 devnull if(c == '2'){
246 5cdb1798 2005-10-29 devnull c = tolower(p[1]);
247 5cdb1798 2005-10-29 devnull if(c == 'e'){
248 5cdb1798 2005-10-29 devnull p += 2;
249 5cdb1798 2005-10-29 devnull c = '.';
250 5cdb1798 2005-10-29 devnull }else
251 5cdb1798 2005-10-29 devnull if(c == 'f'){
252 5cdb1798 2005-10-29 devnull p += 2;
253 5cdb1798 2005-10-29 devnull c = '/';
254 5cdb1798 2005-10-29 devnull }else
255 5cdb1798 2005-10-29 devnull if(c == '0'){
256 5cdb1798 2005-10-29 devnull p += 2;
257 5cdb1798 2005-10-29 devnull c = ' ';
258 5cdb1798 2005-10-29 devnull }
259 5cdb1798 2005-10-29 devnull else c = '=';
260 5cdb1798 2005-10-29 devnull } else {
261 5cdb1798 2005-10-29 devnull if(c == '3' && tolower(p[1]) == 'd')
262 5cdb1798 2005-10-29 devnull p += 2;
263 5cdb1798 2005-10-29 devnull c = '=';
264 5cdb1798 2005-10-29 devnull }
265 5cdb1798 2005-10-29 devnull *msg = p;
266 5cdb1798 2005-10-29 devnull return c;
267 5cdb1798 2005-10-29 devnull }
268 5cdb1798 2005-10-29 devnull
269 5cdb1798 2005-10-29 devnull static int
270 5cdb1798 2005-10-29 devnull htmlchk(char **msg, char *end)
271 5cdb1798 2005-10-29 devnull {
272 5cdb1798 2005-10-29 devnull int n;
273 5cdb1798 2005-10-29 devnull char *p;
274 5cdb1798 2005-10-29 devnull
275 5cdb1798 2005-10-29 devnull static int ishtml;
276 5cdb1798 2005-10-29 devnull
277 5cdb1798 2005-10-29 devnull p = *msg;
278 5cdb1798 2005-10-29 devnull if(ishtml == 0){
279 5cdb1798 2005-10-29 devnull ishtml = htmlmatch(htmlcmds, p, end, &n);
280 5cdb1798 2005-10-29 devnull
281 5cdb1798 2005-10-29 devnull /* If not an HTML keyword, check if it's
282 5cdb1798 2005-10-29 devnull * an HTML comment (<!comment>). if so,
283 5cdb1798 2005-10-29 devnull * skip over it; otherwise copy it in.
284 5cdb1798 2005-10-29 devnull */
285 5cdb1798 2005-10-29 devnull if(ishtml == 0 && *p != '!') /* not comment */
286 5cdb1798 2005-10-29 devnull return '<'; /* copy it */
287 5cdb1798 2005-10-29 devnull
288 5cdb1798 2005-10-29 devnull } else if(htmlmatch(hrefs, p, end, &n)) /* if special HTML string */
289 5cdb1798 2005-10-29 devnull return '<'; /* copy it */
290 5cdb1798 2005-10-29 devnull
291 5cdb1798 2005-10-29 devnull /*
292 5cdb1798 2005-10-29 devnull * this is an uninteresting HTML command; skip over it.
293 5cdb1798 2005-10-29 devnull */
294 5cdb1798 2005-10-29 devnull p += n;
295 5cdb1798 2005-10-29 devnull *msg = p+1;
296 5cdb1798 2005-10-29 devnull return *p;
297 5cdb1798 2005-10-29 devnull }
298 5cdb1798 2005-10-29 devnull
299 5cdb1798 2005-10-29 devnull /*
300 5cdb1798 2005-10-29 devnull * decode a base 64 encode body
301 5cdb1798 2005-10-29 devnull */
302 5cdb1798 2005-10-29 devnull void
303 5cdb1798 2005-10-29 devnull conv64(char *msg, char *end, char *buf, int bufsize)
304 5cdb1798 2005-10-29 devnull {
305 5cdb1798 2005-10-29 devnull int len, i;
306 5cdb1798 2005-10-29 devnull char *cp;
307 5cdb1798 2005-10-29 devnull
308 5cdb1798 2005-10-29 devnull len = end - msg;
309 cbeb0b26 2006-04-01 devnull i = (len*3)/4+1; /* room for max chars + null */
310 5cdb1798 2005-10-29 devnull cp = Malloc(i);
311 5cdb1798 2005-10-29 devnull len = dec64((uchar*)cp, i, msg, len);
312 5cdb1798 2005-10-29 devnull convert(cp, cp+len, buf, bufsize, 1);
313 5cdb1798 2005-10-29 devnull free(cp);
314 5cdb1798 2005-10-29 devnull }
315 5cdb1798 2005-10-29 devnull
316 5cdb1798 2005-10-29 devnull int
317 5cdb1798 2005-10-29 devnull convert(char *msg, char *end, char *buf, int bufsize, int isbody)
318 5cdb1798 2005-10-29 devnull {
319 5cdb1798 2005-10-29 devnull
320 5cdb1798 2005-10-29 devnull char *p;
321 5cdb1798 2005-10-29 devnull int c, lastc, base64;
322 5cdb1798 2005-10-29 devnull
323 5cdb1798 2005-10-29 devnull lastc = 0;
324 5cdb1798 2005-10-29 devnull base64 = 0;
325 5cdb1798 2005-10-29 devnull while(msg < end && bufsize > 0){
326 5cdb1798 2005-10-29 devnull c = *msg++;
327 5cdb1798 2005-10-29 devnull
328 5cdb1798 2005-10-29 devnull /*
329 5cdb1798 2005-10-29 devnull * In the body only, try to strip most HTML and
330 5cdb1798 2005-10-29 devnull * replace certain MIME escape sequences with the character
331 5cdb1798 2005-10-29 devnull */
332 5cdb1798 2005-10-29 devnull if(isbody) {
333 5cdb1798 2005-10-29 devnull do{
334 5cdb1798 2005-10-29 devnull p = msg;
335 5cdb1798 2005-10-29 devnull if(c == '<')
336 5cdb1798 2005-10-29 devnull c = htmlchk(&msg, end);
337 5cdb1798 2005-10-29 devnull if(c == '=')
338 5cdb1798 2005-10-29 devnull c = escape(&msg);
339 5cdb1798 2005-10-29 devnull } while(p != msg && p < end);
340 5cdb1798 2005-10-29 devnull }
341 5cdb1798 2005-10-29 devnull switch(c){
342 5cdb1798 2005-10-29 devnull case 0:
343 5cdb1798 2005-10-29 devnull case '\r':
344 5cdb1798 2005-10-29 devnull continue;
345 5cdb1798 2005-10-29 devnull case '\t':
346 5cdb1798 2005-10-29 devnull case ' ':
347 5cdb1798 2005-10-29 devnull case '\n':
348 5cdb1798 2005-10-29 devnull if(lastc == ' ')
349 5cdb1798 2005-10-29 devnull continue;
350 5cdb1798 2005-10-29 devnull c = ' ';
351 5cdb1798 2005-10-29 devnull break;
352 5cdb1798 2005-10-29 devnull case 'C': /* check for MIME base 64 encoding in header */
353 5cdb1798 2005-10-29 devnull case 'c':
354 5cdb1798 2005-10-29 devnull if(isbody == 0)
355 5cdb1798 2005-10-29 devnull if(msg < end-32 && *msg == 'o' && msg[1] == 'n')
356 5cdb1798 2005-10-29 devnull if(cistrncmp(msg+2, "tent-transfer-encoding: base64", 30) == 0)
357 5cdb1798 2005-10-29 devnull base64 = 1;
358 5cdb1798 2005-10-29 devnull c = 'c';
359 5cdb1798 2005-10-29 devnull break;
360 5cdb1798 2005-10-29 devnull default:
361 5cdb1798 2005-10-29 devnull c = tolower(c);
362 5cdb1798 2005-10-29 devnull break;
363 5cdb1798 2005-10-29 devnull }
364 5cdb1798 2005-10-29 devnull *buf++ = c;
365 5cdb1798 2005-10-29 devnull lastc = c;
366 5cdb1798 2005-10-29 devnull bufsize--;
367 5cdb1798 2005-10-29 devnull }
368 5cdb1798 2005-10-29 devnull *buf = 0;
369 5cdb1798 2005-10-29 devnull return base64;
370 5cdb1798 2005-10-29 devnull }
371 5cdb1798 2005-10-29 devnull
372 5cdb1798 2005-10-29 devnull /*
373 5cdb1798 2005-10-29 devnull * The pattern parser: build data structures from the pattern file
374 5cdb1798 2005-10-29 devnull */
375 5cdb1798 2005-10-29 devnull
376 5cdb1798 2005-10-29 devnull static int
377 5cdb1798 2005-10-29 devnull hash(int c)
378 5cdb1798 2005-10-29 devnull {
379 5cdb1798 2005-10-29 devnull return c & 127;
380 5cdb1798 2005-10-29 devnull }
381 5cdb1798 2005-10-29 devnull
382 5cdb1798 2005-10-29 devnull static int
383 5cdb1798 2005-10-29 devnull findkey(char *val)
384 5cdb1798 2005-10-29 devnull {
385 5cdb1798 2005-10-29 devnull Keyword *kp;
386 5cdb1798 2005-10-29 devnull
387 5cdb1798 2005-10-29 devnull for(kp = keywords; kp->string; kp++)
388 5cdb1798 2005-10-29 devnull if(strcmp(val, kp->string) == 0)
389 5cdb1798 2005-10-29 devnull break;
390 5cdb1798 2005-10-29 devnull return kp->value;
391 5cdb1798 2005-10-29 devnull }
392 5cdb1798 2005-10-29 devnull
393 5cdb1798 2005-10-29 devnull #define whitespace(c) ((c) == ' ' || (c) == '\t')
394 5cdb1798 2005-10-29 devnull
395 5cdb1798 2005-10-29 devnull void
396 5cdb1798 2005-10-29 devnull parsepats(Biobuf *bp)
397 5cdb1798 2005-10-29 devnull {
398 5cdb1798 2005-10-29 devnull Pattern *p, *new;
399 5cdb1798 2005-10-29 devnull char *cp, *qp;
400 5cdb1798 2005-10-29 devnull int type, action, n, h;
401 5cdb1798 2005-10-29 devnull Spat *spat;
402 5cdb1798 2005-10-29 devnull
403 5cdb1798 2005-10-29 devnull for(;;){
404 5cdb1798 2005-10-29 devnull cp = Brdline(bp, '\n');
405 5cdb1798 2005-10-29 devnull if(cp == 0)
406 5cdb1798 2005-10-29 devnull break;
407 5cdb1798 2005-10-29 devnull cp[Blinelen(bp)-1] = 0;
408 5cdb1798 2005-10-29 devnull while(*cp == ' ' || *cp == '\t')
409 5cdb1798 2005-10-29 devnull cp++;
410 5cdb1798 2005-10-29 devnull if(*cp == '#' || *cp == 0)
411 5cdb1798 2005-10-29 devnull continue;
412 5cdb1798 2005-10-29 devnull type = regexp;
413 5cdb1798 2005-10-29 devnull if(*cp == '*'){
414 5cdb1798 2005-10-29 devnull type = string;
415 5cdb1798 2005-10-29 devnull cp++;
416 5cdb1798 2005-10-29 devnull }
417 5cdb1798 2005-10-29 devnull qp = strchr(cp, ':');
418 5cdb1798 2005-10-29 devnull if(qp == 0)
419 5cdb1798 2005-10-29 devnull continue;
420 5cdb1798 2005-10-29 devnull *qp = 0;
421 5cdb1798 2005-10-29 devnull if(debug)
422 5cdb1798 2005-10-29 devnull fprint(2, "action = %s\n", cp);
423 5cdb1798 2005-10-29 devnull action = findkey(cp);
424 5cdb1798 2005-10-29 devnull if(action >= Nactions)
425 5cdb1798 2005-10-29 devnull continue;
426 5cdb1798 2005-10-29 devnull cp = qp+1;
427 5cdb1798 2005-10-29 devnull n = extract(cp);
428 5cdb1798 2005-10-29 devnull if(n <= 0 || *cp == 0)
429 5cdb1798 2005-10-29 devnull continue;
430 5cdb1798 2005-10-29 devnull
431 5cdb1798 2005-10-29 devnull qp = strstr(cp, "~~");
432 5cdb1798 2005-10-29 devnull if(qp){
433 5cdb1798 2005-10-29 devnull *qp = 0;
434 5cdb1798 2005-10-29 devnull n = strlen(cp);
435 5cdb1798 2005-10-29 devnull }
436 5cdb1798 2005-10-29 devnull if(debug)
437 5cdb1798 2005-10-29 devnull fprint(2, " Pattern: `%s'\n", cp);
438 5cdb1798 2005-10-29 devnull
439 5cdb1798 2005-10-29 devnull /* Hook regexps into a chain */
440 5cdb1798 2005-10-29 devnull if(type == regexp) {
441 5cdb1798 2005-10-29 devnull new = Malloc(sizeof(Pattern));
442 5cdb1798 2005-10-29 devnull new->action = action;
443 5cdb1798 2005-10-29 devnull new->pat = regcomp(cp);
444 5cdb1798 2005-10-29 devnull if(new->pat == 0){
445 5cdb1798 2005-10-29 devnull free(new);
446 5cdb1798 2005-10-29 devnull continue;
447 5cdb1798 2005-10-29 devnull }
448 5cdb1798 2005-10-29 devnull new->type = regexp;
449 5cdb1798 2005-10-29 devnull new->alt = 0;
450 5cdb1798 2005-10-29 devnull new->next = 0;
451 5cdb1798 2005-10-29 devnull
452 5cdb1798 2005-10-29 devnull if(qp)
453 5cdb1798 2005-10-29 devnull parsealt(bp, qp+2, &new->alt);
454 5cdb1798 2005-10-29 devnull
455 5cdb1798 2005-10-29 devnull new->next = patterns[action].regexps;
456 5cdb1798 2005-10-29 devnull patterns[action].regexps = new;
457 5cdb1798 2005-10-29 devnull continue;
458 5cdb1798 2005-10-29 devnull
459 5cdb1798 2005-10-29 devnull }
460 5cdb1798 2005-10-29 devnull /* not a Regexp - hook strings into Pattern hash chain */
461 5cdb1798 2005-10-29 devnull spat = Malloc(sizeof(*spat));
462 5cdb1798 2005-10-29 devnull spat->next = 0;
463 5cdb1798 2005-10-29 devnull spat->alt = 0;
464 5cdb1798 2005-10-29 devnull spat->len = n;
465 5cdb1798 2005-10-29 devnull spat->string = Malloc(n+1);
466 5cdb1798 2005-10-29 devnull spat->c1 = cp[1];
467 5cdb1798 2005-10-29 devnull strcpy(spat->string, cp);
468 5cdb1798 2005-10-29 devnull
469 5cdb1798 2005-10-29 devnull if(qp)
470 5cdb1798 2005-10-29 devnull parsealt(bp, qp+2, &spat->alt);
471 5cdb1798 2005-10-29 devnull
472 5cdb1798 2005-10-29 devnull p = patterns[action].strings;
473 5cdb1798 2005-10-29 devnull if(p == 0) {
474 5cdb1798 2005-10-29 devnull p = Malloc(sizeof(Pattern));
475 5cdb1798 2005-10-29 devnull memset(p, 0, sizeof(*p));
476 5cdb1798 2005-10-29 devnull p->action = action;
477 5cdb1798 2005-10-29 devnull p->type = string;
478 5cdb1798 2005-10-29 devnull patterns[action].strings = p;
479 5cdb1798 2005-10-29 devnull }
480 5cdb1798 2005-10-29 devnull h = hash(*spat->string);
481 5cdb1798 2005-10-29 devnull spat->next = p->spat[h];
482 5cdb1798 2005-10-29 devnull p->spat[h] = spat;
483 5cdb1798 2005-10-29 devnull }
484 5cdb1798 2005-10-29 devnull }
485 5cdb1798 2005-10-29 devnull
486 5cdb1798 2005-10-29 devnull static void
487 5cdb1798 2005-10-29 devnull parsealt(Biobuf *bp, char *cp, Spat** head)
488 5cdb1798 2005-10-29 devnull {
489 5cdb1798 2005-10-29 devnull char *p;
490 5cdb1798 2005-10-29 devnull Spat *alt;
491 5cdb1798 2005-10-29 devnull
492 5cdb1798 2005-10-29 devnull while(cp){
493 5cdb1798 2005-10-29 devnull if(*cp == 0){ /*escaped newline*/
494 5cdb1798 2005-10-29 devnull do{
495 5cdb1798 2005-10-29 devnull cp = Brdline(bp, '\n');
496 5cdb1798 2005-10-29 devnull if(cp == 0)
497 5cdb1798 2005-10-29 devnull return;
498 5cdb1798 2005-10-29 devnull cp[Blinelen(bp)-1] = 0;
499 5cdb1798 2005-10-29 devnull } while(extract(cp) <= 0 || *cp == 0);
500 5cdb1798 2005-10-29 devnull }
501 5cdb1798 2005-10-29 devnull
502 5cdb1798 2005-10-29 devnull p = cp;
503 5cdb1798 2005-10-29 devnull cp = strstr(p, "~~");
504 5cdb1798 2005-10-29 devnull if(cp){
505 5cdb1798 2005-10-29 devnull *cp = 0;
506 5cdb1798 2005-10-29 devnull cp += 2;
507 5cdb1798 2005-10-29 devnull }
508 5cdb1798 2005-10-29 devnull if(strlen(p)){
509 5cdb1798 2005-10-29 devnull alt = Malloc(sizeof(*alt));
510 5cdb1798 2005-10-29 devnull alt->string = strdup(p);
511 5cdb1798 2005-10-29 devnull alt->next = *head;
512 5cdb1798 2005-10-29 devnull *head = alt;
513 5cdb1798 2005-10-29 devnull }
514 5cdb1798 2005-10-29 devnull }
515 5cdb1798 2005-10-29 devnull }
516 5cdb1798 2005-10-29 devnull
517 5cdb1798 2005-10-29 devnull static int
518 5cdb1798 2005-10-29 devnull extract(char *cp)
519 5cdb1798 2005-10-29 devnull {
520 5cdb1798 2005-10-29 devnull int c;
521 5cdb1798 2005-10-29 devnull char *p, *q, *r;
522 5cdb1798 2005-10-29 devnull
523 5cdb1798 2005-10-29 devnull p = q = r = cp;
524 5cdb1798 2005-10-29 devnull while(whitespace(*p))
525 5cdb1798 2005-10-29 devnull p++;
526 5cdb1798 2005-10-29 devnull while(c = *p++){
527 5cdb1798 2005-10-29 devnull if (c == '#')
528 5cdb1798 2005-10-29 devnull break;
529 5cdb1798 2005-10-29 devnull if(c == '"'){
530 5cdb1798 2005-10-29 devnull while(*p && *p != '"'){
531 5cdb1798 2005-10-29 devnull if(*p == '\\' && p[1] == '"')
532 5cdb1798 2005-10-29 devnull p++;
533 5cdb1798 2005-10-29 devnull if('A' <= *p && *p <= 'Z')
534 5cdb1798 2005-10-29 devnull *q++ = *p++ + ('a'-'A');
535 5cdb1798 2005-10-29 devnull else
536 5cdb1798 2005-10-29 devnull *q++ = *p++;
537 5cdb1798 2005-10-29 devnull }
538 5cdb1798 2005-10-29 devnull if(*p)
539 5cdb1798 2005-10-29 devnull p++;
540 5cdb1798 2005-10-29 devnull r = q; /* never back up over a quoted string */
541 5cdb1798 2005-10-29 devnull } else {
542 5cdb1798 2005-10-29 devnull if('A' <= c && c <= 'Z')
543 5cdb1798 2005-10-29 devnull c += ('a'-'A');
544 5cdb1798 2005-10-29 devnull *q++ = c;
545 5cdb1798 2005-10-29 devnull }
546 5cdb1798 2005-10-29 devnull }
547 5cdb1798 2005-10-29 devnull while(q > r && whitespace(q[-1]))
548 5cdb1798 2005-10-29 devnull q--;
549 5cdb1798 2005-10-29 devnull *q = 0;
550 5cdb1798 2005-10-29 devnull return q-cp;
551 5cdb1798 2005-10-29 devnull }
552 5cdb1798 2005-10-29 devnull
553 5cdb1798 2005-10-29 devnull /*
554 5cdb1798 2005-10-29 devnull * The matching engine: compare canonical input to pattern structures
555 5cdb1798 2005-10-29 devnull */
556 5cdb1798 2005-10-29 devnull
557 5cdb1798 2005-10-29 devnull static Spat*
558 5cdb1798 2005-10-29 devnull isalt(char *message, Spat *alt)
559 5cdb1798 2005-10-29 devnull {
560 5cdb1798 2005-10-29 devnull while(alt) {
561 5cdb1798 2005-10-29 devnull if(*cmd)
562 5cdb1798 2005-10-29 devnull if(message != cmd && strstr(cmd, alt->string))
563 5cdb1798 2005-10-29 devnull break;
564 5cdb1798 2005-10-29 devnull if(message != header+1 && strstr(header+1, alt->string))
565 5cdb1798 2005-10-29 devnull break;
566 5cdb1798 2005-10-29 devnull if(strstr(message, alt->string))
567 5cdb1798 2005-10-29 devnull break;
568 5cdb1798 2005-10-29 devnull alt = alt->next;
569 5cdb1798 2005-10-29 devnull }
570 5cdb1798 2005-10-29 devnull return alt;
571 5cdb1798 2005-10-29 devnull }
572 5cdb1798 2005-10-29 devnull
573 5cdb1798 2005-10-29 devnull int
574 5cdb1798 2005-10-29 devnull matchpat(Pattern *p, char *message, Resub *m)
575 5cdb1798 2005-10-29 devnull {
576 5cdb1798 2005-10-29 devnull Spat *spat;
577 5cdb1798 2005-10-29 devnull char *s;
578 5cdb1798 2005-10-29 devnull int c, c1;
579 5cdb1798 2005-10-29 devnull
580 5cdb1798 2005-10-29 devnull if(p->type == string){
581 5cdb1798 2005-10-29 devnull c1 = *message;
582 5cdb1798 2005-10-29 devnull for(s=message; c=c1; s++){
583 5cdb1798 2005-10-29 devnull c1 = s[1];
584 5cdb1798 2005-10-29 devnull for(spat=p->spat[hash(c)]; spat; spat=spat->next){
585 5cdb1798 2005-10-29 devnull if(c1 == spat->c1)
586 5cdb1798 2005-10-29 devnull if(memcmp(s, spat->string, spat->len) == 0)
587 5cdb1798 2005-10-29 devnull if(!isalt(message, spat->alt)){
588 b5f65921 2006-02-11 devnull m->s.sp = s;
589 b5f65921 2006-02-11 devnull m->e.ep = s + spat->len;
590 5cdb1798 2005-10-29 devnull return 1;
591 5cdb1798 2005-10-29 devnull }
592 5cdb1798 2005-10-29 devnull }
593 5cdb1798 2005-10-29 devnull }
594 5cdb1798 2005-10-29 devnull return 0;
595 5cdb1798 2005-10-29 devnull }
596 b5f65921 2006-02-11 devnull m->s.sp = m->e.ep = 0;
597 5cdb1798 2005-10-29 devnull if(regexec(p->pat, message, m, 1) == 0)
598 5cdb1798 2005-10-29 devnull return 0;
599 5cdb1798 2005-10-29 devnull if(isalt(message, p->alt))
600 5cdb1798 2005-10-29 devnull return 0;
601 5cdb1798 2005-10-29 devnull return 1;
602 5cdb1798 2005-10-29 devnull }
603 5cdb1798 2005-10-29 devnull
604 5cdb1798 2005-10-29 devnull
605 5cdb1798 2005-10-29 devnull void
606 5cdb1798 2005-10-29 devnull xprint(int fd, char *type, Resub *m)
607 5cdb1798 2005-10-29 devnull {
608 5cdb1798 2005-10-29 devnull char *p, *q;
609 5cdb1798 2005-10-29 devnull int i;
610 5cdb1798 2005-10-29 devnull
611 b5f65921 2006-02-11 devnull if(m->s.sp == 0 || m->e.ep == 0)
612 5cdb1798 2005-10-29 devnull return;
613 5cdb1798 2005-10-29 devnull
614 5cdb1798 2005-10-29 devnull /* back up approx 30 characters to whitespace */
615 b5f65921 2006-02-11 devnull for(p = m->s.sp, i = 0; *p && i < 30; i++, p--)
616 5cdb1798 2005-10-29 devnull ;
617 5cdb1798 2005-10-29 devnull while(*p && *p != ' ')
618 5cdb1798 2005-10-29 devnull p--;
619 5cdb1798 2005-10-29 devnull p++;
620 5cdb1798 2005-10-29 devnull
621 5cdb1798 2005-10-29 devnull /* grab about 30 more chars beyond the end of the match */
622 b5f65921 2006-02-11 devnull for(q = m->e.ep, i = 0; *q && i < 30; i++, q++)
623 5cdb1798 2005-10-29 devnull ;
624 5cdb1798 2005-10-29 devnull while(*q && *q != ' ')
625 5cdb1798 2005-10-29 devnull q++;
626 5cdb1798 2005-10-29 devnull
627 b5f65921 2006-02-11 devnull fprint(fd, "%s %.*s~%.*s~%.*s\n", type, (int)(m->s.sp-p), p, (int)(m->e.ep-m->s.sp), m->s.sp, (int)(q-m->e.ep), m->e.ep);
628 5cdb1798 2005-10-29 devnull }
629 5cdb1798 2005-10-29 devnull
630 5cdb1798 2005-10-29 devnull enum {
631 5cdb1798 2005-10-29 devnull INVAL= 255
632 5cdb1798 2005-10-29 devnull };
633 5cdb1798 2005-10-29 devnull
634 5cdb1798 2005-10-29 devnull static uchar t64d[256] = {
635 5cdb1798 2005-10-29 devnull /*00 */ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
636 5cdb1798 2005-10-29 devnull INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
637 5cdb1798 2005-10-29 devnull /*10*/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
638 5cdb1798 2005-10-29 devnull INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
639 5cdb1798 2005-10-29 devnull /*20*/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
640 5cdb1798 2005-10-29 devnull INVAL, INVAL, INVAL, 62, INVAL, INVAL, INVAL, 63,
641 5cdb1798 2005-10-29 devnull /*30*/ 52, 53, 54, 55, 56, 57, 58, 59,
642 5cdb1798 2005-10-29 devnull 60, 61, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
643 5cdb1798 2005-10-29 devnull /*40*/ INVAL, 0, 1, 2, 3, 4, 5, 6,
644 5cdb1798 2005-10-29 devnull 7, 8, 9, 10, 11, 12, 13, 14,
645 5cdb1798 2005-10-29 devnull /*50*/ 15, 16, 17, 18, 19, 20, 21, 22,
646 5cdb1798 2005-10-29 devnull 23, 24, 25, INVAL, INVAL, INVAL, INVAL, INVAL,
647 5cdb1798 2005-10-29 devnull /*60*/ INVAL, 26, 27, 28, 29, 30, 31, 32,
648 5cdb1798 2005-10-29 devnull 33, 34, 35, 36, 37, 38, 39, 40,
649 5cdb1798 2005-10-29 devnull /*70*/ 41, 42, 43, 44, 45, 46, 47, 48,
650 5cdb1798 2005-10-29 devnull 49, 50, 51, INVAL, INVAL, INVAL, INVAL, INVAL,
651 5cdb1798 2005-10-29 devnull /*80*/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
652 5cdb1798 2005-10-29 devnull INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
653 5cdb1798 2005-10-29 devnull /*90*/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
654 5cdb1798 2005-10-29 devnull INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
655 5cdb1798 2005-10-29 devnull /*A0*/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
656 5cdb1798 2005-10-29 devnull INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
657 5cdb1798 2005-10-29 devnull /*B0*/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
658 5cdb1798 2005-10-29 devnull INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
659 5cdb1798 2005-10-29 devnull /*C0*/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
660 5cdb1798 2005-10-29 devnull INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
661 5cdb1798 2005-10-29 devnull /*D0*/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
662 5cdb1798 2005-10-29 devnull INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
663 5cdb1798 2005-10-29 devnull /*E0*/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
664 5cdb1798 2005-10-29 devnull INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
665 5cdb1798 2005-10-29 devnull /*F0*/ INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL,
666 cbeb0b26 2006-04-01 devnull INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL, INVAL
667 5cdb1798 2005-10-29 devnull };