1 76193d7c 2003-09-30 devnull #include "sam.h"
3 76193d7c 2003-09-30 devnull Rangeset sel;
4 76193d7c 2003-09-30 devnull String lastregexp;
6 76193d7c 2003-09-30 devnull * Machine Information
8 76193d7c 2003-09-30 devnull typedef struct Inst Inst;
10 76193d7c 2003-09-30 devnull struct Inst
12 36d9b90c 2010-07-14 rsc long type; /* < OPERATOR ==> literal, otherwise action */
14 76193d7c 2003-09-30 devnull int rsid;
15 76193d7c 2003-09-30 devnull int rsubid;
16 76193d7c 2003-09-30 devnull int class;
17 76193d7c 2003-09-30 devnull struct Inst *rother;
18 76193d7c 2003-09-30 devnull struct Inst *rright;
21 76193d7c 2003-09-30 devnull struct Inst *lleft;
22 76193d7c 2003-09-30 devnull struct Inst *lnext;
25 76193d7c 2003-09-30 devnull #define sid r.rsid
26 76193d7c 2003-09-30 devnull #define subid r.rsubid
27 76193d7c 2003-09-30 devnull #define rclass r.class
28 76193d7c 2003-09-30 devnull #define other r.rother
29 76193d7c 2003-09-30 devnull #define right r.rright
30 76193d7c 2003-09-30 devnull #define left l.lleft
31 76193d7c 2003-09-30 devnull #define next l.lnext
33 76193d7c 2003-09-30 devnull #define NPROG 1024
34 76193d7c 2003-09-30 devnull Inst program[NPROG];
35 76193d7c 2003-09-30 devnull Inst *progp;
36 76193d7c 2003-09-30 devnull Inst *startinst; /* First inst. of program; might not be program[0] */
37 76193d7c 2003-09-30 devnull Inst *bstartinst; /* same for backwards machine */
39 76193d7c 2003-09-30 devnull typedef struct Ilist Ilist;
40 76193d7c 2003-09-30 devnull struct Ilist
42 76193d7c 2003-09-30 devnull Inst *inst; /* Instruction of the thread */
43 76193d7c 2003-09-30 devnull Rangeset se;
44 76193d7c 2003-09-30 devnull Posn startp; /* first char of match */
47 c99ef336 2007-06-09 devnull #define NLIST 127
49 76193d7c 2003-09-30 devnull Ilist *tl, *nl; /* This list, next list */
50 c99ef336 2007-06-09 devnull Ilist list[2][NLIST+1]; /* +1 for trailing null */
51 76193d7c 2003-09-30 devnull static Rangeset sempty;
54 76193d7c 2003-09-30 devnull * Actions and Tokens
56 36d9b90c 2010-07-14 rsc * 0x10000xx are operators, value == precedence
57 36d9b90c 2010-07-14 rsc * 0x20000xx are tokens, i.e. operands for operators
59 36d9b90c 2010-07-14 rsc #define OPERATOR 0x1000000 /* Bit set in all operators */
60 36d9b90c 2010-07-14 rsc #define START (OPERATOR+0) /* Start, used for marker on stack */
61 36d9b90c 2010-07-14 rsc #define RBRA (OPERATOR+1) /* Right bracket, ) */
62 36d9b90c 2010-07-14 rsc #define LBRA (OPERATOR+2) /* Left bracket, ( */
63 36d9b90c 2010-07-14 rsc #define OR (OPERATOR+3) /* Alternation, | */
64 36d9b90c 2010-07-14 rsc #define CAT (OPERATOR+4) /* Concatentation, implicit operator */
65 36d9b90c 2010-07-14 rsc #define STAR (OPERATOR+5) /* Closure, * */
66 36d9b90c 2010-07-14 rsc #define PLUS (OPERATOR+6) /* a+ == aa* */
67 36d9b90c 2010-07-14 rsc #define QUEST (OPERATOR+7) /* a? == a|nothing, i.e. 0 or 1 a's */
68 36d9b90c 2010-07-14 rsc #define ANY 0x2000000 /* Any character but newline, . */
69 36d9b90c 2010-07-14 rsc #define NOP (ANY+1) /* No operation, internal use only */
70 36d9b90c 2010-07-14 rsc #define BOL (ANY+2) /* Beginning of line, ^ */
71 36d9b90c 2010-07-14 rsc #define EOL (ANY+3) /* End of line, $ */
72 36d9b90c 2010-07-14 rsc #define CCLASS (ANY+4) /* Character class, [] */
73 36d9b90c 2010-07-14 rsc #define NCCLASS (ANY+5) /* Negated character class, [^] */
74 36d9b90c 2010-07-14 rsc #define END (ANY+0x77) /* Terminate: match found */
76 36d9b90c 2010-07-14 rsc #define ISATOR OPERATOR
77 36d9b90c 2010-07-14 rsc #define ISAND ANY
79 36d9b90c 2010-07-14 rsc #define QUOTED 0x4000000 /* Bit set for \-ed lex characters */
82 76193d7c 2003-09-30 devnull * Parser Information
84 76193d7c 2003-09-30 devnull typedef struct Node Node;
85 76193d7c 2003-09-30 devnull struct Node
87 76193d7c 2003-09-30 devnull Inst *first;
88 76193d7c 2003-09-30 devnull Inst *last;
91 76193d7c 2003-09-30 devnull #define NSTACK 20
92 76193d7c 2003-09-30 devnull Node andstack[NSTACK];
93 76193d7c 2003-09-30 devnull Node *andp;
94 76193d7c 2003-09-30 devnull int atorstack[NSTACK];
95 76193d7c 2003-09-30 devnull int *atorp;
96 76193d7c 2003-09-30 devnull int lastwasand; /* Last token was operand */
97 76193d7c 2003-09-30 devnull int cursubid;
98 76193d7c 2003-09-30 devnull int subidstack[NSTACK];
99 76193d7c 2003-09-30 devnull int *subidp;
100 76193d7c 2003-09-30 devnull int backwards;
101 76193d7c 2003-09-30 devnull int nbra;
102 76193d7c 2003-09-30 devnull Rune *exprp; /* pointer to next character in source expression */
103 76193d7c 2003-09-30 devnull #define DCLASS 10 /* allocation increment */
104 76193d7c 2003-09-30 devnull int nclass; /* number active */
105 76193d7c 2003-09-30 devnull int Nclass; /* high water mark */
106 76193d7c 2003-09-30 devnull Rune **class;
107 76193d7c 2003-09-30 devnull int negateclass;
109 c99ef336 2007-06-09 devnull int addinst(Ilist *l, Inst *inst, Rangeset *sep);
110 76193d7c 2003-09-30 devnull void newmatch(Rangeset*);
111 76193d7c 2003-09-30 devnull void bnewmatch(Rangeset*);
112 76193d7c 2003-09-30 devnull void pushand(Inst*, Inst*);
113 76193d7c 2003-09-30 devnull void pushator(int);
114 76193d7c 2003-09-30 devnull Node *popand(int);
115 76193d7c 2003-09-30 devnull int popator(void);
116 76193d7c 2003-09-30 devnull void startlex(Rune*);
117 76193d7c 2003-09-30 devnull int lex(void);
118 76193d7c 2003-09-30 devnull void operator(int);
119 76193d7c 2003-09-30 devnull void operand(int);
120 76193d7c 2003-09-30 devnull void evaluntil(int);
121 76193d7c 2003-09-30 devnull void optimize(Inst*);
122 76193d7c 2003-09-30 devnull void bldcclass(void);
125 76193d7c 2003-09-30 devnull regerror(Err e)
127 76193d7c 2003-09-30 devnull Strzero(&lastregexp);
128 76193d7c 2003-09-30 devnull error(e);
132 76193d7c 2003-09-30 devnull regerror_c(Err e, int c)
134 76193d7c 2003-09-30 devnull Strzero(&lastregexp);
135 76193d7c 2003-09-30 devnull error_c(e, c);
139 76193d7c 2003-09-30 devnull newinst(int t)
141 76193d7c 2003-09-30 devnull if(progp >= &program[NPROG])
142 76193d7c 2003-09-30 devnull regerror(Etoolong);
143 76193d7c 2003-09-30 devnull progp->type = t;
144 76193d7c 2003-09-30 devnull progp->left = 0;
145 76193d7c 2003-09-30 devnull progp->right = 0;
146 76193d7c 2003-09-30 devnull return progp++;
150 76193d7c 2003-09-30 devnull realcompile(Rune *s)
152 76193d7c 2003-09-30 devnull int token;
154 76193d7c 2003-09-30 devnull startlex(s);
155 76193d7c 2003-09-30 devnull atorp = atorstack;
156 76193d7c 2003-09-30 devnull andp = andstack;
157 76193d7c 2003-09-30 devnull subidp = subidstack;
158 76193d7c 2003-09-30 devnull cursubid = 0;
159 76193d7c 2003-09-30 devnull lastwasand = FALSE;
160 76193d7c 2003-09-30 devnull /* Start with a low priority operator to prime parser */
161 76193d7c 2003-09-30 devnull pushator(START-1);
162 76193d7c 2003-09-30 devnull while((token=lex()) != END){
163 76193d7c 2003-09-30 devnull if((token&ISATOR) == OPERATOR)
164 76193d7c 2003-09-30 devnull operator(token);
166 76193d7c 2003-09-30 devnull operand(token);
168 76193d7c 2003-09-30 devnull /* Close with a low priority operator */
169 76193d7c 2003-09-30 devnull evaluntil(START);
170 76193d7c 2003-09-30 devnull /* Force END */
171 76193d7c 2003-09-30 devnull operand(END);
172 76193d7c 2003-09-30 devnull evaluntil(START);
173 76193d7c 2003-09-30 devnull if(nbra)
174 76193d7c 2003-09-30 devnull regerror(Eleftpar);
175 76193d7c 2003-09-30 devnull --andp; /* points to first and only operand */
176 76193d7c 2003-09-30 devnull return andp->first;
180 76193d7c 2003-09-30 devnull compile(String *s)
183 76193d7c 2003-09-30 devnull Inst *oprogp;
185 76193d7c 2003-09-30 devnull if(Strcmp(s, &lastregexp)==0)
187 76193d7c 2003-09-30 devnull for(i=0; i<nclass; i++)
188 76193d7c 2003-09-30 devnull free(class[i]);
189 76193d7c 2003-09-30 devnull nclass = 0;
190 76193d7c 2003-09-30 devnull progp = program;
191 76193d7c 2003-09-30 devnull backwards = FALSE;
192 76193d7c 2003-09-30 devnull startinst = realcompile(s->s);
193 76193d7c 2003-09-30 devnull optimize(program);
194 76193d7c 2003-09-30 devnull oprogp = progp;
195 76193d7c 2003-09-30 devnull backwards = TRUE;
196 76193d7c 2003-09-30 devnull bstartinst = realcompile(s->s);
197 76193d7c 2003-09-30 devnull optimize(oprogp);
198 76193d7c 2003-09-30 devnull Strduplstr(&lastregexp, s);
202 76193d7c 2003-09-30 devnull operand(int t)
204 76193d7c 2003-09-30 devnull Inst *i;
205 76193d7c 2003-09-30 devnull if(lastwasand)
206 76193d7c 2003-09-30 devnull operator(CAT); /* catenate is implicit */
207 76193d7c 2003-09-30 devnull i = newinst(t);
208 76193d7c 2003-09-30 devnull if(t == CCLASS){
209 76193d7c 2003-09-30 devnull if(negateclass)
210 76193d7c 2003-09-30 devnull i->type = NCCLASS; /* UGH */
211 76193d7c 2003-09-30 devnull i->rclass = nclass-1; /* UGH */
213 76193d7c 2003-09-30 devnull pushand(i, i);
214 76193d7c 2003-09-30 devnull lastwasand = TRUE;
218 76193d7c 2003-09-30 devnull operator(int t)
220 76193d7c 2003-09-30 devnull if(t==RBRA && --nbra<0)
221 76193d7c 2003-09-30 devnull regerror(Erightpar);
222 76193d7c 2003-09-30 devnull if(t==LBRA){
224 76193d7c 2003-09-30 devnull * if(++cursubid >= NSUBEXP)
225 76193d7c 2003-09-30 devnull * regerror(Esubexp);
227 76193d7c 2003-09-30 devnull cursubid++; /* silently ignored */
229 76193d7c 2003-09-30 devnull if(lastwasand)
230 76193d7c 2003-09-30 devnull operator(CAT);
232 76193d7c 2003-09-30 devnull evaluntil(t);
233 76193d7c 2003-09-30 devnull if(t!=RBRA)
234 76193d7c 2003-09-30 devnull pushator(t);
235 76193d7c 2003-09-30 devnull lastwasand = FALSE;
236 76193d7c 2003-09-30 devnull if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
237 76193d7c 2003-09-30 devnull lastwasand = TRUE; /* these look like operands */
241 76193d7c 2003-09-30 devnull cant(char *s)
243 76193d7c 2003-09-30 devnull char buf[100];
245 76193d7c 2003-09-30 devnull sprint(buf, "regexp: can't happen: %s", s);
246 76193d7c 2003-09-30 devnull panic(buf);
250 76193d7c 2003-09-30 devnull pushand(Inst *f, Inst *l)
252 76193d7c 2003-09-30 devnull if(andp >= &andstack[NSTACK])
253 76193d7c 2003-09-30 devnull cant("operand stack overflow");
254 76193d7c 2003-09-30 devnull andp->first = f;
255 76193d7c 2003-09-30 devnull andp->last = l;
260 76193d7c 2003-09-30 devnull pushator(int t)
262 76193d7c 2003-09-30 devnull if(atorp >= &atorstack[NSTACK])
263 76193d7c 2003-09-30 devnull cant("operator stack overflow");
264 76193d7c 2003-09-30 devnull *atorp++=t;
265 76193d7c 2003-09-30 devnull if(cursubid >= NSUBEXP)
266 76193d7c 2003-09-30 devnull *subidp++= -1;
268 76193d7c 2003-09-30 devnull *subidp++=cursubid;
272 76193d7c 2003-09-30 devnull popand(int op)
274 76193d7c 2003-09-30 devnull if(andp <= &andstack[0])
276 76193d7c 2003-09-30 devnull regerror_c(Emissop, op);
278 76193d7c 2003-09-30 devnull regerror(Ebadregexp);
279 76193d7c 2003-09-30 devnull return --andp;
283 76193d7c 2003-09-30 devnull popator(void)
285 76193d7c 2003-09-30 devnull if(atorp <= &atorstack[0])
286 76193d7c 2003-09-30 devnull cant("operator stack underflow");
287 76193d7c 2003-09-30 devnull --subidp;
288 76193d7c 2003-09-30 devnull return *--atorp;
292 76193d7c 2003-09-30 devnull evaluntil(int pri)
294 76193d7c 2003-09-30 devnull Node *op1, *op2, *t;
295 76193d7c 2003-09-30 devnull Inst *inst1, *inst2;
297 76193d7c 2003-09-30 devnull while(pri==RBRA || atorp[-1]>=pri){
298 76193d7c 2003-09-30 devnull switch(popator()){
299 76193d7c 2003-09-30 devnull case LBRA:
300 76193d7c 2003-09-30 devnull op1 = popand('(');
301 76193d7c 2003-09-30 devnull inst2 = newinst(RBRA);
302 76193d7c 2003-09-30 devnull inst2->subid = *subidp;
303 76193d7c 2003-09-30 devnull op1->last->next = inst2;
304 76193d7c 2003-09-30 devnull inst1 = newinst(LBRA);
305 76193d7c 2003-09-30 devnull inst1->subid = *subidp;
306 76193d7c 2003-09-30 devnull inst1->next = op1->first;
307 76193d7c 2003-09-30 devnull pushand(inst1, inst2);
308 76193d7c 2003-09-30 devnull return; /* must have been RBRA */
309 76193d7c 2003-09-30 devnull default:
310 76193d7c 2003-09-30 devnull panic("unknown regexp operator");
312 76193d7c 2003-09-30 devnull case OR:
313 76193d7c 2003-09-30 devnull op2 = popand('|');
314 76193d7c 2003-09-30 devnull op1 = popand('|');
315 76193d7c 2003-09-30 devnull inst2 = newinst(NOP);
316 76193d7c 2003-09-30 devnull op2->last->next = inst2;
317 76193d7c 2003-09-30 devnull op1->last->next = inst2;
318 76193d7c 2003-09-30 devnull inst1 = newinst(OR);
319 76193d7c 2003-09-30 devnull inst1->right = op1->first;
320 76193d7c 2003-09-30 devnull inst1->left = op2->first;
321 76193d7c 2003-09-30 devnull pushand(inst1, inst2);
323 76193d7c 2003-09-30 devnull case CAT:
324 76193d7c 2003-09-30 devnull op2 = popand(0);
325 76193d7c 2003-09-30 devnull op1 = popand(0);
326 76193d7c 2003-09-30 devnull if(backwards && op2->first->type!=END)
327 76193d7c 2003-09-30 devnull t = op1, op1 = op2, op2 = t;
328 76193d7c 2003-09-30 devnull op1->last->next = op2->first;
329 76193d7c 2003-09-30 devnull pushand(op1->first, op2->last);
331 76193d7c 2003-09-30 devnull case STAR:
332 76193d7c 2003-09-30 devnull op2 = popand('*');
333 76193d7c 2003-09-30 devnull inst1 = newinst(OR);
334 76193d7c 2003-09-30 devnull op2->last->next = inst1;
335 76193d7c 2003-09-30 devnull inst1->right = op2->first;
336 76193d7c 2003-09-30 devnull pushand(inst1, inst1);
338 76193d7c 2003-09-30 devnull case PLUS:
339 76193d7c 2003-09-30 devnull op2 = popand('+');
340 76193d7c 2003-09-30 devnull inst1 = newinst(OR);
341 76193d7c 2003-09-30 devnull op2->last->next = inst1;
342 76193d7c 2003-09-30 devnull inst1->right = op2->first;
343 76193d7c 2003-09-30 devnull pushand(op2->first, inst1);
345 76193d7c 2003-09-30 devnull case QUEST:
346 76193d7c 2003-09-30 devnull op2 = popand('?');
347 76193d7c 2003-09-30 devnull inst1 = newinst(OR);
348 76193d7c 2003-09-30 devnull inst2 = newinst(NOP);
349 76193d7c 2003-09-30 devnull inst1->left = inst2;
350 76193d7c 2003-09-30 devnull inst1->right = op2->first;
351 76193d7c 2003-09-30 devnull op2->last->next = inst2;
352 76193d7c 2003-09-30 devnull pushand(inst1, inst2);
360 76193d7c 2003-09-30 devnull optimize(Inst *start)
362 76193d7c 2003-09-30 devnull Inst *inst, *target;
364 76193d7c 2003-09-30 devnull for(inst=start; inst->type!=END; inst++){
365 76193d7c 2003-09-30 devnull target = inst->next;
366 76193d7c 2003-09-30 devnull while(target->type == NOP)
367 76193d7c 2003-09-30 devnull target = target->next;
368 76193d7c 2003-09-30 devnull inst->next = target;
372 76193d7c 2003-09-30 devnull #ifdef DEBUG
374 76193d7c 2003-09-30 devnull dumpstack(void){
375 76193d7c 2003-09-30 devnull Node *stk;
376 76193d7c 2003-09-30 devnull int *ip;
378 76193d7c 2003-09-30 devnull dprint("operators\n");
379 76193d7c 2003-09-30 devnull for(ip = atorstack; ip<atorp; ip++)
380 76193d7c 2003-09-30 devnull dprint("0%o\n", *ip);
381 76193d7c 2003-09-30 devnull dprint("operands\n");
382 76193d7c 2003-09-30 devnull for(stk = andstack; stk<andp; stk++)
383 76193d7c 2003-09-30 devnull dprint("0%o\t0%o\n", stk->first->type, stk->last->type);
386 76193d7c 2003-09-30 devnull dump(void){
387 76193d7c 2003-09-30 devnull Inst *l;
389 76193d7c 2003-09-30 devnull l = program;
391 76193d7c 2003-09-30 devnull dprint("%d:\t0%o\t%d\t%d\n", l-program, l->type,
392 76193d7c 2003-09-30 devnull l->left-program, l->right-program);
393 76193d7c 2003-09-30 devnull }while(l++->type);
398 76193d7c 2003-09-30 devnull startlex(Rune *s)
400 76193d7c 2003-09-30 devnull exprp = s;
401 76193d7c 2003-09-30 devnull nbra = 0;
406 76193d7c 2003-09-30 devnull lex(void){
407 76193d7c 2003-09-30 devnull int c= *exprp++;
409 76193d7c 2003-09-30 devnull switch(c){
410 76193d7c 2003-09-30 devnull case '\\':
411 76193d7c 2003-09-30 devnull if(*exprp)
412 76193d7c 2003-09-30 devnull if((c= *exprp++)=='n')
416 76193d7c 2003-09-30 devnull c = END;
417 76193d7c 2003-09-30 devnull --exprp; /* In case we come here again */
419 76193d7c 2003-09-30 devnull case '*':
420 76193d7c 2003-09-30 devnull c = STAR;
422 76193d7c 2003-09-30 devnull case '?':
423 76193d7c 2003-09-30 devnull c = QUEST;
425 76193d7c 2003-09-30 devnull case '+':
426 76193d7c 2003-09-30 devnull c = PLUS;
428 76193d7c 2003-09-30 devnull case '|':
431 76193d7c 2003-09-30 devnull case '.':
432 76193d7c 2003-09-30 devnull c = ANY;
434 76193d7c 2003-09-30 devnull case '(':
435 76193d7c 2003-09-30 devnull c = LBRA;
437 76193d7c 2003-09-30 devnull case ')':
438 76193d7c 2003-09-30 devnull c = RBRA;
440 76193d7c 2003-09-30 devnull case '^':
441 76193d7c 2003-09-30 devnull c = BOL;
443 76193d7c 2003-09-30 devnull case '$':
444 76193d7c 2003-09-30 devnull c = EOL;
446 76193d7c 2003-09-30 devnull case '[':
447 76193d7c 2003-09-30 devnull c = CCLASS;
448 76193d7c 2003-09-30 devnull bldcclass();
451 76193d7c 2003-09-30 devnull return c;
455 76193d7c 2003-09-30 devnull nextrec(void){
456 76193d7c 2003-09-30 devnull if(exprp[0]==0 || (exprp[0]=='\\' && exprp[1]==0))
457 76193d7c 2003-09-30 devnull regerror(Ebadclass);
458 76193d7c 2003-09-30 devnull if(exprp[0] == '\\'){
459 76193d7c 2003-09-30 devnull exprp++;
460 76193d7c 2003-09-30 devnull if(*exprp=='n'){
461 76193d7c 2003-09-30 devnull exprp++;
462 76193d7c 2003-09-30 devnull return '\n';
464 36d9b90c 2010-07-14 rsc return *exprp++|QUOTED;
466 76193d7c 2003-09-30 devnull return *exprp++;
470 76193d7c 2003-09-30 devnull bldcclass(void)
472 76193d7c 2003-09-30 devnull long c1, c2, n, na;
473 76193d7c 2003-09-30 devnull Rune *classp;
475 76193d7c 2003-09-30 devnull classp = emalloc(DCLASS*RUNESIZE);
477 76193d7c 2003-09-30 devnull na = DCLASS;
478 76193d7c 2003-09-30 devnull /* we have already seen the '[' */
479 76193d7c 2003-09-30 devnull if(*exprp == '^'){
480 76193d7c 2003-09-30 devnull classp[n++] = '\n'; /* don't match newline in negate case */
481 76193d7c 2003-09-30 devnull negateclass = TRUE;
482 76193d7c 2003-09-30 devnull exprp++;
484 76193d7c 2003-09-30 devnull negateclass = FALSE;
485 76193d7c 2003-09-30 devnull while((c1 = nextrec()) != ']'){
486 76193d7c 2003-09-30 devnull if(c1 == '-'){
488 76193d7c 2003-09-30 devnull free(classp);
489 76193d7c 2003-09-30 devnull regerror(Ebadclass);
491 76193d7c 2003-09-30 devnull if(n+4 >= na){ /* 3 runes plus NUL */
492 76193d7c 2003-09-30 devnull na += DCLASS;
493 76193d7c 2003-09-30 devnull classp = erealloc(classp, na*RUNESIZE);
495 76193d7c 2003-09-30 devnull if(*exprp == '-'){
496 76193d7c 2003-09-30 devnull exprp++; /* eat '-' */
497 76193d7c 2003-09-30 devnull if((c2 = nextrec()) == ']')
498 76193d7c 2003-09-30 devnull goto Error;
499 0cadb430 2009-09-11 russcox classp[n+0] = Runemax;
500 76193d7c 2003-09-30 devnull classp[n+1] = c1;
501 76193d7c 2003-09-30 devnull classp[n+2] = c2;
504 36d9b90c 2010-07-14 rsc classp[n++] = c1 & ~QUOTED;
506 76193d7c 2003-09-30 devnull classp[n] = 0;
507 76193d7c 2003-09-30 devnull if(nclass == Nclass){
508 76193d7c 2003-09-30 devnull Nclass += DCLASS;
509 76193d7c 2003-09-30 devnull class = erealloc(class, Nclass*sizeof(Rune*));
511 76193d7c 2003-09-30 devnull class[nclass++] = classp;
515 76193d7c 2003-09-30 devnull classmatch(int classno, int c, int negate)
517 76193d7c 2003-09-30 devnull Rune *p;
519 76193d7c 2003-09-30 devnull p = class[classno];
520 76193d7c 2003-09-30 devnull while(*p){
521 0cadb430 2009-09-11 russcox if(*p == Runemax){
522 76193d7c 2003-09-30 devnull if(p[1]<=c && c<=p[2])
523 76193d7c 2003-09-30 devnull return !negate;
525 76193d7c 2003-09-30 devnull }else if(*p++ == c)
526 76193d7c 2003-09-30 devnull return !negate;
528 76193d7c 2003-09-30 devnull return negate;
532 2deda14e 2007-12-07 rsc * Note optimization in addinst:
533 2deda14e 2007-12-07 rsc * *l must be pending when addinst called; if *l has been looked
534 2deda14e 2007-12-07 rsc * at already, the optimization is a bug.
537 608a0928 2007-12-07 rsc addinst(Ilist *l, Inst *inst, Rangeset *sep)
541 2deda14e 2007-12-07 rsc for(p = l; p->inst; p++){
542 2deda14e 2007-12-07 rsc if(p->inst==inst){
543 2deda14e 2007-12-07 rsc if((sep)->p[0].p1 < p->se.p[0].p1)
544 2deda14e 2007-12-07 rsc p->se= *sep; /* this would be bug */
545 2deda14e 2007-12-07 rsc return 0; /* It's already there */
548 2deda14e 2007-12-07 rsc p->inst = inst;
549 2deda14e 2007-12-07 rsc p->se= *sep;
550 2deda14e 2007-12-07 rsc (p+1)->inst = 0;
555 76193d7c 2003-09-30 devnull execute(File *f, Posn startp, Posn eof)
557 76193d7c 2003-09-30 devnull int flag = 0;
558 76193d7c 2003-09-30 devnull Inst *inst;
559 76193d7c 2003-09-30 devnull Ilist *tlp;
560 76193d7c 2003-09-30 devnull Posn p = startp;
561 2deda14e 2007-12-07 rsc int nnl = 0, ntl;
563 76193d7c 2003-09-30 devnull int wrapped = 0;
564 76193d7c 2003-09-30 devnull int startchar = startinst->type<OPERATOR? startinst->type : 0;
566 76193d7c 2003-09-30 devnull list[0][0].inst = list[1][0].inst = 0;
567 76193d7c 2003-09-30 devnull sel.p[0].p1 = -1;
568 76193d7c 2003-09-30 devnull /* Execute machine once for each character */
569 76193d7c 2003-09-30 devnull for(;;p++){
571 76193d7c 2003-09-30 devnull c = filereadc(f, p);
572 76193d7c 2003-09-30 devnull if(p>=eof || c<0){
573 76193d7c 2003-09-30 devnull switch(wrapped++){
574 76193d7c 2003-09-30 devnull case 0: /* let loop run one more click */
577 76193d7c 2003-09-30 devnull case 1: /* expired; wrap to beginning */
578 76193d7c 2003-09-30 devnull if(sel.p[0].p1>=0 || eof!=INFINITY)
579 76193d7c 2003-09-30 devnull goto Return;
580 76193d7c 2003-09-30 devnull list[0][0].inst = list[1][0].inst = 0;
582 76193d7c 2003-09-30 devnull goto doloop;
583 76193d7c 2003-09-30 devnull default:
584 76193d7c 2003-09-30 devnull goto Return;
586 2deda14e 2007-12-07 rsc }else if(((wrapped && p>=startp) || sel.p[0].p1>0) && nnl==0)
588 76193d7c 2003-09-30 devnull /* fast check for first char */
589 2deda14e 2007-12-07 rsc if(startchar && nnl==0 && c!=startchar)
590 76193d7c 2003-09-30 devnull continue;
591 76193d7c 2003-09-30 devnull tl = list[flag];
592 76193d7c 2003-09-30 devnull nl = list[flag^=1];
593 76193d7c 2003-09-30 devnull nl->inst = 0;
596 76193d7c 2003-09-30 devnull if(sel.p[0].p1<0 && (!wrapped || p<startp || startp==eof)){
597 76193d7c 2003-09-30 devnull /* Add first instruction to this list */
598 c99ef336 2007-06-09 devnull sempty.p[0].p1 = p;
599 2deda14e 2007-12-07 rsc if(addinst(tl, startinst, &sempty))
600 2deda14e 2007-12-07 rsc if(++ntl >= NLIST)
601 76193d7c 2003-09-30 devnull Overflow:
602 76193d7c 2003-09-30 devnull error(Eoverflow);
604 76193d7c 2003-09-30 devnull /* Execute machine until this list is empty */
605 76193d7c 2003-09-30 devnull for(tlp = tl; inst = tlp->inst; tlp++){ /* assignment = */
606 76193d7c 2003-09-30 devnull Switchstmt:
607 76193d7c 2003-09-30 devnull switch(inst->type){
608 76193d7c 2003-09-30 devnull default: /* regular character */
609 76193d7c 2003-09-30 devnull if(inst->type==c){
610 76193d7c 2003-09-30 devnull Addinst:
611 2deda14e 2007-12-07 rsc if(addinst(nl, inst->next, &tlp->se))
612 2deda14e 2007-12-07 rsc if(++nnl >= NLIST)
613 76193d7c 2003-09-30 devnull goto Overflow;
616 76193d7c 2003-09-30 devnull case LBRA:
617 76193d7c 2003-09-30 devnull if(inst->subid>=0)
618 76193d7c 2003-09-30 devnull tlp->se.p[inst->subid].p1 = p;
619 76193d7c 2003-09-30 devnull inst = inst->next;
620 76193d7c 2003-09-30 devnull goto Switchstmt;
621 76193d7c 2003-09-30 devnull case RBRA:
622 76193d7c 2003-09-30 devnull if(inst->subid>=0)
623 76193d7c 2003-09-30 devnull tlp->se.p[inst->subid].p2 = p;
624 76193d7c 2003-09-30 devnull inst = inst->next;
625 76193d7c 2003-09-30 devnull goto Switchstmt;
626 76193d7c 2003-09-30 devnull case ANY:
627 76193d7c 2003-09-30 devnull if(c!='\n')
628 76193d7c 2003-09-30 devnull goto Addinst;
630 76193d7c 2003-09-30 devnull case BOL:
631 76193d7c 2003-09-30 devnull if(p==0 || filereadc(f, p - 1)=='\n'){
633 76193d7c 2003-09-30 devnull inst = inst->next;
634 76193d7c 2003-09-30 devnull goto Switchstmt;
637 76193d7c 2003-09-30 devnull case EOL:
638 76193d7c 2003-09-30 devnull if(c == '\n')
639 76193d7c 2003-09-30 devnull goto Step;
641 76193d7c 2003-09-30 devnull case CCLASS:
642 76193d7c 2003-09-30 devnull if(c>=0 && classmatch(inst->rclass, c, 0))
643 76193d7c 2003-09-30 devnull goto Addinst;
645 76193d7c 2003-09-30 devnull case NCCLASS:
646 76193d7c 2003-09-30 devnull if(c>=0 && classmatch(inst->rclass, c, 1))
647 76193d7c 2003-09-30 devnull goto Addinst;
649 76193d7c 2003-09-30 devnull case OR:
650 2deda14e 2007-12-07 rsc /* evaluate right choice later */
651 2deda14e 2007-12-07 rsc if(addinst(tl, inst->right, &tlp->se))
652 2deda14e 2007-12-07 rsc if(++ntl >= NLIST)
653 2deda14e 2007-12-07 rsc goto Overflow;
654 2deda14e 2007-12-07 rsc /* efficiency: advance and re-evaluate */
655 2deda14e 2007-12-07 rsc inst = inst->left;
656 2deda14e 2007-12-07 rsc goto Switchstmt;
657 76193d7c 2003-09-30 devnull case END: /* Match! */
658 76193d7c 2003-09-30 devnull tlp->se.p[0].p2 = p;
659 76193d7c 2003-09-30 devnull newmatch(&tlp->se);
665 76193d7c 2003-09-30 devnull return sel.p[0].p1>=0;
669 76193d7c 2003-09-30 devnull newmatch(Rangeset *sp)
673 76193d7c 2003-09-30 devnull if(sel.p[0].p1<0 || sp->p[0].p1<sel.p[0].p1 ||
674 76193d7c 2003-09-30 devnull (sp->p[0].p1==sel.p[0].p1 && sp->p[0].p2>sel.p[0].p2))
675 76193d7c 2003-09-30 devnull for(i = 0; i<NSUBEXP; i++)
676 76193d7c 2003-09-30 devnull sel.p[i] = sp->p[i];
680 76193d7c 2003-09-30 devnull bexecute(File *f, Posn startp)
682 76193d7c 2003-09-30 devnull int flag = 0;
683 76193d7c 2003-09-30 devnull Inst *inst;
684 76193d7c 2003-09-30 devnull Ilist *tlp;
685 76193d7c 2003-09-30 devnull Posn p = startp;
686 2deda14e 2007-12-07 rsc int nnl = 0, ntl;
688 76193d7c 2003-09-30 devnull int wrapped = 0;
689 76193d7c 2003-09-30 devnull int startchar = bstartinst->type<OPERATOR? bstartinst->type : 0;
691 76193d7c 2003-09-30 devnull list[0][0].inst = list[1][0].inst = 0;
692 76193d7c 2003-09-30 devnull sel.p[0].p1= -1;
693 76193d7c 2003-09-30 devnull /* Execute machine once for each character, including terminal NUL */
694 76193d7c 2003-09-30 devnull for(;;--p){
696 76193d7c 2003-09-30 devnull if((c = filereadc(f, p - 1))==-1){
697 76193d7c 2003-09-30 devnull switch(wrapped++){
698 76193d7c 2003-09-30 devnull case 0: /* let loop run one more click */
701 76193d7c 2003-09-30 devnull case 1: /* expired; wrap to end */
702 76193d7c 2003-09-30 devnull if(sel.p[0].p1>=0)
704 76193d7c 2003-09-30 devnull goto Return;
705 76193d7c 2003-09-30 devnull list[0][0].inst = list[1][0].inst = 0;
706 522b0689 2003-09-30 devnull p = f->b.nc;
707 76193d7c 2003-09-30 devnull goto doloop;
708 76193d7c 2003-09-30 devnull default:
709 76193d7c 2003-09-30 devnull goto Return;
711 2deda14e 2007-12-07 rsc }else if(((wrapped && p<=startp) || sel.p[0].p1>0) && nnl==0)
713 76193d7c 2003-09-30 devnull /* fast check for first char */
714 2deda14e 2007-12-07 rsc if(startchar && nnl==0 && c!=startchar)
715 76193d7c 2003-09-30 devnull continue;
716 76193d7c 2003-09-30 devnull tl = list[flag];
717 76193d7c 2003-09-30 devnull nl = list[flag^=1];
718 76193d7c 2003-09-30 devnull nl->inst = 0;
721 76193d7c 2003-09-30 devnull if(sel.p[0].p1<0 && (!wrapped || p>startp)){
722 76193d7c 2003-09-30 devnull /* Add first instruction to this list */
723 2deda14e 2007-12-07 rsc /* the minus is so the optimizations in addinst work */
724 2deda14e 2007-12-07 rsc sempty.p[0].p1 = -p;
725 2deda14e 2007-12-07 rsc if(addinst(tl, bstartinst, &sempty))
726 2deda14e 2007-12-07 rsc if(++ntl >= NLIST)
727 76193d7c 2003-09-30 devnull Overflow:
728 76193d7c 2003-09-30 devnull error(Eoverflow);
730 76193d7c 2003-09-30 devnull /* Execute machine until this list is empty */
731 76193d7c 2003-09-30 devnull for(tlp = tl; inst = tlp->inst; tlp++){ /* assignment = */
732 76193d7c 2003-09-30 devnull Switchstmt:
733 76193d7c 2003-09-30 devnull switch(inst->type){
734 76193d7c 2003-09-30 devnull default: /* regular character */
735 76193d7c 2003-09-30 devnull if(inst->type == c){
736 76193d7c 2003-09-30 devnull Addinst:
737 2deda14e 2007-12-07 rsc if(addinst(nl, inst->next, &tlp->se))
738 2deda14e 2007-12-07 rsc if(++nnl >= NLIST)
739 76193d7c 2003-09-30 devnull goto Overflow;
742 76193d7c 2003-09-30 devnull case LBRA:
743 76193d7c 2003-09-30 devnull if(inst->subid>=0)
744 76193d7c 2003-09-30 devnull tlp->se.p[inst->subid].p1 = p;
745 76193d7c 2003-09-30 devnull inst = inst->next;
746 76193d7c 2003-09-30 devnull goto Switchstmt;
747 76193d7c 2003-09-30 devnull case RBRA:
748 76193d7c 2003-09-30 devnull if(inst->subid >= 0)
749 76193d7c 2003-09-30 devnull tlp->se.p[inst->subid].p2 = p;
750 76193d7c 2003-09-30 devnull inst = inst->next;
751 76193d7c 2003-09-30 devnull goto Switchstmt;
752 76193d7c 2003-09-30 devnull case ANY:
753 76193d7c 2003-09-30 devnull if(c != '\n')
754 76193d7c 2003-09-30 devnull goto Addinst;
756 76193d7c 2003-09-30 devnull case BOL:
757 76193d7c 2003-09-30 devnull if(c=='\n' || p==0){
759 76193d7c 2003-09-30 devnull inst = inst->next;
760 76193d7c 2003-09-30 devnull goto Switchstmt;
763 76193d7c 2003-09-30 devnull case EOL:
764 522b0689 2003-09-30 devnull if(p==f->b.nc || filereadc(f, p)=='\n')
765 76193d7c 2003-09-30 devnull goto Step;
767 76193d7c 2003-09-30 devnull case CCLASS:
768 76193d7c 2003-09-30 devnull if(c>=0 && classmatch(inst->rclass, c, 0))
769 76193d7c 2003-09-30 devnull goto Addinst;
771 76193d7c 2003-09-30 devnull case NCCLASS:
772 76193d7c 2003-09-30 devnull if(c>=0 && classmatch(inst->rclass, c, 1))
773 76193d7c 2003-09-30 devnull goto Addinst;
775 76193d7c 2003-09-30 devnull case OR:
776 2deda14e 2007-12-07 rsc /* evaluate right choice later */
777 3a9401ae 2008-01-30 rsc if(addinst(tlp, inst->right, &tlp->se))
778 2deda14e 2007-12-07 rsc if(++ntl >= NLIST)
779 2deda14e 2007-12-07 rsc goto Overflow;
780 2deda14e 2007-12-07 rsc /* efficiency: advance and re-evaluate */
781 2deda14e 2007-12-07 rsc inst = inst->left;
782 2deda14e 2007-12-07 rsc goto Switchstmt;
783 76193d7c 2003-09-30 devnull case END: /* Match! */
784 2deda14e 2007-12-07 rsc tlp->se.p[0].p1 = -tlp->se.p[0].p1; /* minus sign */
785 76193d7c 2003-09-30 devnull tlp->se.p[0].p2 = p;
786 76193d7c 2003-09-30 devnull bnewmatch(&tlp->se);
792 76193d7c 2003-09-30 devnull return sel.p[0].p1>=0;
796 76193d7c 2003-09-30 devnull bnewmatch(Rangeset *sp)
799 76193d7c 2003-09-30 devnull if(sel.p[0].p1<0 || sp->p[0].p1>sel.p[0].p2 || (sp->p[0].p1==sel.p[0].p2 && sp->p[0].p2<sel.p[0].p1))
800 76193d7c 2003-09-30 devnull for(i = 0; i<NSUBEXP; i++){ /* note the reversal; p1<=p2 */
801 76193d7c 2003-09-30 devnull sel.p[i].p1 = sp->p[i].p2;
802 76193d7c 2003-09-30 devnull sel.p[i].p2 = sp->p[i].p1;