19 static Node andstack[NSTACK];
21 static int atorstack[NSTACK];
23 static int cursubid; /* id of current subexpression */
24 static int subidstack[NSTACK]; /* parallel to atorstack */
26 static int lastwasand; /* Last token was operand */
28 static char* exprp; /* pointer to next character in source expression */
31 static Reclass*classp;
34 static Rune yyrune; /* last lex'd rune */
35 static Reclass*yyclassp; /* last lex'd class */
37 /* predeclared crap */
38 static void operator(int);
39 static void pushand(Reinst*, Reinst*);
40 static void pushator(int);
41 static void evaluntil(int);
42 static int bldcclass(void);
44 static jmp_buf regkaboom;
51 longjmp(regkaboom, 1);
69 operator(CAT); /* catenate is implicit */
72 if(t == CCLASS || t == NCCLASS)
84 if(t==RBRA && --nbra<0)
85 rcerror("unmatched right paren");
87 if(++cursubid >= NSUBEXP)
88 rcerror ("too many subexpressions");
97 if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
98 lastwasand = TRUE; /* these look like operands */
102 regerr2(char *s, int c)
117 strcpy(buf, "can't happen: ");
123 pushand(Reinst *f, Reinst *l)
125 if(andp >= &andstack[NSTACK])
126 cant("operand stack overflow");
135 if(atorp >= &atorstack[NSTACK])
136 cant("operator stack overflow");
138 *subidp++ = cursubid;
146 if(andp <= &andstack[0]){
147 regerr2("missing operand for ", op);
157 if(atorp <= &atorstack[0])
158 cant("operator stack underflow");
167 Reinst *inst1, *inst2;
169 while(pri==RBRA || atorp[-1]>=pri){
172 rcerror("unknown operator in evaluntil");
174 case LBRA: /* must have been RBRA */
176 inst2 = newinst(RBRA);
177 inst2->u1.subid = *subidp;
178 op1->last->u2.next = inst2;
179 inst1 = newinst(LBRA);
180 inst1->u1.subid = *subidp;
181 inst1->u2.next = op1->first;
182 pushand(inst1, inst2);
187 inst2 = newinst(NOP);
188 op2->last->u2.next = inst2;
189 op1->last->u2.next = inst2;
191 inst1->u1.right = op1->first;
192 inst1->u2.left = op2->first;
193 pushand(inst1, inst2);
198 op1->last->u2.next = op2->first;
199 pushand(op1->first, op2->last);
204 op2->last->u2.next = inst1;
205 inst1->u1.right = op2->first;
206 pushand(inst1, inst1);
211 op2->last->u2.next = inst1;
212 inst1->u1.right = op2->first;
213 pushand(op2->first, inst1);
218 inst2 = newinst(NOP);
219 inst1->u2.left = inst2;
220 inst1->u1.right = op2->first;
221 op2->last->u2.next = inst2;
222 pushand(inst1, inst2);
231 Reinst *inst, *target;
238 * get rid of NOOP chains
240 for(inst=pp->firstinst; inst->type!=END; inst++){
241 target = inst->u2.next;
242 while(target->type == NOP)
243 target = target->u2.next;
244 inst->u2.next = target;
248 * The original allocation is for an area larger than
249 * necessary. Reallocate to the actual space used
250 * and then relocate the code.
252 size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
253 npp = realloc(pp, size);
254 if(npp==0 || npp==pp)
256 diff = (char *)npp - (char *)pp;
257 freep = (Reinst *)((char *)freep + diff);
258 for(inst=npp->firstinst; inst<freep; inst++){
264 inst->u1.right = (void*)((char*)inst->u1.right + diff);
268 inst->u1.right = (void*)((char*)inst->u1.right + diff);
270 cl->end = (void*)((char*)cl->end + diff);
273 inst->u2.left = (void*)((char*)inst->u2.left + diff);
275 npp->startinst = (void*)((char*)npp->startinst + diff);
285 print("operators\n");
286 for(ip=atorstack; ip<atorp; ip++)
289 for(stk=andstack; stk<andp; stk++)
290 print("0%o\t0%o\n", stk->first->type, stk->last->type);
301 print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
302 l->u2.left-pp->firstinst, l->u1.right-pp->firstinst);
304 print("\t%C\n", l->u1.r);
305 else if(l->type == CCLASS || l->type == NCCLASS){
307 if(l->type == NCCLASS)
309 for(p = l->u1.cp->spans; p < l->u1.cp->end; p += 2)
313 print("%C-%C", p[0], p[1]);
325 regerr2("too many character classes; limit", NCLASS+'0');
326 return &(classp[nclass++]);
336 exprp += chartorune(rp, exprp);
338 exprp += chartorune(rp, exprp);
347 lex(int literal, int dot_type)
351 quoted = nextc(&yyrune);
352 if(literal || quoted){
394 /* we have already seen the '[' */
396 yyclassp = newclass();
398 /* look ahead for negation */
399 /* SPECIAL CASE!!! negated classes don't match \n */
401 quoted = nextc(&rune);
402 if(!quoted && rune == '^'){
404 quoted = nextc(&rune);
409 /* parse class into a set of spans */
410 for(; ep<&r[NCCRUNE];){
412 rcerror("malformed '[]'");
415 if(!quoted && rune == ']')
417 if(!quoted && rune == '-'){
419 rcerror("malformed '[]'");
422 quoted = nextc(&rune);
423 if((!quoted && rune == ']') || rune == 0){
424 rcerror("malformed '[]'");
432 quoted = nextc(&rune);
435 /* sort on span start */
436 for(p = r; p < ep; p += 2){
437 for(np = p; np < ep; np += 2)
449 np = yyclassp->spans;
456 for(; p < ep; p += 2)
465 yyclassp->end = np+2;
472 regcomp1(char *s, int literal, int dot_type)
477 /* get memory for the program */
478 pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
480 regerror("out of memory");
483 freep = pp->firstinst;
487 if(setjmp(regkaboom))
490 /* go compile the sucker */
501 /* Start with a low priority operator to prime parser */
503 while((token = lex(literal, dot_type)) != END){
504 if((token&0300) == OPERATOR)
510 /* Close with a low priority operator */
520 rcerror("unmatched left paren");
521 --andp; /* points to first and only operand */
522 pp->startinst = andp->first;
528 print("start: %d\n", andp->first-pp->firstinst);
542 return regcomp1(s, 0, ANY);
548 return regcomp1(s, 1, ANY);
554 return regcomp1(s, 0, ANYNL);