18 /* max rune ranges per character class is nelem(classp->spans)/2 */
19 #define NCCRUNE nelem(classp->spans)
22 static Node andstack[NSTACK];
24 static int atorstack[NSTACK];
26 static int cursubid; /* id of current subexpression */
27 static int subidstack[NSTACK]; /* parallel to atorstack */
29 static int lastwasand; /* Last token was operand */
31 static char* exprp; /* pointer to next character in source expression */
34 static Reclass*classp;
37 static Rune yyrune; /* last lex'd rune */
38 static Reclass*yyclassp; /* last lex'd class */
40 /* predeclared crap */
41 static void operator(int);
42 static void pushand(Reinst*, Reinst*);
43 static void pushator(int);
44 static void evaluntil(int);
45 static int bldcclass(void);
47 static jmp_buf regkaboom;
54 longjmp(regkaboom, 1);
72 operator(CAT); /* catenate is implicit */
75 if(t == CCLASS || t == NCCLASS)
87 if(t==RBRA && --nbra<0)
88 rcerror("unmatched right paren");
90 if(++cursubid >= NSUBEXP)
91 rcerror ("too many subexpressions");
100 if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
101 lastwasand = TRUE; /* these look like operands */
105 regerr2(char *s, int c)
120 strcpy(buf, "can't happen: ");
126 pushand(Reinst *f, Reinst *l)
128 if(andp >= &andstack[NSTACK])
129 cant("operand stack overflow");
138 if(atorp >= &atorstack[NSTACK])
139 cant("operator stack overflow");
141 *subidp++ = cursubid;
149 if(andp <= &andstack[0]){
150 regerr2("missing operand for ", op);
160 if(atorp <= &atorstack[0])
161 cant("operator stack underflow");
170 Reinst *inst1, *inst2;
172 while(pri==RBRA || atorp[-1]>=pri){
175 rcerror("unknown operator in evaluntil");
177 case LBRA: /* must have been RBRA */
179 inst2 = newinst(RBRA);
180 inst2->u1.subid = *subidp;
181 op1->last->u2.next = inst2;
182 inst1 = newinst(LBRA);
183 inst1->u1.subid = *subidp;
184 inst1->u2.next = op1->first;
185 pushand(inst1, inst2);
190 inst2 = newinst(NOP);
191 op2->last->u2.next = inst2;
192 op1->last->u2.next = inst2;
194 inst1->u1.right = op1->first;
195 inst1->u2.left = op2->first;
196 pushand(inst1, inst2);
201 op1->last->u2.next = op2->first;
202 pushand(op1->first, op2->last);
207 op2->last->u2.next = inst1;
208 inst1->u1.right = op2->first;
209 pushand(inst1, inst1);
214 op2->last->u2.next = inst1;
215 inst1->u1.right = op2->first;
216 pushand(op2->first, inst1);
221 inst2 = newinst(NOP);
222 inst1->u2.left = inst2;
223 inst1->u1.right = op2->first;
224 op2->last->u2.next = inst2;
225 pushand(inst1, inst2);
234 Reinst *inst, *target;
241 * get rid of NOOP chains
243 for(inst=pp->firstinst; inst->type!=END; inst++){
244 target = inst->u2.next;
245 while(target->type == NOP)
246 target = target->u2.next;
247 inst->u2.next = target;
251 * The original allocation is for an area larger than
252 * necessary. Reallocate to the actual space used
253 * and then relocate the code.
255 size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
256 npp = realloc(pp, size);
257 if(npp==0 || npp==pp)
259 diff = (char *)npp - (char *)pp;
260 freep = (Reinst *)((char *)freep + diff);
261 for(inst=npp->firstinst; inst<freep; inst++){
267 inst->u1.right = (void*)((char*)inst->u1.right + diff);
271 inst->u1.right = (void*)((char*)inst->u1.right + diff);
273 cl->end = (void*)((char*)cl->end + diff);
276 inst->u2.left = (void*)((char*)inst->u2.left + diff);
278 npp->startinst = (void*)((char*)npp->startinst + diff);
288 print("operators\n");
289 for(ip=atorstack; ip<atorp; ip++)
292 for(stk=andstack; stk<andp; stk++)
293 print("0%o\t0%o\n", stk->first->type, stk->last->type);
304 print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
305 l->u2.left-pp->firstinst, l->u1.right-pp->firstinst);
307 print("\t%C\n", l->u1.r);
308 else if(l->type == CCLASS || l->type == NCCLASS){
310 if(l->type == NCCLASS)
312 for(p = l->u1.cp->spans; p < l->u1.cp->end; p += 2)
316 print("%C-%C", p[0], p[1]);
327 if(nclass >= nelem(((Reprog*)0)->class))
328 rcerror("too many character classes; increase Reprog.class size");
329 return &(classp[nclass++]);
339 exprp += chartorune(rp, exprp);
341 exprp += chartorune(rp, exprp);
350 lex(int literal, int dot_type)
354 quoted = nextc(&yyrune);
355 if(literal || quoted){
397 /* we have already seen the '[' */
399 yyclassp = newclass();
401 /* look ahead for negation */
402 /* SPECIAL CASE!!! negated classes don't match \n */
404 quoted = nextc(&rune);
405 if(!quoted && rune == '^'){
407 quoted = nextc(&rune);
412 /* parse class into a set of spans */
413 while(ep < &r[NCCRUNE-1]){
415 rcerror("malformed '[]'");
418 if(!quoted && rune == ']')
420 if(!quoted && rune == '-'){
422 rcerror("malformed '[]'");
425 quoted = nextc(&rune);
426 if((!quoted && rune == ']') || rune == 0){
427 rcerror("malformed '[]'");
435 quoted = nextc(&rune);
437 if(ep >= &r[NCCRUNE-1]) {
438 rcerror("char class too large; increase Reclass.spans size");
442 /* sort on span start */
443 for(p = r; p < ep; p += 2){
444 for(np = p; np < ep; np += 2)
456 np = yyclassp->spans;
463 for(; p < ep; p += 2)
464 /* overlapping or adjacent ranges? */
465 if(p[0] <= np[1] + 1){
467 np[1] = p[1]; /* coalesce */
473 yyclassp->end = np+2;
480 regcomp1(char *s, int literal, int dot_type)
485 /* get memory for the program */
486 pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
488 regerror("out of memory");
491 freep = pp->firstinst;
495 if(setjmp(regkaboom))
498 /* go compile the sucker */
509 /* Start with a low priority operator to prime parser */
511 while((token = lex(literal, dot_type)) != END){
512 if((token&0300) == OPERATOR)
518 /* Close with a low priority operator */
528 rcerror("unmatched left paren");
529 --andp; /* points to first and only operand */
530 pp->startinst = andp->first;
536 print("start: %d\n", andp->first-pp->firstinst);
550 return regcomp1(s, 0, ANY);
556 return regcomp1(s, 1, ANY);
562 return regcomp1(s, 0, ANYNL);