18 /* max character classes per program is nelem(reprog->class) */
19 static Reprog *reprog;
21 /* max rune ranges per character class is nelem(classp->spans)/2 */
22 #define NCCRUNE nelem(classp->spans)
25 static Node andstack[NSTACK];
27 static int atorstack[NSTACK];
29 static int cursubid; /* id of current subexpression */
30 static int subidstack[NSTACK]; /* parallel to atorstack */
32 static int lastwasand; /* Last token was operand */
34 static char* exprp; /* pointer to next character in source expression */
37 static Reclass*classp;
40 static Rune yyrune; /* last lex'd rune */
41 static Reclass*yyclassp; /* last lex'd class */
43 /* predeclared crap */
44 static void operator(int);
45 static void pushand(Reinst*, Reinst*);
46 static void pushator(int);
47 static void evaluntil(int);
48 static int bldcclass(void);
50 static jmp_buf regkaboom;
57 longjmp(regkaboom, 1);
75 operator(CAT); /* catenate is implicit */
78 if(t == CCLASS || t == NCCLASS)
90 if(t==RBRA && --nbra<0)
91 rcerror("unmatched right paren");
93 if(++cursubid >= NSUBEXP)
94 rcerror ("too many subexpressions");
103 if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
104 lastwasand = TRUE; /* these look like operands */
108 regerr2(char *s, int c)
123 strcpy(buf, "can't happen: ");
129 pushand(Reinst *f, Reinst *l)
131 if(andp >= &andstack[NSTACK])
132 cant("operand stack overflow");
141 if(atorp >= &atorstack[NSTACK])
142 cant("operator stack overflow");
144 *subidp++ = cursubid;
152 if(andp <= &andstack[0]){
153 regerr2("missing operand for ", op);
163 if(atorp <= &atorstack[0])
164 cant("operator stack underflow");
173 Reinst *inst1, *inst2;
175 while(pri==RBRA || atorp[-1]>=pri){
178 rcerror("unknown operator in evaluntil");
180 case LBRA: /* must have been RBRA */
182 inst2 = newinst(RBRA);
183 inst2->u1.subid = *subidp;
184 op1->last->u2.next = inst2;
185 inst1 = newinst(LBRA);
186 inst1->u1.subid = *subidp;
187 inst1->u2.next = op1->first;
188 pushand(inst1, inst2);
193 inst2 = newinst(NOP);
194 op2->last->u2.next = inst2;
195 op1->last->u2.next = inst2;
197 inst1->u1.right = op1->first;
198 inst1->u2.left = op2->first;
199 pushand(inst1, inst2);
204 op1->last->u2.next = op2->first;
205 pushand(op1->first, op2->last);
210 op2->last->u2.next = inst1;
211 inst1->u1.right = op2->first;
212 pushand(inst1, inst1);
217 op2->last->u2.next = inst1;
218 inst1->u1.right = op2->first;
219 pushand(op2->first, inst1);
224 inst2 = newinst(NOP);
225 inst1->u2.left = inst2;
226 inst1->u1.right = op2->first;
227 op2->last->u2.next = inst2;
228 pushand(inst1, inst2);
237 Reinst *inst, *target;
244 * get rid of NOOP chains
246 for(inst=pp->firstinst; inst->type!=END; inst++){
247 target = inst->u2.next;
248 while(target->type == NOP)
249 target = target->u2.next;
250 inst->u2.next = target;
254 * The original allocation is for an area larger than
255 * necessary. Reallocate to the actual space used
256 * and then relocate the code.
258 size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
259 npp = realloc(pp, size);
260 if(npp==0 || npp==pp)
262 diff = (char *)npp - (char *)pp;
263 freep = (Reinst *)((char *)freep + diff);
264 for(inst=npp->firstinst; inst<freep; inst++){
270 inst->u1.right = (void*)((char*)inst->u1.right + diff);
274 inst->u1.right = (void*)((char*)inst->u1.right + diff);
276 cl->end = (void*)((char*)cl->end + diff);
279 inst->u2.left = (void*)((char*)inst->u2.left + diff);
281 npp->startinst = (void*)((char*)npp->startinst + diff);
291 print("operators\n");
292 for(ip=atorstack; ip<atorp; ip++)
295 for(stk=andstack; stk<andp; stk++)
296 print("0%o\t0%o\n", stk->first->type, stk->last->type);
307 print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
308 l->u2.left-pp->firstinst, l->u1.right-pp->firstinst);
310 print("\t%C\n", l->u1.r);
311 else if(l->type == CCLASS || l->type == NCCLASS){
313 if(l->type == NCCLASS)
315 for(p = l->u1.cp->spans; p < l->u1.cp->end; p += 2)
319 print("%C-%C", p[0], p[1]);
330 if(nclass >= nelem(reprog->class))
331 rcerror("too many character classes; increase Reprog.class size");
332 return &(classp[nclass++]);
342 exprp += chartorune(rp, exprp);
344 exprp += chartorune(rp, exprp);
353 lex(int literal, int dot_type)
357 quoted = nextc(&yyrune);
358 if(literal || quoted){
400 /* we have already seen the '[' */
402 yyclassp = newclass();
404 /* look ahead for negation */
405 /* SPECIAL CASE!!! negated classes don't match \n */
407 quoted = nextc(&rune);
408 if(!quoted && rune == '^'){
410 quoted = nextc(&rune);
415 /* parse class into a set of spans */
416 while(ep < &r[NCCRUNE-1]){
418 rcerror("malformed '[]'");
421 if(!quoted && rune == ']')
423 if(!quoted && rune == '-'){
425 rcerror("malformed '[]'");
428 quoted = nextc(&rune);
429 if((!quoted && rune == ']') || rune == 0){
430 rcerror("malformed '[]'");
438 quoted = nextc(&rune);
440 if(ep >= &r[NCCRUNE-1]) {
441 rcerror("char class too large; increase Reclass.spans size");
445 /* sort on span start */
446 for(p = r; p < ep; p += 2){
447 for(np = p; np < ep; np += 2)
459 np = yyclassp->spans;
466 for(; p < ep; p += 2)
467 /* overlapping or adjacent ranges? */
468 if(p[0] <= np[1] + 1){
470 np[1] = p[1]; /* coalesce */
476 yyclassp->end = np+2;
483 regcomp1(char *s, int literal, int dot_type)
488 /* get memory for the program */
489 pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
491 regerror("out of memory");
494 freep = pp->firstinst;
498 if(setjmp(regkaboom))
501 /* go compile the sucker */
512 /* Start with a low priority operator to prime parser */
514 while((token = lex(literal, dot_type)) != END){
515 if((token&0300) == OPERATOR)
521 /* Close with a low priority operator */
531 rcerror("unmatched left paren");
532 --andp; /* points to first and only operand */
533 pp->startinst = andp->first;
539 print("start: %d\n", andp->first-pp->firstinst);
553 return regcomp1(s, 0, ANY);
559 return regcomp1(s, 1, ANY);
565 return regcomp1(s, 0, ANYNL);