commit 6f16e7fc1b35c2e99c9dc069e6ec81a9ac07ca21 from: Russ Cox date: Fri Dec 07 20:33:38 2007 UTC acme: regexp fix (see libregexp change) commit - a7511dd43d9afe8025a6d7bd2fcccf8f594a6f2b commit + 6f16e7fc1b35c2e99c9dc069e6ec81a9ac07ca21 blob - 3a3e78c536a2927abd48b1d0df3721300d388715 blob + a76f617dd41e8e296b68e138770d5cb172387af0 --- src/cmd/acme/regx.c +++ src/cmd/acme/regx.c @@ -521,26 +521,56 @@ classmatch(int classno, int c, int negate) } /* - * Note optimization in addinst: - * *l must be pending when addinst called; if *l has been looked - * at already, the optimization is a bug. + * Add inst to the list [l, l+NLIST), but only if it is not there already. + * These work lists are stored and processed in increasing + * order of sep->p[0], so if the inst is there already, the one + * there already is a more left match and takes priority. + * (This works for backward searches too, because there + * is no explicit comparison.) */ -int -addinst(Ilist *l, Inst *inst, Rangeset *sep) +Ilist* +addinst1(Ilist *l, Inst *inst, Rangeset *sep) { Ilist *p; - for(p = l; p->inst; p++){ - if(p->inst==inst){ - if((sep)->r[0].q0 < p->se.r[0].q0) - p->se= *sep; /* this would be bug */ - return 0; /* It's already there */ + for(p = l; p->inst; p++) + if(p->inst==inst) + return 0; + + if(p == l+NLIST) + return l+NLIST; + + p->inst = inst; + p->se = *sep; + (p+1)->inst = 0; + return p; +} + +int +addinst(Ilist *l, Inst *inst, Rangeset *sep) +{ + Ilist *ap; + + ap = addinst1(l, inst, sep); + if(ap == 0) + return 0; + if(ap == l+NLIST) + return -1; + + /* + * Added inst to list at ap. + * Expand any ORs right now, so that entire + * work list ends up being sorted by increasing sep->p[0]. + */ + for(; ap->inst; ap++){ + if(ap->inst->type == OR){ + if(addinst1(l, ap->inst->u1.left, &ap->se) == l+NLIST) + return -1; + if(addinst1(l, ap->inst->u.right, &ap->se) == l+NLIST) + return -1; } } - p->inst = inst; - p->se= *sep; - (p+1)->inst = nil; - return 1; + return 0; } int @@ -557,7 +587,6 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Ran Inst *inst; Ilist *tlp; uint p; - int nnl, ntl; int nc, c; int wrapped; int startchar; @@ -566,7 +595,6 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Ran p = startp; startchar = 0; wrapped = 0; - nnl = 0; if(startinst->typetype; list[0][0].inst = list[1][0].inst = nil; @@ -576,6 +604,7 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Ran else nc = runestrlen(r); /* Execute machine once for each character */ + nl = list[0]; for(;;p++){ doloop: if(p>=eof || p>=nc){ @@ -594,7 +623,7 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Ran } c = 0; }else{ - if(((wrapped && p>=startp) || sel.r[0].q0>0) && nnl==0) + if(((wrapped && p>=startp) || sel.r[0].q0>0) && nl->inst==0) break; if(t != nil) c = textreadc(t, p); @@ -602,18 +631,15 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Ran c = r[p]; } /* fast check for first char */ - if(startchar && nnl==0 && c!=startchar) + if(startchar && nl->inst==0 && c!=startchar) continue; tl = list[flag]; nl = list[flag^=1]; nl->inst = nil; - ntl = nnl; - nnl = 0; if(sel.r[0].q0<0 && (!wrapped || p= NLIST){ + if(addinst(tl, startinst, &sempty) < 0){ Overflow: warning(nil, "regexp list overflow\n"); sel.r[0].q0 = -1; @@ -627,8 +653,7 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Ran default: /* regular character */ if(inst->type==c){ Addinst: - if(addinst(nl, inst->u1.next, &tlp->se)) - if(++nnl >= NLIST) + if(addinst(nl, inst->u1.next, &tlp->se) < 0) goto Overflow; } break; @@ -666,13 +691,8 @@ rxexecute(Text *t, Rune *r, uint startp, uint eof, Ran goto Addinst; break; case OR: - /* evaluate right choice later */ - if(addinst(tl, inst->u.right, &tlp->se)) - if(++ntl >= NLIST) - goto Overflow; - /* efficiency: advance and re-evaluate */ - inst = inst->u1.left; - goto Switchstmt; + /* already expanded; just a placeholder */ + break; case END: /* Match! */ tlp->se.r[0].q1 = p; newmatch(&tlp->se); @@ -700,13 +720,11 @@ rxbexecute(Text *t, uint startp, Rangeset *rp) Inst *inst; Ilist *tlp; int p; - int nnl, ntl; int c; int wrapped; int startchar; flag = 0; - nnl = 0; wrapped = 0; p = startp; startchar = 0; @@ -715,6 +733,7 @@ rxbexecute(Text *t, uint startp, Rangeset *rp) list[0][0].inst = list[1][0].inst = nil; sel.r[0].q0= -1; /* Execute machine once for each character, including terminal NUL */ + nl = list[0]; for(;;--p){ doloop: if(p <= 0){ @@ -734,24 +753,20 @@ rxbexecute(Text *t, uint startp, Rangeset *rp) } c = 0; }else{ - if(((wrapped && p<=startp) || sel.r[0].q0>0) && nnl==0) + if(((wrapped && p<=startp) || sel.r[0].q0>0) && nl->inst==0) break; c = textreadc(t, p-1); } /* fast check for first char */ - if(startchar && nnl==0 && c!=startchar) + if(startchar && nl->inst==0 && c!=startchar) continue; tl = list[flag]; nl = list[flag^=1]; nl->inst = nil; - ntl = nnl; - nnl = 0; if(sel.r[0].q0<0 && (!wrapped || p>startp)){ /* Add first instruction to this list */ - /* the minus is so the optimizations in addinst work */ - sempty.r[0].q0 = -p; - if(addinst(tl, bstartinst, &sempty)) - if(++ntl >= NLIST){ + sempty.r[0].q0 = p; + if(addinst(tl, bstartinst, &sempty) < 0){ Overflow: warning(nil, "regexp list overflow\n"); sel.r[0].q0 = -1; @@ -765,8 +780,7 @@ rxbexecute(Text *t, uint startp, Rangeset *rp) default: /* regular character */ if(inst->type == c){ Addinst: - if(addinst(nl, inst->u1.next, &tlp->se)) - if(++nnl >= NLIST) + if(addinst(nl, inst->u1.next, &tlp->se) < 0) goto Overflow; } break; @@ -804,15 +818,9 @@ rxbexecute(Text *t, uint startp, Rangeset *rp) goto Addinst; break; case OR: - /* evaluate right choice later */ - if(addinst(tl, inst->u.right, &tlp->se)) - if(++ntl >= NLIST) - goto Overflow; - /* efficiency: advance and re-evaluate */ - inst = inst->u1.left; - goto Switchstmt; + /* already expanded; just a placeholder */ + break; case END: /* Match! */ - tlp->se.r[0].q0 = -tlp->se.r[0].q0; /* minus sign */ tlp->se.r[0].q1 = p; bnewmatch(&tlp->se); break;