Commit Diff


commit - 6c6117397fb186253bd8754ecfd4786d1d1371f6
commit + a7511dd43d9afe8025a6d7bd2fcccf8f594a6f2b
blob - b854b5ac072274c9ad44ba8c4fc669fab65468fc
blob + 39e67725d95d63b11ce6e74564779824cefde071
--- src/libregexp/regaux.c
+++ src/libregexp/regaux.c
@@ -23,90 +23,89 @@ _renewmatch(Resub *mp, int ms, Resublist *sp)
 }
 
 /*
- * Note optimization in _renewthread:
- * 	*lp must be pending when _renewthread called; if *l has been looked
- *		at already, the optimization is a bug.
+ * Add ip to the list [lp, elp], but only if it is not there already.
+ * These work lists are stored and processed in increasing
+ * order of sp[0], so if the ip is there already, the one that's
+ * there already is a more left match and takes priority.
  */
-extern Relist*
-_renewthread(Relist *lp,	/* _relist to add to */
+static Relist*
+_renewthread1(Relist *lp,	/* Relist to add to */
+	Relist *elp,		/* limit pointer for Relist */
 	Reinst *ip,		/* instruction to add */
 	int ms,
 	Resublist *sep)		/* pointers to subexpressions */
 {
 	Relist *p;
 
-	for(p=lp; p->inst; p++){
-		if(p->inst == ip){
-			if(sep->m[0].s.sp < p->se.m[0].s.sp){
-				if(ms > 1)
-					p->se = *sep;
-				else
-					p->se.m[0] = sep->m[0];
-			}
+	for(p=lp; p->inst; p++)
+		if(p->inst == ip)
 			return 0;
-		}
-	}
+	
+	if(p == elp)	/* refuse to overflow buffer */
+		return elp;
+
 	p->inst = ip;
 	if(ms > 1)
 		p->se = *sep;
 	else
 		p->se.m[0] = sep->m[0];
-	(++p)->inst = 0;
+	(p+1)->inst = 0;
 	return p;
 }
 
+extern int
+_renewthread(Relist *lp, Relist *elp, Reinst *ip, int ms, Resublist *sep)
+{
+	Relist *ap;
+
+	ap = _renewthread1(lp, elp, ip, ms, sep);
+	if(ap == 0)
+		return 0;
+	if(ap == elp)
+		return -1;
+
+	/*
+	 * Added ip to list at ap.  
+	 * Expand any ORs right now, so that entire
+	 * work list ends up being sorted by increasing m[0].sp.
+	 */
+	for(; ap->inst; ap++){
+		if(ap->inst->type == OR){
+			if(_renewthread1(lp, elp, ap->inst->u1.right, ms, &ap->se) == elp)
+				return -1;
+			if(_renewthread1(lp, elp, ap->inst->u2.next, ms, &ap->se) == elp)
+				return -1;
+		}
+	}
+	return 0;
+}
+
 /*
  * same as renewthread, but called with
  * initial empty start pointer.
  */
-extern Relist*
+extern int
 _renewemptythread(Relist *lp,	/* _relist to add to */
+	Relist *elp,
 	Reinst *ip,		/* instruction to add */
 	int ms,
 	char *sp)		/* pointers to subexpressions */
 {
-	Relist *p;
-
-	for(p=lp; p->inst; p++){
-		if(p->inst == ip){
-			if(sp < p->se.m[0].s.sp) {
-				if(ms > 1)
-					memset(&p->se, 0, sizeof(p->se));
-				p->se.m[0].s.sp = sp;
-			}
-			return 0;
-		}
-	}
-	p->inst = ip;
+	Resublist sep;
+	
 	if(ms > 1)
-		memset(&p->se, 0, sizeof(p->se));
-	p->se.m[0].s.sp = sp;
-	(++p)->inst = 0;
-	return p;
+		memset(&sep, 0, sizeof sep);
+	sep.m[0].s.sp = sp;
+	sep.m[0].e.ep = 0;
+	return _renewthread(lp, elp, ip, ms, &sep);
 }
 
-extern Relist*
+extern int
 _rrenewemptythread(Relist *lp,	/* _relist to add to */
+	Relist *elp,
 	Reinst *ip,		/* instruction to add */
 	int ms,
 	Rune *rsp)		/* pointers to subexpressions */
 {
-	Relist *p;
-
-	for(p=lp; p->inst; p++){
-		if(p->inst == ip){
-			if(rsp < p->se.m[0].s.rsp) {
-				if(ms > 1)
-					memset(&p->se, 0, sizeof(p->se));
-				p->se.m[0].s.rsp = rsp;
-			}
-			return 0;
-		}
-	}
-	p->inst = ip;
-	if(ms > 1)
-		memset(&p->se, 0, sizeof(p->se));
-	p->se.m[0].s.rsp = rsp;
-	(++p)->inst = 0;
-	return p;
+	return _renewemptythread(lp, elp, ip, ms, (char*)rsp);
 }
blob - b8286dc7298046dd811a10505174a443eb489544
blob + ba0175ff2041fd007d97a1cfd08b881e09add618
--- src/libregexp/regcomp.c
+++ src/libregexp/regcomp.c
@@ -232,7 +232,7 @@ optimize(Reprog *pp)
 	int size;
 	Reprog *npp;
 	Reclass *cl;
-	int diff;
+	int diff, proglen;
 
 	/*
 	 *  get rid of NOOP chains
@@ -249,10 +249,13 @@ optimize(Reprog *pp)
 	 *  necessary.  Reallocate to the actual space used
 	 *  and then relocate the code.
 	 */
-	size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
-	npp = realloc(pp, size);
-	if(npp==0 || npp==pp)
+	proglen = freep - pp->firstinst;
+	size = sizeof(Reprog) + proglen*sizeof(Reinst);
+	npp = realloc(pp, size);
+	if(npp==0 || npp==pp){
+		pp->proglen = proglen;
 		return pp;
+	}
 	diff = (char *)npp - (char *)pp;
 	freep = (Reinst *)((char *)freep + diff);
 	for(inst=npp->firstinst; inst<freep; inst++){
@@ -273,6 +276,7 @@ optimize(Reprog *pp)
 		*(char**)(void*)&inst->u2.left += diff;
 	}
 	*(char**)(void*)&npp->startinst += diff;
+	npp->proglen = proglen;
 	return npp;
 }
 
blob - 6c88cd09768dc39ab0966a3f9f28d0ab9c3288c3
blob + fde99f54a25f85851a076e01a81d19260a382645
--- src/libregexp/regcomp.h
+++ src/libregexp/regcomp.h
@@ -68,7 +68,7 @@ struct	Reljunk
 	Rune*	reol;
 };
 
-extern Relist*	_renewthread(Relist*, Reinst*, int, Resublist*);
+extern int	_renewthread(Relist*, Relist*, Reinst*, int, Resublist*);
 extern void	_renewmatch(Resub*, int, Resublist*);
-extern Relist*	_renewemptythread(Relist*, Reinst*, int, char*);
-extern Relist*	_rrenewemptythread(Relist*, Reinst*, int, Rune*);
+extern int	_renewemptythread(Relist*, Relist*, Reinst*, int, char*);
+extern int	_rrenewemptythread(Relist*, Relist*, Reinst*, int, Rune*);
blob - c04182a1aee0a8c97ce770af7b71b8879038d84a
blob + 10d93f0cbbf2cfc4825246460d55cab333c31082
--- src/libregexp/regexec.c
+++ src/libregexp/regexec.c
@@ -2,7 +2,6 @@
 #include "regexp9.h"
 #include "regcomp.h"
 
-
 /*
  *  return	0 if no match
  *		>0 if a match
@@ -13,16 +12,14 @@ regexec1(Reprog *progp,	/* program to run */
 	char *bol,	/* string to run machine on */
 	Resub *mp,	/* subexpression elements */
 	int ms,		/* number of elements at mp */
-	Reljunk *j
-)
+	Reljunk *j)
 {
 	int flag=0;
 	Reinst *inst;
 	Relist *tlp;
 	char *s;
-	int i, checkstart;
+	int i, checkstart, n;
 	Rune r, *rp, *ep;
-	int n;
 	Relist* tl;		/* This list, next list */
 	Relist* nl;
 	Relist* tle;		/* ends of this and next list */
@@ -48,7 +45,7 @@ regexec1(Reprog *progp,	/* program to run */
 			switch(j->starttype) {
 			case RUNE:
 				p = utfrune(s, j->startchar);
-				if(p == 0 || s == j->eol)
+				if(p == 0 || (j->eol && p >= j->eol))
 					return match;
 				s = p;
 				break;
@@ -56,7 +53,7 @@ regexec1(Reprog *progp,	/* program to run */
 				if(s == bol)
 					break;
 				p = utfrune(s, '\n');
-				if(p == 0 || s == j->eol)
+				if(p == 0 || (j->eol && p+1 >= j->eol))
 					return match;
 				s = p+1;
 				break;
@@ -77,17 +74,16 @@ regexec1(Reprog *progp,	/* program to run */
 
 		/* Add first instruction to current list */
 		if(match == 0)
-			_renewemptythread(tl, progp->startinst, ms, s);
+			_renewemptythread(tl, tle, progp->startinst, ms, s);
 
 		/* Execute machine until current list is empty */
-		for(tlp=tl; tlp->inst; tlp++){	/* assignment = */
+		for(tlp=tl; tlp->inst; tlp++){
 			for(inst = tlp->inst; ; inst = inst->u2.next){
 				switch(inst->type){
 				case RUNE:	/* regular character */
-					if(inst->u1.r == r){
-						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
+					if(inst->u1.r == r)
+						if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
 							return -1;
-					}
 					break;
 				case LBRA:
 					tlp->se.m[inst->u1.subid].s.sp = s;
@@ -97,11 +93,11 @@ regexec1(Reprog *progp,	/* program to run */
 					continue;
 				case ANY:
 					if(r != '\n')
-						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
+						if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
 							return -1;
 					break;
 				case ANYNL:
-					if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
+					if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
 							return -1;
 					break;
 				case BOL:
@@ -116,7 +112,7 @@ regexec1(Reprog *progp,	/* program to run */
 					ep = inst->u1.cp->end;
 					for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
 						if(r >= rp[0] && r <= rp[1]){
-							if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
+							if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
 								return -1;
 							break;
 						}
@@ -127,15 +123,12 @@ regexec1(Reprog *progp,	/* program to run */
 						if(r >= rp[0] && r <= rp[1])
 							break;
 					if(rp == ep)
-						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
+						if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
 							return -1;
 					break;
 				case OR:
-					/* evaluate right choice later */
-					if(_renewthread(tl, inst->u1.right, ms, &tlp->se) == tle)
-						return -1;
-					/* efficiency: advance and re-evaluate */
-					continue;
+					/* expanded during renewthread; just a place holder */
+					break;
 				case END:	/* Match! */
 					match = 1;
 					tlp->se.m[0].e.ep = s;
@@ -165,19 +158,18 @@ regexec2(Reprog *progp,	/* program to run */
 	int rv;
 	Relist *relist0, *relist1;
 
-	/* mark space */
-	relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
+	relist0 = malloc((progp->proglen+1)*sizeof(Relist));
 	if(relist0 == nil)
 		return -1;
-	relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
+	relist1 = malloc((progp->proglen+1)*sizeof(Relist));
 	if(relist1 == nil){
 		free(relist1);
 		return -1;
 	}
 	j->relist[0] = relist0;
 	j->relist[1] = relist1;
-	j->reliste[0] = relist0 + BIGLISTSIZE - 2;
-	j->reliste[1] = relist1 + BIGLISTSIZE - 2;
+	j->reliste[0] = relist0 + progp->proglen;
+	j->reliste[1] = relist1 + progp->proglen;
 
 	rv = regexec1(progp, bol, mp, ms, j);
 	free(relist0);
@@ -218,8 +210,8 @@ regexec(Reprog *progp,	/* program to run */
 	/* mark space */
 	j.relist[0] = relist0;
 	j.relist[1] = relist1;
-	j.reliste[0] = relist0 + nelem(relist0) - 2;
-	j.reliste[1] = relist1 + nelem(relist1) - 2;
+	j.reliste[0] = relist0 + nelem(relist0) - 1;
+	j.reliste[1] = relist1 + nelem(relist1) - 1;
 
 	rv = regexec1(progp, bol, mp, ms, &j);
 	if(rv >= 0)
blob - ec7907da547207d235b7838534cbd7080b1a775f
blob + 907ddef3215a5899ca1380dcd71b00b40a6b3dc5
--- src/libregexp/rregexec.c
+++ src/libregexp/rregexec.c
@@ -9,9 +9,9 @@
  */
 static int
 rregexec1(Reprog *progp,	/* program to run */
-	Rune *bol,		/* string to run machine on */
-	Resub *mp,		/* subexpression elements */
-	int ms,			/* number of elements at mp */
+	Rune *bol,	/* string to run machine on */
+	Resub *mp,	/* subexpression elements */
+	int ms,		/* number of elements at mp */
 	Reljunk *j)
 {
 	int flag=0;
@@ -28,7 +28,7 @@ rregexec1(Reprog *progp,	/* program to run */
 	Rune *p;
 
 	match = 0;
-	checkstart = j->startchar;
+	checkstart = j->starttype;
 	if(mp)
 		for(i=0; i<ms; i++) {
 			mp[i].s.rsp = 0;
@@ -46,7 +46,7 @@ rregexec1(Reprog *progp,	/* program to run */
 			switch(j->starttype) {
 			case RUNE:
 				p = runestrchr(s, j->startchar);
-				if(p == 0 || p == j->reol)
+				if(p == 0 || (j->reol && p >= j->reol))
 					return match;
 				s = p;
 				break;
@@ -54,7 +54,7 @@ rregexec1(Reprog *progp,	/* program to run */
 				if(s == bol)
 					break;
 				p = runestrchr(s, '\n');
-				if(p == 0 || s == j->reol)
+				if(p == 0 || (j->reol && p+1 >= j->reol))
 					return match;
 				s = p+1;
 				break;
@@ -71,15 +71,16 @@ rregexec1(Reprog *progp,	/* program to run */
 		nl->inst = 0;
 
 		/* Add first instruction to current list */
-		_rrenewemptythread(tl, progp->startinst, ms, s);
+		if(match == 0)
+			_rrenewemptythread(tl, tle, progp->startinst, ms, s);
 
 		/* Execute machine until current list is empty */
 		for(tlp=tl; tlp->inst; tlp++){
-			for(inst=tlp->inst; ; inst = inst->u2.next){
+			for(inst = tlp->inst; ; inst = inst->u2.next){
 				switch(inst->type){
 				case RUNE:	/* regular character */
 					if(inst->u1.r == r)
-						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
+						if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
 							return -1;
 					break;
 				case LBRA:
@@ -90,11 +91,11 @@ rregexec1(Reprog *progp,	/* program to run */
 					continue;
 				case ANY:
 					if(r != '\n')
-						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
+						if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
 							return -1;
 					break;
 				case ANYNL:
-					if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
+					if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
 							return -1;
 					break;
 				case BOL:
@@ -109,7 +110,7 @@ rregexec1(Reprog *progp,	/* program to run */
 					ep = inst->u1.cp->end;
 					for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
 						if(r >= rp[0] && r <= rp[1]){
-							if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
+							if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
 								return -1;
 							break;
 						}
@@ -120,15 +121,12 @@ rregexec1(Reprog *progp,	/* program to run */
 						if(r >= rp[0] && r <= rp[1])
 							break;
 					if(rp == ep)
-						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
+						if(_renewthread(nl, nle, inst->u2.next, ms, &tlp->se) < 0)
 							return -1;
 					break;
 				case OR:
-					/* evaluate right choice later */
-					if(_renewthread(tl, inst->u1.right, ms, &tlp->se) == tle)
-						return -1;
-					/* efficiency: advance and re-evaluate */
-					continue;
+					/* expanded during renewthread; just a place holder */
+					break;
 				case END:	/* Match! */
 					match = 1;
 					tlp->se.m[0].e.rep = s;
@@ -141,7 +139,7 @@ rregexec1(Reprog *progp,	/* program to run */
 		}
 		if(s == j->reol)
 			break;
-		checkstart = j->startchar && nl->inst==0;
+		checkstart = j->starttype && nl->inst==0;
 		s++;
 	}while(r);
 	return match;
@@ -155,15 +153,26 @@ rregexec2(Reprog *progp,	/* program to run */
 	Reljunk *j
 )
 {
-	Relist relist0[5*LISTSIZE], relist1[5*LISTSIZE];
+	int rv;
+	Relist *relist0, *relist1;
 
-	/* mark space */
+	relist0 = malloc((progp->proglen+1)*sizeof(Relist));
+	if(relist0 == nil)
+		return -1;
+	relist1 = malloc((progp->proglen+1)*sizeof(Relist));
+	if(relist1 == nil){
+		free(relist1);
+		return -1;
+	}
 	j->relist[0] = relist0;
 	j->relist[1] = relist1;
-	j->reliste[0] = relist0 + nelem(relist0) - 2;
-	j->reliste[1] = relist1 + nelem(relist1) - 2;
+	j->reliste[0] = relist0 + progp->proglen;
+	j->reliste[1] = relist1 + progp->proglen;
 
-	return rregexec1(progp, bol, mp, ms, j);
+	rv = rregexec1(progp, bol, mp, ms, j);
+	free(relist0);
+	free(relist1);
+	return rv;
 }
 
 extern int
@@ -199,8 +208,8 @@ rregexec(Reprog *progp,	/* program to run */
 	/* mark space */
 	j.relist[0] = relist0;
 	j.relist[1] = relist1;
-	j.reliste[0] = relist0 + nelem(relist0) - 2;
-	j.reliste[1] = relist1 + nelem(relist1) - 2;
+	j.reliste[0] = relist0 + nelem(relist0) - 1;
+	j.reliste[1] = relist1 + nelem(relist1) - 1;
 
 	rv = rregexec1(progp, bol, mp, ms, &j);
 	if(rv >= 0)