Blame


1 b2cfc4e2 2003-09-30 devnull #include "lib9.h"
2 b2cfc4e2 2003-09-30 devnull #include "regexp9.h"
3 b2cfc4e2 2003-09-30 devnull #include "regcomp.h"
4 b2cfc4e2 2003-09-30 devnull
5 6d08a0f5 2007-12-07 rsc
6 b2cfc4e2 2003-09-30 devnull /*
7 b2cfc4e2 2003-09-30 devnull * return 0 if no match
8 b2cfc4e2 2003-09-30 devnull * >0 if a match
9 b2cfc4e2 2003-09-30 devnull * <0 if we ran out of _relist space
10 b2cfc4e2 2003-09-30 devnull */
11 b2cfc4e2 2003-09-30 devnull static int
12 b2cfc4e2 2003-09-30 devnull regexec1(Reprog *progp, /* program to run */
13 b2cfc4e2 2003-09-30 devnull char *bol, /* string to run machine on */
14 b2cfc4e2 2003-09-30 devnull Resub *mp, /* subexpression elements */
15 b2cfc4e2 2003-09-30 devnull int ms, /* number of elements at mp */
16 6d08a0f5 2007-12-07 rsc Reljunk *j
17 6d08a0f5 2007-12-07 rsc )
18 b2cfc4e2 2003-09-30 devnull {
19 b2cfc4e2 2003-09-30 devnull int flag=0;
20 b2cfc4e2 2003-09-30 devnull Reinst *inst;
21 b2cfc4e2 2003-09-30 devnull Relist *tlp;
22 b2cfc4e2 2003-09-30 devnull char *s;
23 6d08a0f5 2007-12-07 rsc int i, checkstart;
24 b2cfc4e2 2003-09-30 devnull Rune r, *rp, *ep;
25 6d08a0f5 2007-12-07 rsc int n;
26 b2cfc4e2 2003-09-30 devnull Relist* tl; /* This list, next list */
27 b2cfc4e2 2003-09-30 devnull Relist* nl;
28 b2cfc4e2 2003-09-30 devnull Relist* tle; /* ends of this and next list */
29 b2cfc4e2 2003-09-30 devnull Relist* nle;
30 b2cfc4e2 2003-09-30 devnull int match;
31 b2cfc4e2 2003-09-30 devnull char *p;
32 b2cfc4e2 2003-09-30 devnull
33 b2cfc4e2 2003-09-30 devnull match = 0;
34 b2cfc4e2 2003-09-30 devnull checkstart = j->starttype;
35 b2cfc4e2 2003-09-30 devnull if(mp)
36 b2cfc4e2 2003-09-30 devnull for(i=0; i<ms; i++) {
37 b2cfc4e2 2003-09-30 devnull mp[i].s.sp = 0;
38 b2cfc4e2 2003-09-30 devnull mp[i].e.ep = 0;
39 b2cfc4e2 2003-09-30 devnull }
40 b2cfc4e2 2003-09-30 devnull j->relist[0][0].inst = 0;
41 b2cfc4e2 2003-09-30 devnull j->relist[1][0].inst = 0;
42 b2cfc4e2 2003-09-30 devnull
43 b2cfc4e2 2003-09-30 devnull /* Execute machine once for each character, including terminal NUL */
44 b2cfc4e2 2003-09-30 devnull s = j->starts;
45 b2cfc4e2 2003-09-30 devnull do{
46 b2cfc4e2 2003-09-30 devnull /* fast check for first char */
47 b2cfc4e2 2003-09-30 devnull if(checkstart) {
48 b2cfc4e2 2003-09-30 devnull switch(j->starttype) {
49 b2cfc4e2 2003-09-30 devnull case RUNE:
50 b2cfc4e2 2003-09-30 devnull p = utfrune(s, j->startchar);
51 6d08a0f5 2007-12-07 rsc if(p == 0 || s == j->eol)
52 b2cfc4e2 2003-09-30 devnull return match;
53 b2cfc4e2 2003-09-30 devnull s = p;
54 b2cfc4e2 2003-09-30 devnull break;
55 b2cfc4e2 2003-09-30 devnull case BOL:
56 b2cfc4e2 2003-09-30 devnull if(s == bol)
57 b2cfc4e2 2003-09-30 devnull break;
58 b2cfc4e2 2003-09-30 devnull p = utfrune(s, '\n');
59 6d08a0f5 2007-12-07 rsc if(p == 0 || s == j->eol)
60 b2cfc4e2 2003-09-30 devnull return match;
61 da7f7882 2007-05-18 devnull s = p+1;
62 b2cfc4e2 2003-09-30 devnull break;
63 b2cfc4e2 2003-09-30 devnull }
64 b2cfc4e2 2003-09-30 devnull }
65 b2cfc4e2 2003-09-30 devnull r = *(uchar*)s;
66 62390091 2004-03-05 devnull if(r < Runeself)
67 b2cfc4e2 2003-09-30 devnull n = 1;
68 b2cfc4e2 2003-09-30 devnull else
69 b2cfc4e2 2003-09-30 devnull n = chartorune(&r, s);
70 b2cfc4e2 2003-09-30 devnull
71 b2cfc4e2 2003-09-30 devnull /* switch run lists */
72 b2cfc4e2 2003-09-30 devnull tl = j->relist[flag];
73 b2cfc4e2 2003-09-30 devnull tle = j->reliste[flag];
74 b2cfc4e2 2003-09-30 devnull nl = j->relist[flag^=1];
75 b2cfc4e2 2003-09-30 devnull nle = j->reliste[flag];
76 b2cfc4e2 2003-09-30 devnull nl->inst = 0;
77 b2cfc4e2 2003-09-30 devnull
78 b2cfc4e2 2003-09-30 devnull /* Add first instruction to current list */
79 b2cfc4e2 2003-09-30 devnull if(match == 0)
80 6d08a0f5 2007-12-07 rsc _renewemptythread(tl, progp->startinst, ms, s);
81 b2cfc4e2 2003-09-30 devnull
82 b2cfc4e2 2003-09-30 devnull /* Execute machine until current list is empty */
83 6d08a0f5 2007-12-07 rsc for(tlp=tl; tlp->inst; tlp++){ /* assignment = */
84 b2cfc4e2 2003-09-30 devnull for(inst = tlp->inst; ; inst = inst->u2.next){
85 b2cfc4e2 2003-09-30 devnull switch(inst->type){
86 b2cfc4e2 2003-09-30 devnull case RUNE: /* regular character */
87 6d08a0f5 2007-12-07 rsc if(inst->u1.r == r){
88 6d08a0f5 2007-12-07 rsc if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
89 b2cfc4e2 2003-09-30 devnull return -1;
90 6d08a0f5 2007-12-07 rsc }
91 b2cfc4e2 2003-09-30 devnull break;
92 b2cfc4e2 2003-09-30 devnull case LBRA:
93 b2cfc4e2 2003-09-30 devnull tlp->se.m[inst->u1.subid].s.sp = s;
94 b2cfc4e2 2003-09-30 devnull continue;
95 b2cfc4e2 2003-09-30 devnull case RBRA:
96 b2cfc4e2 2003-09-30 devnull tlp->se.m[inst->u1.subid].e.ep = s;
97 b2cfc4e2 2003-09-30 devnull continue;
98 b2cfc4e2 2003-09-30 devnull case ANY:
99 b2cfc4e2 2003-09-30 devnull if(r != '\n')
100 6d08a0f5 2007-12-07 rsc if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
101 b2cfc4e2 2003-09-30 devnull return -1;
102 b2cfc4e2 2003-09-30 devnull break;
103 b2cfc4e2 2003-09-30 devnull case ANYNL:
104 6d08a0f5 2007-12-07 rsc if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
105 b2cfc4e2 2003-09-30 devnull return -1;
106 b2cfc4e2 2003-09-30 devnull break;
107 b2cfc4e2 2003-09-30 devnull case BOL:
108 b2cfc4e2 2003-09-30 devnull if(s == bol || *(s-1) == '\n')
109 b2cfc4e2 2003-09-30 devnull continue;
110 b2cfc4e2 2003-09-30 devnull break;
111 b2cfc4e2 2003-09-30 devnull case EOL:
112 b2cfc4e2 2003-09-30 devnull if(s == j->eol || r == 0 || r == '\n')
113 b2cfc4e2 2003-09-30 devnull continue;
114 b2cfc4e2 2003-09-30 devnull break;
115 b2cfc4e2 2003-09-30 devnull case CCLASS:
116 b2cfc4e2 2003-09-30 devnull ep = inst->u1.cp->end;
117 b2cfc4e2 2003-09-30 devnull for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
118 b2cfc4e2 2003-09-30 devnull if(r >= rp[0] && r <= rp[1]){
119 6d08a0f5 2007-12-07 rsc if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
120 b2cfc4e2 2003-09-30 devnull return -1;
121 b2cfc4e2 2003-09-30 devnull break;
122 b2cfc4e2 2003-09-30 devnull }
123 b2cfc4e2 2003-09-30 devnull break;
124 b2cfc4e2 2003-09-30 devnull case NCCLASS:
125 b2cfc4e2 2003-09-30 devnull ep = inst->u1.cp->end;
126 b2cfc4e2 2003-09-30 devnull for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
127 b2cfc4e2 2003-09-30 devnull if(r >= rp[0] && r <= rp[1])
128 b2cfc4e2 2003-09-30 devnull break;
129 b2cfc4e2 2003-09-30 devnull if(rp == ep)
130 6d08a0f5 2007-12-07 rsc if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
131 b2cfc4e2 2003-09-30 devnull return -1;
132 b2cfc4e2 2003-09-30 devnull break;
133 b2cfc4e2 2003-09-30 devnull case OR:
134 6d08a0f5 2007-12-07 rsc /* evaluate right choice later */
135 27589754 2008-01-10 rsc if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle)
136 6d08a0f5 2007-12-07 rsc return -1;
137 6d08a0f5 2007-12-07 rsc /* efficiency: advance and re-evaluate */
138 6d08a0f5 2007-12-07 rsc continue;
139 b2cfc4e2 2003-09-30 devnull case END: /* Match! */
140 b2cfc4e2 2003-09-30 devnull match = 1;
141 b2cfc4e2 2003-09-30 devnull tlp->se.m[0].e.ep = s;
142 b2cfc4e2 2003-09-30 devnull if(mp != 0)
143 b2cfc4e2 2003-09-30 devnull _renewmatch(mp, ms, &tlp->se);
144 b2cfc4e2 2003-09-30 devnull break;
145 b2cfc4e2 2003-09-30 devnull }
146 b2cfc4e2 2003-09-30 devnull break;
147 b2cfc4e2 2003-09-30 devnull }
148 b2cfc4e2 2003-09-30 devnull }
149 b2cfc4e2 2003-09-30 devnull if(s == j->eol)
150 b2cfc4e2 2003-09-30 devnull break;
151 b2cfc4e2 2003-09-30 devnull checkstart = j->starttype && nl->inst==0;
152 b2cfc4e2 2003-09-30 devnull s += n;
153 b2cfc4e2 2003-09-30 devnull }while(r);
154 b2cfc4e2 2003-09-30 devnull return match;
155 b2cfc4e2 2003-09-30 devnull }
156 b2cfc4e2 2003-09-30 devnull
157 b2cfc4e2 2003-09-30 devnull static int
158 b2cfc4e2 2003-09-30 devnull regexec2(Reprog *progp, /* program to run */
159 b2cfc4e2 2003-09-30 devnull char *bol, /* string to run machine on */
160 b2cfc4e2 2003-09-30 devnull Resub *mp, /* subexpression elements */
161 b2cfc4e2 2003-09-30 devnull int ms, /* number of elements at mp */
162 b2cfc4e2 2003-09-30 devnull Reljunk *j
163 b2cfc4e2 2003-09-30 devnull )
164 b2cfc4e2 2003-09-30 devnull {
165 62390091 2004-03-05 devnull int rv;
166 62390091 2004-03-05 devnull Relist *relist0, *relist1;
167 b2cfc4e2 2003-09-30 devnull
168 6d08a0f5 2007-12-07 rsc /* mark space */
169 6d08a0f5 2007-12-07 rsc relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
170 62390091 2004-03-05 devnull if(relist0 == nil)
171 62390091 2004-03-05 devnull return -1;
172 6d08a0f5 2007-12-07 rsc relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
173 62390091 2004-03-05 devnull if(relist1 == nil){
174 1b68dbef 2016-11-02 rsc free(relist0);
175 62390091 2004-03-05 devnull return -1;
176 62390091 2004-03-05 devnull }
177 b2cfc4e2 2003-09-30 devnull j->relist[0] = relist0;
178 b2cfc4e2 2003-09-30 devnull j->relist[1] = relist1;
179 6d08a0f5 2007-12-07 rsc j->reliste[0] = relist0 + BIGLISTSIZE - 2;
180 6d08a0f5 2007-12-07 rsc j->reliste[1] = relist1 + BIGLISTSIZE - 2;
181 b2cfc4e2 2003-09-30 devnull
182 62390091 2004-03-05 devnull rv = regexec1(progp, bol, mp, ms, j);
183 62390091 2004-03-05 devnull free(relist0);
184 62390091 2004-03-05 devnull free(relist1);
185 62390091 2004-03-05 devnull return rv;
186 b2cfc4e2 2003-09-30 devnull }
187 b2cfc4e2 2003-09-30 devnull
188 b2cfc4e2 2003-09-30 devnull extern int
189 b2cfc4e2 2003-09-30 devnull regexec(Reprog *progp, /* program to run */
190 b2cfc4e2 2003-09-30 devnull char *bol, /* string to run machine on */
191 b2cfc4e2 2003-09-30 devnull Resub *mp, /* subexpression elements */
192 b2cfc4e2 2003-09-30 devnull int ms) /* number of elements at mp */
193 b2cfc4e2 2003-09-30 devnull {
194 b2cfc4e2 2003-09-30 devnull Reljunk j;
195 b2cfc4e2 2003-09-30 devnull Relist relist0[LISTSIZE], relist1[LISTSIZE];
196 b2cfc4e2 2003-09-30 devnull int rv;
197 b2cfc4e2 2003-09-30 devnull
198 b2cfc4e2 2003-09-30 devnull /*
199 b2cfc4e2 2003-09-30 devnull * use user-specified starting/ending location if specified
200 b2cfc4e2 2003-09-30 devnull */
201 b2cfc4e2 2003-09-30 devnull j.starts = bol;
202 b2cfc4e2 2003-09-30 devnull j.eol = 0;
203 b2cfc4e2 2003-09-30 devnull if(mp && ms>0){
204 b2cfc4e2 2003-09-30 devnull if(mp->s.sp)
205 b2cfc4e2 2003-09-30 devnull j.starts = mp->s.sp;
206 b2cfc4e2 2003-09-30 devnull if(mp->e.ep)
207 b2cfc4e2 2003-09-30 devnull j.eol = mp->e.ep;
208 b2cfc4e2 2003-09-30 devnull }
209 b2cfc4e2 2003-09-30 devnull j.starttype = 0;
210 b2cfc4e2 2003-09-30 devnull j.startchar = 0;
211 62390091 2004-03-05 devnull if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) {
212 b2cfc4e2 2003-09-30 devnull j.starttype = RUNE;
213 b2cfc4e2 2003-09-30 devnull j.startchar = progp->startinst->u1.r;
214 b2cfc4e2 2003-09-30 devnull }
215 b2cfc4e2 2003-09-30 devnull if(progp->startinst->type == BOL)
216 b2cfc4e2 2003-09-30 devnull j.starttype = BOL;
217 b2cfc4e2 2003-09-30 devnull
218 b2cfc4e2 2003-09-30 devnull /* mark space */
219 b2cfc4e2 2003-09-30 devnull j.relist[0] = relist0;
220 b2cfc4e2 2003-09-30 devnull j.relist[1] = relist1;
221 6d08a0f5 2007-12-07 rsc j.reliste[0] = relist0 + nelem(relist0) - 2;
222 6d08a0f5 2007-12-07 rsc j.reliste[1] = relist1 + nelem(relist1) - 2;
223 b2cfc4e2 2003-09-30 devnull
224 b2cfc4e2 2003-09-30 devnull rv = regexec1(progp, bol, mp, ms, &j);
225 b2cfc4e2 2003-09-30 devnull if(rv >= 0)
226 b2cfc4e2 2003-09-30 devnull return rv;
227 b2cfc4e2 2003-09-30 devnull rv = regexec2(progp, bol, mp, ms, &j);
228 b2cfc4e2 2003-09-30 devnull if(rv >= 0)
229 b2cfc4e2 2003-09-30 devnull return rv;
230 b2cfc4e2 2003-09-30 devnull return -1;
231 b2cfc4e2 2003-09-30 devnull }