Blob


1 #include "lib9.h"
2 #include "regexp9.h"
3 #include "regcomp.h"
6 /*
7 * return 0 if no match
8 * >0 if a match
9 * <0 if we ran out of _relist space
10 */
11 static int
12 regexec1(Reprog *progp, /* program to run */
13 char *bol, /* string to run machine on */
14 Resub *mp, /* subexpression elements */
15 int ms, /* number of elements at mp */
16 Reljunk *j
17 )
18 {
19 int flag=0;
20 Reinst *inst;
21 Relist *tlp;
22 char *s;
23 int i, checkstart;
24 Rune r, *rp, *ep;
25 int n;
26 Relist* tl; /* This list, next list */
27 Relist* nl;
28 Relist* tle; /* ends of this and next list */
29 Relist* nle;
30 int match;
31 char *p;
33 match = 0;
34 checkstart = j->starttype;
35 if(mp)
36 for(i=0; i<ms; i++) {
37 mp[i].s.sp = 0;
38 mp[i].e.ep = 0;
39 }
40 j->relist[0][0].inst = 0;
41 j->relist[1][0].inst = 0;
43 /* Execute machine once for each character, including terminal NUL */
44 s = j->starts;
45 do{
46 /* fast check for first char */
47 if(checkstart) {
48 switch(j->starttype) {
49 case RUNE:
50 p = utfrune(s, j->startchar);
51 if(p == 0 || s == j->eol)
52 return match;
53 s = p;
54 break;
55 case BOL:
56 if(s == bol)
57 break;
58 p = utfrune(s, '\n');
59 if(p == 0 || s == j->eol)
60 return match;
61 s = p+1;
62 break;
63 }
64 }
65 r = *(uchar*)s;
66 if(r < Runeself)
67 n = 1;
68 else
69 n = chartorune(&r, s);
71 /* switch run lists */
72 tl = j->relist[flag];
73 tle = j->reliste[flag];
74 nl = j->relist[flag^=1];
75 nle = j->reliste[flag];
76 nl->inst = 0;
78 /* Add first instruction to current list */
79 if(match == 0)
80 _renewemptythread(tl, progp->startinst, ms, s);
82 /* Execute machine until current list is empty */
83 for(tlp=tl; tlp->inst; tlp++){ /* assignment = */
84 for(inst = tlp->inst; ; inst = inst->u2.next){
85 switch(inst->type){
86 case RUNE: /* regular character */
87 if(inst->u1.r == r){
88 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
89 return -1;
90 }
91 break;
92 case LBRA:
93 tlp->se.m[inst->u1.subid].s.sp = s;
94 continue;
95 case RBRA:
96 tlp->se.m[inst->u1.subid].e.ep = s;
97 continue;
98 case ANY:
99 if(r != '\n')
100 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
101 return -1;
102 break;
103 case ANYNL:
104 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
105 return -1;
106 break;
107 case BOL:
108 if(s == bol || *(s-1) == '\n')
109 continue;
110 break;
111 case EOL:
112 if(s == j->eol || r == 0 || r == '\n')
113 continue;
114 break;
115 case CCLASS:
116 ep = inst->u1.cp->end;
117 for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
118 if(r >= rp[0] && r <= rp[1]){
119 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
120 return -1;
121 break;
123 break;
124 case NCCLASS:
125 ep = inst->u1.cp->end;
126 for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
127 if(r >= rp[0] && r <= rp[1])
128 break;
129 if(rp == ep)
130 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
131 return -1;
132 break;
133 case OR:
134 /* evaluate right choice later */
135 if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle)
136 return -1;
137 /* efficiency: advance and re-evaluate */
138 continue;
139 case END: /* Match! */
140 match = 1;
141 tlp->se.m[0].e.ep = s;
142 if(mp != 0)
143 _renewmatch(mp, ms, &tlp->se);
144 break;
146 break;
149 if(s == j->eol)
150 break;
151 checkstart = j->starttype && nl->inst==0;
152 s += n;
153 }while(r);
154 return match;
157 static int
158 regexec2(Reprog *progp, /* program to run */
159 char *bol, /* string to run machine on */
160 Resub *mp, /* subexpression elements */
161 int ms, /* number of elements at mp */
162 Reljunk *j
165 int rv;
166 Relist *relist0, *relist1;
168 /* mark space */
169 relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
170 if(relist0 == nil)
171 return -1;
172 relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
173 if(relist1 == nil){
174 free(relist0);
175 return -1;
177 j->relist[0] = relist0;
178 j->relist[1] = relist1;
179 j->reliste[0] = relist0 + BIGLISTSIZE - 2;
180 j->reliste[1] = relist1 + BIGLISTSIZE - 2;
182 rv = regexec1(progp, bol, mp, ms, j);
183 free(relist0);
184 free(relist1);
185 return rv;
188 extern int
189 regexec(Reprog *progp, /* program to run */
190 char *bol, /* string to run machine on */
191 Resub *mp, /* subexpression elements */
192 int ms) /* number of elements at mp */
194 Reljunk j;
195 Relist relist0[LISTSIZE], relist1[LISTSIZE];
196 int rv;
198 /*
199 * use user-specified starting/ending location if specified
200 */
201 j.starts = bol;
202 j.eol = 0;
203 if(mp && ms>0){
204 if(mp->s.sp)
205 j.starts = mp->s.sp;
206 if(mp->e.ep)
207 j.eol = mp->e.ep;
209 j.starttype = 0;
210 j.startchar = 0;
211 if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) {
212 j.starttype = RUNE;
213 j.startchar = progp->startinst->u1.r;
215 if(progp->startinst->type == BOL)
216 j.starttype = BOL;
218 /* mark space */
219 j.relist[0] = relist0;
220 j.relist[1] = relist1;
221 j.reliste[0] = relist0 + nelem(relist0) - 2;
222 j.reliste[1] = relist1 + nelem(relist1) - 2;
224 rv = regexec1(progp, bol, mp, ms, &j);
225 if(rv >= 0)
226 return rv;
227 rv = regexec2(progp, bol, mp, ms, &j);
228 if(rv >= 0)
229 return rv;
230 return -1;