Blame


1 8ad51794 2004-03-25 devnull #include "grep.h"
2 8ad51794 2004-03-25 devnull
3 8ad51794 2004-03-25 devnull /*
4 8ad51794 2004-03-25 devnull * incremental compiler.
5 8ad51794 2004-03-25 devnull * add the branch c to the
6 8ad51794 2004-03-25 devnull * state s.
7 8ad51794 2004-03-25 devnull */
8 8ad51794 2004-03-25 devnull void
9 8ad51794 2004-03-25 devnull increment(State *s, int c)
10 8ad51794 2004-03-25 devnull {
11 8ad51794 2004-03-25 devnull int i;
12 8ad51794 2004-03-25 devnull State *t, **tt;
13 8ad51794 2004-03-25 devnull Re *re1, *re2;
14 8ad51794 2004-03-25 devnull
15 8ad51794 2004-03-25 devnull nfollow = 0;
16 8ad51794 2004-03-25 devnull gen++;
17 8ad51794 2004-03-25 devnull matched = 0;
18 8ad51794 2004-03-25 devnull for(i=0; i<s->count; i++)
19 8ad51794 2004-03-25 devnull fol1(s->re[i], c);
20 8ad51794 2004-03-25 devnull qsort(follow, nfollow, sizeof(*follow), fcmp);
21 8ad51794 2004-03-25 devnull for(tt=&state0; t = *tt;) {
22 8ad51794 2004-03-25 devnull if(t->count > nfollow) {
23 8ad51794 2004-03-25 devnull tt = &t->linkleft;
24 8ad51794 2004-03-25 devnull goto cont;
25 8ad51794 2004-03-25 devnull }
26 8ad51794 2004-03-25 devnull if(t->count < nfollow) {
27 8ad51794 2004-03-25 devnull tt = &t->linkright;
28 8ad51794 2004-03-25 devnull goto cont;
29 8ad51794 2004-03-25 devnull }
30 8ad51794 2004-03-25 devnull for(i=0; i<nfollow; i++) {
31 8ad51794 2004-03-25 devnull re1 = t->re[i];
32 8ad51794 2004-03-25 devnull re2 = follow[i];
33 8ad51794 2004-03-25 devnull if(re1 > re2) {
34 8ad51794 2004-03-25 devnull tt = &t->linkleft;
35 8ad51794 2004-03-25 devnull goto cont;
36 8ad51794 2004-03-25 devnull }
37 8ad51794 2004-03-25 devnull if(re1 < re2) {
38 8ad51794 2004-03-25 devnull tt = &t->linkright;
39 8ad51794 2004-03-25 devnull goto cont;
40 8ad51794 2004-03-25 devnull }
41 8ad51794 2004-03-25 devnull }
42 8ad51794 2004-03-25 devnull if(!!matched && !t->match) {
43 8ad51794 2004-03-25 devnull tt = &t->linkleft;
44 8ad51794 2004-03-25 devnull goto cont;
45 8ad51794 2004-03-25 devnull }
46 8ad51794 2004-03-25 devnull if(!matched && !!t->match) {
47 8ad51794 2004-03-25 devnull tt = &t->linkright;
48 8ad51794 2004-03-25 devnull goto cont;
49 8ad51794 2004-03-25 devnull }
50 8ad51794 2004-03-25 devnull s->next[c] = t;
51 8ad51794 2004-03-25 devnull return;
52 8ad51794 2004-03-25 devnull cont:;
53 8ad51794 2004-03-25 devnull }
54 8ad51794 2004-03-25 devnull
55 8ad51794 2004-03-25 devnull t = sal(nfollow);
56 8ad51794 2004-03-25 devnull *tt = t;
57 8ad51794 2004-03-25 devnull for(i=0; i<nfollow; i++) {
58 8ad51794 2004-03-25 devnull re1 = follow[i];
59 8ad51794 2004-03-25 devnull t->re[i] = re1;
60 8ad51794 2004-03-25 devnull }
61 8ad51794 2004-03-25 devnull s->next[c] = t;
62 8ad51794 2004-03-25 devnull t->match = matched;
63 8ad51794 2004-03-25 devnull }
64 8ad51794 2004-03-25 devnull
65 8ad51794 2004-03-25 devnull int
66 8ad51794 2004-03-25 devnull fcmp(const void *va, const void *vb)
67 8ad51794 2004-03-25 devnull {
68 8ad51794 2004-03-25 devnull Re **aa, **bb;
69 8ad51794 2004-03-25 devnull Re *a, *b;
70 8ad51794 2004-03-25 devnull
71 8ad51794 2004-03-25 devnull aa = (Re**)va;
72 8ad51794 2004-03-25 devnull bb = (Re**)vb;
73 8ad51794 2004-03-25 devnull a = *aa;
74 8ad51794 2004-03-25 devnull b = *bb;
75 8ad51794 2004-03-25 devnull if(a > b)
76 8ad51794 2004-03-25 devnull return 1;
77 8ad51794 2004-03-25 devnull if(a < b)
78 8ad51794 2004-03-25 devnull return -1;
79 8ad51794 2004-03-25 devnull return 0;
80 8ad51794 2004-03-25 devnull }
81 8ad51794 2004-03-25 devnull
82 8ad51794 2004-03-25 devnull void
83 8ad51794 2004-03-25 devnull fol1(Re *r, int c)
84 8ad51794 2004-03-25 devnull {
85 8ad51794 2004-03-25 devnull Re *r1;
86 8ad51794 2004-03-25 devnull
87 8ad51794 2004-03-25 devnull loop:
88 8ad51794 2004-03-25 devnull if(r->gen == gen)
89 8ad51794 2004-03-25 devnull return;
90 8ad51794 2004-03-25 devnull if(nfollow >= maxfollow)
91 8ad51794 2004-03-25 devnull error("nfollow");
92 8ad51794 2004-03-25 devnull r->gen = gen;
93 8ad51794 2004-03-25 devnull switch(r->type) {
94 8ad51794 2004-03-25 devnull default:
95 8ad51794 2004-03-25 devnull error("fol1");
96 8ad51794 2004-03-25 devnull
97 8ad51794 2004-03-25 devnull case Tcase:
98 8ad51794 2004-03-25 devnull if(c >= 0 && c < 256)
99 285b4f85 2004-03-25 devnull if(r1 = r->u.cases[c])
100 8ad51794 2004-03-25 devnull follow[nfollow++] = r1;
101 8ad51794 2004-03-25 devnull if(r = r->next)
102 8ad51794 2004-03-25 devnull goto loop;
103 8ad51794 2004-03-25 devnull break;
104 8ad51794 2004-03-25 devnull
105 8ad51794 2004-03-25 devnull case Talt:
106 8ad51794 2004-03-25 devnull case Tor:
107 285b4f85 2004-03-25 devnull fol1(r->u.alt, c);
108 8ad51794 2004-03-25 devnull r = r->next;
109 8ad51794 2004-03-25 devnull goto loop;
110 8ad51794 2004-03-25 devnull
111 8ad51794 2004-03-25 devnull case Tbegin:
112 8ad51794 2004-03-25 devnull if(c == '\n' || c == Cbegin)
113 8ad51794 2004-03-25 devnull follow[nfollow++] = r->next;
114 8ad51794 2004-03-25 devnull break;
115 8ad51794 2004-03-25 devnull
116 8ad51794 2004-03-25 devnull case Tend:
117 8ad51794 2004-03-25 devnull if(c == '\n')
118 8ad51794 2004-03-25 devnull matched = 1;
119 8ad51794 2004-03-25 devnull break;
120 8ad51794 2004-03-25 devnull
121 8ad51794 2004-03-25 devnull case Tclass:
122 285b4f85 2004-03-25 devnull if(c >= r->u.x.lo && c <= r->u.x.hi)
123 8ad51794 2004-03-25 devnull follow[nfollow++] = r->next;
124 8ad51794 2004-03-25 devnull break;
125 8ad51794 2004-03-25 devnull }
126 8ad51794 2004-03-25 devnull }
127 8ad51794 2004-03-25 devnull
128 8ad51794 2004-03-25 devnull Rune tab1[] =
129 8ad51794 2004-03-25 devnull {
130 8ad51794 2004-03-25 devnull 0x007f,
131 cbeb0b26 2006-04-01 devnull 0x07ff
132 8ad51794 2004-03-25 devnull };
133 8ad51794 2004-03-25 devnull Rune tab2[] =
134 8ad51794 2004-03-25 devnull {
135 8ad51794 2004-03-25 devnull 0x003f,
136 cbeb0b26 2006-04-01 devnull 0x0fff
137 8ad51794 2004-03-25 devnull };
138 8ad51794 2004-03-25 devnull
139 8ad51794 2004-03-25 devnull Re2
140 8ad51794 2004-03-25 devnull rclass(Rune p0, Rune p1)
141 8ad51794 2004-03-25 devnull {
142 8ad51794 2004-03-25 devnull char xc0[6], xc1[6];
143 8ad51794 2004-03-25 devnull int i, n, m;
144 8ad51794 2004-03-25 devnull Re2 x;
145 8ad51794 2004-03-25 devnull
146 8ad51794 2004-03-25 devnull if(p0 > p1)
147 cbeb0b26 2006-04-01 devnull return re2char(0xff, 0xff); /* no match */
148 8ad51794 2004-03-25 devnull
149 8ad51794 2004-03-25 devnull /*
150 8ad51794 2004-03-25 devnull * bust range into same length
151 8ad51794 2004-03-25 devnull * character sequences
152 8ad51794 2004-03-25 devnull */
153 8ad51794 2004-03-25 devnull for(i=0; i<nelem(tab1); i++) {
154 8ad51794 2004-03-25 devnull m = tab1[i];
155 8ad51794 2004-03-25 devnull if(p0 <= m && p1 > m)
156 8ad51794 2004-03-25 devnull return re2or(rclass(p0, m), rclass(m+1, p1));
157 8ad51794 2004-03-25 devnull }
158 8ad51794 2004-03-25 devnull
159 8ad51794 2004-03-25 devnull /*
160 8ad51794 2004-03-25 devnull * bust range into part of a single page
161 8ad51794 2004-03-25 devnull * or into full pages
162 8ad51794 2004-03-25 devnull */
163 8ad51794 2004-03-25 devnull for(i=0; i<nelem(tab2); i++) {
164 8ad51794 2004-03-25 devnull m = tab2[i];
165 8ad51794 2004-03-25 devnull if((p0 & ~m) != (p1 & ~m)) {
166 8ad51794 2004-03-25 devnull if((p0 & m) != 0)
167 8ad51794 2004-03-25 devnull return re2or(rclass(p0, p0|m), rclass((p0|m)+1, p1));
168 8ad51794 2004-03-25 devnull if((p1 & m) != m)
169 8ad51794 2004-03-25 devnull return re2or(rclass(p0, (p1&~m)-1), rclass(p1&~m, p1));
170 8ad51794 2004-03-25 devnull }
171 8ad51794 2004-03-25 devnull }
172 8ad51794 2004-03-25 devnull
173 8ad51794 2004-03-25 devnull n = runetochar(xc0, &p0);
174 8ad51794 2004-03-25 devnull i = runetochar(xc1, &p1);
175 8ad51794 2004-03-25 devnull if(i != n)
176 8ad51794 2004-03-25 devnull error("length");
177 8ad51794 2004-03-25 devnull
178 8ad51794 2004-03-25 devnull x = re2char(xc0[0], xc1[0]);
179 8ad51794 2004-03-25 devnull for(i=1; i<n; i++)
180 8ad51794 2004-03-25 devnull x = re2cat(x, re2char(xc0[i], xc1[i]));
181 8ad51794 2004-03-25 devnull return x;
182 8ad51794 2004-03-25 devnull }
183 8ad51794 2004-03-25 devnull
184 8ad51794 2004-03-25 devnull int
185 8ad51794 2004-03-25 devnull pcmp(const void *va, const void *vb)
186 8ad51794 2004-03-25 devnull {
187 8ad51794 2004-03-25 devnull int n;
188 8ad51794 2004-03-25 devnull Rune *a, *b;
189 8ad51794 2004-03-25 devnull
190 8ad51794 2004-03-25 devnull a = (Rune*)va;
191 8ad51794 2004-03-25 devnull b = (Rune*)vb;
192 8ad51794 2004-03-25 devnull
193 8ad51794 2004-03-25 devnull n = a[0] - b[0];
194 8ad51794 2004-03-25 devnull if(n)
195 8ad51794 2004-03-25 devnull return n;
196 8ad51794 2004-03-25 devnull return a[1] - b[1];
197 8ad51794 2004-03-25 devnull }
198 8ad51794 2004-03-25 devnull
199 8ad51794 2004-03-25 devnull /*
200 8ad51794 2004-03-25 devnull * convert character chass into
201 8ad51794 2004-03-25 devnull * run-pair ranges of matches.
202 8ad51794 2004-03-25 devnull * this is 10646/utf specific and
203 8ad51794 2004-03-25 devnull * needs to be changed for some
204 8ad51794 2004-03-25 devnull * other input character set.
205 8ad51794 2004-03-25 devnull * this is the key to a fast
206 8ad51794 2004-03-25 devnull * regular search of characters
207 8ad51794 2004-03-25 devnull * by looking at sequential bytes.
208 8ad51794 2004-03-25 devnull */
209 8ad51794 2004-03-25 devnull Re2
210 8ad51794 2004-03-25 devnull re2class(char *s)
211 8ad51794 2004-03-25 devnull {
212 8ad51794 2004-03-25 devnull Rune pairs[200], *p, *q, ov;
213 8ad51794 2004-03-25 devnull int nc;
214 8ad51794 2004-03-25 devnull Re2 x;
215 8ad51794 2004-03-25 devnull
216 8ad51794 2004-03-25 devnull nc = 0;
217 8ad51794 2004-03-25 devnull if(*s == '^') {
218 8ad51794 2004-03-25 devnull nc = 1;
219 8ad51794 2004-03-25 devnull s++;
220 8ad51794 2004-03-25 devnull }
221 8ad51794 2004-03-25 devnull
222 8ad51794 2004-03-25 devnull p = pairs;
223 8ad51794 2004-03-25 devnull s += chartorune(p, s);
224 8ad51794 2004-03-25 devnull for(;;) {
225 8ad51794 2004-03-25 devnull if(*p == '\\')
226 8ad51794 2004-03-25 devnull s += chartorune(p, s);
227 8ad51794 2004-03-25 devnull if(*p == 0)
228 8ad51794 2004-03-25 devnull break;
229 8ad51794 2004-03-25 devnull p[1] = *p;
230 8ad51794 2004-03-25 devnull p += 2;
231 8ad51794 2004-03-25 devnull s += chartorune(p, s);
232 8ad51794 2004-03-25 devnull if(*p != '-')
233 8ad51794 2004-03-25 devnull continue;
234 8ad51794 2004-03-25 devnull s += chartorune(p, s);
235 8ad51794 2004-03-25 devnull if(*p == '\\')
236 8ad51794 2004-03-25 devnull s += chartorune(p, s);
237 8ad51794 2004-03-25 devnull if(*p == 0)
238 8ad51794 2004-03-25 devnull break;
239 8ad51794 2004-03-25 devnull p[-1] = *p;
240 8ad51794 2004-03-25 devnull s += chartorune(p, s);
241 8ad51794 2004-03-25 devnull }
242 8ad51794 2004-03-25 devnull *p = 0;
243 8ad51794 2004-03-25 devnull qsort(pairs, (p-pairs)/2, 2*sizeof(*pairs), pcmp);
244 8ad51794 2004-03-25 devnull
245 8ad51794 2004-03-25 devnull q = pairs;
246 8ad51794 2004-03-25 devnull for(p=pairs+2; *p; p+=2) {
247 8ad51794 2004-03-25 devnull if(p[0] > p[1])
248 8ad51794 2004-03-25 devnull continue;
249 8ad51794 2004-03-25 devnull if(p[0] > q[1] || p[1] < q[0]) {
250 8ad51794 2004-03-25 devnull q[2] = p[0];
251 8ad51794 2004-03-25 devnull q[3] = p[1];
252 8ad51794 2004-03-25 devnull q += 2;
253 8ad51794 2004-03-25 devnull continue;
254 8ad51794 2004-03-25 devnull }
255 8ad51794 2004-03-25 devnull if(p[0] < q[0])
256 8ad51794 2004-03-25 devnull q[0] = p[0];
257 8ad51794 2004-03-25 devnull if(p[1] > q[1])
258 8ad51794 2004-03-25 devnull q[1] = p[1];
259 8ad51794 2004-03-25 devnull }
260 8ad51794 2004-03-25 devnull q[2] = 0;
261 8ad51794 2004-03-25 devnull
262 8ad51794 2004-03-25 devnull p = pairs;
263 8ad51794 2004-03-25 devnull if(nc) {
264 8ad51794 2004-03-25 devnull x = rclass(0, p[0]-1);
265 8ad51794 2004-03-25 devnull ov = p[1]+1;
266 8ad51794 2004-03-25 devnull for(p+=2; *p; p+=2) {
267 8ad51794 2004-03-25 devnull x = re2or(x, rclass(ov, p[0]-1));
268 8ad51794 2004-03-25 devnull ov = p[1]+1;
269 8ad51794 2004-03-25 devnull }
270 8ad51794 2004-03-25 devnull x = re2or(x, rclass(ov, 0xffff));
271 8ad51794 2004-03-25 devnull } else {
272 8ad51794 2004-03-25 devnull x = rclass(p[0], p[1]);
273 8ad51794 2004-03-25 devnull for(p+=2; *p; p+=2)
274 8ad51794 2004-03-25 devnull x = re2or(x, rclass(p[0], p[1]));
275 8ad51794 2004-03-25 devnull }
276 8ad51794 2004-03-25 devnull return x;
277 8ad51794 2004-03-25 devnull }