Blob
1 #include "grep.h"3 /*4 * incremental compiler.5 * add the branch c to the6 * state s.7 */8 void9 increment(State *s, int c)10 {11 int i;12 State *t, **tt;13 Re *re1, *re2;15 nfollow = 0;16 gen++;17 matched = 0;18 for(i=0; i<s->count; i++)19 fol1(s->re[i], c);20 qsort(follow, nfollow, sizeof(*follow), fcmp);21 for(tt=&state0; t = *tt;) {22 if(t->count > nfollow) {23 tt = &t->linkleft;24 goto cont;25 }26 if(t->count < nfollow) {27 tt = &t->linkright;28 goto cont;29 }30 for(i=0; i<nfollow; i++) {31 re1 = t->re[i];32 re2 = follow[i];33 if(re1 > re2) {34 tt = &t->linkleft;35 goto cont;36 }37 if(re1 < re2) {38 tt = &t->linkright;39 goto cont;40 }41 }42 if(!!matched && !t->match) {43 tt = &t->linkleft;44 goto cont;45 }46 if(!matched && !!t->match) {47 tt = &t->linkright;48 goto cont;49 }50 s->next[c] = t;51 return;52 cont:;53 }55 t = sal(nfollow);56 *tt = t;57 for(i=0; i<nfollow; i++) {58 re1 = follow[i];59 t->re[i] = re1;60 }61 s->next[c] = t;62 t->match = matched;63 }65 int66 fcmp(const void *va, const void *vb)67 {68 Re **aa, **bb;69 Re *a, *b;71 aa = (Re**)va;72 bb = (Re**)vb;73 a = *aa;74 b = *bb;75 if(a > b)76 return 1;77 if(a < b)78 return -1;79 return 0;80 }82 void83 fol1(Re *r, int c)84 {85 Re *r1;87 loop:88 if(r->gen == gen)89 return;90 if(nfollow >= maxfollow)91 error("nfollow");92 r->gen = gen;93 switch(r->type) {94 default:95 error("fol1");97 case Tcase:98 if(c >= 0 && c < 256)99 if(r1 = r->u.cases[c])100 follow[nfollow++] = r1;101 if(r = r->next)102 goto loop;103 break;105 case Talt:106 case Tor:107 fol1(r->u.alt, c);108 r = r->next;109 goto loop;111 case Tbegin:112 if(c == '\n' || c == Cbegin)113 follow[nfollow++] = r->next;114 break;116 case Tend:117 if(c == '\n')118 matched = 1;119 break;121 case Tclass:122 if(c >= r->u.x.lo && c <= r->u.x.hi)123 follow[nfollow++] = r->next;124 break;125 }126 }128 Rune tab1[] =129 {130 0x007f,131 0x07ff132 };133 Rune tab2[] =134 {135 0x003f,136 0x0fff137 };139 Re2140 rclass(Rune p0, Rune p1)141 {142 char xc0[6], xc1[6];143 int i, n, m;144 Re2 x;146 if(p0 > p1)147 return re2char(0xff, 0xff); /* no match */149 /*150 * bust range into same length151 * character sequences152 */153 for(i=0; i<nelem(tab1); i++) {154 m = tab1[i];155 if(p0 <= m && p1 > m)156 return re2or(rclass(p0, m), rclass(m+1, p1));157 }159 /*160 * bust range into part of a single page161 * or into full pages162 */163 for(i=0; i<nelem(tab2); i++) {164 m = tab2[i];165 if((p0 & ~m) != (p1 & ~m)) {166 if((p0 & m) != 0)167 return re2or(rclass(p0, p0|m), rclass((p0|m)+1, p1));168 if((p1 & m) != m)169 return re2or(rclass(p0, (p1&~m)-1), rclass(p1&~m, p1));170 }171 }173 n = runetochar(xc0, &p0);174 i = runetochar(xc1, &p1);175 if(i != n)176 error("length");178 x = re2char(xc0[0], xc1[0]);179 for(i=1; i<n; i++)180 x = re2cat(x, re2char(xc0[i], xc1[i]));181 return x;182 }184 int185 pcmp(const void *va, const void *vb)186 {187 int n;188 Rune *a, *b;190 a = (Rune*)va;191 b = (Rune*)vb;193 n = a[0] - b[0];194 if(n)195 return n;196 return a[1] - b[1];197 }199 /*200 * convert character chass into201 * run-pair ranges of matches.202 * this is 10646/utf specific and203 * needs to be changed for some204 * other input character set.205 * this is the key to a fast206 * regular search of characters207 * by looking at sequential bytes.208 */209 Re2210 re2class(char *s)211 {212 Rune pairs[200], *p, *q, ov;213 int nc;214 Re2 x;216 nc = 0;217 if(*s == '^') {218 nc = 1;219 s++;220 }222 p = pairs;223 s += chartorune(p, s);224 for(;;) {225 if(*p == '\\')226 s += chartorune(p, s);227 if(*p == 0)228 break;229 p[1] = *p;230 p += 2;231 s += chartorune(p, s);232 if(*p != '-')233 continue;234 s += chartorune(p, s);235 if(*p == '\\')236 s += chartorune(p, s);237 if(*p == 0)238 break;239 p[-1] = *p;240 s += chartorune(p, s);241 }242 *p = 0;243 qsort(pairs, (p-pairs)/2, 2*sizeof(*pairs), pcmp);245 q = pairs;246 for(p=pairs+2; *p; p+=2) {247 if(p[0] > p[1])248 continue;249 if(p[0] > q[1] || p[1] < q[0]) {250 q[2] = p[0];251 q[3] = p[1];252 q += 2;253 continue;254 }255 if(p[0] < q[0])256 q[0] = p[0];257 if(p[1] > q[1])258 q[1] = p[1];259 }260 q[2] = 0;262 p = pairs;263 if(nc) {264 x = rclass(0, p[0]-1);265 ov = p[1]+1;266 for(p+=2; *p; p+=2) {267 x = re2or(x, rclass(ov, p[0]-1));268 ov = p[1]+1;269 }270 x = re2or(x, rclass(ov, 0xffff));271 } else {272 x = rclass(p[0], p[1]);273 for(p+=2; *p; p+=2)274 x = re2or(x, rclass(p[0], p[1]));275 }276 return x;277 }