Blame


1 d1f529f4 2005-10-29 devnull /* From libregexp but leaks extra classes when it runs out */
2 d1f529f4 2005-10-29 devnull
3 d1f529f4 2005-10-29 devnull
4 d1f529f4 2005-10-29 devnull #include <u.h>
5 d1f529f4 2005-10-29 devnull #include <libc.h>
6 d1f529f4 2005-10-29 devnull #include "regexp.h"
7 d1f529f4 2005-10-29 devnull #include "/sys/src/libregexp/regcomp.h"
8 d1f529f4 2005-10-29 devnull
9 d1f529f4 2005-10-29 devnull #define TRUE 1
10 d1f529f4 2005-10-29 devnull #define FALSE 0
11 d1f529f4 2005-10-29 devnull
12 d1f529f4 2005-10-29 devnull /*
13 d1f529f4 2005-10-29 devnull * Parser Information
14 d1f529f4 2005-10-29 devnull */
15 d1f529f4 2005-10-29 devnull typedef
16 d1f529f4 2005-10-29 devnull struct Node
17 d1f529f4 2005-10-29 devnull {
18 d1f529f4 2005-10-29 devnull Reinst* first;
19 d1f529f4 2005-10-29 devnull Reinst* last;
20 d1f529f4 2005-10-29 devnull }Node;
21 d1f529f4 2005-10-29 devnull
22 d1f529f4 2005-10-29 devnull #define NSTACK 20
23 d1f529f4 2005-10-29 devnull static Node andstack[NSTACK];
24 d1f529f4 2005-10-29 devnull static Node *andp;
25 d1f529f4 2005-10-29 devnull static int atorstack[NSTACK];
26 d1f529f4 2005-10-29 devnull static int* atorp;
27 d1f529f4 2005-10-29 devnull static int cursubid; /* id of current subexpression */
28 d1f529f4 2005-10-29 devnull static int subidstack[NSTACK]; /* parallel to atorstack */
29 d1f529f4 2005-10-29 devnull static int* subidp;
30 d1f529f4 2005-10-29 devnull static int lastwasand; /* Last token was operand */
31 d1f529f4 2005-10-29 devnull static int nbra;
32 d1f529f4 2005-10-29 devnull static char* exprp; /* pointer to next character in source expression */
33 d1f529f4 2005-10-29 devnull static int lexdone;
34 d1f529f4 2005-10-29 devnull static int nclass;
35 d1f529f4 2005-10-29 devnull static Reclass*classp;
36 d1f529f4 2005-10-29 devnull static Reinst* freep;
37 d1f529f4 2005-10-29 devnull static int errors;
38 d1f529f4 2005-10-29 devnull static Rune yyrune; /* last lex'd rune */
39 d1f529f4 2005-10-29 devnull static Reclass*yyclassp; /* last lex'd class */
40 d1f529f4 2005-10-29 devnull
41 d1f529f4 2005-10-29 devnull /* predeclared crap */
42 d1f529f4 2005-10-29 devnull static void operator(int);
43 d1f529f4 2005-10-29 devnull static void pushand(Reinst*, Reinst*);
44 d1f529f4 2005-10-29 devnull static void pushator(int);
45 d1f529f4 2005-10-29 devnull static void evaluntil(int);
46 d1f529f4 2005-10-29 devnull static int bldcclass(void);
47 d1f529f4 2005-10-29 devnull
48 d1f529f4 2005-10-29 devnull static jmp_buf regkaboom;
49 d1f529f4 2005-10-29 devnull
50 d1f529f4 2005-10-29 devnull static void
51 d1f529f4 2005-10-29 devnull rcerror(char *s)
52 d1f529f4 2005-10-29 devnull {
53 d1f529f4 2005-10-29 devnull errors++;
54 d1f529f4 2005-10-29 devnull regerror(s);
55 d1f529f4 2005-10-29 devnull longjmp(regkaboom, 1);
56 d1f529f4 2005-10-29 devnull }
57 d1f529f4 2005-10-29 devnull
58 d1f529f4 2005-10-29 devnull static Reinst*
59 d1f529f4 2005-10-29 devnull newinst(int t)
60 d1f529f4 2005-10-29 devnull {
61 d1f529f4 2005-10-29 devnull freep->type = t;
62 d1f529f4 2005-10-29 devnull freep->left = 0;
63 d1f529f4 2005-10-29 devnull freep->right = 0;
64 d1f529f4 2005-10-29 devnull return freep++;
65 d1f529f4 2005-10-29 devnull }
66 d1f529f4 2005-10-29 devnull
67 d1f529f4 2005-10-29 devnull static void
68 d1f529f4 2005-10-29 devnull operand(int t)
69 d1f529f4 2005-10-29 devnull {
70 d1f529f4 2005-10-29 devnull Reinst *i;
71 d1f529f4 2005-10-29 devnull
72 d1f529f4 2005-10-29 devnull if(lastwasand)
73 d1f529f4 2005-10-29 devnull operator(CAT); /* catenate is implicit */
74 d1f529f4 2005-10-29 devnull i = newinst(t);
75 d1f529f4 2005-10-29 devnull
76 d1f529f4 2005-10-29 devnull if(t == CCLASS || t == NCCLASS)
77 d1f529f4 2005-10-29 devnull i->cp = yyclassp;
78 d1f529f4 2005-10-29 devnull if(t == RUNE)
79 d1f529f4 2005-10-29 devnull i->r = yyrune;
80 d1f529f4 2005-10-29 devnull
81 d1f529f4 2005-10-29 devnull pushand(i, i);
82 d1f529f4 2005-10-29 devnull lastwasand = TRUE;
83 d1f529f4 2005-10-29 devnull }
84 d1f529f4 2005-10-29 devnull
85 d1f529f4 2005-10-29 devnull static void
86 d1f529f4 2005-10-29 devnull operator(int t)
87 d1f529f4 2005-10-29 devnull {
88 d1f529f4 2005-10-29 devnull if(t==RBRA && --nbra<0)
89 d1f529f4 2005-10-29 devnull rcerror("unmatched right paren");
90 d1f529f4 2005-10-29 devnull if(t==LBRA){
91 d1f529f4 2005-10-29 devnull if(++cursubid >= NSUBEXP)
92 d1f529f4 2005-10-29 devnull rcerror ("too many subexpressions");
93 d1f529f4 2005-10-29 devnull nbra++;
94 d1f529f4 2005-10-29 devnull if(lastwasand)
95 d1f529f4 2005-10-29 devnull operator(CAT);
96 d1f529f4 2005-10-29 devnull } else
97 d1f529f4 2005-10-29 devnull evaluntil(t);
98 d1f529f4 2005-10-29 devnull if(t != RBRA)
99 d1f529f4 2005-10-29 devnull pushator(t);
100 d1f529f4 2005-10-29 devnull lastwasand = FALSE;
101 d1f529f4 2005-10-29 devnull if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
102 d1f529f4 2005-10-29 devnull lastwasand = TRUE; /* these look like operands */
103 d1f529f4 2005-10-29 devnull }
104 d1f529f4 2005-10-29 devnull
105 d1f529f4 2005-10-29 devnull static void
106 d1f529f4 2005-10-29 devnull regerr2(char *s, int c)
107 d1f529f4 2005-10-29 devnull {
108 d1f529f4 2005-10-29 devnull char buf[100];
109 d1f529f4 2005-10-29 devnull char *cp = buf;
110 d1f529f4 2005-10-29 devnull while(*s)
111 d1f529f4 2005-10-29 devnull *cp++ = *s++;
112 d1f529f4 2005-10-29 devnull *cp++ = c;
113 d1f529f4 2005-10-29 devnull *cp = '\0';
114 d1f529f4 2005-10-29 devnull rcerror(buf);
115 d1f529f4 2005-10-29 devnull }
116 d1f529f4 2005-10-29 devnull
117 d1f529f4 2005-10-29 devnull static void
118 d1f529f4 2005-10-29 devnull cant(char *s)
119 d1f529f4 2005-10-29 devnull {
120 d1f529f4 2005-10-29 devnull char buf[100];
121 d1f529f4 2005-10-29 devnull strcpy(buf, "can't happen: ");
122 d1f529f4 2005-10-29 devnull strcat(buf, s);
123 d1f529f4 2005-10-29 devnull rcerror(buf);
124 d1f529f4 2005-10-29 devnull }
125 d1f529f4 2005-10-29 devnull
126 d1f529f4 2005-10-29 devnull static void
127 d1f529f4 2005-10-29 devnull pushand(Reinst *f, Reinst *l)
128 d1f529f4 2005-10-29 devnull {
129 d1f529f4 2005-10-29 devnull if(andp >= &andstack[NSTACK])
130 d1f529f4 2005-10-29 devnull cant("operand stack overflow");
131 d1f529f4 2005-10-29 devnull andp->first = f;
132 d1f529f4 2005-10-29 devnull andp->last = l;
133 d1f529f4 2005-10-29 devnull andp++;
134 d1f529f4 2005-10-29 devnull }
135 d1f529f4 2005-10-29 devnull
136 d1f529f4 2005-10-29 devnull static void
137 d1f529f4 2005-10-29 devnull pushator(int t)
138 d1f529f4 2005-10-29 devnull {
139 d1f529f4 2005-10-29 devnull if(atorp >= &atorstack[NSTACK])
140 d1f529f4 2005-10-29 devnull cant("operator stack overflow");
141 d1f529f4 2005-10-29 devnull *atorp++ = t;
142 d1f529f4 2005-10-29 devnull *subidp++ = cursubid;
143 d1f529f4 2005-10-29 devnull }
144 d1f529f4 2005-10-29 devnull
145 d1f529f4 2005-10-29 devnull static Node*
146 d1f529f4 2005-10-29 devnull popand(int op)
147 d1f529f4 2005-10-29 devnull {
148 d1f529f4 2005-10-29 devnull Reinst *inst;
149 d1f529f4 2005-10-29 devnull
150 d1f529f4 2005-10-29 devnull if(andp <= &andstack[0]){
151 d1f529f4 2005-10-29 devnull regerr2("missing operand for ", op);
152 d1f529f4 2005-10-29 devnull inst = newinst(NOP);
153 d1f529f4 2005-10-29 devnull pushand(inst,inst);
154 d1f529f4 2005-10-29 devnull }
155 d1f529f4 2005-10-29 devnull return --andp;
156 d1f529f4 2005-10-29 devnull }
157 d1f529f4 2005-10-29 devnull
158 d1f529f4 2005-10-29 devnull static int
159 d1f529f4 2005-10-29 devnull popator(void)
160 d1f529f4 2005-10-29 devnull {
161 d1f529f4 2005-10-29 devnull if(atorp <= &atorstack[0])
162 d1f529f4 2005-10-29 devnull cant("operator stack underflow");
163 d1f529f4 2005-10-29 devnull --subidp;
164 d1f529f4 2005-10-29 devnull return *--atorp;
165 d1f529f4 2005-10-29 devnull }
166 d1f529f4 2005-10-29 devnull
167 d1f529f4 2005-10-29 devnull static void
168 d1f529f4 2005-10-29 devnull evaluntil(int pri)
169 d1f529f4 2005-10-29 devnull {
170 d1f529f4 2005-10-29 devnull Node *op1, *op2;
171 d1f529f4 2005-10-29 devnull Reinst *inst1, *inst2;
172 d1f529f4 2005-10-29 devnull
173 d1f529f4 2005-10-29 devnull while(pri==RBRA || atorp[-1]>=pri){
174 d1f529f4 2005-10-29 devnull switch(popator()){
175 d1f529f4 2005-10-29 devnull default:
176 d1f529f4 2005-10-29 devnull rcerror("unknown operator in evaluntil");
177 d1f529f4 2005-10-29 devnull break;
178 d1f529f4 2005-10-29 devnull case LBRA: /* must have been RBRA */
179 d1f529f4 2005-10-29 devnull op1 = popand('(');
180 d1f529f4 2005-10-29 devnull inst2 = newinst(RBRA);
181 d1f529f4 2005-10-29 devnull inst2->subid = *subidp;
182 d1f529f4 2005-10-29 devnull op1->last->next = inst2;
183 d1f529f4 2005-10-29 devnull inst1 = newinst(LBRA);
184 d1f529f4 2005-10-29 devnull inst1->subid = *subidp;
185 d1f529f4 2005-10-29 devnull inst1->next = op1->first;
186 d1f529f4 2005-10-29 devnull pushand(inst1, inst2);
187 d1f529f4 2005-10-29 devnull return;
188 d1f529f4 2005-10-29 devnull case OR:
189 d1f529f4 2005-10-29 devnull op2 = popand('|');
190 d1f529f4 2005-10-29 devnull op1 = popand('|');
191 d1f529f4 2005-10-29 devnull inst2 = newinst(NOP);
192 d1f529f4 2005-10-29 devnull op2->last->next = inst2;
193 d1f529f4 2005-10-29 devnull op1->last->next = inst2;
194 d1f529f4 2005-10-29 devnull inst1 = newinst(OR);
195 d1f529f4 2005-10-29 devnull inst1->right = op1->first;
196 d1f529f4 2005-10-29 devnull inst1->left = op2->first;
197 d1f529f4 2005-10-29 devnull pushand(inst1, inst2);
198 d1f529f4 2005-10-29 devnull break;
199 d1f529f4 2005-10-29 devnull case CAT:
200 d1f529f4 2005-10-29 devnull op2 = popand(0);
201 d1f529f4 2005-10-29 devnull op1 = popand(0);
202 d1f529f4 2005-10-29 devnull op1->last->next = op2->first;
203 d1f529f4 2005-10-29 devnull pushand(op1->first, op2->last);
204 d1f529f4 2005-10-29 devnull break;
205 d1f529f4 2005-10-29 devnull case STAR:
206 d1f529f4 2005-10-29 devnull op2 = popand('*');
207 d1f529f4 2005-10-29 devnull inst1 = newinst(OR);
208 d1f529f4 2005-10-29 devnull op2->last->next = inst1;
209 d1f529f4 2005-10-29 devnull inst1->right = op2->first;
210 d1f529f4 2005-10-29 devnull pushand(inst1, inst1);
211 d1f529f4 2005-10-29 devnull break;
212 d1f529f4 2005-10-29 devnull case PLUS:
213 d1f529f4 2005-10-29 devnull op2 = popand('+');
214 d1f529f4 2005-10-29 devnull inst1 = newinst(OR);
215 d1f529f4 2005-10-29 devnull op2->last->next = inst1;
216 d1f529f4 2005-10-29 devnull inst1->right = op2->first;
217 d1f529f4 2005-10-29 devnull pushand(op2->first, inst1);
218 d1f529f4 2005-10-29 devnull break;
219 d1f529f4 2005-10-29 devnull case QUEST:
220 d1f529f4 2005-10-29 devnull op2 = popand('?');
221 d1f529f4 2005-10-29 devnull inst1 = newinst(OR);
222 d1f529f4 2005-10-29 devnull inst2 = newinst(NOP);
223 d1f529f4 2005-10-29 devnull inst1->left = inst2;
224 d1f529f4 2005-10-29 devnull inst1->right = op2->first;
225 d1f529f4 2005-10-29 devnull op2->last->next = inst2;
226 d1f529f4 2005-10-29 devnull pushand(inst1, inst2);
227 d1f529f4 2005-10-29 devnull break;
228 d1f529f4 2005-10-29 devnull }
229 d1f529f4 2005-10-29 devnull }
230 d1f529f4 2005-10-29 devnull }
231 d1f529f4 2005-10-29 devnull
232 d1f529f4 2005-10-29 devnull static Reprog*
233 d1f529f4 2005-10-29 devnull optimize(Reprog *pp)
234 d1f529f4 2005-10-29 devnull {
235 d1f529f4 2005-10-29 devnull Reinst *inst, *target;
236 d1f529f4 2005-10-29 devnull int size;
237 d1f529f4 2005-10-29 devnull Reprog *npp;
238 d1f529f4 2005-10-29 devnull Reclass *cl;
239 d1f529f4 2005-10-29 devnull int diff;
240 d1f529f4 2005-10-29 devnull
241 d1f529f4 2005-10-29 devnull /*
242 d1f529f4 2005-10-29 devnull * get rid of NOOP chains
243 d1f529f4 2005-10-29 devnull */
244 d1f529f4 2005-10-29 devnull for(inst=pp->firstinst; inst->type!=END; inst++){
245 d1f529f4 2005-10-29 devnull target = inst->next;
246 d1f529f4 2005-10-29 devnull while(target->type == NOP)
247 d1f529f4 2005-10-29 devnull target = target->next;
248 d1f529f4 2005-10-29 devnull inst->next = target;
249 d1f529f4 2005-10-29 devnull }
250 d1f529f4 2005-10-29 devnull
251 d1f529f4 2005-10-29 devnull /*
252 d1f529f4 2005-10-29 devnull * The original allocation is for an area larger than
253 d1f529f4 2005-10-29 devnull * necessary. Reallocate to the actual space used
254 d1f529f4 2005-10-29 devnull * and then relocate the code.
255 d1f529f4 2005-10-29 devnull */
256 d1f529f4 2005-10-29 devnull size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
257 d1f529f4 2005-10-29 devnull npp = realloc(pp, size);
258 d1f529f4 2005-10-29 devnull if(npp==0 || npp==pp)
259 d1f529f4 2005-10-29 devnull return pp;
260 d1f529f4 2005-10-29 devnull diff = (char *)npp - (char *)pp;
261 d1f529f4 2005-10-29 devnull freep = (Reinst *)((char *)freep + diff);
262 d1f529f4 2005-10-29 devnull for(inst=npp->firstinst; inst<freep; inst++){
263 d1f529f4 2005-10-29 devnull switch(inst->type){
264 d1f529f4 2005-10-29 devnull case OR:
265 d1f529f4 2005-10-29 devnull case STAR:
266 d1f529f4 2005-10-29 devnull case PLUS:
267 d1f529f4 2005-10-29 devnull case QUEST:
268 d1f529f4 2005-10-29 devnull *(char **)&inst->right += diff;
269 d1f529f4 2005-10-29 devnull break;
270 d1f529f4 2005-10-29 devnull case CCLASS:
271 d1f529f4 2005-10-29 devnull case NCCLASS:
272 d1f529f4 2005-10-29 devnull *(char **)&inst->right += diff;
273 d1f529f4 2005-10-29 devnull cl = inst->cp;
274 d1f529f4 2005-10-29 devnull *(char **)&cl->end += diff;
275 d1f529f4 2005-10-29 devnull break;
276 d1f529f4 2005-10-29 devnull }
277 d1f529f4 2005-10-29 devnull *(char **)&inst->left += diff;
278 d1f529f4 2005-10-29 devnull }
279 d1f529f4 2005-10-29 devnull *(char **)&npp->startinst += diff;
280 d1f529f4 2005-10-29 devnull return npp;
281 d1f529f4 2005-10-29 devnull }
282 d1f529f4 2005-10-29 devnull
283 d1f529f4 2005-10-29 devnull #ifdef DEBUG
284 d1f529f4 2005-10-29 devnull static void
285 d1f529f4 2005-10-29 devnull dumpstack(void){
286 d1f529f4 2005-10-29 devnull Node *stk;
287 d1f529f4 2005-10-29 devnull int *ip;
288 d1f529f4 2005-10-29 devnull
289 d1f529f4 2005-10-29 devnull print("operators\n");
290 d1f529f4 2005-10-29 devnull for(ip=atorstack; ip<atorp; ip++)
291 d1f529f4 2005-10-29 devnull print("0%o\n", *ip);
292 d1f529f4 2005-10-29 devnull print("operands\n");
293 d1f529f4 2005-10-29 devnull for(stk=andstack; stk<andp; stk++)
294 d1f529f4 2005-10-29 devnull print("0%o\t0%o\n", stk->first->type, stk->last->type);
295 d1f529f4 2005-10-29 devnull }
296 d1f529f4 2005-10-29 devnull
297 d1f529f4 2005-10-29 devnull static void
298 d1f529f4 2005-10-29 devnull dump(Reprog *pp)
299 d1f529f4 2005-10-29 devnull {
300 d1f529f4 2005-10-29 devnull Reinst *l;
301 d1f529f4 2005-10-29 devnull Rune *p;
302 d1f529f4 2005-10-29 devnull
303 d1f529f4 2005-10-29 devnull l = pp->firstinst;
304 d1f529f4 2005-10-29 devnull do{
305 d1f529f4 2005-10-29 devnull print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
306 d1f529f4 2005-10-29 devnull l->left-pp->firstinst, l->right-pp->firstinst);
307 d1f529f4 2005-10-29 devnull if(l->type == RUNE)
308 d1f529f4 2005-10-29 devnull print("\t%C\n", l->r);
309 d1f529f4 2005-10-29 devnull else if(l->type == CCLASS || l->type == NCCLASS){
310 d1f529f4 2005-10-29 devnull print("\t[");
311 d1f529f4 2005-10-29 devnull if(l->type == NCCLASS)
312 d1f529f4 2005-10-29 devnull print("^");
313 d1f529f4 2005-10-29 devnull for(p = l->cp->spans; p < l->cp->end; p += 2)
314 d1f529f4 2005-10-29 devnull if(p[0] == p[1])
315 d1f529f4 2005-10-29 devnull print("%C", p[0]);
316 d1f529f4 2005-10-29 devnull else
317 d1f529f4 2005-10-29 devnull print("%C-%C", p[0], p[1]);
318 d1f529f4 2005-10-29 devnull print("]\n");
319 d1f529f4 2005-10-29 devnull } else
320 d1f529f4 2005-10-29 devnull print("\n");
321 d1f529f4 2005-10-29 devnull }while(l++->type);
322 d1f529f4 2005-10-29 devnull }
323 d1f529f4 2005-10-29 devnull #endif
324 d1f529f4 2005-10-29 devnull
325 d1f529f4 2005-10-29 devnull static Reclass*
326 d1f529f4 2005-10-29 devnull newclass(void)
327 d1f529f4 2005-10-29 devnull {
328 d1f529f4 2005-10-29 devnull if(nclass <= 0){
329 d1f529f4 2005-10-29 devnull classp = mallocz(128*sizeof(Reclass), 1);
330 d1f529f4 2005-10-29 devnull if(classp == nil)
331 d1f529f4 2005-10-29 devnull regerror("out of memory");
332 d1f529f4 2005-10-29 devnull nclass = 128;
333 d1f529f4 2005-10-29 devnull }
334 d1f529f4 2005-10-29 devnull return &classp[--nclass];
335 d1f529f4 2005-10-29 devnull }
336 d1f529f4 2005-10-29 devnull
337 d1f529f4 2005-10-29 devnull static int
338 d1f529f4 2005-10-29 devnull nextc(Rune *rp)
339 d1f529f4 2005-10-29 devnull {
340 d1f529f4 2005-10-29 devnull if(lexdone){
341 d1f529f4 2005-10-29 devnull *rp = 0;
342 d1f529f4 2005-10-29 devnull return 1;
343 d1f529f4 2005-10-29 devnull }
344 d1f529f4 2005-10-29 devnull exprp += chartorune(rp, exprp);
345 d1f529f4 2005-10-29 devnull if(*rp == L'\\'){
346 d1f529f4 2005-10-29 devnull exprp += chartorune(rp, exprp);
347 d1f529f4 2005-10-29 devnull return 1;
348 d1f529f4 2005-10-29 devnull }
349 d1f529f4 2005-10-29 devnull if(*rp == 0)
350 d1f529f4 2005-10-29 devnull lexdone = 1;
351 d1f529f4 2005-10-29 devnull return 0;
352 d1f529f4 2005-10-29 devnull }
353 d1f529f4 2005-10-29 devnull
354 d1f529f4 2005-10-29 devnull static int
355 d1f529f4 2005-10-29 devnull lex(int literal, int dot_type)
356 d1f529f4 2005-10-29 devnull {
357 d1f529f4 2005-10-29 devnull int quoted;
358 d1f529f4 2005-10-29 devnull
359 d1f529f4 2005-10-29 devnull quoted = nextc(&yyrune);
360 d1f529f4 2005-10-29 devnull if(literal || quoted){
361 d1f529f4 2005-10-29 devnull if(yyrune == 0)
362 d1f529f4 2005-10-29 devnull return END;
363 d1f529f4 2005-10-29 devnull return RUNE;
364 d1f529f4 2005-10-29 devnull }
365 d1f529f4 2005-10-29 devnull
366 d1f529f4 2005-10-29 devnull switch(yyrune){
367 d1f529f4 2005-10-29 devnull case 0:
368 d1f529f4 2005-10-29 devnull return END;
369 d1f529f4 2005-10-29 devnull case L'*':
370 d1f529f4 2005-10-29 devnull return STAR;
371 d1f529f4 2005-10-29 devnull case L'?':
372 d1f529f4 2005-10-29 devnull return QUEST;
373 d1f529f4 2005-10-29 devnull case L'+':
374 d1f529f4 2005-10-29 devnull return PLUS;
375 d1f529f4 2005-10-29 devnull case L'|':
376 d1f529f4 2005-10-29 devnull return OR;
377 d1f529f4 2005-10-29 devnull case L'.':
378 d1f529f4 2005-10-29 devnull return dot_type;
379 d1f529f4 2005-10-29 devnull case L'(':
380 d1f529f4 2005-10-29 devnull return LBRA;
381 d1f529f4 2005-10-29 devnull case L')':
382 d1f529f4 2005-10-29 devnull return RBRA;
383 d1f529f4 2005-10-29 devnull case L'^':
384 d1f529f4 2005-10-29 devnull return BOL;
385 d1f529f4 2005-10-29 devnull case L'$':
386 d1f529f4 2005-10-29 devnull return EOL;
387 d1f529f4 2005-10-29 devnull case L'[':
388 d1f529f4 2005-10-29 devnull return bldcclass();
389 d1f529f4 2005-10-29 devnull }
390 d1f529f4 2005-10-29 devnull return RUNE;
391 d1f529f4 2005-10-29 devnull }
392 d1f529f4 2005-10-29 devnull
393 d1f529f4 2005-10-29 devnull static int
394 d1f529f4 2005-10-29 devnull bldcclass(void)
395 d1f529f4 2005-10-29 devnull {
396 d1f529f4 2005-10-29 devnull int type;
397 d1f529f4 2005-10-29 devnull Rune r[NCCRUNE];
398 d1f529f4 2005-10-29 devnull Rune *p, *ep, *np;
399 d1f529f4 2005-10-29 devnull Rune rune;
400 d1f529f4 2005-10-29 devnull int quoted;
401 d1f529f4 2005-10-29 devnull
402 d1f529f4 2005-10-29 devnull /* we have already seen the '[' */
403 d1f529f4 2005-10-29 devnull type = CCLASS;
404 d1f529f4 2005-10-29 devnull yyclassp = newclass();
405 d1f529f4 2005-10-29 devnull
406 d1f529f4 2005-10-29 devnull /* look ahead for negation */
407 d1f529f4 2005-10-29 devnull /* SPECIAL CASE!!! negated classes don't match \n */
408 d1f529f4 2005-10-29 devnull ep = r;
409 d1f529f4 2005-10-29 devnull quoted = nextc(&rune);
410 d1f529f4 2005-10-29 devnull if(!quoted && rune == L'^'){
411 d1f529f4 2005-10-29 devnull type = NCCLASS;
412 d1f529f4 2005-10-29 devnull quoted = nextc(&rune);
413 d1f529f4 2005-10-29 devnull *ep++ = L'\n';
414 d1f529f4 2005-10-29 devnull *ep++ = L'\n';
415 d1f529f4 2005-10-29 devnull }
416 d1f529f4 2005-10-29 devnull
417 d1f529f4 2005-10-29 devnull /* parse class into a set of spans */
418 d1f529f4 2005-10-29 devnull for(; ep<&r[NCCRUNE];){
419 d1f529f4 2005-10-29 devnull if(rune == 0){
420 d1f529f4 2005-10-29 devnull rcerror("malformed '[]'");
421 d1f529f4 2005-10-29 devnull return 0;
422 d1f529f4 2005-10-29 devnull }
423 d1f529f4 2005-10-29 devnull if(!quoted && rune == L']')
424 d1f529f4 2005-10-29 devnull break;
425 d1f529f4 2005-10-29 devnull if(!quoted && rune == L'-'){
426 d1f529f4 2005-10-29 devnull if(ep == r){
427 d1f529f4 2005-10-29 devnull rcerror("malformed '[]'");
428 d1f529f4 2005-10-29 devnull return 0;
429 d1f529f4 2005-10-29 devnull }
430 d1f529f4 2005-10-29 devnull quoted = nextc(&rune);
431 d1f529f4 2005-10-29 devnull if((!quoted && rune == L']') || rune == 0){
432 d1f529f4 2005-10-29 devnull rcerror("malformed '[]'");
433 d1f529f4 2005-10-29 devnull return 0;
434 d1f529f4 2005-10-29 devnull }
435 d1f529f4 2005-10-29 devnull *(ep-1) = rune;
436 d1f529f4 2005-10-29 devnull } else {
437 d1f529f4 2005-10-29 devnull *ep++ = rune;
438 d1f529f4 2005-10-29 devnull *ep++ = rune;
439 d1f529f4 2005-10-29 devnull }
440 d1f529f4 2005-10-29 devnull quoted = nextc(&rune);
441 d1f529f4 2005-10-29 devnull }
442 d1f529f4 2005-10-29 devnull
443 d1f529f4 2005-10-29 devnull /* sort on span start */
444 d1f529f4 2005-10-29 devnull for(p = r; p < ep; p += 2){
445 d1f529f4 2005-10-29 devnull for(np = p; np < ep; np += 2)
446 d1f529f4 2005-10-29 devnull if(*np < *p){
447 d1f529f4 2005-10-29 devnull rune = np[0];
448 d1f529f4 2005-10-29 devnull np[0] = p[0];
449 d1f529f4 2005-10-29 devnull p[0] = rune;
450 d1f529f4 2005-10-29 devnull rune = np[1];
451 d1f529f4 2005-10-29 devnull np[1] = p[1];
452 d1f529f4 2005-10-29 devnull p[1] = rune;
453 d1f529f4 2005-10-29 devnull }
454 d1f529f4 2005-10-29 devnull }
455 d1f529f4 2005-10-29 devnull
456 d1f529f4 2005-10-29 devnull /* merge spans */
457 d1f529f4 2005-10-29 devnull np = yyclassp->spans;
458 d1f529f4 2005-10-29 devnull p = r;
459 d1f529f4 2005-10-29 devnull if(r == ep)
460 d1f529f4 2005-10-29 devnull yyclassp->end = np;
461 d1f529f4 2005-10-29 devnull else {
462 d1f529f4 2005-10-29 devnull np[0] = *p++;
463 d1f529f4 2005-10-29 devnull np[1] = *p++;
464 d1f529f4 2005-10-29 devnull for(; p < ep; p += 2)
465 d1f529f4 2005-10-29 devnull if(p[0] <= np[1]){
466 d1f529f4 2005-10-29 devnull if(p[1] > np[1])
467 d1f529f4 2005-10-29 devnull np[1] = p[1];
468 d1f529f4 2005-10-29 devnull } else {
469 d1f529f4 2005-10-29 devnull np += 2;
470 d1f529f4 2005-10-29 devnull np[0] = p[0];
471 d1f529f4 2005-10-29 devnull np[1] = p[1];
472 d1f529f4 2005-10-29 devnull }
473 d1f529f4 2005-10-29 devnull yyclassp->end = np+2;
474 d1f529f4 2005-10-29 devnull }
475 d1f529f4 2005-10-29 devnull
476 d1f529f4 2005-10-29 devnull return type;
477 d1f529f4 2005-10-29 devnull }
478 d1f529f4 2005-10-29 devnull
479 d1f529f4 2005-10-29 devnull static Reprog*
480 d1f529f4 2005-10-29 devnull regcomp1(char *s, int literal, int dot_type)
481 d1f529f4 2005-10-29 devnull {
482 d1f529f4 2005-10-29 devnull int token;
483 d1f529f4 2005-10-29 devnull Reprog *pp;
484 d1f529f4 2005-10-29 devnull
485 d1f529f4 2005-10-29 devnull /* get memory for the program */
486 d1f529f4 2005-10-29 devnull pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
487 d1f529f4 2005-10-29 devnull if(pp == 0){
488 d1f529f4 2005-10-29 devnull regerror("out of memory");
489 d1f529f4 2005-10-29 devnull return 0;
490 d1f529f4 2005-10-29 devnull }
491 d1f529f4 2005-10-29 devnull freep = pp->firstinst;
492 d1f529f4 2005-10-29 devnull classp = pp->class;
493 d1f529f4 2005-10-29 devnull errors = 0;
494 d1f529f4 2005-10-29 devnull
495 d1f529f4 2005-10-29 devnull if(setjmp(regkaboom))
496 d1f529f4 2005-10-29 devnull goto out;
497 d1f529f4 2005-10-29 devnull
498 d1f529f4 2005-10-29 devnull /* go compile the sucker */
499 d1f529f4 2005-10-29 devnull lexdone = 0;
500 d1f529f4 2005-10-29 devnull exprp = s;
501 d1f529f4 2005-10-29 devnull nclass = NCLASS;
502 d1f529f4 2005-10-29 devnull nbra = 0;
503 d1f529f4 2005-10-29 devnull atorp = atorstack;
504 d1f529f4 2005-10-29 devnull andp = andstack;
505 d1f529f4 2005-10-29 devnull subidp = subidstack;
506 d1f529f4 2005-10-29 devnull lastwasand = FALSE;
507 d1f529f4 2005-10-29 devnull cursubid = 0;
508 d1f529f4 2005-10-29 devnull
509 d1f529f4 2005-10-29 devnull /* Start with a low priority operator to prime parser */
510 d1f529f4 2005-10-29 devnull pushator(START-1);
511 d1f529f4 2005-10-29 devnull while((token = lex(literal, dot_type)) != END){
512 d1f529f4 2005-10-29 devnull if((token&0300) == OPERATOR)
513 d1f529f4 2005-10-29 devnull operator(token);
514 d1f529f4 2005-10-29 devnull else
515 d1f529f4 2005-10-29 devnull operand(token);
516 d1f529f4 2005-10-29 devnull }
517 d1f529f4 2005-10-29 devnull
518 d1f529f4 2005-10-29 devnull /* Close with a low priority operator */
519 d1f529f4 2005-10-29 devnull evaluntil(START);
520 d1f529f4 2005-10-29 devnull
521 d1f529f4 2005-10-29 devnull /* Force END */
522 d1f529f4 2005-10-29 devnull operand(END);
523 d1f529f4 2005-10-29 devnull evaluntil(START);
524 d1f529f4 2005-10-29 devnull #ifdef DEBUG
525 d1f529f4 2005-10-29 devnull dumpstack();
526 d1f529f4 2005-10-29 devnull #endif
527 d1f529f4 2005-10-29 devnull if(nbra)
528 d1f529f4 2005-10-29 devnull rcerror("unmatched left paren");
529 d1f529f4 2005-10-29 devnull --andp; /* points to first and only operand */
530 d1f529f4 2005-10-29 devnull pp->startinst = andp->first;
531 d1f529f4 2005-10-29 devnull #ifdef DEBUG
532 d1f529f4 2005-10-29 devnull dump(pp);
533 d1f529f4 2005-10-29 devnull #endif
534 d1f529f4 2005-10-29 devnull pp = optimize(pp);
535 d1f529f4 2005-10-29 devnull #ifdef DEBUG
536 d1f529f4 2005-10-29 devnull print("start: %d\n", andp->first-pp->firstinst);
537 d1f529f4 2005-10-29 devnull dump(pp);
538 d1f529f4 2005-10-29 devnull #endif
539 d1f529f4 2005-10-29 devnull out:
540 d1f529f4 2005-10-29 devnull if(errors){
541 d1f529f4 2005-10-29 devnull free(pp);
542 d1f529f4 2005-10-29 devnull pp = 0;
543 d1f529f4 2005-10-29 devnull }
544 d1f529f4 2005-10-29 devnull return pp;
545 d1f529f4 2005-10-29 devnull }
546 d1f529f4 2005-10-29 devnull
547 d1f529f4 2005-10-29 devnull extern Reprog*
548 d1f529f4 2005-10-29 devnull regcomp(char *s)
549 d1f529f4 2005-10-29 devnull {
550 d1f529f4 2005-10-29 devnull return regcomp1(s, 0, ANY);
551 d1f529f4 2005-10-29 devnull }
552 d1f529f4 2005-10-29 devnull
553 d1f529f4 2005-10-29 devnull extern Reprog*
554 d1f529f4 2005-10-29 devnull regcomplit(char *s)
555 d1f529f4 2005-10-29 devnull {
556 d1f529f4 2005-10-29 devnull return regcomp1(s, 1, ANY);
557 d1f529f4 2005-10-29 devnull }
558 d1f529f4 2005-10-29 devnull
559 d1f529f4 2005-10-29 devnull extern Reprog*
560 d1f529f4 2005-10-29 devnull regcompnl(char *s)
561 d1f529f4 2005-10-29 devnull {
562 d1f529f4 2005-10-29 devnull return regcomp1(s, 0, ANYNL);
563 d1f529f4 2005-10-29 devnull }