Blame


1 b2cfc4e2 2003-09-30 devnull #include "lib9.h"
2 b2cfc4e2 2003-09-30 devnull #include "regexp9.h"
3 b2cfc4e2 2003-09-30 devnull #include "regcomp.h"
4 b2cfc4e2 2003-09-30 devnull
5 b2cfc4e2 2003-09-30 devnull #define TRUE 1
6 b2cfc4e2 2003-09-30 devnull #define FALSE 0
7 b2cfc4e2 2003-09-30 devnull
8 b2cfc4e2 2003-09-30 devnull /*
9 b2cfc4e2 2003-09-30 devnull * Parser Information
10 b2cfc4e2 2003-09-30 devnull */
11 b2cfc4e2 2003-09-30 devnull typedef
12 b2cfc4e2 2003-09-30 devnull struct Node
13 b2cfc4e2 2003-09-30 devnull {
14 b2cfc4e2 2003-09-30 devnull Reinst* first;
15 b2cfc4e2 2003-09-30 devnull Reinst* last;
16 b2cfc4e2 2003-09-30 devnull }Node;
17 b2cfc4e2 2003-09-30 devnull
18 b2cfc4e2 2003-09-30 devnull Reprog RePrOg;
19 b2cfc4e2 2003-09-30 devnull
20 b2cfc4e2 2003-09-30 devnull #define NSTACK 20
21 b2cfc4e2 2003-09-30 devnull static Node andstack[NSTACK];
22 b2cfc4e2 2003-09-30 devnull static Node *andp;
23 b2cfc4e2 2003-09-30 devnull static int atorstack[NSTACK];
24 b2cfc4e2 2003-09-30 devnull static int* atorp;
25 b2cfc4e2 2003-09-30 devnull static int cursubid; /* id of current subexpression */
26 b2cfc4e2 2003-09-30 devnull static int subidstack[NSTACK]; /* parallel to atorstack */
27 b2cfc4e2 2003-09-30 devnull static int* subidp;
28 b2cfc4e2 2003-09-30 devnull static int lastwasand; /* Last token was operand */
29 b2cfc4e2 2003-09-30 devnull static int nbra;
30 b2cfc4e2 2003-09-30 devnull static char* exprp; /* pointer to next character in source expression */
31 b2cfc4e2 2003-09-30 devnull static int lexdone;
32 b2cfc4e2 2003-09-30 devnull static int nclass;
33 b2cfc4e2 2003-09-30 devnull static Reclass*classp;
34 b2cfc4e2 2003-09-30 devnull static Reinst* freep;
35 b2cfc4e2 2003-09-30 devnull static int errors;
36 b2cfc4e2 2003-09-30 devnull static Rune yyrune; /* last lex'd rune */
37 b2cfc4e2 2003-09-30 devnull static Reclass*yyclassp; /* last lex'd class */
38 b2cfc4e2 2003-09-30 devnull
39 b2cfc4e2 2003-09-30 devnull /* predeclared crap */
40 b2cfc4e2 2003-09-30 devnull static void operator(int);
41 b2cfc4e2 2003-09-30 devnull static void pushand(Reinst*, Reinst*);
42 b2cfc4e2 2003-09-30 devnull static void pushator(int);
43 b2cfc4e2 2003-09-30 devnull static void evaluntil(int);
44 b2cfc4e2 2003-09-30 devnull static int bldcclass(void);
45 b2cfc4e2 2003-09-30 devnull
46 b2cfc4e2 2003-09-30 devnull static jmp_buf regkaboom;
47 b2cfc4e2 2003-09-30 devnull
48 b2cfc4e2 2003-09-30 devnull static void
49 b2cfc4e2 2003-09-30 devnull rcerror(char *s)
50 b2cfc4e2 2003-09-30 devnull {
51 b2cfc4e2 2003-09-30 devnull errors++;
52 b2cfc4e2 2003-09-30 devnull regerror(s);
53 b2cfc4e2 2003-09-30 devnull longjmp(regkaboom, 1);
54 b2cfc4e2 2003-09-30 devnull }
55 b2cfc4e2 2003-09-30 devnull
56 b2cfc4e2 2003-09-30 devnull static Reinst*
57 b2cfc4e2 2003-09-30 devnull newinst(int t)
58 b2cfc4e2 2003-09-30 devnull {
59 b2cfc4e2 2003-09-30 devnull freep->type = t;
60 b2cfc4e2 2003-09-30 devnull freep->u2.left = 0;
61 b2cfc4e2 2003-09-30 devnull freep->u1.right = 0;
62 b2cfc4e2 2003-09-30 devnull return freep++;
63 b2cfc4e2 2003-09-30 devnull }
64 b2cfc4e2 2003-09-30 devnull
65 b2cfc4e2 2003-09-30 devnull static void
66 b2cfc4e2 2003-09-30 devnull operand(int t)
67 b2cfc4e2 2003-09-30 devnull {
68 b2cfc4e2 2003-09-30 devnull Reinst *i;
69 b2cfc4e2 2003-09-30 devnull
70 b2cfc4e2 2003-09-30 devnull if(lastwasand)
71 b2cfc4e2 2003-09-30 devnull operator(CAT); /* catenate is implicit */
72 b2cfc4e2 2003-09-30 devnull i = newinst(t);
73 b2cfc4e2 2003-09-30 devnull
74 b2cfc4e2 2003-09-30 devnull if(t == CCLASS || t == NCCLASS)
75 b2cfc4e2 2003-09-30 devnull i->u1.cp = yyclassp;
76 b2cfc4e2 2003-09-30 devnull if(t == RUNE)
77 b2cfc4e2 2003-09-30 devnull i->u1.r = yyrune;
78 b2cfc4e2 2003-09-30 devnull
79 b2cfc4e2 2003-09-30 devnull pushand(i, i);
80 b2cfc4e2 2003-09-30 devnull lastwasand = TRUE;
81 b2cfc4e2 2003-09-30 devnull }
82 b2cfc4e2 2003-09-30 devnull
83 b2cfc4e2 2003-09-30 devnull static void
84 b2cfc4e2 2003-09-30 devnull operator(int t)
85 b2cfc4e2 2003-09-30 devnull {
86 b2cfc4e2 2003-09-30 devnull if(t==RBRA && --nbra<0)
87 b2cfc4e2 2003-09-30 devnull rcerror("unmatched right paren");
88 b2cfc4e2 2003-09-30 devnull if(t==LBRA){
89 b2cfc4e2 2003-09-30 devnull if(++cursubid >= NSUBEXP)
90 b2cfc4e2 2003-09-30 devnull rcerror ("too many subexpressions");
91 b2cfc4e2 2003-09-30 devnull nbra++;
92 b2cfc4e2 2003-09-30 devnull if(lastwasand)
93 b2cfc4e2 2003-09-30 devnull operator(CAT);
94 b2cfc4e2 2003-09-30 devnull } else
95 b2cfc4e2 2003-09-30 devnull evaluntil(t);
96 b2cfc4e2 2003-09-30 devnull if(t != RBRA)
97 b2cfc4e2 2003-09-30 devnull pushator(t);
98 b2cfc4e2 2003-09-30 devnull lastwasand = FALSE;
99 b2cfc4e2 2003-09-30 devnull if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
100 b2cfc4e2 2003-09-30 devnull lastwasand = TRUE; /* these look like operands */
101 b2cfc4e2 2003-09-30 devnull }
102 b2cfc4e2 2003-09-30 devnull
103 b2cfc4e2 2003-09-30 devnull static void
104 b2cfc4e2 2003-09-30 devnull regerr2(char *s, int c)
105 b2cfc4e2 2003-09-30 devnull {
106 b2cfc4e2 2003-09-30 devnull char buf[100];
107 b2cfc4e2 2003-09-30 devnull char *cp = buf;
108 b2cfc4e2 2003-09-30 devnull while(*s)
109 b2cfc4e2 2003-09-30 devnull *cp++ = *s++;
110 b2cfc4e2 2003-09-30 devnull *cp++ = c;
111 b2cfc4e2 2003-09-30 devnull *cp = '\0';
112 b2cfc4e2 2003-09-30 devnull rcerror(buf);
113 b2cfc4e2 2003-09-30 devnull }
114 b2cfc4e2 2003-09-30 devnull
115 b2cfc4e2 2003-09-30 devnull static void
116 b2cfc4e2 2003-09-30 devnull cant(char *s)
117 b2cfc4e2 2003-09-30 devnull {
118 b2cfc4e2 2003-09-30 devnull char buf[100];
119 b2cfc4e2 2003-09-30 devnull strcpy(buf, "can't happen: ");
120 b2cfc4e2 2003-09-30 devnull strcat(buf, s);
121 b2cfc4e2 2003-09-30 devnull rcerror(buf);
122 b2cfc4e2 2003-09-30 devnull }
123 b2cfc4e2 2003-09-30 devnull
124 b2cfc4e2 2003-09-30 devnull static void
125 b2cfc4e2 2003-09-30 devnull pushand(Reinst *f, Reinst *l)
126 b2cfc4e2 2003-09-30 devnull {
127 b2cfc4e2 2003-09-30 devnull if(andp >= &andstack[NSTACK])
128 b2cfc4e2 2003-09-30 devnull cant("operand stack overflow");
129 b2cfc4e2 2003-09-30 devnull andp->first = f;
130 b2cfc4e2 2003-09-30 devnull andp->last = l;
131 b2cfc4e2 2003-09-30 devnull andp++;
132 b2cfc4e2 2003-09-30 devnull }
133 b2cfc4e2 2003-09-30 devnull
134 b2cfc4e2 2003-09-30 devnull static void
135 b2cfc4e2 2003-09-30 devnull pushator(int t)
136 b2cfc4e2 2003-09-30 devnull {
137 b2cfc4e2 2003-09-30 devnull if(atorp >= &atorstack[NSTACK])
138 b2cfc4e2 2003-09-30 devnull cant("operator stack overflow");
139 b2cfc4e2 2003-09-30 devnull *atorp++ = t;
140 b2cfc4e2 2003-09-30 devnull *subidp++ = cursubid;
141 b2cfc4e2 2003-09-30 devnull }
142 b2cfc4e2 2003-09-30 devnull
143 b2cfc4e2 2003-09-30 devnull static Node*
144 b2cfc4e2 2003-09-30 devnull popand(int op)
145 b2cfc4e2 2003-09-30 devnull {
146 b2cfc4e2 2003-09-30 devnull Reinst *inst;
147 b2cfc4e2 2003-09-30 devnull
148 b2cfc4e2 2003-09-30 devnull if(andp <= &andstack[0]){
149 b2cfc4e2 2003-09-30 devnull regerr2("missing operand for ", op);
150 b2cfc4e2 2003-09-30 devnull inst = newinst(NOP);
151 b2cfc4e2 2003-09-30 devnull pushand(inst,inst);
152 b2cfc4e2 2003-09-30 devnull }
153 b2cfc4e2 2003-09-30 devnull return --andp;
154 b2cfc4e2 2003-09-30 devnull }
155 b2cfc4e2 2003-09-30 devnull
156 b2cfc4e2 2003-09-30 devnull static int
157 b2cfc4e2 2003-09-30 devnull popator(void)
158 b2cfc4e2 2003-09-30 devnull {
159 b2cfc4e2 2003-09-30 devnull if(atorp <= &atorstack[0])
160 b2cfc4e2 2003-09-30 devnull cant("operator stack underflow");
161 b2cfc4e2 2003-09-30 devnull --subidp;
162 b2cfc4e2 2003-09-30 devnull return *--atorp;
163 b2cfc4e2 2003-09-30 devnull }
164 b2cfc4e2 2003-09-30 devnull
165 b2cfc4e2 2003-09-30 devnull static void
166 b2cfc4e2 2003-09-30 devnull evaluntil(int pri)
167 b2cfc4e2 2003-09-30 devnull {
168 b2cfc4e2 2003-09-30 devnull Node *op1, *op2;
169 b2cfc4e2 2003-09-30 devnull Reinst *inst1, *inst2;
170 b2cfc4e2 2003-09-30 devnull
171 b2cfc4e2 2003-09-30 devnull while(pri==RBRA || atorp[-1]>=pri){
172 b2cfc4e2 2003-09-30 devnull switch(popator()){
173 b2cfc4e2 2003-09-30 devnull default:
174 b2cfc4e2 2003-09-30 devnull rcerror("unknown operator in evaluntil");
175 b2cfc4e2 2003-09-30 devnull break;
176 b2cfc4e2 2003-09-30 devnull case LBRA: /* must have been RBRA */
177 b2cfc4e2 2003-09-30 devnull op1 = popand('(');
178 b2cfc4e2 2003-09-30 devnull inst2 = newinst(RBRA);
179 b2cfc4e2 2003-09-30 devnull inst2->u1.subid = *subidp;
180 b2cfc4e2 2003-09-30 devnull op1->last->u2.next = inst2;
181 b2cfc4e2 2003-09-30 devnull inst1 = newinst(LBRA);
182 b2cfc4e2 2003-09-30 devnull inst1->u1.subid = *subidp;
183 b2cfc4e2 2003-09-30 devnull inst1->u2.next = op1->first;
184 b2cfc4e2 2003-09-30 devnull pushand(inst1, inst2);
185 b2cfc4e2 2003-09-30 devnull return;
186 b2cfc4e2 2003-09-30 devnull case OR:
187 b2cfc4e2 2003-09-30 devnull op2 = popand('|');
188 b2cfc4e2 2003-09-30 devnull op1 = popand('|');
189 b2cfc4e2 2003-09-30 devnull inst2 = newinst(NOP);
190 b2cfc4e2 2003-09-30 devnull op2->last->u2.next = inst2;
191 b2cfc4e2 2003-09-30 devnull op1->last->u2.next = inst2;
192 b2cfc4e2 2003-09-30 devnull inst1 = newinst(OR);
193 b2cfc4e2 2003-09-30 devnull inst1->u1.right = op1->first;
194 b2cfc4e2 2003-09-30 devnull inst1->u2.left = op2->first;
195 b2cfc4e2 2003-09-30 devnull pushand(inst1, inst2);
196 b2cfc4e2 2003-09-30 devnull break;
197 b2cfc4e2 2003-09-30 devnull case CAT:
198 b2cfc4e2 2003-09-30 devnull op2 = popand(0);
199 b2cfc4e2 2003-09-30 devnull op1 = popand(0);
200 b2cfc4e2 2003-09-30 devnull op1->last->u2.next = op2->first;
201 b2cfc4e2 2003-09-30 devnull pushand(op1->first, op2->last);
202 b2cfc4e2 2003-09-30 devnull break;
203 b2cfc4e2 2003-09-30 devnull case STAR:
204 b2cfc4e2 2003-09-30 devnull op2 = popand('*');
205 b2cfc4e2 2003-09-30 devnull inst1 = newinst(OR);
206 b2cfc4e2 2003-09-30 devnull op2->last->u2.next = inst1;
207 b2cfc4e2 2003-09-30 devnull inst1->u1.right = op2->first;
208 b2cfc4e2 2003-09-30 devnull pushand(inst1, inst1);
209 b2cfc4e2 2003-09-30 devnull break;
210 b2cfc4e2 2003-09-30 devnull case PLUS:
211 b2cfc4e2 2003-09-30 devnull op2 = popand('+');
212 b2cfc4e2 2003-09-30 devnull inst1 = newinst(OR);
213 b2cfc4e2 2003-09-30 devnull op2->last->u2.next = inst1;
214 b2cfc4e2 2003-09-30 devnull inst1->u1.right = op2->first;
215 b2cfc4e2 2003-09-30 devnull pushand(op2->first, inst1);
216 b2cfc4e2 2003-09-30 devnull break;
217 b2cfc4e2 2003-09-30 devnull case QUEST:
218 b2cfc4e2 2003-09-30 devnull op2 = popand('?');
219 b2cfc4e2 2003-09-30 devnull inst1 = newinst(OR);
220 b2cfc4e2 2003-09-30 devnull inst2 = newinst(NOP);
221 b2cfc4e2 2003-09-30 devnull inst1->u2.left = inst2;
222 b2cfc4e2 2003-09-30 devnull inst1->u1.right = op2->first;
223 b2cfc4e2 2003-09-30 devnull op2->last->u2.next = inst2;
224 b2cfc4e2 2003-09-30 devnull pushand(inst1, inst2);
225 b2cfc4e2 2003-09-30 devnull break;
226 b2cfc4e2 2003-09-30 devnull }
227 b2cfc4e2 2003-09-30 devnull }
228 b2cfc4e2 2003-09-30 devnull }
229 b2cfc4e2 2003-09-30 devnull
230 b2cfc4e2 2003-09-30 devnull static Reprog*
231 b2cfc4e2 2003-09-30 devnull optimize(Reprog *pp)
232 b2cfc4e2 2003-09-30 devnull {
233 b2cfc4e2 2003-09-30 devnull Reinst *inst, *target;
234 b2cfc4e2 2003-09-30 devnull int size;
235 b2cfc4e2 2003-09-30 devnull Reprog *npp;
236 b2cfc4e2 2003-09-30 devnull Reclass *cl;
237 b2cfc4e2 2003-09-30 devnull int diff;
238 b2cfc4e2 2003-09-30 devnull
239 b2cfc4e2 2003-09-30 devnull /*
240 b2cfc4e2 2003-09-30 devnull * get rid of NOOP chains
241 b2cfc4e2 2003-09-30 devnull */
242 b2cfc4e2 2003-09-30 devnull for(inst=pp->firstinst; inst->type!=END; inst++){
243 b2cfc4e2 2003-09-30 devnull target = inst->u2.next;
244 b2cfc4e2 2003-09-30 devnull while(target->type == NOP)
245 b2cfc4e2 2003-09-30 devnull target = target->u2.next;
246 b2cfc4e2 2003-09-30 devnull inst->u2.next = target;
247 b2cfc4e2 2003-09-30 devnull }
248 b2cfc4e2 2003-09-30 devnull
249 b2cfc4e2 2003-09-30 devnull /*
250 b2cfc4e2 2003-09-30 devnull * The original allocation is for an area larger than
251 b2cfc4e2 2003-09-30 devnull * necessary. Reallocate to the actual space used
252 b2cfc4e2 2003-09-30 devnull * and then relocate the code.
253 b2cfc4e2 2003-09-30 devnull */
254 b2cfc4e2 2003-09-30 devnull size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
255 b2cfc4e2 2003-09-30 devnull npp = (Reprog *)realloc(pp, size);
256 b2cfc4e2 2003-09-30 devnull if(npp==0 || npp==pp)
257 b2cfc4e2 2003-09-30 devnull return pp;
258 b2cfc4e2 2003-09-30 devnull diff = (char *)npp - (char *)pp;
259 b2cfc4e2 2003-09-30 devnull freep = (Reinst *)((char *)freep + diff);
260 b2cfc4e2 2003-09-30 devnull for(inst=npp->firstinst; inst<freep; inst++){
261 b2cfc4e2 2003-09-30 devnull switch(inst->type){
262 b2cfc4e2 2003-09-30 devnull case OR:
263 b2cfc4e2 2003-09-30 devnull case STAR:
264 b2cfc4e2 2003-09-30 devnull case PLUS:
265 b2cfc4e2 2003-09-30 devnull case QUEST:
266 b2cfc4e2 2003-09-30 devnull *(char **)&inst->u1.right += diff;
267 b2cfc4e2 2003-09-30 devnull break;
268 b2cfc4e2 2003-09-30 devnull case CCLASS:
269 b2cfc4e2 2003-09-30 devnull case NCCLASS:
270 b2cfc4e2 2003-09-30 devnull *(char **)&inst->u1.right += diff;
271 b2cfc4e2 2003-09-30 devnull cl = inst->u1.cp;
272 b2cfc4e2 2003-09-30 devnull *(char **)&cl->end += diff;
273 b2cfc4e2 2003-09-30 devnull break;
274 b2cfc4e2 2003-09-30 devnull }
275 b2cfc4e2 2003-09-30 devnull *(char **)&inst->u2.left += diff;
276 b2cfc4e2 2003-09-30 devnull }
277 b2cfc4e2 2003-09-30 devnull *(char **)&npp->startinst += diff;
278 b2cfc4e2 2003-09-30 devnull return npp;
279 b2cfc4e2 2003-09-30 devnull }
280 b2cfc4e2 2003-09-30 devnull
281 b2cfc4e2 2003-09-30 devnull #ifdef DEBUG
282 b2cfc4e2 2003-09-30 devnull static void
283 b2cfc4e2 2003-09-30 devnull dumpstack(void){
284 b2cfc4e2 2003-09-30 devnull Node *stk;
285 b2cfc4e2 2003-09-30 devnull int *ip;
286 b2cfc4e2 2003-09-30 devnull
287 b2cfc4e2 2003-09-30 devnull print("operators\n");
288 b2cfc4e2 2003-09-30 devnull for(ip=atorstack; ip<atorp; ip++)
289 b2cfc4e2 2003-09-30 devnull print("0%o\n", *ip);
290 b2cfc4e2 2003-09-30 devnull print("operands\n");
291 b2cfc4e2 2003-09-30 devnull for(stk=andstack; stk<andp; stk++)
292 b2cfc4e2 2003-09-30 devnull print("0%o\t0%o\n", stk->first->type, stk->last->type);
293 b2cfc4e2 2003-09-30 devnull }
294 b2cfc4e2 2003-09-30 devnull
295 b2cfc4e2 2003-09-30 devnull static void
296 b2cfc4e2 2003-09-30 devnull dump(Reprog *pp)
297 b2cfc4e2 2003-09-30 devnull {
298 b2cfc4e2 2003-09-30 devnull Reinst *l;
299 b2cfc4e2 2003-09-30 devnull Rune *p;
300 b2cfc4e2 2003-09-30 devnull
301 b2cfc4e2 2003-09-30 devnull l = pp->firstinst;
302 b2cfc4e2 2003-09-30 devnull do{
303 b2cfc4e2 2003-09-30 devnull print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
304 b2cfc4e2 2003-09-30 devnull l->u2.left-pp->firstinst, l->u1.right-pp->firstinst);
305 b2cfc4e2 2003-09-30 devnull if(l->type == RUNE)
306 b2cfc4e2 2003-09-30 devnull print("\t%C\n", l->r);
307 b2cfc4e2 2003-09-30 devnull else if(l->type == CCLASS || l->type == NCCLASS){
308 b2cfc4e2 2003-09-30 devnull print("\t[");
309 b2cfc4e2 2003-09-30 devnull if(l->type == NCCLASS)
310 b2cfc4e2 2003-09-30 devnull print("^");
311 b2cfc4e2 2003-09-30 devnull for(p = l->cp->spans; p < l->cp->end; p += 2)
312 b2cfc4e2 2003-09-30 devnull if(p[0] == p[1])
313 b2cfc4e2 2003-09-30 devnull print("%C", p[0]);
314 b2cfc4e2 2003-09-30 devnull else
315 b2cfc4e2 2003-09-30 devnull print("%C-%C", p[0], p[1]);
316 b2cfc4e2 2003-09-30 devnull print("]\n");
317 b2cfc4e2 2003-09-30 devnull } else
318 b2cfc4e2 2003-09-30 devnull print("\n");
319 b2cfc4e2 2003-09-30 devnull }while(l++->type);
320 b2cfc4e2 2003-09-30 devnull }
321 b2cfc4e2 2003-09-30 devnull #endif
322 b2cfc4e2 2003-09-30 devnull
323 b2cfc4e2 2003-09-30 devnull static Reclass*
324 b2cfc4e2 2003-09-30 devnull newclass(void)
325 b2cfc4e2 2003-09-30 devnull {
326 b2cfc4e2 2003-09-30 devnull if(nclass >= NCLASS)
327 b2cfc4e2 2003-09-30 devnull regerr2("too many character classes; limit", NCLASS+'0');
328 b2cfc4e2 2003-09-30 devnull return &(classp[nclass++]);
329 b2cfc4e2 2003-09-30 devnull }
330 b2cfc4e2 2003-09-30 devnull
331 b2cfc4e2 2003-09-30 devnull static int
332 b2cfc4e2 2003-09-30 devnull nextc(Rune *rp)
333 b2cfc4e2 2003-09-30 devnull {
334 b2cfc4e2 2003-09-30 devnull if(lexdone){
335 b2cfc4e2 2003-09-30 devnull *rp = 0;
336 b2cfc4e2 2003-09-30 devnull return 1;
337 b2cfc4e2 2003-09-30 devnull }
338 b2cfc4e2 2003-09-30 devnull exprp += chartorune(rp, exprp);
339 b2cfc4e2 2003-09-30 devnull if(*rp == L'\\'){
340 b2cfc4e2 2003-09-30 devnull exprp += chartorune(rp, exprp);
341 b2cfc4e2 2003-09-30 devnull return 1;
342 b2cfc4e2 2003-09-30 devnull }
343 b2cfc4e2 2003-09-30 devnull if(*rp == 0)
344 b2cfc4e2 2003-09-30 devnull lexdone = 1;
345 b2cfc4e2 2003-09-30 devnull return 0;
346 b2cfc4e2 2003-09-30 devnull }
347 b2cfc4e2 2003-09-30 devnull
348 b2cfc4e2 2003-09-30 devnull static int
349 b2cfc4e2 2003-09-30 devnull lex(int literal, int dot_type)
350 b2cfc4e2 2003-09-30 devnull {
351 b2cfc4e2 2003-09-30 devnull int quoted;
352 b2cfc4e2 2003-09-30 devnull
353 b2cfc4e2 2003-09-30 devnull quoted = nextc(&yyrune);
354 b2cfc4e2 2003-09-30 devnull if(literal || quoted){
355 b2cfc4e2 2003-09-30 devnull if(yyrune == 0)
356 b2cfc4e2 2003-09-30 devnull return END;
357 b2cfc4e2 2003-09-30 devnull return RUNE;
358 b2cfc4e2 2003-09-30 devnull }
359 b2cfc4e2 2003-09-30 devnull
360 b2cfc4e2 2003-09-30 devnull switch(yyrune){
361 b2cfc4e2 2003-09-30 devnull case 0:
362 b2cfc4e2 2003-09-30 devnull return END;
363 b2cfc4e2 2003-09-30 devnull case L'*':
364 b2cfc4e2 2003-09-30 devnull return STAR;
365 b2cfc4e2 2003-09-30 devnull case L'?':
366 b2cfc4e2 2003-09-30 devnull return QUEST;
367 b2cfc4e2 2003-09-30 devnull case L'+':
368 b2cfc4e2 2003-09-30 devnull return PLUS;
369 b2cfc4e2 2003-09-30 devnull case L'|':
370 b2cfc4e2 2003-09-30 devnull return OR;
371 b2cfc4e2 2003-09-30 devnull case L'.':
372 b2cfc4e2 2003-09-30 devnull return dot_type;
373 b2cfc4e2 2003-09-30 devnull case L'(':
374 b2cfc4e2 2003-09-30 devnull return LBRA;
375 b2cfc4e2 2003-09-30 devnull case L')':
376 b2cfc4e2 2003-09-30 devnull return RBRA;
377 b2cfc4e2 2003-09-30 devnull case L'^':
378 b2cfc4e2 2003-09-30 devnull return BOL;
379 b2cfc4e2 2003-09-30 devnull case L'$':
380 b2cfc4e2 2003-09-30 devnull return EOL;
381 b2cfc4e2 2003-09-30 devnull case L'[':
382 b2cfc4e2 2003-09-30 devnull return bldcclass();
383 b2cfc4e2 2003-09-30 devnull }
384 b2cfc4e2 2003-09-30 devnull return RUNE;
385 b2cfc4e2 2003-09-30 devnull }
386 b2cfc4e2 2003-09-30 devnull
387 b2cfc4e2 2003-09-30 devnull static int
388 b2cfc4e2 2003-09-30 devnull bldcclass(void)
389 b2cfc4e2 2003-09-30 devnull {
390 b2cfc4e2 2003-09-30 devnull int type;
391 b2cfc4e2 2003-09-30 devnull Rune r[NCCRUNE];
392 b2cfc4e2 2003-09-30 devnull Rune *p, *ep, *np;
393 b2cfc4e2 2003-09-30 devnull Rune rune;
394 b2cfc4e2 2003-09-30 devnull int quoted;
395 b2cfc4e2 2003-09-30 devnull
396 b2cfc4e2 2003-09-30 devnull /* we have already seen the '[' */
397 b2cfc4e2 2003-09-30 devnull type = CCLASS;
398 b2cfc4e2 2003-09-30 devnull yyclassp = newclass();
399 b2cfc4e2 2003-09-30 devnull
400 b2cfc4e2 2003-09-30 devnull /* look ahead for negation */
401 b2cfc4e2 2003-09-30 devnull /* SPECIAL CASE!!! negated classes don't match \n */
402 b2cfc4e2 2003-09-30 devnull ep = r;
403 b2cfc4e2 2003-09-30 devnull quoted = nextc(&rune);
404 b2cfc4e2 2003-09-30 devnull if(!quoted && rune == L'^'){
405 b2cfc4e2 2003-09-30 devnull type = NCCLASS;
406 b2cfc4e2 2003-09-30 devnull quoted = nextc(&rune);
407 b2cfc4e2 2003-09-30 devnull *ep++ = L'\n';
408 b2cfc4e2 2003-09-30 devnull *ep++ = L'\n';
409 b2cfc4e2 2003-09-30 devnull }
410 b2cfc4e2 2003-09-30 devnull
411 b2cfc4e2 2003-09-30 devnull /* parse class into a set of spans */
412 b2cfc4e2 2003-09-30 devnull for(; ep<&r[NCCRUNE];){
413 b2cfc4e2 2003-09-30 devnull if(rune == 0){
414 b2cfc4e2 2003-09-30 devnull rcerror("malformed '[]'");
415 b2cfc4e2 2003-09-30 devnull return 0;
416 b2cfc4e2 2003-09-30 devnull }
417 b2cfc4e2 2003-09-30 devnull if(!quoted && rune == L']')
418 b2cfc4e2 2003-09-30 devnull break;
419 b2cfc4e2 2003-09-30 devnull if(!quoted && rune == L'-'){
420 b2cfc4e2 2003-09-30 devnull if(ep == r){
421 b2cfc4e2 2003-09-30 devnull rcerror("malformed '[]'");
422 b2cfc4e2 2003-09-30 devnull return 0;
423 b2cfc4e2 2003-09-30 devnull }
424 b2cfc4e2 2003-09-30 devnull quoted = nextc(&rune);
425 b2cfc4e2 2003-09-30 devnull if((!quoted && rune == L']') || rune == 0){
426 b2cfc4e2 2003-09-30 devnull rcerror("malformed '[]'");
427 b2cfc4e2 2003-09-30 devnull return 0;
428 b2cfc4e2 2003-09-30 devnull }
429 b2cfc4e2 2003-09-30 devnull *(ep-1) = rune;
430 b2cfc4e2 2003-09-30 devnull } else {
431 b2cfc4e2 2003-09-30 devnull *ep++ = rune;
432 b2cfc4e2 2003-09-30 devnull *ep++ = rune;
433 b2cfc4e2 2003-09-30 devnull }
434 b2cfc4e2 2003-09-30 devnull quoted = nextc(&rune);
435 b2cfc4e2 2003-09-30 devnull }
436 b2cfc4e2 2003-09-30 devnull
437 b2cfc4e2 2003-09-30 devnull /* sort on span start */
438 b2cfc4e2 2003-09-30 devnull for(p = r; p < ep; p += 2){
439 b2cfc4e2 2003-09-30 devnull for(np = p; np < ep; np += 2)
440 b2cfc4e2 2003-09-30 devnull if(*np < *p){
441 b2cfc4e2 2003-09-30 devnull rune = np[0];
442 b2cfc4e2 2003-09-30 devnull np[0] = p[0];
443 b2cfc4e2 2003-09-30 devnull p[0] = rune;
444 b2cfc4e2 2003-09-30 devnull rune = np[1];
445 b2cfc4e2 2003-09-30 devnull np[1] = p[1];
446 b2cfc4e2 2003-09-30 devnull p[1] = rune;
447 b2cfc4e2 2003-09-30 devnull }
448 b2cfc4e2 2003-09-30 devnull }
449 b2cfc4e2 2003-09-30 devnull
450 b2cfc4e2 2003-09-30 devnull /* merge spans */
451 b2cfc4e2 2003-09-30 devnull np = yyclassp->spans;
452 b2cfc4e2 2003-09-30 devnull p = r;
453 b2cfc4e2 2003-09-30 devnull if(r == ep)
454 b2cfc4e2 2003-09-30 devnull yyclassp->end = np;
455 b2cfc4e2 2003-09-30 devnull else {
456 b2cfc4e2 2003-09-30 devnull np[0] = *p++;
457 b2cfc4e2 2003-09-30 devnull np[1] = *p++;
458 b2cfc4e2 2003-09-30 devnull for(; p < ep; p += 2)
459 b2cfc4e2 2003-09-30 devnull if(p[0] <= np[1]){
460 b2cfc4e2 2003-09-30 devnull if(p[1] > np[1])
461 b2cfc4e2 2003-09-30 devnull np[1] = p[1];
462 b2cfc4e2 2003-09-30 devnull } else {
463 b2cfc4e2 2003-09-30 devnull np += 2;
464 b2cfc4e2 2003-09-30 devnull np[0] = p[0];
465 b2cfc4e2 2003-09-30 devnull np[1] = p[1];
466 b2cfc4e2 2003-09-30 devnull }
467 b2cfc4e2 2003-09-30 devnull yyclassp->end = np+2;
468 b2cfc4e2 2003-09-30 devnull }
469 b2cfc4e2 2003-09-30 devnull
470 b2cfc4e2 2003-09-30 devnull return type;
471 b2cfc4e2 2003-09-30 devnull }
472 b2cfc4e2 2003-09-30 devnull
473 b2cfc4e2 2003-09-30 devnull static Reprog*
474 b2cfc4e2 2003-09-30 devnull regcomp1(char *s, int literal, int dot_type)
475 b2cfc4e2 2003-09-30 devnull {
476 b2cfc4e2 2003-09-30 devnull int token;
477 b2cfc4e2 2003-09-30 devnull Reprog *pp;
478 b2cfc4e2 2003-09-30 devnull
479 b2cfc4e2 2003-09-30 devnull /* get memory for the program */
480 b2cfc4e2 2003-09-30 devnull pp = (Reprog *)malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
481 b2cfc4e2 2003-09-30 devnull if(pp == 0){
482 b2cfc4e2 2003-09-30 devnull regerror("out of memory");
483 b2cfc4e2 2003-09-30 devnull return 0;
484 b2cfc4e2 2003-09-30 devnull }
485 b2cfc4e2 2003-09-30 devnull freep = pp->firstinst;
486 b2cfc4e2 2003-09-30 devnull classp = pp->class;
487 b2cfc4e2 2003-09-30 devnull errors = 0;
488 b2cfc4e2 2003-09-30 devnull
489 b2cfc4e2 2003-09-30 devnull if(setjmp(regkaboom))
490 b2cfc4e2 2003-09-30 devnull goto out;
491 b2cfc4e2 2003-09-30 devnull
492 b2cfc4e2 2003-09-30 devnull /* go compile the sucker */
493 b2cfc4e2 2003-09-30 devnull lexdone = 0;
494 b2cfc4e2 2003-09-30 devnull exprp = s;
495 b2cfc4e2 2003-09-30 devnull nclass = 0;
496 b2cfc4e2 2003-09-30 devnull nbra = 0;
497 b2cfc4e2 2003-09-30 devnull atorp = atorstack;
498 b2cfc4e2 2003-09-30 devnull andp = andstack;
499 b2cfc4e2 2003-09-30 devnull subidp = subidstack;
500 b2cfc4e2 2003-09-30 devnull lastwasand = FALSE;
501 b2cfc4e2 2003-09-30 devnull cursubid = 0;
502 b2cfc4e2 2003-09-30 devnull
503 b2cfc4e2 2003-09-30 devnull /* Start with a low priority operator to prime parser */
504 b2cfc4e2 2003-09-30 devnull pushator(START-1);
505 b2cfc4e2 2003-09-30 devnull while((token = lex(literal, dot_type)) != END){
506 b2cfc4e2 2003-09-30 devnull if((token&0300) == OPERATOR)
507 b2cfc4e2 2003-09-30 devnull operator(token);
508 b2cfc4e2 2003-09-30 devnull else
509 b2cfc4e2 2003-09-30 devnull operand(token);
510 b2cfc4e2 2003-09-30 devnull }
511 b2cfc4e2 2003-09-30 devnull
512 b2cfc4e2 2003-09-30 devnull /* Close with a low priority operator */
513 b2cfc4e2 2003-09-30 devnull evaluntil(START);
514 b2cfc4e2 2003-09-30 devnull
515 b2cfc4e2 2003-09-30 devnull /* Force END */
516 b2cfc4e2 2003-09-30 devnull operand(END);
517 b2cfc4e2 2003-09-30 devnull evaluntil(START);
518 b2cfc4e2 2003-09-30 devnull #ifdef DEBUG
519 b2cfc4e2 2003-09-30 devnull dumpstack();
520 b2cfc4e2 2003-09-30 devnull #endif
521 b2cfc4e2 2003-09-30 devnull if(nbra)
522 b2cfc4e2 2003-09-30 devnull rcerror("unmatched left paren");
523 b2cfc4e2 2003-09-30 devnull --andp; /* points to first and only operand */
524 b2cfc4e2 2003-09-30 devnull pp->startinst = andp->first;
525 b2cfc4e2 2003-09-30 devnull #ifdef DEBUG
526 b2cfc4e2 2003-09-30 devnull dump(pp);
527 b2cfc4e2 2003-09-30 devnull #endif
528 b2cfc4e2 2003-09-30 devnull pp = optimize(pp);
529 b2cfc4e2 2003-09-30 devnull #ifdef DEBUG
530 b2cfc4e2 2003-09-30 devnull print("start: %d\n", andp->first-pp->firstinst);
531 b2cfc4e2 2003-09-30 devnull dump(pp);
532 b2cfc4e2 2003-09-30 devnull #endif
533 b2cfc4e2 2003-09-30 devnull out:
534 b2cfc4e2 2003-09-30 devnull if(errors){
535 b2cfc4e2 2003-09-30 devnull free(pp);
536 b2cfc4e2 2003-09-30 devnull pp = 0;
537 b2cfc4e2 2003-09-30 devnull }
538 b2cfc4e2 2003-09-30 devnull return pp;
539 b2cfc4e2 2003-09-30 devnull }
540 b2cfc4e2 2003-09-30 devnull
541 b2cfc4e2 2003-09-30 devnull extern Reprog*
542 b2cfc4e2 2003-09-30 devnull regcomp(char *s)
543 b2cfc4e2 2003-09-30 devnull {
544 b2cfc4e2 2003-09-30 devnull return regcomp1(s, 0, ANY);
545 b2cfc4e2 2003-09-30 devnull }
546 b2cfc4e2 2003-09-30 devnull
547 b2cfc4e2 2003-09-30 devnull extern Reprog*
548 b2cfc4e2 2003-09-30 devnull regcomplit(char *s)
549 b2cfc4e2 2003-09-30 devnull {
550 b2cfc4e2 2003-09-30 devnull return regcomp1(s, 1, ANY);
551 b2cfc4e2 2003-09-30 devnull }
552 b2cfc4e2 2003-09-30 devnull
553 b2cfc4e2 2003-09-30 devnull extern Reprog*
554 b2cfc4e2 2003-09-30 devnull regcompnl(char *s)
555 b2cfc4e2 2003-09-30 devnull {
556 b2cfc4e2 2003-09-30 devnull return regcomp1(s, 0, ANYNL);
557 b2cfc4e2 2003-09-30 devnull }