Blame


1 b2cfc4e2 2003-09-30 devnull #include "lib9.h"
2 b2cfc4e2 2003-09-30 devnull #include "regexp9.h"
3 b2cfc4e2 2003-09-30 devnull #include "regcomp.h"
4 b2cfc4e2 2003-09-30 devnull
5 b2cfc4e2 2003-09-30 devnull #define TRUE 1
6 b2cfc4e2 2003-09-30 devnull #define FALSE 0
7 b2cfc4e2 2003-09-30 devnull
8 b2cfc4e2 2003-09-30 devnull /*
9 b2cfc4e2 2003-09-30 devnull * Parser Information
10 b2cfc4e2 2003-09-30 devnull */
11 b2cfc4e2 2003-09-30 devnull typedef
12 b2cfc4e2 2003-09-30 devnull struct Node
13 b2cfc4e2 2003-09-30 devnull {
14 b2cfc4e2 2003-09-30 devnull Reinst* first;
15 b2cfc4e2 2003-09-30 devnull Reinst* last;
16 b2cfc4e2 2003-09-30 devnull }Node;
17 b2cfc4e2 2003-09-30 devnull
18 b2cfc4e2 2003-09-30 devnull #define NSTACK 20
19 b2cfc4e2 2003-09-30 devnull static Node andstack[NSTACK];
20 b2cfc4e2 2003-09-30 devnull static Node *andp;
21 b2cfc4e2 2003-09-30 devnull static int atorstack[NSTACK];
22 b2cfc4e2 2003-09-30 devnull static int* atorp;
23 b2cfc4e2 2003-09-30 devnull static int cursubid; /* id of current subexpression */
24 b2cfc4e2 2003-09-30 devnull static int subidstack[NSTACK]; /* parallel to atorstack */
25 b2cfc4e2 2003-09-30 devnull static int* subidp;
26 b2cfc4e2 2003-09-30 devnull static int lastwasand; /* Last token was operand */
27 b2cfc4e2 2003-09-30 devnull static int nbra;
28 b2cfc4e2 2003-09-30 devnull static char* exprp; /* pointer to next character in source expression */
29 b2cfc4e2 2003-09-30 devnull static int lexdone;
30 b2cfc4e2 2003-09-30 devnull static int nclass;
31 b2cfc4e2 2003-09-30 devnull static Reclass*classp;
32 b2cfc4e2 2003-09-30 devnull static Reinst* freep;
33 b2cfc4e2 2003-09-30 devnull static int errors;
34 b2cfc4e2 2003-09-30 devnull static Rune yyrune; /* last lex'd rune */
35 b2cfc4e2 2003-09-30 devnull static Reclass*yyclassp; /* last lex'd class */
36 b2cfc4e2 2003-09-30 devnull
37 b2cfc4e2 2003-09-30 devnull /* predeclared crap */
38 b2cfc4e2 2003-09-30 devnull static void operator(int);
39 b2cfc4e2 2003-09-30 devnull static void pushand(Reinst*, Reinst*);
40 b2cfc4e2 2003-09-30 devnull static void pushator(int);
41 b2cfc4e2 2003-09-30 devnull static void evaluntil(int);
42 b2cfc4e2 2003-09-30 devnull static int bldcclass(void);
43 b2cfc4e2 2003-09-30 devnull
44 b2cfc4e2 2003-09-30 devnull static jmp_buf regkaboom;
45 b2cfc4e2 2003-09-30 devnull
46 b2cfc4e2 2003-09-30 devnull static void
47 b2cfc4e2 2003-09-30 devnull rcerror(char *s)
48 b2cfc4e2 2003-09-30 devnull {
49 b2cfc4e2 2003-09-30 devnull errors++;
50 b2cfc4e2 2003-09-30 devnull regerror(s);
51 b2cfc4e2 2003-09-30 devnull longjmp(regkaboom, 1);
52 b2cfc4e2 2003-09-30 devnull }
53 b2cfc4e2 2003-09-30 devnull
54 b2cfc4e2 2003-09-30 devnull static Reinst*
55 b2cfc4e2 2003-09-30 devnull newinst(int t)
56 b2cfc4e2 2003-09-30 devnull {
57 b2cfc4e2 2003-09-30 devnull freep->type = t;
58 b2cfc4e2 2003-09-30 devnull freep->u2.left = 0;
59 b2cfc4e2 2003-09-30 devnull freep->u1.right = 0;
60 b2cfc4e2 2003-09-30 devnull return freep++;
61 b2cfc4e2 2003-09-30 devnull }
62 b2cfc4e2 2003-09-30 devnull
63 b2cfc4e2 2003-09-30 devnull static void
64 b2cfc4e2 2003-09-30 devnull operand(int t)
65 b2cfc4e2 2003-09-30 devnull {
66 b2cfc4e2 2003-09-30 devnull Reinst *i;
67 b2cfc4e2 2003-09-30 devnull
68 b2cfc4e2 2003-09-30 devnull if(lastwasand)
69 b2cfc4e2 2003-09-30 devnull operator(CAT); /* catenate is implicit */
70 b2cfc4e2 2003-09-30 devnull i = newinst(t);
71 b2cfc4e2 2003-09-30 devnull
72 b2cfc4e2 2003-09-30 devnull if(t == CCLASS || t == NCCLASS)
73 b2cfc4e2 2003-09-30 devnull i->u1.cp = yyclassp;
74 b2cfc4e2 2003-09-30 devnull if(t == RUNE)
75 b2cfc4e2 2003-09-30 devnull i->u1.r = yyrune;
76 b2cfc4e2 2003-09-30 devnull
77 b2cfc4e2 2003-09-30 devnull pushand(i, i);
78 b2cfc4e2 2003-09-30 devnull lastwasand = TRUE;
79 b2cfc4e2 2003-09-30 devnull }
80 b2cfc4e2 2003-09-30 devnull
81 b2cfc4e2 2003-09-30 devnull static void
82 b2cfc4e2 2003-09-30 devnull operator(int t)
83 b2cfc4e2 2003-09-30 devnull {
84 b2cfc4e2 2003-09-30 devnull if(t==RBRA && --nbra<0)
85 b2cfc4e2 2003-09-30 devnull rcerror("unmatched right paren");
86 b2cfc4e2 2003-09-30 devnull if(t==LBRA){
87 b2cfc4e2 2003-09-30 devnull if(++cursubid >= NSUBEXP)
88 b2cfc4e2 2003-09-30 devnull rcerror ("too many subexpressions");
89 b2cfc4e2 2003-09-30 devnull nbra++;
90 b2cfc4e2 2003-09-30 devnull if(lastwasand)
91 b2cfc4e2 2003-09-30 devnull operator(CAT);
92 b2cfc4e2 2003-09-30 devnull } else
93 b2cfc4e2 2003-09-30 devnull evaluntil(t);
94 b2cfc4e2 2003-09-30 devnull if(t != RBRA)
95 b2cfc4e2 2003-09-30 devnull pushator(t);
96 b2cfc4e2 2003-09-30 devnull lastwasand = FALSE;
97 b2cfc4e2 2003-09-30 devnull if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
98 b2cfc4e2 2003-09-30 devnull lastwasand = TRUE; /* these look like operands */
99 b2cfc4e2 2003-09-30 devnull }
100 b2cfc4e2 2003-09-30 devnull
101 b2cfc4e2 2003-09-30 devnull static void
102 b2cfc4e2 2003-09-30 devnull regerr2(char *s, int c)
103 b2cfc4e2 2003-09-30 devnull {
104 b2cfc4e2 2003-09-30 devnull char buf[100];
105 b2cfc4e2 2003-09-30 devnull char *cp = buf;
106 b2cfc4e2 2003-09-30 devnull while(*s)
107 b2cfc4e2 2003-09-30 devnull *cp++ = *s++;
108 b2cfc4e2 2003-09-30 devnull *cp++ = c;
109 b2cfc4e2 2003-09-30 devnull *cp = '\0';
110 b2cfc4e2 2003-09-30 devnull rcerror(buf);
111 b2cfc4e2 2003-09-30 devnull }
112 b2cfc4e2 2003-09-30 devnull
113 b2cfc4e2 2003-09-30 devnull static void
114 b2cfc4e2 2003-09-30 devnull cant(char *s)
115 b2cfc4e2 2003-09-30 devnull {
116 b2cfc4e2 2003-09-30 devnull char buf[100];
117 b2cfc4e2 2003-09-30 devnull strcpy(buf, "can't happen: ");
118 b2cfc4e2 2003-09-30 devnull strcat(buf, s);
119 b2cfc4e2 2003-09-30 devnull rcerror(buf);
120 b2cfc4e2 2003-09-30 devnull }
121 b2cfc4e2 2003-09-30 devnull
122 b2cfc4e2 2003-09-30 devnull static void
123 b2cfc4e2 2003-09-30 devnull pushand(Reinst *f, Reinst *l)
124 b2cfc4e2 2003-09-30 devnull {
125 b2cfc4e2 2003-09-30 devnull if(andp >= &andstack[NSTACK])
126 b2cfc4e2 2003-09-30 devnull cant("operand stack overflow");
127 b2cfc4e2 2003-09-30 devnull andp->first = f;
128 b2cfc4e2 2003-09-30 devnull andp->last = l;
129 b2cfc4e2 2003-09-30 devnull andp++;
130 b2cfc4e2 2003-09-30 devnull }
131 b2cfc4e2 2003-09-30 devnull
132 b2cfc4e2 2003-09-30 devnull static void
133 b2cfc4e2 2003-09-30 devnull pushator(int t)
134 b2cfc4e2 2003-09-30 devnull {
135 b2cfc4e2 2003-09-30 devnull if(atorp >= &atorstack[NSTACK])
136 b2cfc4e2 2003-09-30 devnull cant("operator stack overflow");
137 b2cfc4e2 2003-09-30 devnull *atorp++ = t;
138 b2cfc4e2 2003-09-30 devnull *subidp++ = cursubid;
139 b2cfc4e2 2003-09-30 devnull }
140 b2cfc4e2 2003-09-30 devnull
141 b2cfc4e2 2003-09-30 devnull static Node*
142 b2cfc4e2 2003-09-30 devnull popand(int op)
143 b2cfc4e2 2003-09-30 devnull {
144 b2cfc4e2 2003-09-30 devnull Reinst *inst;
145 b2cfc4e2 2003-09-30 devnull
146 b2cfc4e2 2003-09-30 devnull if(andp <= &andstack[0]){
147 b2cfc4e2 2003-09-30 devnull regerr2("missing operand for ", op);
148 b2cfc4e2 2003-09-30 devnull inst = newinst(NOP);
149 b2cfc4e2 2003-09-30 devnull pushand(inst,inst);
150 b2cfc4e2 2003-09-30 devnull }
151 b2cfc4e2 2003-09-30 devnull return --andp;
152 b2cfc4e2 2003-09-30 devnull }
153 b2cfc4e2 2003-09-30 devnull
154 b2cfc4e2 2003-09-30 devnull static int
155 b2cfc4e2 2003-09-30 devnull popator(void)
156 b2cfc4e2 2003-09-30 devnull {
157 b2cfc4e2 2003-09-30 devnull if(atorp <= &atorstack[0])
158 b2cfc4e2 2003-09-30 devnull cant("operator stack underflow");
159 b2cfc4e2 2003-09-30 devnull --subidp;
160 b2cfc4e2 2003-09-30 devnull return *--atorp;
161 b2cfc4e2 2003-09-30 devnull }
162 b2cfc4e2 2003-09-30 devnull
163 b2cfc4e2 2003-09-30 devnull static void
164 b2cfc4e2 2003-09-30 devnull evaluntil(int pri)
165 b2cfc4e2 2003-09-30 devnull {
166 b2cfc4e2 2003-09-30 devnull Node *op1, *op2;
167 b2cfc4e2 2003-09-30 devnull Reinst *inst1, *inst2;
168 b2cfc4e2 2003-09-30 devnull
169 b2cfc4e2 2003-09-30 devnull while(pri==RBRA || atorp[-1]>=pri){
170 b2cfc4e2 2003-09-30 devnull switch(popator()){
171 b2cfc4e2 2003-09-30 devnull default:
172 b2cfc4e2 2003-09-30 devnull rcerror("unknown operator in evaluntil");
173 b2cfc4e2 2003-09-30 devnull break;
174 b2cfc4e2 2003-09-30 devnull case LBRA: /* must have been RBRA */
175 b2cfc4e2 2003-09-30 devnull op1 = popand('(');
176 b2cfc4e2 2003-09-30 devnull inst2 = newinst(RBRA);
177 b2cfc4e2 2003-09-30 devnull inst2->u1.subid = *subidp;
178 b2cfc4e2 2003-09-30 devnull op1->last->u2.next = inst2;
179 b2cfc4e2 2003-09-30 devnull inst1 = newinst(LBRA);
180 b2cfc4e2 2003-09-30 devnull inst1->u1.subid = *subidp;
181 b2cfc4e2 2003-09-30 devnull inst1->u2.next = op1->first;
182 b2cfc4e2 2003-09-30 devnull pushand(inst1, inst2);
183 b2cfc4e2 2003-09-30 devnull return;
184 b2cfc4e2 2003-09-30 devnull case OR:
185 b2cfc4e2 2003-09-30 devnull op2 = popand('|');
186 b2cfc4e2 2003-09-30 devnull op1 = popand('|');
187 b2cfc4e2 2003-09-30 devnull inst2 = newinst(NOP);
188 b2cfc4e2 2003-09-30 devnull op2->last->u2.next = inst2;
189 b2cfc4e2 2003-09-30 devnull op1->last->u2.next = inst2;
190 b2cfc4e2 2003-09-30 devnull inst1 = newinst(OR);
191 b2cfc4e2 2003-09-30 devnull inst1->u1.right = op1->first;
192 b2cfc4e2 2003-09-30 devnull inst1->u2.left = op2->first;
193 b2cfc4e2 2003-09-30 devnull pushand(inst1, inst2);
194 b2cfc4e2 2003-09-30 devnull break;
195 b2cfc4e2 2003-09-30 devnull case CAT:
196 b2cfc4e2 2003-09-30 devnull op2 = popand(0);
197 b2cfc4e2 2003-09-30 devnull op1 = popand(0);
198 b2cfc4e2 2003-09-30 devnull op1->last->u2.next = op2->first;
199 b2cfc4e2 2003-09-30 devnull pushand(op1->first, op2->last);
200 b2cfc4e2 2003-09-30 devnull break;
201 b2cfc4e2 2003-09-30 devnull case STAR:
202 b2cfc4e2 2003-09-30 devnull op2 = popand('*');
203 b2cfc4e2 2003-09-30 devnull inst1 = newinst(OR);
204 b2cfc4e2 2003-09-30 devnull op2->last->u2.next = inst1;
205 b2cfc4e2 2003-09-30 devnull inst1->u1.right = op2->first;
206 b2cfc4e2 2003-09-30 devnull pushand(inst1, inst1);
207 b2cfc4e2 2003-09-30 devnull break;
208 b2cfc4e2 2003-09-30 devnull case PLUS:
209 b2cfc4e2 2003-09-30 devnull op2 = popand('+');
210 b2cfc4e2 2003-09-30 devnull inst1 = newinst(OR);
211 b2cfc4e2 2003-09-30 devnull op2->last->u2.next = inst1;
212 b2cfc4e2 2003-09-30 devnull inst1->u1.right = op2->first;
213 b2cfc4e2 2003-09-30 devnull pushand(op2->first, inst1);
214 b2cfc4e2 2003-09-30 devnull break;
215 b2cfc4e2 2003-09-30 devnull case QUEST:
216 b2cfc4e2 2003-09-30 devnull op2 = popand('?');
217 b2cfc4e2 2003-09-30 devnull inst1 = newinst(OR);
218 b2cfc4e2 2003-09-30 devnull inst2 = newinst(NOP);
219 b2cfc4e2 2003-09-30 devnull inst1->u2.left = inst2;
220 b2cfc4e2 2003-09-30 devnull inst1->u1.right = op2->first;
221 b2cfc4e2 2003-09-30 devnull op2->last->u2.next = inst2;
222 b2cfc4e2 2003-09-30 devnull pushand(inst1, inst2);
223 b2cfc4e2 2003-09-30 devnull break;
224 b2cfc4e2 2003-09-30 devnull }
225 b2cfc4e2 2003-09-30 devnull }
226 b2cfc4e2 2003-09-30 devnull }
227 b2cfc4e2 2003-09-30 devnull
228 b2cfc4e2 2003-09-30 devnull static Reprog*
229 b2cfc4e2 2003-09-30 devnull optimize(Reprog *pp)
230 b2cfc4e2 2003-09-30 devnull {
231 b2cfc4e2 2003-09-30 devnull Reinst *inst, *target;
232 b2cfc4e2 2003-09-30 devnull int size;
233 b2cfc4e2 2003-09-30 devnull Reprog *npp;
234 b2cfc4e2 2003-09-30 devnull Reclass *cl;
235 b2cfc4e2 2003-09-30 devnull int diff;
236 b2cfc4e2 2003-09-30 devnull
237 b2cfc4e2 2003-09-30 devnull /*
238 b2cfc4e2 2003-09-30 devnull * get rid of NOOP chains
239 b2cfc4e2 2003-09-30 devnull */
240 b2cfc4e2 2003-09-30 devnull for(inst=pp->firstinst; inst->type!=END; inst++){
241 b2cfc4e2 2003-09-30 devnull target = inst->u2.next;
242 b2cfc4e2 2003-09-30 devnull while(target->type == NOP)
243 b2cfc4e2 2003-09-30 devnull target = target->u2.next;
244 b2cfc4e2 2003-09-30 devnull inst->u2.next = target;
245 b2cfc4e2 2003-09-30 devnull }
246 b2cfc4e2 2003-09-30 devnull
247 b2cfc4e2 2003-09-30 devnull /*
248 b2cfc4e2 2003-09-30 devnull * The original allocation is for an area larger than
249 b2cfc4e2 2003-09-30 devnull * necessary. Reallocate to the actual space used
250 b2cfc4e2 2003-09-30 devnull * and then relocate the code.
251 b2cfc4e2 2003-09-30 devnull */
252 b2cfc4e2 2003-09-30 devnull size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
253 62390091 2004-03-05 devnull npp = realloc(pp, size);
254 b2cfc4e2 2003-09-30 devnull if(npp==0 || npp==pp)
255 b2cfc4e2 2003-09-30 devnull return pp;
256 b2cfc4e2 2003-09-30 devnull diff = (char *)npp - (char *)pp;
257 b2cfc4e2 2003-09-30 devnull freep = (Reinst *)((char *)freep + diff);
258 b2cfc4e2 2003-09-30 devnull for(inst=npp->firstinst; inst<freep; inst++){
259 b2cfc4e2 2003-09-30 devnull switch(inst->type){
260 b2cfc4e2 2003-09-30 devnull case OR:
261 b2cfc4e2 2003-09-30 devnull case STAR:
262 b2cfc4e2 2003-09-30 devnull case PLUS:
263 b2cfc4e2 2003-09-30 devnull case QUEST:
264 b2cfc4e2 2003-09-30 devnull *(char **)&inst->u1.right += diff;
265 b2cfc4e2 2003-09-30 devnull break;
266 b2cfc4e2 2003-09-30 devnull case CCLASS:
267 b2cfc4e2 2003-09-30 devnull case NCCLASS:
268 b2cfc4e2 2003-09-30 devnull *(char **)&inst->u1.right += diff;
269 b2cfc4e2 2003-09-30 devnull cl = inst->u1.cp;
270 b2cfc4e2 2003-09-30 devnull *(char **)&cl->end += diff;
271 b2cfc4e2 2003-09-30 devnull break;
272 b2cfc4e2 2003-09-30 devnull }
273 b2cfc4e2 2003-09-30 devnull *(char **)&inst->u2.left += diff;
274 b2cfc4e2 2003-09-30 devnull }
275 b2cfc4e2 2003-09-30 devnull *(char **)&npp->startinst += diff;
276 b2cfc4e2 2003-09-30 devnull return npp;
277 b2cfc4e2 2003-09-30 devnull }
278 b2cfc4e2 2003-09-30 devnull
279 b2cfc4e2 2003-09-30 devnull #ifdef DEBUG
280 b2cfc4e2 2003-09-30 devnull static void
281 b2cfc4e2 2003-09-30 devnull dumpstack(void){
282 b2cfc4e2 2003-09-30 devnull Node *stk;
283 b2cfc4e2 2003-09-30 devnull int *ip;
284 b2cfc4e2 2003-09-30 devnull
285 b2cfc4e2 2003-09-30 devnull print("operators\n");
286 b2cfc4e2 2003-09-30 devnull for(ip=atorstack; ip<atorp; ip++)
287 b2cfc4e2 2003-09-30 devnull print("0%o\n", *ip);
288 b2cfc4e2 2003-09-30 devnull print("operands\n");
289 b2cfc4e2 2003-09-30 devnull for(stk=andstack; stk<andp; stk++)
290 b2cfc4e2 2003-09-30 devnull print("0%o\t0%o\n", stk->first->type, stk->last->type);
291 b2cfc4e2 2003-09-30 devnull }
292 b2cfc4e2 2003-09-30 devnull
293 b2cfc4e2 2003-09-30 devnull static void
294 b2cfc4e2 2003-09-30 devnull dump(Reprog *pp)
295 b2cfc4e2 2003-09-30 devnull {
296 b2cfc4e2 2003-09-30 devnull Reinst *l;
297 b2cfc4e2 2003-09-30 devnull Rune *p;
298 b2cfc4e2 2003-09-30 devnull
299 b2cfc4e2 2003-09-30 devnull l = pp->firstinst;
300 b2cfc4e2 2003-09-30 devnull do{
301 b2cfc4e2 2003-09-30 devnull print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
302 b2cfc4e2 2003-09-30 devnull l->u2.left-pp->firstinst, l->u1.right-pp->firstinst);
303 b2cfc4e2 2003-09-30 devnull if(l->type == RUNE)
304 62390091 2004-03-05 devnull print("\t%C\n", l->u1.r);
305 b2cfc4e2 2003-09-30 devnull else if(l->type == CCLASS || l->type == NCCLASS){
306 b2cfc4e2 2003-09-30 devnull print("\t[");
307 b2cfc4e2 2003-09-30 devnull if(l->type == NCCLASS)
308 b2cfc4e2 2003-09-30 devnull print("^");
309 62390091 2004-03-05 devnull for(p = l->u1.cp->spans; p < l->u1.cp->end; p += 2)
310 b2cfc4e2 2003-09-30 devnull if(p[0] == p[1])
311 b2cfc4e2 2003-09-30 devnull print("%C", p[0]);
312 b2cfc4e2 2003-09-30 devnull else
313 b2cfc4e2 2003-09-30 devnull print("%C-%C", p[0], p[1]);
314 b2cfc4e2 2003-09-30 devnull print("]\n");
315 b2cfc4e2 2003-09-30 devnull } else
316 b2cfc4e2 2003-09-30 devnull print("\n");
317 b2cfc4e2 2003-09-30 devnull }while(l++->type);
318 b2cfc4e2 2003-09-30 devnull }
319 b2cfc4e2 2003-09-30 devnull #endif
320 b2cfc4e2 2003-09-30 devnull
321 b2cfc4e2 2003-09-30 devnull static Reclass*
322 b2cfc4e2 2003-09-30 devnull newclass(void)
323 b2cfc4e2 2003-09-30 devnull {
324 b2cfc4e2 2003-09-30 devnull if(nclass >= NCLASS)
325 b2cfc4e2 2003-09-30 devnull regerr2("too many character classes; limit", NCLASS+'0');
326 b2cfc4e2 2003-09-30 devnull return &(classp[nclass++]);
327 b2cfc4e2 2003-09-30 devnull }
328 b2cfc4e2 2003-09-30 devnull
329 b2cfc4e2 2003-09-30 devnull static int
330 b2cfc4e2 2003-09-30 devnull nextc(Rune *rp)
331 b2cfc4e2 2003-09-30 devnull {
332 b2cfc4e2 2003-09-30 devnull if(lexdone){
333 b2cfc4e2 2003-09-30 devnull *rp = 0;
334 b2cfc4e2 2003-09-30 devnull return 1;
335 b2cfc4e2 2003-09-30 devnull }
336 b2cfc4e2 2003-09-30 devnull exprp += chartorune(rp, exprp);
337 b2cfc4e2 2003-09-30 devnull if(*rp == L'\\'){
338 b2cfc4e2 2003-09-30 devnull exprp += chartorune(rp, exprp);
339 b2cfc4e2 2003-09-30 devnull return 1;
340 b2cfc4e2 2003-09-30 devnull }
341 b2cfc4e2 2003-09-30 devnull if(*rp == 0)
342 b2cfc4e2 2003-09-30 devnull lexdone = 1;
343 b2cfc4e2 2003-09-30 devnull return 0;
344 b2cfc4e2 2003-09-30 devnull }
345 b2cfc4e2 2003-09-30 devnull
346 b2cfc4e2 2003-09-30 devnull static int
347 b2cfc4e2 2003-09-30 devnull lex(int literal, int dot_type)
348 b2cfc4e2 2003-09-30 devnull {
349 b2cfc4e2 2003-09-30 devnull int quoted;
350 b2cfc4e2 2003-09-30 devnull
351 b2cfc4e2 2003-09-30 devnull quoted = nextc(&yyrune);
352 b2cfc4e2 2003-09-30 devnull if(literal || quoted){
353 b2cfc4e2 2003-09-30 devnull if(yyrune == 0)
354 b2cfc4e2 2003-09-30 devnull return END;
355 b2cfc4e2 2003-09-30 devnull return RUNE;
356 b2cfc4e2 2003-09-30 devnull }
357 b2cfc4e2 2003-09-30 devnull
358 b2cfc4e2 2003-09-30 devnull switch(yyrune){
359 b2cfc4e2 2003-09-30 devnull case 0:
360 b2cfc4e2 2003-09-30 devnull return END;
361 b2cfc4e2 2003-09-30 devnull case L'*':
362 b2cfc4e2 2003-09-30 devnull return STAR;
363 b2cfc4e2 2003-09-30 devnull case L'?':
364 b2cfc4e2 2003-09-30 devnull return QUEST;
365 b2cfc4e2 2003-09-30 devnull case L'+':
366 b2cfc4e2 2003-09-30 devnull return PLUS;
367 b2cfc4e2 2003-09-30 devnull case L'|':
368 b2cfc4e2 2003-09-30 devnull return OR;
369 b2cfc4e2 2003-09-30 devnull case L'.':
370 b2cfc4e2 2003-09-30 devnull return dot_type;
371 b2cfc4e2 2003-09-30 devnull case L'(':
372 b2cfc4e2 2003-09-30 devnull return LBRA;
373 b2cfc4e2 2003-09-30 devnull case L')':
374 b2cfc4e2 2003-09-30 devnull return RBRA;
375 b2cfc4e2 2003-09-30 devnull case L'^':
376 b2cfc4e2 2003-09-30 devnull return BOL;
377 b2cfc4e2 2003-09-30 devnull case L'$':
378 b2cfc4e2 2003-09-30 devnull return EOL;
379 b2cfc4e2 2003-09-30 devnull case L'[':
380 b2cfc4e2 2003-09-30 devnull return bldcclass();
381 b2cfc4e2 2003-09-30 devnull }
382 b2cfc4e2 2003-09-30 devnull return RUNE;
383 b2cfc4e2 2003-09-30 devnull }
384 b2cfc4e2 2003-09-30 devnull
385 b2cfc4e2 2003-09-30 devnull static int
386 b2cfc4e2 2003-09-30 devnull bldcclass(void)
387 b2cfc4e2 2003-09-30 devnull {
388 b2cfc4e2 2003-09-30 devnull int type;
389 b2cfc4e2 2003-09-30 devnull Rune r[NCCRUNE];
390 b2cfc4e2 2003-09-30 devnull Rune *p, *ep, *np;
391 b2cfc4e2 2003-09-30 devnull Rune rune;
392 b2cfc4e2 2003-09-30 devnull int quoted;
393 b2cfc4e2 2003-09-30 devnull
394 b2cfc4e2 2003-09-30 devnull /* we have already seen the '[' */
395 b2cfc4e2 2003-09-30 devnull type = CCLASS;
396 b2cfc4e2 2003-09-30 devnull yyclassp = newclass();
397 b2cfc4e2 2003-09-30 devnull
398 b2cfc4e2 2003-09-30 devnull /* look ahead for negation */
399 b2cfc4e2 2003-09-30 devnull /* SPECIAL CASE!!! negated classes don't match \n */
400 b2cfc4e2 2003-09-30 devnull ep = r;
401 b2cfc4e2 2003-09-30 devnull quoted = nextc(&rune);
402 b2cfc4e2 2003-09-30 devnull if(!quoted && rune == L'^'){
403 b2cfc4e2 2003-09-30 devnull type = NCCLASS;
404 b2cfc4e2 2003-09-30 devnull quoted = nextc(&rune);
405 b2cfc4e2 2003-09-30 devnull *ep++ = L'\n';
406 b2cfc4e2 2003-09-30 devnull *ep++ = L'\n';
407 b2cfc4e2 2003-09-30 devnull }
408 b2cfc4e2 2003-09-30 devnull
409 b2cfc4e2 2003-09-30 devnull /* parse class into a set of spans */
410 b2cfc4e2 2003-09-30 devnull for(; ep<&r[NCCRUNE];){
411 b2cfc4e2 2003-09-30 devnull if(rune == 0){
412 b2cfc4e2 2003-09-30 devnull rcerror("malformed '[]'");
413 b2cfc4e2 2003-09-30 devnull return 0;
414 b2cfc4e2 2003-09-30 devnull }
415 b2cfc4e2 2003-09-30 devnull if(!quoted && rune == L']')
416 b2cfc4e2 2003-09-30 devnull break;
417 b2cfc4e2 2003-09-30 devnull if(!quoted && rune == L'-'){
418 b2cfc4e2 2003-09-30 devnull if(ep == r){
419 b2cfc4e2 2003-09-30 devnull rcerror("malformed '[]'");
420 b2cfc4e2 2003-09-30 devnull return 0;
421 b2cfc4e2 2003-09-30 devnull }
422 b2cfc4e2 2003-09-30 devnull quoted = nextc(&rune);
423 b2cfc4e2 2003-09-30 devnull if((!quoted && rune == L']') || rune == 0){
424 b2cfc4e2 2003-09-30 devnull rcerror("malformed '[]'");
425 b2cfc4e2 2003-09-30 devnull return 0;
426 b2cfc4e2 2003-09-30 devnull }
427 b2cfc4e2 2003-09-30 devnull *(ep-1) = rune;
428 b2cfc4e2 2003-09-30 devnull } else {
429 b2cfc4e2 2003-09-30 devnull *ep++ = rune;
430 b2cfc4e2 2003-09-30 devnull *ep++ = rune;
431 b2cfc4e2 2003-09-30 devnull }
432 b2cfc4e2 2003-09-30 devnull quoted = nextc(&rune);
433 b2cfc4e2 2003-09-30 devnull }
434 b2cfc4e2 2003-09-30 devnull
435 b2cfc4e2 2003-09-30 devnull /* sort on span start */
436 b2cfc4e2 2003-09-30 devnull for(p = r; p < ep; p += 2){
437 b2cfc4e2 2003-09-30 devnull for(np = p; np < ep; np += 2)
438 b2cfc4e2 2003-09-30 devnull if(*np < *p){
439 b2cfc4e2 2003-09-30 devnull rune = np[0];
440 b2cfc4e2 2003-09-30 devnull np[0] = p[0];
441 b2cfc4e2 2003-09-30 devnull p[0] = rune;
442 b2cfc4e2 2003-09-30 devnull rune = np[1];
443 b2cfc4e2 2003-09-30 devnull np[1] = p[1];
444 b2cfc4e2 2003-09-30 devnull p[1] = rune;
445 b2cfc4e2 2003-09-30 devnull }
446 b2cfc4e2 2003-09-30 devnull }
447 b2cfc4e2 2003-09-30 devnull
448 b2cfc4e2 2003-09-30 devnull /* merge spans */
449 b2cfc4e2 2003-09-30 devnull np = yyclassp->spans;
450 b2cfc4e2 2003-09-30 devnull p = r;
451 b2cfc4e2 2003-09-30 devnull if(r == ep)
452 b2cfc4e2 2003-09-30 devnull yyclassp->end = np;
453 b2cfc4e2 2003-09-30 devnull else {
454 b2cfc4e2 2003-09-30 devnull np[0] = *p++;
455 b2cfc4e2 2003-09-30 devnull np[1] = *p++;
456 b2cfc4e2 2003-09-30 devnull for(; p < ep; p += 2)
457 b2cfc4e2 2003-09-30 devnull if(p[0] <= np[1]){
458 b2cfc4e2 2003-09-30 devnull if(p[1] > np[1])
459 b2cfc4e2 2003-09-30 devnull np[1] = p[1];
460 b2cfc4e2 2003-09-30 devnull } else {
461 b2cfc4e2 2003-09-30 devnull np += 2;
462 b2cfc4e2 2003-09-30 devnull np[0] = p[0];
463 b2cfc4e2 2003-09-30 devnull np[1] = p[1];
464 b2cfc4e2 2003-09-30 devnull }
465 b2cfc4e2 2003-09-30 devnull yyclassp->end = np+2;
466 b2cfc4e2 2003-09-30 devnull }
467 b2cfc4e2 2003-09-30 devnull
468 b2cfc4e2 2003-09-30 devnull return type;
469 b2cfc4e2 2003-09-30 devnull }
470 b2cfc4e2 2003-09-30 devnull
471 b2cfc4e2 2003-09-30 devnull static Reprog*
472 b2cfc4e2 2003-09-30 devnull regcomp1(char *s, int literal, int dot_type)
473 b2cfc4e2 2003-09-30 devnull {
474 b2cfc4e2 2003-09-30 devnull int token;
475 b2cfc4e2 2003-09-30 devnull Reprog *pp;
476 b2cfc4e2 2003-09-30 devnull
477 b2cfc4e2 2003-09-30 devnull /* get memory for the program */
478 62390091 2004-03-05 devnull pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
479 b2cfc4e2 2003-09-30 devnull if(pp == 0){
480 b2cfc4e2 2003-09-30 devnull regerror("out of memory");
481 b2cfc4e2 2003-09-30 devnull return 0;
482 b2cfc4e2 2003-09-30 devnull }
483 b2cfc4e2 2003-09-30 devnull freep = pp->firstinst;
484 b2cfc4e2 2003-09-30 devnull classp = pp->class;
485 b2cfc4e2 2003-09-30 devnull errors = 0;
486 b2cfc4e2 2003-09-30 devnull
487 b2cfc4e2 2003-09-30 devnull if(setjmp(regkaboom))
488 b2cfc4e2 2003-09-30 devnull goto out;
489 b2cfc4e2 2003-09-30 devnull
490 b2cfc4e2 2003-09-30 devnull /* go compile the sucker */
491 b2cfc4e2 2003-09-30 devnull lexdone = 0;
492 b2cfc4e2 2003-09-30 devnull exprp = s;
493 b2cfc4e2 2003-09-30 devnull nclass = 0;
494 b2cfc4e2 2003-09-30 devnull nbra = 0;
495 b2cfc4e2 2003-09-30 devnull atorp = atorstack;
496 b2cfc4e2 2003-09-30 devnull andp = andstack;
497 b2cfc4e2 2003-09-30 devnull subidp = subidstack;
498 b2cfc4e2 2003-09-30 devnull lastwasand = FALSE;
499 b2cfc4e2 2003-09-30 devnull cursubid = 0;
500 b2cfc4e2 2003-09-30 devnull
501 b2cfc4e2 2003-09-30 devnull /* Start with a low priority operator to prime parser */
502 b2cfc4e2 2003-09-30 devnull pushator(START-1);
503 b2cfc4e2 2003-09-30 devnull while((token = lex(literal, dot_type)) != END){
504 b2cfc4e2 2003-09-30 devnull if((token&0300) == OPERATOR)
505 b2cfc4e2 2003-09-30 devnull operator(token);
506 b2cfc4e2 2003-09-30 devnull else
507 b2cfc4e2 2003-09-30 devnull operand(token);
508 b2cfc4e2 2003-09-30 devnull }
509 b2cfc4e2 2003-09-30 devnull
510 b2cfc4e2 2003-09-30 devnull /* Close with a low priority operator */
511 b2cfc4e2 2003-09-30 devnull evaluntil(START);
512 b2cfc4e2 2003-09-30 devnull
513 b2cfc4e2 2003-09-30 devnull /* Force END */
514 b2cfc4e2 2003-09-30 devnull operand(END);
515 b2cfc4e2 2003-09-30 devnull evaluntil(START);
516 b2cfc4e2 2003-09-30 devnull #ifdef DEBUG
517 b2cfc4e2 2003-09-30 devnull dumpstack();
518 b2cfc4e2 2003-09-30 devnull #endif
519 b2cfc4e2 2003-09-30 devnull if(nbra)
520 b2cfc4e2 2003-09-30 devnull rcerror("unmatched left paren");
521 b2cfc4e2 2003-09-30 devnull --andp; /* points to first and only operand */
522 b2cfc4e2 2003-09-30 devnull pp->startinst = andp->first;
523 b2cfc4e2 2003-09-30 devnull #ifdef DEBUG
524 b2cfc4e2 2003-09-30 devnull dump(pp);
525 b2cfc4e2 2003-09-30 devnull #endif
526 b2cfc4e2 2003-09-30 devnull pp = optimize(pp);
527 b2cfc4e2 2003-09-30 devnull #ifdef DEBUG
528 b2cfc4e2 2003-09-30 devnull print("start: %d\n", andp->first-pp->firstinst);
529 b2cfc4e2 2003-09-30 devnull dump(pp);
530 b2cfc4e2 2003-09-30 devnull #endif
531 b2cfc4e2 2003-09-30 devnull out:
532 b2cfc4e2 2003-09-30 devnull if(errors){
533 b2cfc4e2 2003-09-30 devnull free(pp);
534 b2cfc4e2 2003-09-30 devnull pp = 0;
535 b2cfc4e2 2003-09-30 devnull }
536 b2cfc4e2 2003-09-30 devnull return pp;
537 b2cfc4e2 2003-09-30 devnull }
538 b2cfc4e2 2003-09-30 devnull
539 b2cfc4e2 2003-09-30 devnull extern Reprog*
540 b2cfc4e2 2003-09-30 devnull regcomp(char *s)
541 b2cfc4e2 2003-09-30 devnull {
542 b2cfc4e2 2003-09-30 devnull return regcomp1(s, 0, ANY);
543 b2cfc4e2 2003-09-30 devnull }
544 b2cfc4e2 2003-09-30 devnull
545 b2cfc4e2 2003-09-30 devnull extern Reprog*
546 b2cfc4e2 2003-09-30 devnull regcomplit(char *s)
547 b2cfc4e2 2003-09-30 devnull {
548 b2cfc4e2 2003-09-30 devnull return regcomp1(s, 1, ANY);
549 b2cfc4e2 2003-09-30 devnull }
550 b2cfc4e2 2003-09-30 devnull
551 b2cfc4e2 2003-09-30 devnull extern Reprog*
552 b2cfc4e2 2003-09-30 devnull regcompnl(char *s)
553 b2cfc4e2 2003-09-30 devnull {
554 b2cfc4e2 2003-09-30 devnull return regcomp1(s, 0, ANYNL);
555 b2cfc4e2 2003-09-30 devnull }