Blame


1 b2cfc4e2 2003-09-30 devnull #include "lib9.h"
2 b2cfc4e2 2003-09-30 devnull #include "regexp9.h"
3 b2cfc4e2 2003-09-30 devnull #include "regcomp.h"
4 b2cfc4e2 2003-09-30 devnull
5 b2cfc4e2 2003-09-30 devnull #define TRUE 1
6 b2cfc4e2 2003-09-30 devnull #define FALSE 0
7 b2cfc4e2 2003-09-30 devnull
8 b2cfc4e2 2003-09-30 devnull /*
9 b2cfc4e2 2003-09-30 devnull * Parser Information
10 b2cfc4e2 2003-09-30 devnull */
11 b2cfc4e2 2003-09-30 devnull typedef
12 b2cfc4e2 2003-09-30 devnull struct Node
13 b2cfc4e2 2003-09-30 devnull {
14 b2cfc4e2 2003-09-30 devnull Reinst* first;
15 b2cfc4e2 2003-09-30 devnull Reinst* last;
16 b2cfc4e2 2003-09-30 devnull }Node;
17 3d31240b 2013-07-02 0intro
18 3d31240b 2013-07-02 0intro /* max character classes per program is nelem(reprog->class) */
19 3d31240b 2013-07-02 0intro static Reprog *reprog;
20 3d31240b 2013-07-02 0intro
21 3d31240b 2013-07-02 0intro /* max rune ranges per character class is nelem(classp->spans)/2 */
22 3d31240b 2013-07-02 0intro #define NCCRUNE nelem(classp->spans)
23 b2cfc4e2 2003-09-30 devnull
24 b2cfc4e2 2003-09-30 devnull #define NSTACK 20
25 b2cfc4e2 2003-09-30 devnull static Node andstack[NSTACK];
26 b2cfc4e2 2003-09-30 devnull static Node *andp;
27 b2cfc4e2 2003-09-30 devnull static int atorstack[NSTACK];
28 b2cfc4e2 2003-09-30 devnull static int* atorp;
29 b2cfc4e2 2003-09-30 devnull static int cursubid; /* id of current subexpression */
30 b2cfc4e2 2003-09-30 devnull static int subidstack[NSTACK]; /* parallel to atorstack */
31 b2cfc4e2 2003-09-30 devnull static int* subidp;
32 b2cfc4e2 2003-09-30 devnull static int lastwasand; /* Last token was operand */
33 b2cfc4e2 2003-09-30 devnull static int nbra;
34 b2cfc4e2 2003-09-30 devnull static char* exprp; /* pointer to next character in source expression */
35 b2cfc4e2 2003-09-30 devnull static int lexdone;
36 b2cfc4e2 2003-09-30 devnull static int nclass;
37 b2cfc4e2 2003-09-30 devnull static Reclass*classp;
38 b2cfc4e2 2003-09-30 devnull static Reinst* freep;
39 b2cfc4e2 2003-09-30 devnull static int errors;
40 b2cfc4e2 2003-09-30 devnull static Rune yyrune; /* last lex'd rune */
41 b2cfc4e2 2003-09-30 devnull static Reclass*yyclassp; /* last lex'd class */
42 b2cfc4e2 2003-09-30 devnull
43 b2cfc4e2 2003-09-30 devnull /* predeclared crap */
44 b2cfc4e2 2003-09-30 devnull static void operator(int);
45 b2cfc4e2 2003-09-30 devnull static void pushand(Reinst*, Reinst*);
46 b2cfc4e2 2003-09-30 devnull static void pushator(int);
47 b2cfc4e2 2003-09-30 devnull static void evaluntil(int);
48 b2cfc4e2 2003-09-30 devnull static int bldcclass(void);
49 b2cfc4e2 2003-09-30 devnull
50 b2cfc4e2 2003-09-30 devnull static jmp_buf regkaboom;
51 b2cfc4e2 2003-09-30 devnull
52 b2cfc4e2 2003-09-30 devnull static void
53 b2cfc4e2 2003-09-30 devnull rcerror(char *s)
54 b2cfc4e2 2003-09-30 devnull {
55 b2cfc4e2 2003-09-30 devnull errors++;
56 b2cfc4e2 2003-09-30 devnull regerror(s);
57 b2cfc4e2 2003-09-30 devnull longjmp(regkaboom, 1);
58 b2cfc4e2 2003-09-30 devnull }
59 b2cfc4e2 2003-09-30 devnull
60 b2cfc4e2 2003-09-30 devnull static Reinst*
61 b2cfc4e2 2003-09-30 devnull newinst(int t)
62 b2cfc4e2 2003-09-30 devnull {
63 b2cfc4e2 2003-09-30 devnull freep->type = t;
64 b2cfc4e2 2003-09-30 devnull freep->u2.left = 0;
65 b2cfc4e2 2003-09-30 devnull freep->u1.right = 0;
66 b2cfc4e2 2003-09-30 devnull return freep++;
67 b2cfc4e2 2003-09-30 devnull }
68 b2cfc4e2 2003-09-30 devnull
69 b2cfc4e2 2003-09-30 devnull static void
70 b2cfc4e2 2003-09-30 devnull operand(int t)
71 b2cfc4e2 2003-09-30 devnull {
72 b2cfc4e2 2003-09-30 devnull Reinst *i;
73 b2cfc4e2 2003-09-30 devnull
74 b2cfc4e2 2003-09-30 devnull if(lastwasand)
75 b2cfc4e2 2003-09-30 devnull operator(CAT); /* catenate is implicit */
76 b2cfc4e2 2003-09-30 devnull i = newinst(t);
77 b2cfc4e2 2003-09-30 devnull
78 b2cfc4e2 2003-09-30 devnull if(t == CCLASS || t == NCCLASS)
79 b2cfc4e2 2003-09-30 devnull i->u1.cp = yyclassp;
80 b2cfc4e2 2003-09-30 devnull if(t == RUNE)
81 b2cfc4e2 2003-09-30 devnull i->u1.r = yyrune;
82 b2cfc4e2 2003-09-30 devnull
83 b2cfc4e2 2003-09-30 devnull pushand(i, i);
84 b2cfc4e2 2003-09-30 devnull lastwasand = TRUE;
85 b2cfc4e2 2003-09-30 devnull }
86 b2cfc4e2 2003-09-30 devnull
87 b2cfc4e2 2003-09-30 devnull static void
88 b2cfc4e2 2003-09-30 devnull operator(int t)
89 b2cfc4e2 2003-09-30 devnull {
90 b2cfc4e2 2003-09-30 devnull if(t==RBRA && --nbra<0)
91 b2cfc4e2 2003-09-30 devnull rcerror("unmatched right paren");
92 b2cfc4e2 2003-09-30 devnull if(t==LBRA){
93 b2cfc4e2 2003-09-30 devnull if(++cursubid >= NSUBEXP)
94 b2cfc4e2 2003-09-30 devnull rcerror ("too many subexpressions");
95 b2cfc4e2 2003-09-30 devnull nbra++;
96 b2cfc4e2 2003-09-30 devnull if(lastwasand)
97 b2cfc4e2 2003-09-30 devnull operator(CAT);
98 b2cfc4e2 2003-09-30 devnull } else
99 b2cfc4e2 2003-09-30 devnull evaluntil(t);
100 b2cfc4e2 2003-09-30 devnull if(t != RBRA)
101 b2cfc4e2 2003-09-30 devnull pushator(t);
102 b2cfc4e2 2003-09-30 devnull lastwasand = FALSE;
103 b2cfc4e2 2003-09-30 devnull if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
104 b2cfc4e2 2003-09-30 devnull lastwasand = TRUE; /* these look like operands */
105 b2cfc4e2 2003-09-30 devnull }
106 b2cfc4e2 2003-09-30 devnull
107 b2cfc4e2 2003-09-30 devnull static void
108 b2cfc4e2 2003-09-30 devnull regerr2(char *s, int c)
109 b2cfc4e2 2003-09-30 devnull {
110 b2cfc4e2 2003-09-30 devnull char buf[100];
111 b2cfc4e2 2003-09-30 devnull char *cp = buf;
112 b2cfc4e2 2003-09-30 devnull while(*s)
113 b2cfc4e2 2003-09-30 devnull *cp++ = *s++;
114 b2cfc4e2 2003-09-30 devnull *cp++ = c;
115 b2cfc4e2 2003-09-30 devnull *cp = '\0';
116 b2cfc4e2 2003-09-30 devnull rcerror(buf);
117 b2cfc4e2 2003-09-30 devnull }
118 b2cfc4e2 2003-09-30 devnull
119 b2cfc4e2 2003-09-30 devnull static void
120 b2cfc4e2 2003-09-30 devnull cant(char *s)
121 b2cfc4e2 2003-09-30 devnull {
122 b2cfc4e2 2003-09-30 devnull char buf[100];
123 b2cfc4e2 2003-09-30 devnull strcpy(buf, "can't happen: ");
124 b2cfc4e2 2003-09-30 devnull strcat(buf, s);
125 b2cfc4e2 2003-09-30 devnull rcerror(buf);
126 b2cfc4e2 2003-09-30 devnull }
127 b2cfc4e2 2003-09-30 devnull
128 b2cfc4e2 2003-09-30 devnull static void
129 b2cfc4e2 2003-09-30 devnull pushand(Reinst *f, Reinst *l)
130 b2cfc4e2 2003-09-30 devnull {
131 b2cfc4e2 2003-09-30 devnull if(andp >= &andstack[NSTACK])
132 b2cfc4e2 2003-09-30 devnull cant("operand stack overflow");
133 b2cfc4e2 2003-09-30 devnull andp->first = f;
134 b2cfc4e2 2003-09-30 devnull andp->last = l;
135 b2cfc4e2 2003-09-30 devnull andp++;
136 b2cfc4e2 2003-09-30 devnull }
137 b2cfc4e2 2003-09-30 devnull
138 b2cfc4e2 2003-09-30 devnull static void
139 b2cfc4e2 2003-09-30 devnull pushator(int t)
140 b2cfc4e2 2003-09-30 devnull {
141 b2cfc4e2 2003-09-30 devnull if(atorp >= &atorstack[NSTACK])
142 b2cfc4e2 2003-09-30 devnull cant("operator stack overflow");
143 b2cfc4e2 2003-09-30 devnull *atorp++ = t;
144 b2cfc4e2 2003-09-30 devnull *subidp++ = cursubid;
145 b2cfc4e2 2003-09-30 devnull }
146 b2cfc4e2 2003-09-30 devnull
147 b2cfc4e2 2003-09-30 devnull static Node*
148 b2cfc4e2 2003-09-30 devnull popand(int op)
149 b2cfc4e2 2003-09-30 devnull {
150 b2cfc4e2 2003-09-30 devnull Reinst *inst;
151 b2cfc4e2 2003-09-30 devnull
152 b2cfc4e2 2003-09-30 devnull if(andp <= &andstack[0]){
153 b2cfc4e2 2003-09-30 devnull regerr2("missing operand for ", op);
154 b2cfc4e2 2003-09-30 devnull inst = newinst(NOP);
155 b2cfc4e2 2003-09-30 devnull pushand(inst,inst);
156 b2cfc4e2 2003-09-30 devnull }
157 b2cfc4e2 2003-09-30 devnull return --andp;
158 b2cfc4e2 2003-09-30 devnull }
159 b2cfc4e2 2003-09-30 devnull
160 b2cfc4e2 2003-09-30 devnull static int
161 b2cfc4e2 2003-09-30 devnull popator(void)
162 b2cfc4e2 2003-09-30 devnull {
163 b2cfc4e2 2003-09-30 devnull if(atorp <= &atorstack[0])
164 b2cfc4e2 2003-09-30 devnull cant("operator stack underflow");
165 b2cfc4e2 2003-09-30 devnull --subidp;
166 b2cfc4e2 2003-09-30 devnull return *--atorp;
167 b2cfc4e2 2003-09-30 devnull }
168 b2cfc4e2 2003-09-30 devnull
169 b2cfc4e2 2003-09-30 devnull static void
170 b2cfc4e2 2003-09-30 devnull evaluntil(int pri)
171 b2cfc4e2 2003-09-30 devnull {
172 b2cfc4e2 2003-09-30 devnull Node *op1, *op2;
173 b2cfc4e2 2003-09-30 devnull Reinst *inst1, *inst2;
174 b2cfc4e2 2003-09-30 devnull
175 b2cfc4e2 2003-09-30 devnull while(pri==RBRA || atorp[-1]>=pri){
176 b2cfc4e2 2003-09-30 devnull switch(popator()){
177 b2cfc4e2 2003-09-30 devnull default:
178 b2cfc4e2 2003-09-30 devnull rcerror("unknown operator in evaluntil");
179 b2cfc4e2 2003-09-30 devnull break;
180 b2cfc4e2 2003-09-30 devnull case LBRA: /* must have been RBRA */
181 b2cfc4e2 2003-09-30 devnull op1 = popand('(');
182 b2cfc4e2 2003-09-30 devnull inst2 = newinst(RBRA);
183 b2cfc4e2 2003-09-30 devnull inst2->u1.subid = *subidp;
184 b2cfc4e2 2003-09-30 devnull op1->last->u2.next = inst2;
185 b2cfc4e2 2003-09-30 devnull inst1 = newinst(LBRA);
186 b2cfc4e2 2003-09-30 devnull inst1->u1.subid = *subidp;
187 b2cfc4e2 2003-09-30 devnull inst1->u2.next = op1->first;
188 b2cfc4e2 2003-09-30 devnull pushand(inst1, inst2);
189 b2cfc4e2 2003-09-30 devnull return;
190 b2cfc4e2 2003-09-30 devnull case OR:
191 b2cfc4e2 2003-09-30 devnull op2 = popand('|');
192 b2cfc4e2 2003-09-30 devnull op1 = popand('|');
193 b2cfc4e2 2003-09-30 devnull inst2 = newinst(NOP);
194 b2cfc4e2 2003-09-30 devnull op2->last->u2.next = inst2;
195 b2cfc4e2 2003-09-30 devnull op1->last->u2.next = inst2;
196 b2cfc4e2 2003-09-30 devnull inst1 = newinst(OR);
197 b2cfc4e2 2003-09-30 devnull inst1->u1.right = op1->first;
198 b2cfc4e2 2003-09-30 devnull inst1->u2.left = op2->first;
199 b2cfc4e2 2003-09-30 devnull pushand(inst1, inst2);
200 b2cfc4e2 2003-09-30 devnull break;
201 b2cfc4e2 2003-09-30 devnull case CAT:
202 b2cfc4e2 2003-09-30 devnull op2 = popand(0);
203 b2cfc4e2 2003-09-30 devnull op1 = popand(0);
204 b2cfc4e2 2003-09-30 devnull op1->last->u2.next = op2->first;
205 b2cfc4e2 2003-09-30 devnull pushand(op1->first, op2->last);
206 b2cfc4e2 2003-09-30 devnull break;
207 b2cfc4e2 2003-09-30 devnull case STAR:
208 b2cfc4e2 2003-09-30 devnull op2 = popand('*');
209 b2cfc4e2 2003-09-30 devnull inst1 = newinst(OR);
210 b2cfc4e2 2003-09-30 devnull op2->last->u2.next = inst1;
211 b2cfc4e2 2003-09-30 devnull inst1->u1.right = op2->first;
212 b2cfc4e2 2003-09-30 devnull pushand(inst1, inst1);
213 b2cfc4e2 2003-09-30 devnull break;
214 b2cfc4e2 2003-09-30 devnull case PLUS:
215 b2cfc4e2 2003-09-30 devnull op2 = popand('+');
216 b2cfc4e2 2003-09-30 devnull inst1 = newinst(OR);
217 b2cfc4e2 2003-09-30 devnull op2->last->u2.next = inst1;
218 b2cfc4e2 2003-09-30 devnull inst1->u1.right = op2->first;
219 b2cfc4e2 2003-09-30 devnull pushand(op2->first, inst1);
220 b2cfc4e2 2003-09-30 devnull break;
221 b2cfc4e2 2003-09-30 devnull case QUEST:
222 b2cfc4e2 2003-09-30 devnull op2 = popand('?');
223 b2cfc4e2 2003-09-30 devnull inst1 = newinst(OR);
224 b2cfc4e2 2003-09-30 devnull inst2 = newinst(NOP);
225 b2cfc4e2 2003-09-30 devnull inst1->u2.left = inst2;
226 b2cfc4e2 2003-09-30 devnull inst1->u1.right = op2->first;
227 b2cfc4e2 2003-09-30 devnull op2->last->u2.next = inst2;
228 b2cfc4e2 2003-09-30 devnull pushand(inst1, inst2);
229 b2cfc4e2 2003-09-30 devnull break;
230 b2cfc4e2 2003-09-30 devnull }
231 b2cfc4e2 2003-09-30 devnull }
232 b2cfc4e2 2003-09-30 devnull }
233 b2cfc4e2 2003-09-30 devnull
234 b2cfc4e2 2003-09-30 devnull static Reprog*
235 b2cfc4e2 2003-09-30 devnull optimize(Reprog *pp)
236 b2cfc4e2 2003-09-30 devnull {
237 b2cfc4e2 2003-09-30 devnull Reinst *inst, *target;
238 b2cfc4e2 2003-09-30 devnull int size;
239 b2cfc4e2 2003-09-30 devnull Reprog *npp;
240 b2cfc4e2 2003-09-30 devnull Reclass *cl;
241 6d08a0f5 2007-12-07 rsc int diff;
242 b2cfc4e2 2003-09-30 devnull
243 b2cfc4e2 2003-09-30 devnull /*
244 b2cfc4e2 2003-09-30 devnull * get rid of NOOP chains
245 b2cfc4e2 2003-09-30 devnull */
246 b2cfc4e2 2003-09-30 devnull for(inst=pp->firstinst; inst->type!=END; inst++){
247 b2cfc4e2 2003-09-30 devnull target = inst->u2.next;
248 b2cfc4e2 2003-09-30 devnull while(target->type == NOP)
249 b2cfc4e2 2003-09-30 devnull target = target->u2.next;
250 b2cfc4e2 2003-09-30 devnull inst->u2.next = target;
251 b2cfc4e2 2003-09-30 devnull }
252 b2cfc4e2 2003-09-30 devnull
253 b2cfc4e2 2003-09-30 devnull /*
254 b2cfc4e2 2003-09-30 devnull * The original allocation is for an area larger than
255 b2cfc4e2 2003-09-30 devnull * necessary. Reallocate to the actual space used
256 b2cfc4e2 2003-09-30 devnull * and then relocate the code.
257 b2cfc4e2 2003-09-30 devnull */
258 6d08a0f5 2007-12-07 rsc size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
259 a7511dd4 2007-12-07 rsc npp = realloc(pp, size);
260 6d08a0f5 2007-12-07 rsc if(npp==0 || npp==pp)
261 b2cfc4e2 2003-09-30 devnull return pp;
262 b2cfc4e2 2003-09-30 devnull diff = (char *)npp - (char *)pp;
263 b2cfc4e2 2003-09-30 devnull freep = (Reinst *)((char *)freep + diff);
264 b2cfc4e2 2003-09-30 devnull for(inst=npp->firstinst; inst<freep; inst++){
265 b2cfc4e2 2003-09-30 devnull switch(inst->type){
266 b2cfc4e2 2003-09-30 devnull case OR:
267 b2cfc4e2 2003-09-30 devnull case STAR:
268 b2cfc4e2 2003-09-30 devnull case PLUS:
269 b2cfc4e2 2003-09-30 devnull case QUEST:
270 002d2f26 2008-05-10 rsc inst->u1.right = (void*)((char*)inst->u1.right + diff);
271 b2cfc4e2 2003-09-30 devnull break;
272 b2cfc4e2 2003-09-30 devnull case CCLASS:
273 b2cfc4e2 2003-09-30 devnull case NCCLASS:
274 002d2f26 2008-05-10 rsc inst->u1.right = (void*)((char*)inst->u1.right + diff);
275 b2cfc4e2 2003-09-30 devnull cl = inst->u1.cp;
276 002d2f26 2008-05-10 rsc cl->end = (void*)((char*)cl->end + diff);
277 b2cfc4e2 2003-09-30 devnull break;
278 b2cfc4e2 2003-09-30 devnull }
279 002d2f26 2008-05-10 rsc inst->u2.left = (void*)((char*)inst->u2.left + diff);
280 b2cfc4e2 2003-09-30 devnull }
281 002d2f26 2008-05-10 rsc npp->startinst = (void*)((char*)npp->startinst + diff);
282 b2cfc4e2 2003-09-30 devnull return npp;
283 b2cfc4e2 2003-09-30 devnull }
284 b2cfc4e2 2003-09-30 devnull
285 b2cfc4e2 2003-09-30 devnull #ifdef DEBUG
286 b2cfc4e2 2003-09-30 devnull static void
287 b2cfc4e2 2003-09-30 devnull dumpstack(void){
288 b2cfc4e2 2003-09-30 devnull Node *stk;
289 b2cfc4e2 2003-09-30 devnull int *ip;
290 b2cfc4e2 2003-09-30 devnull
291 b2cfc4e2 2003-09-30 devnull print("operators\n");
292 b2cfc4e2 2003-09-30 devnull for(ip=atorstack; ip<atorp; ip++)
293 b2cfc4e2 2003-09-30 devnull print("0%o\n", *ip);
294 b2cfc4e2 2003-09-30 devnull print("operands\n");
295 b2cfc4e2 2003-09-30 devnull for(stk=andstack; stk<andp; stk++)
296 b2cfc4e2 2003-09-30 devnull print("0%o\t0%o\n", stk->first->type, stk->last->type);
297 b2cfc4e2 2003-09-30 devnull }
298 b2cfc4e2 2003-09-30 devnull
299 b2cfc4e2 2003-09-30 devnull static void
300 b2cfc4e2 2003-09-30 devnull dump(Reprog *pp)
301 b2cfc4e2 2003-09-30 devnull {
302 b2cfc4e2 2003-09-30 devnull Reinst *l;
303 b2cfc4e2 2003-09-30 devnull Rune *p;
304 b2cfc4e2 2003-09-30 devnull
305 b2cfc4e2 2003-09-30 devnull l = pp->firstinst;
306 b2cfc4e2 2003-09-30 devnull do{
307 b2cfc4e2 2003-09-30 devnull print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
308 b2cfc4e2 2003-09-30 devnull l->u2.left-pp->firstinst, l->u1.right-pp->firstinst);
309 b2cfc4e2 2003-09-30 devnull if(l->type == RUNE)
310 62390091 2004-03-05 devnull print("\t%C\n", l->u1.r);
311 b2cfc4e2 2003-09-30 devnull else if(l->type == CCLASS || l->type == NCCLASS){
312 b2cfc4e2 2003-09-30 devnull print("\t[");
313 b2cfc4e2 2003-09-30 devnull if(l->type == NCCLASS)
314 b2cfc4e2 2003-09-30 devnull print("^");
315 62390091 2004-03-05 devnull for(p = l->u1.cp->spans; p < l->u1.cp->end; p += 2)
316 b2cfc4e2 2003-09-30 devnull if(p[0] == p[1])
317 b2cfc4e2 2003-09-30 devnull print("%C", p[0]);
318 b2cfc4e2 2003-09-30 devnull else
319 b2cfc4e2 2003-09-30 devnull print("%C-%C", p[0], p[1]);
320 b2cfc4e2 2003-09-30 devnull print("]\n");
321 b2cfc4e2 2003-09-30 devnull } else
322 b2cfc4e2 2003-09-30 devnull print("\n");
323 b2cfc4e2 2003-09-30 devnull }while(l++->type);
324 b2cfc4e2 2003-09-30 devnull }
325 b2cfc4e2 2003-09-30 devnull #endif
326 b2cfc4e2 2003-09-30 devnull
327 b2cfc4e2 2003-09-30 devnull static Reclass*
328 b2cfc4e2 2003-09-30 devnull newclass(void)
329 b2cfc4e2 2003-09-30 devnull {
330 3d31240b 2013-07-02 0intro if(nclass >= nelem(reprog->class))
331 3d31240b 2013-07-02 0intro rcerror("too many character classes; increase Reprog.class size");
332 b2cfc4e2 2003-09-30 devnull return &(classp[nclass++]);
333 b2cfc4e2 2003-09-30 devnull }
334 b2cfc4e2 2003-09-30 devnull
335 b2cfc4e2 2003-09-30 devnull static int
336 b2cfc4e2 2003-09-30 devnull nextc(Rune *rp)
337 b2cfc4e2 2003-09-30 devnull {
338 b2cfc4e2 2003-09-30 devnull if(lexdone){
339 b2cfc4e2 2003-09-30 devnull *rp = 0;
340 b2cfc4e2 2003-09-30 devnull return 1;
341 b2cfc4e2 2003-09-30 devnull }
342 b2cfc4e2 2003-09-30 devnull exprp += chartorune(rp, exprp);
343 bb0266fe 2005-05-07 devnull if(*rp == '\\'){
344 b2cfc4e2 2003-09-30 devnull exprp += chartorune(rp, exprp);
345 b2cfc4e2 2003-09-30 devnull return 1;
346 b2cfc4e2 2003-09-30 devnull }
347 b2cfc4e2 2003-09-30 devnull if(*rp == 0)
348 b2cfc4e2 2003-09-30 devnull lexdone = 1;
349 b2cfc4e2 2003-09-30 devnull return 0;
350 b2cfc4e2 2003-09-30 devnull }
351 b2cfc4e2 2003-09-30 devnull
352 b2cfc4e2 2003-09-30 devnull static int
353 b2cfc4e2 2003-09-30 devnull lex(int literal, int dot_type)
354 b2cfc4e2 2003-09-30 devnull {
355 b2cfc4e2 2003-09-30 devnull int quoted;
356 b2cfc4e2 2003-09-30 devnull
357 b2cfc4e2 2003-09-30 devnull quoted = nextc(&yyrune);
358 b2cfc4e2 2003-09-30 devnull if(literal || quoted){
359 b2cfc4e2 2003-09-30 devnull if(yyrune == 0)
360 b2cfc4e2 2003-09-30 devnull return END;
361 b2cfc4e2 2003-09-30 devnull return RUNE;
362 b2cfc4e2 2003-09-30 devnull }
363 b2cfc4e2 2003-09-30 devnull
364 b2cfc4e2 2003-09-30 devnull switch(yyrune){
365 b2cfc4e2 2003-09-30 devnull case 0:
366 b2cfc4e2 2003-09-30 devnull return END;
367 bb0266fe 2005-05-07 devnull case '*':
368 b2cfc4e2 2003-09-30 devnull return STAR;
369 bb0266fe 2005-05-07 devnull case '?':
370 b2cfc4e2 2003-09-30 devnull return QUEST;
371 bb0266fe 2005-05-07 devnull case '+':
372 b2cfc4e2 2003-09-30 devnull return PLUS;
373 bb0266fe 2005-05-07 devnull case '|':
374 b2cfc4e2 2003-09-30 devnull return OR;
375 bb0266fe 2005-05-07 devnull case '.':
376 b2cfc4e2 2003-09-30 devnull return dot_type;
377 bb0266fe 2005-05-07 devnull case '(':
378 b2cfc4e2 2003-09-30 devnull return LBRA;
379 bb0266fe 2005-05-07 devnull case ')':
380 b2cfc4e2 2003-09-30 devnull return RBRA;
381 bb0266fe 2005-05-07 devnull case '^':
382 bb0266fe 2005-05-07 devnull return BOL;
383 bb0266fe 2005-05-07 devnull case '$':
384 b2cfc4e2 2003-09-30 devnull return EOL;
385 bb0266fe 2005-05-07 devnull case '[':
386 b2cfc4e2 2003-09-30 devnull return bldcclass();
387 b2cfc4e2 2003-09-30 devnull }
388 b2cfc4e2 2003-09-30 devnull return RUNE;
389 b2cfc4e2 2003-09-30 devnull }
390 b2cfc4e2 2003-09-30 devnull
391 b2cfc4e2 2003-09-30 devnull static int
392 b2cfc4e2 2003-09-30 devnull bldcclass(void)
393 b2cfc4e2 2003-09-30 devnull {
394 b2cfc4e2 2003-09-30 devnull int type;
395 b2cfc4e2 2003-09-30 devnull Rune r[NCCRUNE];
396 b2cfc4e2 2003-09-30 devnull Rune *p, *ep, *np;
397 b2cfc4e2 2003-09-30 devnull Rune rune;
398 b2cfc4e2 2003-09-30 devnull int quoted;
399 b2cfc4e2 2003-09-30 devnull
400 b2cfc4e2 2003-09-30 devnull /* we have already seen the '[' */
401 b2cfc4e2 2003-09-30 devnull type = CCLASS;
402 b2cfc4e2 2003-09-30 devnull yyclassp = newclass();
403 b2cfc4e2 2003-09-30 devnull
404 b2cfc4e2 2003-09-30 devnull /* look ahead for negation */
405 b2cfc4e2 2003-09-30 devnull /* SPECIAL CASE!!! negated classes don't match \n */
406 b2cfc4e2 2003-09-30 devnull ep = r;
407 b2cfc4e2 2003-09-30 devnull quoted = nextc(&rune);
408 bb0266fe 2005-05-07 devnull if(!quoted && rune == '^'){
409 b2cfc4e2 2003-09-30 devnull type = NCCLASS;
410 b2cfc4e2 2003-09-30 devnull quoted = nextc(&rune);
411 bb0266fe 2005-05-07 devnull *ep++ = '\n';
412 bb0266fe 2005-05-07 devnull *ep++ = '\n';
413 b2cfc4e2 2003-09-30 devnull }
414 b2cfc4e2 2003-09-30 devnull
415 b2cfc4e2 2003-09-30 devnull /* parse class into a set of spans */
416 3d31240b 2013-07-02 0intro while(ep < &r[NCCRUNE-1]){
417 b2cfc4e2 2003-09-30 devnull if(rune == 0){
418 b2cfc4e2 2003-09-30 devnull rcerror("malformed '[]'");
419 b2cfc4e2 2003-09-30 devnull return 0;
420 b2cfc4e2 2003-09-30 devnull }
421 bb0266fe 2005-05-07 devnull if(!quoted && rune == ']')
422 b2cfc4e2 2003-09-30 devnull break;
423 bb0266fe 2005-05-07 devnull if(!quoted && rune == '-'){
424 b2cfc4e2 2003-09-30 devnull if(ep == r){
425 b2cfc4e2 2003-09-30 devnull rcerror("malformed '[]'");
426 b2cfc4e2 2003-09-30 devnull return 0;
427 b2cfc4e2 2003-09-30 devnull }
428 b2cfc4e2 2003-09-30 devnull quoted = nextc(&rune);
429 bb0266fe 2005-05-07 devnull if((!quoted && rune == ']') || rune == 0){
430 b2cfc4e2 2003-09-30 devnull rcerror("malformed '[]'");
431 b2cfc4e2 2003-09-30 devnull return 0;
432 b2cfc4e2 2003-09-30 devnull }
433 b2cfc4e2 2003-09-30 devnull *(ep-1) = rune;
434 b2cfc4e2 2003-09-30 devnull } else {
435 b2cfc4e2 2003-09-30 devnull *ep++ = rune;
436 b2cfc4e2 2003-09-30 devnull *ep++ = rune;
437 b2cfc4e2 2003-09-30 devnull }
438 b2cfc4e2 2003-09-30 devnull quoted = nextc(&rune);
439 b2cfc4e2 2003-09-30 devnull }
440 3d31240b 2013-07-02 0intro if(ep >= &r[NCCRUNE-1]) {
441 3d31240b 2013-07-02 0intro rcerror("char class too large; increase Reclass.spans size");
442 3d31240b 2013-07-02 0intro return 0;
443 3d31240b 2013-07-02 0intro }
444 b2cfc4e2 2003-09-30 devnull
445 b2cfc4e2 2003-09-30 devnull /* sort on span start */
446 b2cfc4e2 2003-09-30 devnull for(p = r; p < ep; p += 2){
447 b2cfc4e2 2003-09-30 devnull for(np = p; np < ep; np += 2)
448 b2cfc4e2 2003-09-30 devnull if(*np < *p){
449 b2cfc4e2 2003-09-30 devnull rune = np[0];
450 b2cfc4e2 2003-09-30 devnull np[0] = p[0];
451 b2cfc4e2 2003-09-30 devnull p[0] = rune;
452 b2cfc4e2 2003-09-30 devnull rune = np[1];
453 b2cfc4e2 2003-09-30 devnull np[1] = p[1];
454 b2cfc4e2 2003-09-30 devnull p[1] = rune;
455 b2cfc4e2 2003-09-30 devnull }
456 b2cfc4e2 2003-09-30 devnull }
457 b2cfc4e2 2003-09-30 devnull
458 b2cfc4e2 2003-09-30 devnull /* merge spans */
459 b2cfc4e2 2003-09-30 devnull np = yyclassp->spans;
460 b2cfc4e2 2003-09-30 devnull p = r;
461 b2cfc4e2 2003-09-30 devnull if(r == ep)
462 b2cfc4e2 2003-09-30 devnull yyclassp->end = np;
463 b2cfc4e2 2003-09-30 devnull else {
464 b2cfc4e2 2003-09-30 devnull np[0] = *p++;
465 b2cfc4e2 2003-09-30 devnull np[1] = *p++;
466 b2cfc4e2 2003-09-30 devnull for(; p < ep; p += 2)
467 3d31240b 2013-07-02 0intro /* overlapping or adjacent ranges? */
468 3d31240b 2013-07-02 0intro if(p[0] <= np[1] + 1){
469 3d31240b 2013-07-02 0intro if(p[1] >= np[1])
470 3d31240b 2013-07-02 0intro np[1] = p[1]; /* coalesce */
471 b2cfc4e2 2003-09-30 devnull } else {
472 b2cfc4e2 2003-09-30 devnull np += 2;
473 b2cfc4e2 2003-09-30 devnull np[0] = p[0];
474 b2cfc4e2 2003-09-30 devnull np[1] = p[1];
475 b2cfc4e2 2003-09-30 devnull }
476 b2cfc4e2 2003-09-30 devnull yyclassp->end = np+2;
477 b2cfc4e2 2003-09-30 devnull }
478 b2cfc4e2 2003-09-30 devnull
479 b2cfc4e2 2003-09-30 devnull return type;
480 b2cfc4e2 2003-09-30 devnull }
481 b2cfc4e2 2003-09-30 devnull
482 b2cfc4e2 2003-09-30 devnull static Reprog*
483 b2cfc4e2 2003-09-30 devnull regcomp1(char *s, int literal, int dot_type)
484 b2cfc4e2 2003-09-30 devnull {
485 b2cfc4e2 2003-09-30 devnull int token;
486 5ba56e91 2005-01-14 devnull Reprog *volatile pp;
487 b2cfc4e2 2003-09-30 devnull
488 b2cfc4e2 2003-09-30 devnull /* get memory for the program */
489 62390091 2004-03-05 devnull pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
490 b2cfc4e2 2003-09-30 devnull if(pp == 0){
491 b2cfc4e2 2003-09-30 devnull regerror("out of memory");
492 b2cfc4e2 2003-09-30 devnull return 0;
493 b2cfc4e2 2003-09-30 devnull }
494 b2cfc4e2 2003-09-30 devnull freep = pp->firstinst;
495 b2cfc4e2 2003-09-30 devnull classp = pp->class;
496 b2cfc4e2 2003-09-30 devnull errors = 0;
497 b2cfc4e2 2003-09-30 devnull
498 b2cfc4e2 2003-09-30 devnull if(setjmp(regkaboom))
499 b2cfc4e2 2003-09-30 devnull goto out;
500 b2cfc4e2 2003-09-30 devnull
501 b2cfc4e2 2003-09-30 devnull /* go compile the sucker */
502 b2cfc4e2 2003-09-30 devnull lexdone = 0;
503 b2cfc4e2 2003-09-30 devnull exprp = s;
504 b2cfc4e2 2003-09-30 devnull nclass = 0;
505 b2cfc4e2 2003-09-30 devnull nbra = 0;
506 b2cfc4e2 2003-09-30 devnull atorp = atorstack;
507 b2cfc4e2 2003-09-30 devnull andp = andstack;
508 b2cfc4e2 2003-09-30 devnull subidp = subidstack;
509 b2cfc4e2 2003-09-30 devnull lastwasand = FALSE;
510 b2cfc4e2 2003-09-30 devnull cursubid = 0;
511 b2cfc4e2 2003-09-30 devnull
512 b2cfc4e2 2003-09-30 devnull /* Start with a low priority operator to prime parser */
513 b2cfc4e2 2003-09-30 devnull pushator(START-1);
514 b2cfc4e2 2003-09-30 devnull while((token = lex(literal, dot_type)) != END){
515 b2cfc4e2 2003-09-30 devnull if((token&0300) == OPERATOR)
516 b2cfc4e2 2003-09-30 devnull operator(token);
517 b2cfc4e2 2003-09-30 devnull else
518 b2cfc4e2 2003-09-30 devnull operand(token);
519 b2cfc4e2 2003-09-30 devnull }
520 b2cfc4e2 2003-09-30 devnull
521 b2cfc4e2 2003-09-30 devnull /* Close with a low priority operator */
522 b2cfc4e2 2003-09-30 devnull evaluntil(START);
523 b2cfc4e2 2003-09-30 devnull
524 b2cfc4e2 2003-09-30 devnull /* Force END */
525 b2cfc4e2 2003-09-30 devnull operand(END);
526 b2cfc4e2 2003-09-30 devnull evaluntil(START);
527 b2cfc4e2 2003-09-30 devnull #ifdef DEBUG
528 b2cfc4e2 2003-09-30 devnull dumpstack();
529 b2cfc4e2 2003-09-30 devnull #endif
530 b2cfc4e2 2003-09-30 devnull if(nbra)
531 b2cfc4e2 2003-09-30 devnull rcerror("unmatched left paren");
532 b2cfc4e2 2003-09-30 devnull --andp; /* points to first and only operand */
533 b2cfc4e2 2003-09-30 devnull pp->startinst = andp->first;
534 b2cfc4e2 2003-09-30 devnull #ifdef DEBUG
535 b2cfc4e2 2003-09-30 devnull dump(pp);
536 b2cfc4e2 2003-09-30 devnull #endif
537 b2cfc4e2 2003-09-30 devnull pp = optimize(pp);
538 b2cfc4e2 2003-09-30 devnull #ifdef DEBUG
539 b2cfc4e2 2003-09-30 devnull print("start: %d\n", andp->first-pp->firstinst);
540 b2cfc4e2 2003-09-30 devnull dump(pp);
541 b2cfc4e2 2003-09-30 devnull #endif
542 b2cfc4e2 2003-09-30 devnull out:
543 b2cfc4e2 2003-09-30 devnull if(errors){
544 b2cfc4e2 2003-09-30 devnull free(pp);
545 b2cfc4e2 2003-09-30 devnull pp = 0;
546 b2cfc4e2 2003-09-30 devnull }
547 b2cfc4e2 2003-09-30 devnull return pp;
548 b2cfc4e2 2003-09-30 devnull }
549 b2cfc4e2 2003-09-30 devnull
550 b2cfc4e2 2003-09-30 devnull extern Reprog*
551 b2cfc4e2 2003-09-30 devnull regcomp(char *s)
552 b2cfc4e2 2003-09-30 devnull {
553 b2cfc4e2 2003-09-30 devnull return regcomp1(s, 0, ANY);
554 b2cfc4e2 2003-09-30 devnull }
555 b2cfc4e2 2003-09-30 devnull
556 b2cfc4e2 2003-09-30 devnull extern Reprog*
557 b2cfc4e2 2003-09-30 devnull regcomplit(char *s)
558 b2cfc4e2 2003-09-30 devnull {
559 b2cfc4e2 2003-09-30 devnull return regcomp1(s, 1, ANY);
560 b2cfc4e2 2003-09-30 devnull }
561 b2cfc4e2 2003-09-30 devnull
562 b2cfc4e2 2003-09-30 devnull extern Reprog*
563 b2cfc4e2 2003-09-30 devnull regcompnl(char *s)
564 b2cfc4e2 2003-09-30 devnull {
565 b2cfc4e2 2003-09-30 devnull return regcomp1(s, 0, ANYNL);
566 b2cfc4e2 2003-09-30 devnull }