Blame


1 b2cfc4e2 2003-09-30 devnull #include "lib9.h"
2 b2cfc4e2 2003-09-30 devnull #include "regexp9.h"
3 b2cfc4e2 2003-09-30 devnull #include "regcomp.h"
4 b2cfc4e2 2003-09-30 devnull
5 b2cfc4e2 2003-09-30 devnull #define TRUE 1
6 b2cfc4e2 2003-09-30 devnull #define FALSE 0
7 b2cfc4e2 2003-09-30 devnull
8 b2cfc4e2 2003-09-30 devnull /*
9 b2cfc4e2 2003-09-30 devnull * Parser Information
10 b2cfc4e2 2003-09-30 devnull */
11 b2cfc4e2 2003-09-30 devnull typedef
12 b2cfc4e2 2003-09-30 devnull struct Node
13 b2cfc4e2 2003-09-30 devnull {
14 b2cfc4e2 2003-09-30 devnull Reinst* first;
15 b2cfc4e2 2003-09-30 devnull Reinst* last;
16 b2cfc4e2 2003-09-30 devnull }Node;
17 3d31240b 2013-07-02 0intro
18 3d31240b 2013-07-02 0intro /* max rune ranges per character class is nelem(classp->spans)/2 */
19 3d31240b 2013-07-02 0intro #define NCCRUNE nelem(classp->spans)
20 b2cfc4e2 2003-09-30 devnull
21 b2cfc4e2 2003-09-30 devnull #define NSTACK 20
22 b2cfc4e2 2003-09-30 devnull static Node andstack[NSTACK];
23 b2cfc4e2 2003-09-30 devnull static Node *andp;
24 b2cfc4e2 2003-09-30 devnull static int atorstack[NSTACK];
25 b2cfc4e2 2003-09-30 devnull static int* atorp;
26 b2cfc4e2 2003-09-30 devnull static int cursubid; /* id of current subexpression */
27 b2cfc4e2 2003-09-30 devnull static int subidstack[NSTACK]; /* parallel to atorstack */
28 b2cfc4e2 2003-09-30 devnull static int* subidp;
29 b2cfc4e2 2003-09-30 devnull static int lastwasand; /* Last token was operand */
30 b2cfc4e2 2003-09-30 devnull static int nbra;
31 b2cfc4e2 2003-09-30 devnull static char* exprp; /* pointer to next character in source expression */
32 b2cfc4e2 2003-09-30 devnull static int lexdone;
33 b2cfc4e2 2003-09-30 devnull static int nclass;
34 b2cfc4e2 2003-09-30 devnull static Reclass*classp;
35 b2cfc4e2 2003-09-30 devnull static Reinst* freep;
36 b2cfc4e2 2003-09-30 devnull static int errors;
37 b2cfc4e2 2003-09-30 devnull static Rune yyrune; /* last lex'd rune */
38 b2cfc4e2 2003-09-30 devnull static Reclass*yyclassp; /* last lex'd class */
39 b2cfc4e2 2003-09-30 devnull
40 b2cfc4e2 2003-09-30 devnull /* predeclared crap */
41 b2cfc4e2 2003-09-30 devnull static void operator(int);
42 b2cfc4e2 2003-09-30 devnull static void pushand(Reinst*, Reinst*);
43 b2cfc4e2 2003-09-30 devnull static void pushator(int);
44 b2cfc4e2 2003-09-30 devnull static void evaluntil(int);
45 b2cfc4e2 2003-09-30 devnull static int bldcclass(void);
46 b2cfc4e2 2003-09-30 devnull
47 b2cfc4e2 2003-09-30 devnull static jmp_buf regkaboom;
48 b2cfc4e2 2003-09-30 devnull
49 b2cfc4e2 2003-09-30 devnull static void
50 b2cfc4e2 2003-09-30 devnull rcerror(char *s)
51 b2cfc4e2 2003-09-30 devnull {
52 b2cfc4e2 2003-09-30 devnull errors++;
53 b2cfc4e2 2003-09-30 devnull regerror(s);
54 b2cfc4e2 2003-09-30 devnull longjmp(regkaboom, 1);
55 b2cfc4e2 2003-09-30 devnull }
56 b2cfc4e2 2003-09-30 devnull
57 b2cfc4e2 2003-09-30 devnull static Reinst*
58 b2cfc4e2 2003-09-30 devnull newinst(int t)
59 b2cfc4e2 2003-09-30 devnull {
60 b2cfc4e2 2003-09-30 devnull freep->type = t;
61 b2cfc4e2 2003-09-30 devnull freep->u2.left = 0;
62 b2cfc4e2 2003-09-30 devnull freep->u1.right = 0;
63 b2cfc4e2 2003-09-30 devnull return freep++;
64 b2cfc4e2 2003-09-30 devnull }
65 b2cfc4e2 2003-09-30 devnull
66 b2cfc4e2 2003-09-30 devnull static void
67 b2cfc4e2 2003-09-30 devnull operand(int t)
68 b2cfc4e2 2003-09-30 devnull {
69 b2cfc4e2 2003-09-30 devnull Reinst *i;
70 b2cfc4e2 2003-09-30 devnull
71 b2cfc4e2 2003-09-30 devnull if(lastwasand)
72 b2cfc4e2 2003-09-30 devnull operator(CAT); /* catenate is implicit */
73 b2cfc4e2 2003-09-30 devnull i = newinst(t);
74 b2cfc4e2 2003-09-30 devnull
75 b2cfc4e2 2003-09-30 devnull if(t == CCLASS || t == NCCLASS)
76 b2cfc4e2 2003-09-30 devnull i->u1.cp = yyclassp;
77 b2cfc4e2 2003-09-30 devnull if(t == RUNE)
78 b2cfc4e2 2003-09-30 devnull i->u1.r = yyrune;
79 b2cfc4e2 2003-09-30 devnull
80 b2cfc4e2 2003-09-30 devnull pushand(i, i);
81 b2cfc4e2 2003-09-30 devnull lastwasand = TRUE;
82 b2cfc4e2 2003-09-30 devnull }
83 b2cfc4e2 2003-09-30 devnull
84 b2cfc4e2 2003-09-30 devnull static void
85 b2cfc4e2 2003-09-30 devnull operator(int t)
86 b2cfc4e2 2003-09-30 devnull {
87 b2cfc4e2 2003-09-30 devnull if(t==RBRA && --nbra<0)
88 b2cfc4e2 2003-09-30 devnull rcerror("unmatched right paren");
89 b2cfc4e2 2003-09-30 devnull if(t==LBRA){
90 b2cfc4e2 2003-09-30 devnull if(++cursubid >= NSUBEXP)
91 b2cfc4e2 2003-09-30 devnull rcerror ("too many subexpressions");
92 b2cfc4e2 2003-09-30 devnull nbra++;
93 b2cfc4e2 2003-09-30 devnull if(lastwasand)
94 b2cfc4e2 2003-09-30 devnull operator(CAT);
95 b2cfc4e2 2003-09-30 devnull } else
96 b2cfc4e2 2003-09-30 devnull evaluntil(t);
97 b2cfc4e2 2003-09-30 devnull if(t != RBRA)
98 b2cfc4e2 2003-09-30 devnull pushator(t);
99 b2cfc4e2 2003-09-30 devnull lastwasand = FALSE;
100 b2cfc4e2 2003-09-30 devnull if(t==STAR || t==QUEST || t==PLUS || t==RBRA)
101 b2cfc4e2 2003-09-30 devnull lastwasand = TRUE; /* these look like operands */
102 b2cfc4e2 2003-09-30 devnull }
103 b2cfc4e2 2003-09-30 devnull
104 b2cfc4e2 2003-09-30 devnull static void
105 b2cfc4e2 2003-09-30 devnull regerr2(char *s, int c)
106 b2cfc4e2 2003-09-30 devnull {
107 b2cfc4e2 2003-09-30 devnull char buf[100];
108 b2cfc4e2 2003-09-30 devnull char *cp = buf;
109 b2cfc4e2 2003-09-30 devnull while(*s)
110 b2cfc4e2 2003-09-30 devnull *cp++ = *s++;
111 b2cfc4e2 2003-09-30 devnull *cp++ = c;
112 fa325e9b 2020-01-10 cross *cp = '\0';
113 b2cfc4e2 2003-09-30 devnull rcerror(buf);
114 b2cfc4e2 2003-09-30 devnull }
115 b2cfc4e2 2003-09-30 devnull
116 b2cfc4e2 2003-09-30 devnull static void
117 b2cfc4e2 2003-09-30 devnull cant(char *s)
118 b2cfc4e2 2003-09-30 devnull {
119 b2cfc4e2 2003-09-30 devnull char buf[100];
120 b2cfc4e2 2003-09-30 devnull strcpy(buf, "can't happen: ");
121 b2cfc4e2 2003-09-30 devnull strcat(buf, s);
122 b2cfc4e2 2003-09-30 devnull rcerror(buf);
123 b2cfc4e2 2003-09-30 devnull }
124 b2cfc4e2 2003-09-30 devnull
125 b2cfc4e2 2003-09-30 devnull static void
126 b2cfc4e2 2003-09-30 devnull pushand(Reinst *f, Reinst *l)
127 b2cfc4e2 2003-09-30 devnull {
128 b2cfc4e2 2003-09-30 devnull if(andp >= &andstack[NSTACK])
129 b2cfc4e2 2003-09-30 devnull cant("operand stack overflow");
130 b2cfc4e2 2003-09-30 devnull andp->first = f;
131 b2cfc4e2 2003-09-30 devnull andp->last = l;
132 b2cfc4e2 2003-09-30 devnull andp++;
133 b2cfc4e2 2003-09-30 devnull }
134 b2cfc4e2 2003-09-30 devnull
135 b2cfc4e2 2003-09-30 devnull static void
136 b2cfc4e2 2003-09-30 devnull pushator(int t)
137 b2cfc4e2 2003-09-30 devnull {
138 b2cfc4e2 2003-09-30 devnull if(atorp >= &atorstack[NSTACK])
139 b2cfc4e2 2003-09-30 devnull cant("operator stack overflow");
140 b2cfc4e2 2003-09-30 devnull *atorp++ = t;
141 b2cfc4e2 2003-09-30 devnull *subidp++ = cursubid;
142 b2cfc4e2 2003-09-30 devnull }
143 b2cfc4e2 2003-09-30 devnull
144 b2cfc4e2 2003-09-30 devnull static Node*
145 b2cfc4e2 2003-09-30 devnull popand(int op)
146 b2cfc4e2 2003-09-30 devnull {
147 b2cfc4e2 2003-09-30 devnull Reinst *inst;
148 b2cfc4e2 2003-09-30 devnull
149 b2cfc4e2 2003-09-30 devnull if(andp <= &andstack[0]){
150 b2cfc4e2 2003-09-30 devnull regerr2("missing operand for ", op);
151 b2cfc4e2 2003-09-30 devnull inst = newinst(NOP);
152 b2cfc4e2 2003-09-30 devnull pushand(inst,inst);
153 b2cfc4e2 2003-09-30 devnull }
154 b2cfc4e2 2003-09-30 devnull return --andp;
155 b2cfc4e2 2003-09-30 devnull }
156 b2cfc4e2 2003-09-30 devnull
157 b2cfc4e2 2003-09-30 devnull static int
158 b2cfc4e2 2003-09-30 devnull popator(void)
159 b2cfc4e2 2003-09-30 devnull {
160 b2cfc4e2 2003-09-30 devnull if(atorp <= &atorstack[0])
161 b2cfc4e2 2003-09-30 devnull cant("operator stack underflow");
162 b2cfc4e2 2003-09-30 devnull --subidp;
163 b2cfc4e2 2003-09-30 devnull return *--atorp;
164 b2cfc4e2 2003-09-30 devnull }
165 b2cfc4e2 2003-09-30 devnull
166 b2cfc4e2 2003-09-30 devnull static void
167 b2cfc4e2 2003-09-30 devnull evaluntil(int pri)
168 b2cfc4e2 2003-09-30 devnull {
169 b2cfc4e2 2003-09-30 devnull Node *op1, *op2;
170 b2cfc4e2 2003-09-30 devnull Reinst *inst1, *inst2;
171 b2cfc4e2 2003-09-30 devnull
172 b2cfc4e2 2003-09-30 devnull while(pri==RBRA || atorp[-1]>=pri){
173 b2cfc4e2 2003-09-30 devnull switch(popator()){
174 b2cfc4e2 2003-09-30 devnull default:
175 b2cfc4e2 2003-09-30 devnull rcerror("unknown operator in evaluntil");
176 b2cfc4e2 2003-09-30 devnull break;
177 b2cfc4e2 2003-09-30 devnull case LBRA: /* must have been RBRA */
178 b2cfc4e2 2003-09-30 devnull op1 = popand('(');
179 b2cfc4e2 2003-09-30 devnull inst2 = newinst(RBRA);
180 b2cfc4e2 2003-09-30 devnull inst2->u1.subid = *subidp;
181 b2cfc4e2 2003-09-30 devnull op1->last->u2.next = inst2;
182 b2cfc4e2 2003-09-30 devnull inst1 = newinst(LBRA);
183 b2cfc4e2 2003-09-30 devnull inst1->u1.subid = *subidp;
184 b2cfc4e2 2003-09-30 devnull inst1->u2.next = op1->first;
185 b2cfc4e2 2003-09-30 devnull pushand(inst1, inst2);
186 b2cfc4e2 2003-09-30 devnull return;
187 b2cfc4e2 2003-09-30 devnull case OR:
188 b2cfc4e2 2003-09-30 devnull op2 = popand('|');
189 b2cfc4e2 2003-09-30 devnull op1 = popand('|');
190 b2cfc4e2 2003-09-30 devnull inst2 = newinst(NOP);
191 b2cfc4e2 2003-09-30 devnull op2->last->u2.next = inst2;
192 b2cfc4e2 2003-09-30 devnull op1->last->u2.next = inst2;
193 b2cfc4e2 2003-09-30 devnull inst1 = newinst(OR);
194 b2cfc4e2 2003-09-30 devnull inst1->u1.right = op1->first;
195 b2cfc4e2 2003-09-30 devnull inst1->u2.left = op2->first;
196 b2cfc4e2 2003-09-30 devnull pushand(inst1, inst2);
197 b2cfc4e2 2003-09-30 devnull break;
198 b2cfc4e2 2003-09-30 devnull case CAT:
199 b2cfc4e2 2003-09-30 devnull op2 = popand(0);
200 b2cfc4e2 2003-09-30 devnull op1 = popand(0);
201 b2cfc4e2 2003-09-30 devnull op1->last->u2.next = op2->first;
202 b2cfc4e2 2003-09-30 devnull pushand(op1->first, op2->last);
203 b2cfc4e2 2003-09-30 devnull break;
204 b2cfc4e2 2003-09-30 devnull case STAR:
205 b2cfc4e2 2003-09-30 devnull op2 = popand('*');
206 b2cfc4e2 2003-09-30 devnull inst1 = newinst(OR);
207 b2cfc4e2 2003-09-30 devnull op2->last->u2.next = inst1;
208 b2cfc4e2 2003-09-30 devnull inst1->u1.right = op2->first;
209 b2cfc4e2 2003-09-30 devnull pushand(inst1, inst1);
210 b2cfc4e2 2003-09-30 devnull break;
211 b2cfc4e2 2003-09-30 devnull case PLUS:
212 b2cfc4e2 2003-09-30 devnull op2 = popand('+');
213 b2cfc4e2 2003-09-30 devnull inst1 = newinst(OR);
214 b2cfc4e2 2003-09-30 devnull op2->last->u2.next = inst1;
215 b2cfc4e2 2003-09-30 devnull inst1->u1.right = op2->first;
216 b2cfc4e2 2003-09-30 devnull pushand(op2->first, inst1);
217 b2cfc4e2 2003-09-30 devnull break;
218 b2cfc4e2 2003-09-30 devnull case QUEST:
219 b2cfc4e2 2003-09-30 devnull op2 = popand('?');
220 b2cfc4e2 2003-09-30 devnull inst1 = newinst(OR);
221 b2cfc4e2 2003-09-30 devnull inst2 = newinst(NOP);
222 b2cfc4e2 2003-09-30 devnull inst1->u2.left = inst2;
223 b2cfc4e2 2003-09-30 devnull inst1->u1.right = op2->first;
224 b2cfc4e2 2003-09-30 devnull op2->last->u2.next = inst2;
225 b2cfc4e2 2003-09-30 devnull pushand(inst1, inst2);
226 b2cfc4e2 2003-09-30 devnull break;
227 b2cfc4e2 2003-09-30 devnull }
228 b2cfc4e2 2003-09-30 devnull }
229 b2cfc4e2 2003-09-30 devnull }
230 b2cfc4e2 2003-09-30 devnull
231 b2cfc4e2 2003-09-30 devnull static Reprog*
232 b2cfc4e2 2003-09-30 devnull optimize(Reprog *pp)
233 b2cfc4e2 2003-09-30 devnull {
234 b2cfc4e2 2003-09-30 devnull Reinst *inst, *target;
235 b2cfc4e2 2003-09-30 devnull int size;
236 b2cfc4e2 2003-09-30 devnull Reprog *npp;
237 b2cfc4e2 2003-09-30 devnull Reclass *cl;
238 2d82ef9d 2016-11-02 rsc ptrdiff_t diff;
239 b2cfc4e2 2003-09-30 devnull
240 b2cfc4e2 2003-09-30 devnull /*
241 b2cfc4e2 2003-09-30 devnull * get rid of NOOP chains
242 b2cfc4e2 2003-09-30 devnull */
243 b2cfc4e2 2003-09-30 devnull for(inst=pp->firstinst; inst->type!=END; inst++){
244 b2cfc4e2 2003-09-30 devnull target = inst->u2.next;
245 b2cfc4e2 2003-09-30 devnull while(target->type == NOP)
246 b2cfc4e2 2003-09-30 devnull target = target->u2.next;
247 b2cfc4e2 2003-09-30 devnull inst->u2.next = target;
248 b2cfc4e2 2003-09-30 devnull }
249 b2cfc4e2 2003-09-30 devnull
250 b2cfc4e2 2003-09-30 devnull /*
251 b2cfc4e2 2003-09-30 devnull * The original allocation is for an area larger than
252 b2cfc4e2 2003-09-30 devnull * necessary. Reallocate to the actual space used
253 b2cfc4e2 2003-09-30 devnull * and then relocate the code.
254 b2cfc4e2 2003-09-30 devnull */
255 6d08a0f5 2007-12-07 rsc size = sizeof(Reprog) + (freep - pp->firstinst)*sizeof(Reinst);
256 a7511dd4 2007-12-07 rsc npp = realloc(pp, size);
257 6d08a0f5 2007-12-07 rsc if(npp==0 || npp==pp)
258 b2cfc4e2 2003-09-30 devnull return pp;
259 b2cfc4e2 2003-09-30 devnull diff = (char *)npp - (char *)pp;
260 b2cfc4e2 2003-09-30 devnull freep = (Reinst *)((char *)freep + diff);
261 b2cfc4e2 2003-09-30 devnull for(inst=npp->firstinst; inst<freep; inst++){
262 b2cfc4e2 2003-09-30 devnull switch(inst->type){
263 b2cfc4e2 2003-09-30 devnull case OR:
264 b2cfc4e2 2003-09-30 devnull case STAR:
265 b2cfc4e2 2003-09-30 devnull case PLUS:
266 b2cfc4e2 2003-09-30 devnull case QUEST:
267 002d2f26 2008-05-10 rsc inst->u1.right = (void*)((char*)inst->u1.right + diff);
268 b2cfc4e2 2003-09-30 devnull break;
269 b2cfc4e2 2003-09-30 devnull case CCLASS:
270 b2cfc4e2 2003-09-30 devnull case NCCLASS:
271 002d2f26 2008-05-10 rsc inst->u1.right = (void*)((char*)inst->u1.right + diff);
272 b2cfc4e2 2003-09-30 devnull cl = inst->u1.cp;
273 002d2f26 2008-05-10 rsc cl->end = (void*)((char*)cl->end + diff);
274 b2cfc4e2 2003-09-30 devnull break;
275 b2cfc4e2 2003-09-30 devnull }
276 002d2f26 2008-05-10 rsc inst->u2.left = (void*)((char*)inst->u2.left + diff);
277 b2cfc4e2 2003-09-30 devnull }
278 002d2f26 2008-05-10 rsc npp->startinst = (void*)((char*)npp->startinst + diff);
279 b2cfc4e2 2003-09-30 devnull return npp;
280 b2cfc4e2 2003-09-30 devnull }
281 b2cfc4e2 2003-09-30 devnull
282 b2cfc4e2 2003-09-30 devnull #ifdef DEBUG
283 b2cfc4e2 2003-09-30 devnull static void
284 b2cfc4e2 2003-09-30 devnull dumpstack(void){
285 b2cfc4e2 2003-09-30 devnull Node *stk;
286 b2cfc4e2 2003-09-30 devnull int *ip;
287 b2cfc4e2 2003-09-30 devnull
288 b2cfc4e2 2003-09-30 devnull print("operators\n");
289 b2cfc4e2 2003-09-30 devnull for(ip=atorstack; ip<atorp; ip++)
290 b2cfc4e2 2003-09-30 devnull print("0%o\n", *ip);
291 b2cfc4e2 2003-09-30 devnull print("operands\n");
292 b2cfc4e2 2003-09-30 devnull for(stk=andstack; stk<andp; stk++)
293 b2cfc4e2 2003-09-30 devnull print("0%o\t0%o\n", stk->first->type, stk->last->type);
294 b2cfc4e2 2003-09-30 devnull }
295 b2cfc4e2 2003-09-30 devnull
296 b2cfc4e2 2003-09-30 devnull static void
297 b2cfc4e2 2003-09-30 devnull dump(Reprog *pp)
298 b2cfc4e2 2003-09-30 devnull {
299 b2cfc4e2 2003-09-30 devnull Reinst *l;
300 b2cfc4e2 2003-09-30 devnull Rune *p;
301 b2cfc4e2 2003-09-30 devnull
302 b2cfc4e2 2003-09-30 devnull l = pp->firstinst;
303 b2cfc4e2 2003-09-30 devnull do{
304 b2cfc4e2 2003-09-30 devnull print("%d:\t0%o\t%d\t%d", l-pp->firstinst, l->type,
305 b2cfc4e2 2003-09-30 devnull l->u2.left-pp->firstinst, l->u1.right-pp->firstinst);
306 b2cfc4e2 2003-09-30 devnull if(l->type == RUNE)
307 62390091 2004-03-05 devnull print("\t%C\n", l->u1.r);
308 b2cfc4e2 2003-09-30 devnull else if(l->type == CCLASS || l->type == NCCLASS){
309 b2cfc4e2 2003-09-30 devnull print("\t[");
310 b2cfc4e2 2003-09-30 devnull if(l->type == NCCLASS)
311 b2cfc4e2 2003-09-30 devnull print("^");
312 62390091 2004-03-05 devnull for(p = l->u1.cp->spans; p < l->u1.cp->end; p += 2)
313 b2cfc4e2 2003-09-30 devnull if(p[0] == p[1])
314 b2cfc4e2 2003-09-30 devnull print("%C", p[0]);
315 b2cfc4e2 2003-09-30 devnull else
316 b2cfc4e2 2003-09-30 devnull print("%C-%C", p[0], p[1]);
317 b2cfc4e2 2003-09-30 devnull print("]\n");
318 b2cfc4e2 2003-09-30 devnull } else
319 b2cfc4e2 2003-09-30 devnull print("\n");
320 b2cfc4e2 2003-09-30 devnull }while(l++->type);
321 b2cfc4e2 2003-09-30 devnull }
322 b2cfc4e2 2003-09-30 devnull #endif
323 b2cfc4e2 2003-09-30 devnull
324 b2cfc4e2 2003-09-30 devnull static Reclass*
325 b2cfc4e2 2003-09-30 devnull newclass(void)
326 b2cfc4e2 2003-09-30 devnull {
327 310ae033 2017-01-06 rsc if(nclass >= nelem(((Reprog*)0)->class))
328 3d31240b 2013-07-02 0intro rcerror("too many character classes; increase Reprog.class size");
329 b2cfc4e2 2003-09-30 devnull return &(classp[nclass++]);
330 b2cfc4e2 2003-09-30 devnull }
331 b2cfc4e2 2003-09-30 devnull
332 b2cfc4e2 2003-09-30 devnull static int
333 b2cfc4e2 2003-09-30 devnull nextc(Rune *rp)
334 b2cfc4e2 2003-09-30 devnull {
335 b2cfc4e2 2003-09-30 devnull if(lexdone){
336 b2cfc4e2 2003-09-30 devnull *rp = 0;
337 b2cfc4e2 2003-09-30 devnull return 1;
338 b2cfc4e2 2003-09-30 devnull }
339 b2cfc4e2 2003-09-30 devnull exprp += chartorune(rp, exprp);
340 bb0266fe 2005-05-07 devnull if(*rp == '\\'){
341 b2cfc4e2 2003-09-30 devnull exprp += chartorune(rp, exprp);
342 b2cfc4e2 2003-09-30 devnull return 1;
343 b2cfc4e2 2003-09-30 devnull }
344 b2cfc4e2 2003-09-30 devnull if(*rp == 0)
345 b2cfc4e2 2003-09-30 devnull lexdone = 1;
346 b2cfc4e2 2003-09-30 devnull return 0;
347 b2cfc4e2 2003-09-30 devnull }
348 b2cfc4e2 2003-09-30 devnull
349 b2cfc4e2 2003-09-30 devnull static int
350 b2cfc4e2 2003-09-30 devnull lex(int literal, int dot_type)
351 b2cfc4e2 2003-09-30 devnull {
352 b2cfc4e2 2003-09-30 devnull int quoted;
353 b2cfc4e2 2003-09-30 devnull
354 b2cfc4e2 2003-09-30 devnull quoted = nextc(&yyrune);
355 b2cfc4e2 2003-09-30 devnull if(literal || quoted){
356 b2cfc4e2 2003-09-30 devnull if(yyrune == 0)
357 b2cfc4e2 2003-09-30 devnull return END;
358 b2cfc4e2 2003-09-30 devnull return RUNE;
359 b2cfc4e2 2003-09-30 devnull }
360 b2cfc4e2 2003-09-30 devnull
361 b2cfc4e2 2003-09-30 devnull switch(yyrune){
362 b2cfc4e2 2003-09-30 devnull case 0:
363 b2cfc4e2 2003-09-30 devnull return END;
364 bb0266fe 2005-05-07 devnull case '*':
365 b2cfc4e2 2003-09-30 devnull return STAR;
366 bb0266fe 2005-05-07 devnull case '?':
367 b2cfc4e2 2003-09-30 devnull return QUEST;
368 bb0266fe 2005-05-07 devnull case '+':
369 b2cfc4e2 2003-09-30 devnull return PLUS;
370 bb0266fe 2005-05-07 devnull case '|':
371 b2cfc4e2 2003-09-30 devnull return OR;
372 bb0266fe 2005-05-07 devnull case '.':
373 b2cfc4e2 2003-09-30 devnull return dot_type;
374 bb0266fe 2005-05-07 devnull case '(':
375 b2cfc4e2 2003-09-30 devnull return LBRA;
376 bb0266fe 2005-05-07 devnull case ')':
377 b2cfc4e2 2003-09-30 devnull return RBRA;
378 bb0266fe 2005-05-07 devnull case '^':
379 bb0266fe 2005-05-07 devnull return BOL;
380 bb0266fe 2005-05-07 devnull case '$':
381 b2cfc4e2 2003-09-30 devnull return EOL;
382 bb0266fe 2005-05-07 devnull case '[':
383 b2cfc4e2 2003-09-30 devnull return bldcclass();
384 b2cfc4e2 2003-09-30 devnull }
385 b2cfc4e2 2003-09-30 devnull return RUNE;
386 b2cfc4e2 2003-09-30 devnull }
387 b2cfc4e2 2003-09-30 devnull
388 b2cfc4e2 2003-09-30 devnull static int
389 b2cfc4e2 2003-09-30 devnull bldcclass(void)
390 b2cfc4e2 2003-09-30 devnull {
391 b2cfc4e2 2003-09-30 devnull int type;
392 b2cfc4e2 2003-09-30 devnull Rune r[NCCRUNE];
393 b2cfc4e2 2003-09-30 devnull Rune *p, *ep, *np;
394 b2cfc4e2 2003-09-30 devnull Rune rune;
395 b2cfc4e2 2003-09-30 devnull int quoted;
396 b2cfc4e2 2003-09-30 devnull
397 b2cfc4e2 2003-09-30 devnull /* we have already seen the '[' */
398 b2cfc4e2 2003-09-30 devnull type = CCLASS;
399 b2cfc4e2 2003-09-30 devnull yyclassp = newclass();
400 b2cfc4e2 2003-09-30 devnull
401 b2cfc4e2 2003-09-30 devnull /* look ahead for negation */
402 b2cfc4e2 2003-09-30 devnull /* SPECIAL CASE!!! negated classes don't match \n */
403 b2cfc4e2 2003-09-30 devnull ep = r;
404 b2cfc4e2 2003-09-30 devnull quoted = nextc(&rune);
405 bb0266fe 2005-05-07 devnull if(!quoted && rune == '^'){
406 b2cfc4e2 2003-09-30 devnull type = NCCLASS;
407 b2cfc4e2 2003-09-30 devnull quoted = nextc(&rune);
408 bb0266fe 2005-05-07 devnull *ep++ = '\n';
409 bb0266fe 2005-05-07 devnull *ep++ = '\n';
410 b2cfc4e2 2003-09-30 devnull }
411 b2cfc4e2 2003-09-30 devnull
412 b2cfc4e2 2003-09-30 devnull /* parse class into a set of spans */
413 3d31240b 2013-07-02 0intro while(ep < &r[NCCRUNE-1]){
414 b2cfc4e2 2003-09-30 devnull if(rune == 0){
415 b2cfc4e2 2003-09-30 devnull rcerror("malformed '[]'");
416 b2cfc4e2 2003-09-30 devnull return 0;
417 b2cfc4e2 2003-09-30 devnull }
418 bb0266fe 2005-05-07 devnull if(!quoted && rune == ']')
419 b2cfc4e2 2003-09-30 devnull break;
420 bb0266fe 2005-05-07 devnull if(!quoted && rune == '-'){
421 b2cfc4e2 2003-09-30 devnull if(ep == r){
422 b2cfc4e2 2003-09-30 devnull rcerror("malformed '[]'");
423 b2cfc4e2 2003-09-30 devnull return 0;
424 b2cfc4e2 2003-09-30 devnull }
425 b2cfc4e2 2003-09-30 devnull quoted = nextc(&rune);
426 bb0266fe 2005-05-07 devnull if((!quoted && rune == ']') || rune == 0){
427 b2cfc4e2 2003-09-30 devnull rcerror("malformed '[]'");
428 b2cfc4e2 2003-09-30 devnull return 0;
429 b2cfc4e2 2003-09-30 devnull }
430 b2cfc4e2 2003-09-30 devnull *(ep-1) = rune;
431 b2cfc4e2 2003-09-30 devnull } else {
432 b2cfc4e2 2003-09-30 devnull *ep++ = rune;
433 b2cfc4e2 2003-09-30 devnull *ep++ = rune;
434 b2cfc4e2 2003-09-30 devnull }
435 b2cfc4e2 2003-09-30 devnull quoted = nextc(&rune);
436 b2cfc4e2 2003-09-30 devnull }
437 3d31240b 2013-07-02 0intro if(ep >= &r[NCCRUNE-1]) {
438 3d31240b 2013-07-02 0intro rcerror("char class too large; increase Reclass.spans size");
439 3d31240b 2013-07-02 0intro return 0;
440 3d31240b 2013-07-02 0intro }
441 b2cfc4e2 2003-09-30 devnull
442 b2cfc4e2 2003-09-30 devnull /* sort on span start */
443 b2cfc4e2 2003-09-30 devnull for(p = r; p < ep; p += 2){
444 b2cfc4e2 2003-09-30 devnull for(np = p; np < ep; np += 2)
445 b2cfc4e2 2003-09-30 devnull if(*np < *p){
446 b2cfc4e2 2003-09-30 devnull rune = np[0];
447 b2cfc4e2 2003-09-30 devnull np[0] = p[0];
448 b2cfc4e2 2003-09-30 devnull p[0] = rune;
449 b2cfc4e2 2003-09-30 devnull rune = np[1];
450 b2cfc4e2 2003-09-30 devnull np[1] = p[1];
451 b2cfc4e2 2003-09-30 devnull p[1] = rune;
452 b2cfc4e2 2003-09-30 devnull }
453 b2cfc4e2 2003-09-30 devnull }
454 b2cfc4e2 2003-09-30 devnull
455 b2cfc4e2 2003-09-30 devnull /* merge spans */
456 b2cfc4e2 2003-09-30 devnull np = yyclassp->spans;
457 b2cfc4e2 2003-09-30 devnull p = r;
458 b2cfc4e2 2003-09-30 devnull if(r == ep)
459 b2cfc4e2 2003-09-30 devnull yyclassp->end = np;
460 b2cfc4e2 2003-09-30 devnull else {
461 b2cfc4e2 2003-09-30 devnull np[0] = *p++;
462 b2cfc4e2 2003-09-30 devnull np[1] = *p++;
463 b2cfc4e2 2003-09-30 devnull for(; p < ep; p += 2)
464 3d31240b 2013-07-02 0intro /* overlapping or adjacent ranges? */
465 3d31240b 2013-07-02 0intro if(p[0] <= np[1] + 1){
466 3d31240b 2013-07-02 0intro if(p[1] >= np[1])
467 3d31240b 2013-07-02 0intro np[1] = p[1]; /* coalesce */
468 b2cfc4e2 2003-09-30 devnull } else {
469 b2cfc4e2 2003-09-30 devnull np += 2;
470 b2cfc4e2 2003-09-30 devnull np[0] = p[0];
471 b2cfc4e2 2003-09-30 devnull np[1] = p[1];
472 b2cfc4e2 2003-09-30 devnull }
473 b2cfc4e2 2003-09-30 devnull yyclassp->end = np+2;
474 b2cfc4e2 2003-09-30 devnull }
475 b2cfc4e2 2003-09-30 devnull
476 b2cfc4e2 2003-09-30 devnull return type;
477 b2cfc4e2 2003-09-30 devnull }
478 b2cfc4e2 2003-09-30 devnull
479 b2cfc4e2 2003-09-30 devnull static Reprog*
480 b2cfc4e2 2003-09-30 devnull regcomp1(char *s, int literal, int dot_type)
481 b2cfc4e2 2003-09-30 devnull {
482 b2cfc4e2 2003-09-30 devnull int token;
483 5ba56e91 2005-01-14 devnull Reprog *volatile pp;
484 b2cfc4e2 2003-09-30 devnull
485 b2cfc4e2 2003-09-30 devnull /* get memory for the program */
486 62390091 2004-03-05 devnull pp = malloc(sizeof(Reprog) + 6*sizeof(Reinst)*strlen(s));
487 b2cfc4e2 2003-09-30 devnull if(pp == 0){
488 b2cfc4e2 2003-09-30 devnull regerror("out of memory");
489 b2cfc4e2 2003-09-30 devnull return 0;
490 b2cfc4e2 2003-09-30 devnull }
491 b2cfc4e2 2003-09-30 devnull freep = pp->firstinst;
492 b2cfc4e2 2003-09-30 devnull classp = pp->class;
493 b2cfc4e2 2003-09-30 devnull errors = 0;
494 b2cfc4e2 2003-09-30 devnull
495 b2cfc4e2 2003-09-30 devnull if(setjmp(regkaboom))
496 b2cfc4e2 2003-09-30 devnull goto out;
497 b2cfc4e2 2003-09-30 devnull
498 b2cfc4e2 2003-09-30 devnull /* go compile the sucker */
499 b2cfc4e2 2003-09-30 devnull lexdone = 0;
500 b2cfc4e2 2003-09-30 devnull exprp = s;
501 b2cfc4e2 2003-09-30 devnull nclass = 0;
502 b2cfc4e2 2003-09-30 devnull nbra = 0;
503 b2cfc4e2 2003-09-30 devnull atorp = atorstack;
504 b2cfc4e2 2003-09-30 devnull andp = andstack;
505 b2cfc4e2 2003-09-30 devnull subidp = subidstack;
506 b2cfc4e2 2003-09-30 devnull lastwasand = FALSE;
507 b2cfc4e2 2003-09-30 devnull cursubid = 0;
508 b2cfc4e2 2003-09-30 devnull
509 b2cfc4e2 2003-09-30 devnull /* Start with a low priority operator to prime parser */
510 b2cfc4e2 2003-09-30 devnull pushator(START-1);
511 b2cfc4e2 2003-09-30 devnull while((token = lex(literal, dot_type)) != END){
512 b2cfc4e2 2003-09-30 devnull if((token&0300) == OPERATOR)
513 b2cfc4e2 2003-09-30 devnull operator(token);
514 b2cfc4e2 2003-09-30 devnull else
515 b2cfc4e2 2003-09-30 devnull operand(token);
516 b2cfc4e2 2003-09-30 devnull }
517 b2cfc4e2 2003-09-30 devnull
518 b2cfc4e2 2003-09-30 devnull /* Close with a low priority operator */
519 b2cfc4e2 2003-09-30 devnull evaluntil(START);
520 b2cfc4e2 2003-09-30 devnull
521 b2cfc4e2 2003-09-30 devnull /* Force END */
522 b2cfc4e2 2003-09-30 devnull operand(END);
523 b2cfc4e2 2003-09-30 devnull evaluntil(START);
524 b2cfc4e2 2003-09-30 devnull #ifdef DEBUG
525 b2cfc4e2 2003-09-30 devnull dumpstack();
526 b2cfc4e2 2003-09-30 devnull #endif
527 b2cfc4e2 2003-09-30 devnull if(nbra)
528 b2cfc4e2 2003-09-30 devnull rcerror("unmatched left paren");
529 b2cfc4e2 2003-09-30 devnull --andp; /* points to first and only operand */
530 b2cfc4e2 2003-09-30 devnull pp->startinst = andp->first;
531 b2cfc4e2 2003-09-30 devnull #ifdef DEBUG
532 b2cfc4e2 2003-09-30 devnull dump(pp);
533 b2cfc4e2 2003-09-30 devnull #endif
534 b2cfc4e2 2003-09-30 devnull pp = optimize(pp);
535 b2cfc4e2 2003-09-30 devnull #ifdef DEBUG
536 b2cfc4e2 2003-09-30 devnull print("start: %d\n", andp->first-pp->firstinst);
537 b2cfc4e2 2003-09-30 devnull dump(pp);
538 b2cfc4e2 2003-09-30 devnull #endif
539 b2cfc4e2 2003-09-30 devnull out:
540 b2cfc4e2 2003-09-30 devnull if(errors){
541 b2cfc4e2 2003-09-30 devnull free(pp);
542 b2cfc4e2 2003-09-30 devnull pp = 0;
543 b2cfc4e2 2003-09-30 devnull }
544 b2cfc4e2 2003-09-30 devnull return pp;
545 b2cfc4e2 2003-09-30 devnull }
546 b2cfc4e2 2003-09-30 devnull
547 b2cfc4e2 2003-09-30 devnull extern Reprog*
548 b2cfc4e2 2003-09-30 devnull regcomp(char *s)
549 b2cfc4e2 2003-09-30 devnull {
550 b2cfc4e2 2003-09-30 devnull return regcomp1(s, 0, ANY);
551 b2cfc4e2 2003-09-30 devnull }
552 b2cfc4e2 2003-09-30 devnull
553 b2cfc4e2 2003-09-30 devnull extern Reprog*
554 b2cfc4e2 2003-09-30 devnull regcomplit(char *s)
555 b2cfc4e2 2003-09-30 devnull {
556 b2cfc4e2 2003-09-30 devnull return regcomp1(s, 1, ANY);
557 b2cfc4e2 2003-09-30 devnull }
558 b2cfc4e2 2003-09-30 devnull
559 b2cfc4e2 2003-09-30 devnull extern Reprog*
560 b2cfc4e2 2003-09-30 devnull regcompnl(char *s)
561 b2cfc4e2 2003-09-30 devnull {
562 b2cfc4e2 2003-09-30 devnull return regcomp1(s, 0, ANYNL);
563 b2cfc4e2 2003-09-30 devnull }