Blame


1 d1f529f4 2005-10-29 devnull #include <u.h>
2 d1f529f4 2005-10-29 devnull #include <libc.h>
3 d1f529f4 2005-10-29 devnull #include <bin.h>
4 d1f529f4 2005-10-29 devnull #include <bio.h>
5 d1f529f4 2005-10-29 devnull #include <regexp.h>
6 b5f65921 2006-02-11 devnull #include "../../../libregexp/regcomp.h"
7 d1f529f4 2005-10-29 devnull #include "dfa.h"
8 d1f529f4 2005-10-29 devnull
9 d1f529f4 2005-10-29 devnull void rdump(Reprog*);
10 d1f529f4 2005-10-29 devnull void dump(Dreprog*);
11 d1f529f4 2005-10-29 devnull
12 d1f529f4 2005-10-29 devnull /*
13 d1f529f4 2005-10-29 devnull * Standard NFA determinization and DFA minimization.
14 d1f529f4 2005-10-29 devnull */
15 d1f529f4 2005-10-29 devnull typedef struct Deter Deter;
16 d1f529f4 2005-10-29 devnull typedef struct Reiset Reiset;
17 d1f529f4 2005-10-29 devnull
18 d1f529f4 2005-10-29 devnull void ddump(Deter*);
19 d1f529f4 2005-10-29 devnull
20 d1f529f4 2005-10-29 devnull /* state of determinization */
21 d1f529f4 2005-10-29 devnull struct Deter
22 d1f529f4 2005-10-29 devnull {
23 d1f529f4 2005-10-29 devnull jmp_buf kaboom; /* jmp on error */
24 d1f529f4 2005-10-29 devnull
25 d1f529f4 2005-10-29 devnull Bin *bin; /* bin for temporary allocations */
26 d1f529f4 2005-10-29 devnull
27 d1f529f4 2005-10-29 devnull Reprog *p; /* program being determinized */
28 d1f529f4 2005-10-29 devnull uint ninst; /* number of instructions in program */
29 d1f529f4 2005-10-29 devnull
30 d1f529f4 2005-10-29 devnull Reiset *alloc; /* chain of all Reisets */
31 d1f529f4 2005-10-29 devnull Reiset **last;
32 d1f529f4 2005-10-29 devnull
33 d1f529f4 2005-10-29 devnull Reiset **hash; /* hash of all Reisets */
34 d1f529f4 2005-10-29 devnull uint nhash;
35 d1f529f4 2005-10-29 devnull
36 d1f529f4 2005-10-29 devnull Reiset *tmp; /* temporaries for walk */
37 d1f529f4 2005-10-29 devnull uchar *bits;
38 d1f529f4 2005-10-29 devnull
39 d1f529f4 2005-10-29 devnull Rune *c; /* ``interesting'' characters */
40 d1f529f4 2005-10-29 devnull uint nc;
41 d1f529f4 2005-10-29 devnull };
42 d1f529f4 2005-10-29 devnull
43 d1f529f4 2005-10-29 devnull /* set of Reinsts: perhaps we should use a bit list instead of the indices? */
44 d1f529f4 2005-10-29 devnull struct Reiset
45 d1f529f4 2005-10-29 devnull {
46 d1f529f4 2005-10-29 devnull uint *inst; /* indices of instructions in set */
47 d1f529f4 2005-10-29 devnull uint ninst; /* size of set */
48 d1f529f4 2005-10-29 devnull
49 d1f529f4 2005-10-29 devnull Reiset *next; /* d.alloc chain */
50 d1f529f4 2005-10-29 devnull Reiset *hash; /* d.hash chain */
51 d1f529f4 2005-10-29 devnull Reiset **delta; /* where to go on each interesting char */
52 d1f529f4 2005-10-29 devnull uint id; /* assigned id during minimization */
53 d1f529f4 2005-10-29 devnull uint isfinal; /* is an accepting (final) state */
54 d1f529f4 2005-10-29 devnull };
55 d1f529f4 2005-10-29 devnull
56 d1f529f4 2005-10-29 devnull static Reiset*
57 d1f529f4 2005-10-29 devnull ralloc(Deter *d, int ninst)
58 d1f529f4 2005-10-29 devnull {
59 d1f529f4 2005-10-29 devnull Reiset *t;
60 d1f529f4 2005-10-29 devnull
61 d1f529f4 2005-10-29 devnull t = binalloc(&d->bin, sizeof(Reiset)+2*d->nc*sizeof(Reiset*)+sizeof(uint)*ninst, 0);
62 d1f529f4 2005-10-29 devnull if(t == nil)
63 d1f529f4 2005-10-29 devnull longjmp(d->kaboom, 1);
64 d1f529f4 2005-10-29 devnull t->delta = (Reiset**)&t[1];
65 d1f529f4 2005-10-29 devnull t->inst = (uint*)&t->delta[2*d->nc];
66 d1f529f4 2005-10-29 devnull return t;
67 d1f529f4 2005-10-29 devnull }
68 d1f529f4 2005-10-29 devnull
69 d1f529f4 2005-10-29 devnull /* find the canonical form a given Reiset */
70 d1f529f4 2005-10-29 devnull static Reiset*
71 d1f529f4 2005-10-29 devnull findreiset(Deter *d, Reiset *s)
72 d1f529f4 2005-10-29 devnull {
73 d1f529f4 2005-10-29 devnull int i, szinst;
74 d1f529f4 2005-10-29 devnull uint h;
75 d1f529f4 2005-10-29 devnull Reiset *t;
76 d1f529f4 2005-10-29 devnull
77 d1f529f4 2005-10-29 devnull h = 0;
78 d1f529f4 2005-10-29 devnull for(i=0; i<s->ninst; i++)
79 d1f529f4 2005-10-29 devnull h = h*1000003 + s->inst[i];
80 d1f529f4 2005-10-29 devnull h %= d->nhash;
81 d1f529f4 2005-10-29 devnull
82 d1f529f4 2005-10-29 devnull szinst = s->ninst*sizeof(s->inst[0]);
83 d1f529f4 2005-10-29 devnull for(t=d->hash[h]; t; t=t->hash)
84 d1f529f4 2005-10-29 devnull if(t->ninst==s->ninst && memcmp(t->inst, s->inst, szinst)==0)
85 d1f529f4 2005-10-29 devnull return t;
86 d1f529f4 2005-10-29 devnull
87 d1f529f4 2005-10-29 devnull t = ralloc(d, s->ninst);
88 d1f529f4 2005-10-29 devnull t->hash = d->hash[h];
89 d1f529f4 2005-10-29 devnull d->hash[h] = t;
90 d1f529f4 2005-10-29 devnull
91 d1f529f4 2005-10-29 devnull *d->last = t;
92 d1f529f4 2005-10-29 devnull d->last = &t->next;
93 d1f529f4 2005-10-29 devnull t->next = 0;
94 d1f529f4 2005-10-29 devnull
95 d1f529f4 2005-10-29 devnull t->ninst = s->ninst;
96 d1f529f4 2005-10-29 devnull memmove(t->inst, s->inst, szinst);
97 d1f529f4 2005-10-29 devnull
98 d1f529f4 2005-10-29 devnull /* delta is filled in later */
99 d1f529f4 2005-10-29 devnull
100 d1f529f4 2005-10-29 devnull return t;
101 d1f529f4 2005-10-29 devnull }
102 d1f529f4 2005-10-29 devnull
103 d1f529f4 2005-10-29 devnull /* convert bits to a real reiset */
104 d1f529f4 2005-10-29 devnull static Reiset*
105 d1f529f4 2005-10-29 devnull bits2reiset(Deter *d, uchar *bits)
106 d1f529f4 2005-10-29 devnull {
107 d1f529f4 2005-10-29 devnull int k;
108 d1f529f4 2005-10-29 devnull Reiset *s;
109 d1f529f4 2005-10-29 devnull
110 d1f529f4 2005-10-29 devnull s = d->tmp;
111 d1f529f4 2005-10-29 devnull s->ninst = 0;
112 d1f529f4 2005-10-29 devnull for(k=0; k<d->ninst; k++)
113 d1f529f4 2005-10-29 devnull if(bits[k])
114 d1f529f4 2005-10-29 devnull s->inst[s->ninst++] = k;
115 d1f529f4 2005-10-29 devnull return findreiset(d, s);
116 d1f529f4 2005-10-29 devnull }
117 d1f529f4 2005-10-29 devnull
118 d1f529f4 2005-10-29 devnull /* add n to state set; if n < k, need to go around again */
119 d1f529f4 2005-10-29 devnull static int
120 d1f529f4 2005-10-29 devnull add(int n, uchar *bits, int k)
121 d1f529f4 2005-10-29 devnull {
122 d1f529f4 2005-10-29 devnull if(bits[n])
123 d1f529f4 2005-10-29 devnull return 0;
124 d1f529f4 2005-10-29 devnull bits[n] = 1;
125 d1f529f4 2005-10-29 devnull return n < k;
126 d1f529f4 2005-10-29 devnull }
127 d1f529f4 2005-10-29 devnull
128 d1f529f4 2005-10-29 devnull /* update bits to follow all the empty (non-character-related) transitions possible */
129 d1f529f4 2005-10-29 devnull static void
130 d1f529f4 2005-10-29 devnull followempty(Deter *d, uchar *bits, int bol, int eol)
131 d1f529f4 2005-10-29 devnull {
132 d1f529f4 2005-10-29 devnull int again, k;
133 d1f529f4 2005-10-29 devnull Reinst *i;
134 d1f529f4 2005-10-29 devnull
135 d1f529f4 2005-10-29 devnull do{
136 d1f529f4 2005-10-29 devnull again = 0;
137 d1f529f4 2005-10-29 devnull for(i=d->p->firstinst, k=0; k < d->ninst; i++, k++){
138 d1f529f4 2005-10-29 devnull if(!bits[k])
139 d1f529f4 2005-10-29 devnull continue;
140 d1f529f4 2005-10-29 devnull switch(i->type){
141 d1f529f4 2005-10-29 devnull case RBRA:
142 d1f529f4 2005-10-29 devnull case LBRA:
143 b5f65921 2006-02-11 devnull again |= add(i->u2.next - d->p->firstinst, bits, k);
144 d1f529f4 2005-10-29 devnull break;
145 d1f529f4 2005-10-29 devnull case OR:
146 b5f65921 2006-02-11 devnull again |= add(i->u2.left - d->p->firstinst, bits, k);
147 b5f65921 2006-02-11 devnull again |= add(i->u1.right - d->p->firstinst, bits, k);
148 d1f529f4 2005-10-29 devnull break;
149 d1f529f4 2005-10-29 devnull case BOL:
150 d1f529f4 2005-10-29 devnull if(bol)
151 b5f65921 2006-02-11 devnull again |= add(i->u2.next - d->p->firstinst, bits, k);
152 d1f529f4 2005-10-29 devnull break;
153 d1f529f4 2005-10-29 devnull case EOL:
154 d1f529f4 2005-10-29 devnull if(eol)
155 b5f65921 2006-02-11 devnull again |= add(i->u2.next - d->p->firstinst, bits, k);
156 d1f529f4 2005-10-29 devnull break;
157 d1f529f4 2005-10-29 devnull }
158 d1f529f4 2005-10-29 devnull }
159 d1f529f4 2005-10-29 devnull }while(again);
160 d1f529f4 2005-10-29 devnull
161 d1f529f4 2005-10-29 devnull /*
162 d1f529f4 2005-10-29 devnull * Clear bits for useless transitions. We could do this during
163 d1f529f4 2005-10-29 devnull * the switch above, but then we have no guarantee of termination
164 d1f529f4 2005-10-29 devnull * if we get a loop in the regexp.
165 d1f529f4 2005-10-29 devnull */
166 d1f529f4 2005-10-29 devnull for(i=d->p->firstinst, k=0; k < d->ninst; i++, k++){
167 d1f529f4 2005-10-29 devnull if(!bits[k])
168 d1f529f4 2005-10-29 devnull continue;
169 d1f529f4 2005-10-29 devnull switch(i->type){
170 d1f529f4 2005-10-29 devnull case RBRA:
171 d1f529f4 2005-10-29 devnull case LBRA:
172 d1f529f4 2005-10-29 devnull case OR:
173 d1f529f4 2005-10-29 devnull case BOL:
174 d1f529f4 2005-10-29 devnull case EOL:
175 d1f529f4 2005-10-29 devnull bits[k] = 0;
176 d1f529f4 2005-10-29 devnull break;
177 d1f529f4 2005-10-29 devnull }
178 d1f529f4 2005-10-29 devnull }
179 d1f529f4 2005-10-29 devnull }
180 d1f529f4 2005-10-29 devnull
181 d1f529f4 2005-10-29 devnull /*
182 d1f529f4 2005-10-29 devnull * Where does s go if it sees rune r?
183 d1f529f4 2005-10-29 devnull * Eol is true if a $ matches the string at the position just after r.
184 d1f529f4 2005-10-29 devnull */
185 d1f529f4 2005-10-29 devnull static Reiset*
186 d1f529f4 2005-10-29 devnull transition(Deter *d, Reiset *s, Rune r, uint eol)
187 d1f529f4 2005-10-29 devnull {
188 d1f529f4 2005-10-29 devnull int k;
189 d1f529f4 2005-10-29 devnull uchar *bits;
190 d1f529f4 2005-10-29 devnull Reinst *i, *inst0;
191 d1f529f4 2005-10-29 devnull Rune *rp, *ep;
192 d1f529f4 2005-10-29 devnull
193 d1f529f4 2005-10-29 devnull bits = d->bits;
194 d1f529f4 2005-10-29 devnull memset(bits, 0, d->ninst);
195 d1f529f4 2005-10-29 devnull
196 d1f529f4 2005-10-29 devnull inst0 = d->p->firstinst;
197 d1f529f4 2005-10-29 devnull for(k=0; k < s->ninst; k++){
198 d1f529f4 2005-10-29 devnull i = inst0 + s->inst[k];
199 d1f529f4 2005-10-29 devnull switch(i->type){
200 d1f529f4 2005-10-29 devnull default:
201 d1f529f4 2005-10-29 devnull werrstr("bad reprog: got type %d", i->type);
202 d1f529f4 2005-10-29 devnull longjmp(d->kaboom, 1);
203 d1f529f4 2005-10-29 devnull case RBRA:
204 d1f529f4 2005-10-29 devnull case LBRA:
205 d1f529f4 2005-10-29 devnull case OR:
206 d1f529f4 2005-10-29 devnull case BOL:
207 d1f529f4 2005-10-29 devnull case EOL:
208 d1f529f4 2005-10-29 devnull werrstr("internal error: got type %d", i->type);
209 d1f529f4 2005-10-29 devnull longjmp(d->kaboom, 1);
210 d1f529f4 2005-10-29 devnull
211 d1f529f4 2005-10-29 devnull case RUNE:
212 b5f65921 2006-02-11 devnull if(r == i->u1.r)
213 b5f65921 2006-02-11 devnull bits[i->u2.next - inst0] = 1;
214 d1f529f4 2005-10-29 devnull break;
215 d1f529f4 2005-10-29 devnull case ANY:
216 d1f529f4 2005-10-29 devnull if(r != L'\n')
217 b5f65921 2006-02-11 devnull bits[i->u2.next - inst0] = 1;
218 d1f529f4 2005-10-29 devnull break;
219 d1f529f4 2005-10-29 devnull case ANYNL:
220 b5f65921 2006-02-11 devnull bits[i->u2.next - inst0] = 1;
221 d1f529f4 2005-10-29 devnull break;
222 d1f529f4 2005-10-29 devnull case NCCLASS:
223 d1f529f4 2005-10-29 devnull if(r == L'\n')
224 d1f529f4 2005-10-29 devnull break;
225 d1f529f4 2005-10-29 devnull /* fall through */
226 d1f529f4 2005-10-29 devnull case CCLASS:
227 b5f65921 2006-02-11 devnull ep = i->u1.cp->end;
228 b5f65921 2006-02-11 devnull for(rp = i->u1.cp->spans; rp < ep; rp += 2)
229 d1f529f4 2005-10-29 devnull if(rp[0] <= r && r <= rp[1])
230 d1f529f4 2005-10-29 devnull break;
231 d1f529f4 2005-10-29 devnull if((rp < ep) ^! (i->type == CCLASS))
232 b5f65921 2006-02-11 devnull bits[i->u2.next - inst0] = 1;
233 d1f529f4 2005-10-29 devnull break;
234 d1f529f4 2005-10-29 devnull case END:
235 d1f529f4 2005-10-29 devnull break;
236 d1f529f4 2005-10-29 devnull }
237 d1f529f4 2005-10-29 devnull }
238 d1f529f4 2005-10-29 devnull
239 d1f529f4 2005-10-29 devnull followempty(d, bits, r=='\n', eol);
240 d1f529f4 2005-10-29 devnull return bits2reiset(d, bits);
241 d1f529f4 2005-10-29 devnull }
242 d1f529f4 2005-10-29 devnull
243 d1f529f4 2005-10-29 devnull static int
244 d1f529f4 2005-10-29 devnull countinst(Reprog *pp)
245 d1f529f4 2005-10-29 devnull {
246 d1f529f4 2005-10-29 devnull int n;
247 d1f529f4 2005-10-29 devnull Reinst *l;
248 d1f529f4 2005-10-29 devnull
249 d1f529f4 2005-10-29 devnull n = 0;
250 d1f529f4 2005-10-29 devnull l = pp->firstinst;
251 d1f529f4 2005-10-29 devnull while(l++->type)
252 d1f529f4 2005-10-29 devnull n++;
253 d1f529f4 2005-10-29 devnull return n;
254 d1f529f4 2005-10-29 devnull }
255 d1f529f4 2005-10-29 devnull
256 d1f529f4 2005-10-29 devnull static void
257 d1f529f4 2005-10-29 devnull set(Deter *d, u32int **tab, Rune r)
258 d1f529f4 2005-10-29 devnull {
259 d1f529f4 2005-10-29 devnull u32int *u;
260 d1f529f4 2005-10-29 devnull
261 d1f529f4 2005-10-29 devnull if((u = tab[r/4096]) == nil){
262 d1f529f4 2005-10-29 devnull u = binalloc(&d->bin, 4096/8, 1);
263 d1f529f4 2005-10-29 devnull if(u == nil)
264 d1f529f4 2005-10-29 devnull longjmp(d->kaboom, 1);
265 d1f529f4 2005-10-29 devnull tab[r/4096] = u;
266 d1f529f4 2005-10-29 devnull }
267 d1f529f4 2005-10-29 devnull u[(r%4096)/32] |= 1<<(r%32);
268 d1f529f4 2005-10-29 devnull }
269 d1f529f4 2005-10-29 devnull
270 d1f529f4 2005-10-29 devnull /*
271 fa325e9b 2020-01-10 cross * Compute the list of important characters.
272 d1f529f4 2005-10-29 devnull * Other characters behave like the ones that surround them.
273 d1f529f4 2005-10-29 devnull */
274 d1f529f4 2005-10-29 devnull static void
275 d1f529f4 2005-10-29 devnull findchars(Deter *d, Reprog *p)
276 d1f529f4 2005-10-29 devnull {
277 d1f529f4 2005-10-29 devnull u32int *tab[65536/4096], *u, x;
278 d1f529f4 2005-10-29 devnull Reinst *i;
279 d1f529f4 2005-10-29 devnull Rune *rp, *ep;
280 d1f529f4 2005-10-29 devnull int k, m, n, a;
281 d1f529f4 2005-10-29 devnull
282 d1f529f4 2005-10-29 devnull memset(tab, 0, sizeof tab);
283 d1f529f4 2005-10-29 devnull set(d, tab, 0);
284 d1f529f4 2005-10-29 devnull set(d, tab, 0xFFFF);
285 d1f529f4 2005-10-29 devnull for(i=p->firstinst; i->type; i++){
286 d1f529f4 2005-10-29 devnull switch(i->type){
287 d1f529f4 2005-10-29 devnull case ANY:
288 d1f529f4 2005-10-29 devnull set(d, tab, L'\n'-1);
289 d1f529f4 2005-10-29 devnull set(d, tab, L'\n');
290 d1f529f4 2005-10-29 devnull set(d, tab, L'\n'+1);
291 d1f529f4 2005-10-29 devnull break;
292 d1f529f4 2005-10-29 devnull case RUNE:
293 b5f65921 2006-02-11 devnull set(d, tab, i->u1.r-1);
294 b5f65921 2006-02-11 devnull set(d, tab, i->u1.r);
295 b5f65921 2006-02-11 devnull set(d, tab, i->u1.r+1);
296 d1f529f4 2005-10-29 devnull break;
297 d1f529f4 2005-10-29 devnull case NCCLASS:
298 d1f529f4 2005-10-29 devnull set(d, tab, L'\n'-1);
299 d1f529f4 2005-10-29 devnull set(d, tab, L'\n');
300 d1f529f4 2005-10-29 devnull set(d, tab, L'\n'+1);
301 d1f529f4 2005-10-29 devnull /* fall through */
302 d1f529f4 2005-10-29 devnull case CCLASS:
303 b5f65921 2006-02-11 devnull ep = i->u1.cp->end;
304 b5f65921 2006-02-11 devnull for(rp = i->u1.cp->spans; rp < ep; rp += 2){
305 d1f529f4 2005-10-29 devnull set(d, tab, rp[0]-1);
306 d1f529f4 2005-10-29 devnull set(d, tab, rp[0]);
307 d1f529f4 2005-10-29 devnull set(d, tab, rp[1]);
308 d1f529f4 2005-10-29 devnull set(d, tab, rp[1]+1);
309 d1f529f4 2005-10-29 devnull }
310 d1f529f4 2005-10-29 devnull break;
311 d1f529f4 2005-10-29 devnull }
312 d1f529f4 2005-10-29 devnull }
313 d1f529f4 2005-10-29 devnull
314 d1f529f4 2005-10-29 devnull n = 0;
315 d1f529f4 2005-10-29 devnull for(k=0; k<nelem(tab); k++){
316 d1f529f4 2005-10-29 devnull if((u = tab[k]) == nil)
317 d1f529f4 2005-10-29 devnull continue;
318 d1f529f4 2005-10-29 devnull for(m=0; m<4096/32; m++){
319 d1f529f4 2005-10-29 devnull if((x = u[m]) == 0)
320 d1f529f4 2005-10-29 devnull continue;
321 d1f529f4 2005-10-29 devnull for(a=0; a<32; a++)
322 d1f529f4 2005-10-29 devnull if(x&(1<<a))
323 d1f529f4 2005-10-29 devnull n++;
324 d1f529f4 2005-10-29 devnull }
325 d1f529f4 2005-10-29 devnull }
326 d1f529f4 2005-10-29 devnull
327 d1f529f4 2005-10-29 devnull d->c = binalloc(&d->bin, (n+1)*sizeof(Rune), 0);
328 d1f529f4 2005-10-29 devnull if(d->c == 0)
329 d1f529f4 2005-10-29 devnull longjmp(d->kaboom, 1);
330 d1f529f4 2005-10-29 devnull d->nc = n;
331 d1f529f4 2005-10-29 devnull
332 d1f529f4 2005-10-29 devnull n = 0;
333 d1f529f4 2005-10-29 devnull for(k=0; k<nelem(tab); k++){
334 d1f529f4 2005-10-29 devnull if((u = tab[k]) == nil)
335 d1f529f4 2005-10-29 devnull continue;
336 d1f529f4 2005-10-29 devnull for(m=0; m<4096/32; m++){
337 d1f529f4 2005-10-29 devnull if((x = u[m]) == 0)
338 d1f529f4 2005-10-29 devnull continue;
339 d1f529f4 2005-10-29 devnull for(a=0; a<32; a++)
340 d1f529f4 2005-10-29 devnull if(x&(1<<a))
341 d1f529f4 2005-10-29 devnull d->c[n++] = k*4096+m*32+a;
342 d1f529f4 2005-10-29 devnull }
343 d1f529f4 2005-10-29 devnull }
344 d1f529f4 2005-10-29 devnull
345 d1f529f4 2005-10-29 devnull d->c[n] = 0;
346 d1f529f4 2005-10-29 devnull if(n != d->nc)
347 d1f529f4 2005-10-29 devnull abort();
348 d1f529f4 2005-10-29 devnull }
349 d1f529f4 2005-10-29 devnull
350 d1f529f4 2005-10-29 devnull /*
351 d1f529f4 2005-10-29 devnull * convert the Deter and Reisets into a Dreprog.
352 d1f529f4 2005-10-29 devnull * if dp and c are nil, just return the count of Drecases needed.
353 d1f529f4 2005-10-29 devnull */
354 d1f529f4 2005-10-29 devnull static int
355 d1f529f4 2005-10-29 devnull buildprog(Deter *d, Reiset **id2set, int nid, Dreprog *dp, Drecase *c)
356 d1f529f4 2005-10-29 devnull {
357 d1f529f4 2005-10-29 devnull int i, j, id, n, nn;
358 d1f529f4 2005-10-29 devnull Dreinst *di;
359 d1f529f4 2005-10-29 devnull Reiset *s;
360 d1f529f4 2005-10-29 devnull
361 d1f529f4 2005-10-29 devnull nn = 0;
362 d1f529f4 2005-10-29 devnull di = 0;
363 d1f529f4 2005-10-29 devnull for(i=0; i<nid; i++){
364 d1f529f4 2005-10-29 devnull s = id2set[i];
365 d1f529f4 2005-10-29 devnull if(c){
366 d1f529f4 2005-10-29 devnull di = &dp->inst[i];
367 d1f529f4 2005-10-29 devnull di->isfinal = s->isfinal;
368 d1f529f4 2005-10-29 devnull }
369 d1f529f4 2005-10-29 devnull n = 0;
370 d1f529f4 2005-10-29 devnull id = -1;
371 d1f529f4 2005-10-29 devnull for(j=0; j<2*d->nc; j++){
372 d1f529f4 2005-10-29 devnull if(s->delta[j]->id != id){
373 d1f529f4 2005-10-29 devnull id = s->delta[j]->id;
374 d1f529f4 2005-10-29 devnull if(c){
375 d1f529f4 2005-10-29 devnull c[n].start = ((j/d->nc)<<16) | d->c[j%d->nc];
376 d1f529f4 2005-10-29 devnull c[n].next = &dp->inst[id];
377 d1f529f4 2005-10-29 devnull }
378 d1f529f4 2005-10-29 devnull n++;
379 d1f529f4 2005-10-29 devnull }
380 d1f529f4 2005-10-29 devnull }
381 d1f529f4 2005-10-29 devnull if(c){
382 d1f529f4 2005-10-29 devnull if(n == 1 && c[0].next == di)
383 d1f529f4 2005-10-29 devnull di->isloop = 1;
384 d1f529f4 2005-10-29 devnull di->c = c;
385 d1f529f4 2005-10-29 devnull di->nc = n;
386 d1f529f4 2005-10-29 devnull c += n;
387 d1f529f4 2005-10-29 devnull }
388 d1f529f4 2005-10-29 devnull nn += n;
389 d1f529f4 2005-10-29 devnull }
390 d1f529f4 2005-10-29 devnull return nn;
391 d1f529f4 2005-10-29 devnull }
392 d1f529f4 2005-10-29 devnull
393 d1f529f4 2005-10-29 devnull Dreprog*
394 d1f529f4 2005-10-29 devnull dregcvt(Reprog *p)
395 d1f529f4 2005-10-29 devnull {
396 d1f529f4 2005-10-29 devnull uchar *bits;
397 d1f529f4 2005-10-29 devnull uint again, n, nid, id;
398 d1f529f4 2005-10-29 devnull Deter d;
399 d1f529f4 2005-10-29 devnull Reiset **id2set, *s, *t, *start[4];
400 d1f529f4 2005-10-29 devnull Dreprog *dp;
401 d1f529f4 2005-10-29 devnull Drecase *c;
402 d1f529f4 2005-10-29 devnull
403 d1f529f4 2005-10-29 devnull memset(&d, 0, sizeof d);
404 d1f529f4 2005-10-29 devnull
405 d1f529f4 2005-10-29 devnull if(setjmp(d.kaboom)){
406 d1f529f4 2005-10-29 devnull binfree(&d.bin);
407 d1f529f4 2005-10-29 devnull return nil;
408 d1f529f4 2005-10-29 devnull }
409 d1f529f4 2005-10-29 devnull
410 d1f529f4 2005-10-29 devnull d.p = p;
411 d1f529f4 2005-10-29 devnull d.ninst = countinst(p);
412 d1f529f4 2005-10-29 devnull
413 d1f529f4 2005-10-29 devnull d.last = &d.alloc;
414 d1f529f4 2005-10-29 devnull
415 d1f529f4 2005-10-29 devnull n = d.ninst;
416 d1f529f4 2005-10-29 devnull /* round up to power of two; this loop is the least of our efficiency problems */
417 d1f529f4 2005-10-29 devnull while(n&(n-1))
418 d1f529f4 2005-10-29 devnull n++;
419 d1f529f4 2005-10-29 devnull d.nhash = n;
420 d1f529f4 2005-10-29 devnull d.hash = binalloc(&d.bin, d.nhash*sizeof(Reinst*), 1);
421 d1f529f4 2005-10-29 devnull
422 d1f529f4 2005-10-29 devnull /* get list of important runes */
423 d1f529f4 2005-10-29 devnull findchars(&d, p);
424 d1f529f4 2005-10-29 devnull
425 d1f529f4 2005-10-29 devnull #ifdef DUMP
426 d1f529f4 2005-10-29 devnull print("relevant chars are: «%S»\n", d.c+1);
427 d1f529f4 2005-10-29 devnull #endif
428 d1f529f4 2005-10-29 devnull
429 d1f529f4 2005-10-29 devnull d.bits = bits = binalloc(&d.bin, d.ninst, 0);
430 d1f529f4 2005-10-29 devnull d.tmp = ralloc(&d, d.ninst);
431 d1f529f4 2005-10-29 devnull
432 d1f529f4 2005-10-29 devnull /*
433 d1f529f4 2005-10-29 devnull * Convert to DFA
434 d1f529f4 2005-10-29 devnull */
435 d1f529f4 2005-10-29 devnull
436 d1f529f4 2005-10-29 devnull /* 4 start states, depending on initial bol, eol */
437 d1f529f4 2005-10-29 devnull for(n=0; n<4; n++){
438 d1f529f4 2005-10-29 devnull memset(bits, 0, d.ninst);
439 d1f529f4 2005-10-29 devnull bits[p->startinst - p->firstinst] = 1;
440 d1f529f4 2005-10-29 devnull followempty(&d, bits, n&1, n&2);
441 d1f529f4 2005-10-29 devnull start[n] = bits2reiset(&d, bits);
442 d1f529f4 2005-10-29 devnull }
443 d1f529f4 2005-10-29 devnull
444 d1f529f4 2005-10-29 devnull /* explore the reiset space */
445 d1f529f4 2005-10-29 devnull for(s=d.alloc; s; s=s->next)
446 d1f529f4 2005-10-29 devnull for(n=0; n<2*d.nc; n++)
447 d1f529f4 2005-10-29 devnull s->delta[n] = transition(&d, s, d.c[n%d.nc], n/d.nc);
448 d1f529f4 2005-10-29 devnull
449 d1f529f4 2005-10-29 devnull #ifdef DUMP
450 d1f529f4 2005-10-29 devnull nid = 0;
451 d1f529f4 2005-10-29 devnull for(s=d.alloc; s; s=s->next)
452 d1f529f4 2005-10-29 devnull s->id = nid++;
453 d1f529f4 2005-10-29 devnull ddump(&d);
454 d1f529f4 2005-10-29 devnull #endif
455 d1f529f4 2005-10-29 devnull
456 d1f529f4 2005-10-29 devnull /*
457 d1f529f4 2005-10-29 devnull * Minimize.
458 d1f529f4 2005-10-29 devnull */
459 d1f529f4 2005-10-29 devnull
460 d1f529f4 2005-10-29 devnull /* first class division is final or not */
461 d1f529f4 2005-10-29 devnull for(s=d.alloc; s; s=s->next){
462 d1f529f4 2005-10-29 devnull s->isfinal = 0;
463 d1f529f4 2005-10-29 devnull for(n=0; n<s->ninst; n++)
464 d1f529f4 2005-10-29 devnull if(p->firstinst[s->inst[n]].type == END)
465 d1f529f4 2005-10-29 devnull s->isfinal = 1;
466 d1f529f4 2005-10-29 devnull s->id = s->isfinal;
467 d1f529f4 2005-10-29 devnull }
468 d1f529f4 2005-10-29 devnull
469 d1f529f4 2005-10-29 devnull /* divide states with different transition tables in id space */
470 d1f529f4 2005-10-29 devnull nid = 2;
471 d1f529f4 2005-10-29 devnull do{
472 d1f529f4 2005-10-29 devnull again = 0;
473 d1f529f4 2005-10-29 devnull for(s=d.alloc; s; s=s->next){
474 d1f529f4 2005-10-29 devnull id = -1;
475 d1f529f4 2005-10-29 devnull for(t=s->next; t; t=t->next){
476 d1f529f4 2005-10-29 devnull if(s->id != t->id)
477 d1f529f4 2005-10-29 devnull continue;
478 d1f529f4 2005-10-29 devnull for(n=0; n<2*d.nc; n++){
479 d1f529f4 2005-10-29 devnull /* until we finish the for(t) loop, s->id and id are same */
480 d1f529f4 2005-10-29 devnull if((s->delta[n]->id == t->delta[n]->id)
481 d1f529f4 2005-10-29 devnull || (s->delta[n]->id == s->id && t->delta[n]->id == id)
482 d1f529f4 2005-10-29 devnull || (s->delta[n]->id == id && t->delta[n]->id == s->id))
483 d1f529f4 2005-10-29 devnull continue;
484 d1f529f4 2005-10-29 devnull break;
485 d1f529f4 2005-10-29 devnull }
486 d1f529f4 2005-10-29 devnull if(n == 2*d.nc)
487 d1f529f4 2005-10-29 devnull continue;
488 d1f529f4 2005-10-29 devnull if(id == -1)
489 d1f529f4 2005-10-29 devnull id = nid++;
490 d1f529f4 2005-10-29 devnull t->id = id;
491 d1f529f4 2005-10-29 devnull again = 1;
492 d1f529f4 2005-10-29 devnull }
493 d1f529f4 2005-10-29 devnull }
494 d1f529f4 2005-10-29 devnull }while(again);
495 d1f529f4 2005-10-29 devnull
496 d1f529f4 2005-10-29 devnull #ifdef DUMP
497 d1f529f4 2005-10-29 devnull ddump(&d);
498 d1f529f4 2005-10-29 devnull #endif
499 d1f529f4 2005-10-29 devnull
500 d1f529f4 2005-10-29 devnull /* build dreprog */
501 d1f529f4 2005-10-29 devnull id2set = binalloc(&d.bin, nid*sizeof(Reiset*), 1);
502 d1f529f4 2005-10-29 devnull if(id2set == nil)
503 d1f529f4 2005-10-29 devnull longjmp(d.kaboom, 1);
504 d1f529f4 2005-10-29 devnull for(s=d.alloc; s; s=s->next)
505 d1f529f4 2005-10-29 devnull id2set[s->id] = s;
506 d1f529f4 2005-10-29 devnull
507 d1f529f4 2005-10-29 devnull n = buildprog(&d, id2set, nid, nil, nil);
508 d1f529f4 2005-10-29 devnull dp = mallocz(sizeof(Dreprog)+nid*sizeof(Dreinst)+n*sizeof(Drecase), 1);
509 d1f529f4 2005-10-29 devnull if(dp == nil)
510 d1f529f4 2005-10-29 devnull longjmp(d.kaboom, 1);
511 d1f529f4 2005-10-29 devnull c = (Drecase*)&dp->inst[nid];
512 d1f529f4 2005-10-29 devnull buildprog(&d, id2set, nid, dp, c);
513 d1f529f4 2005-10-29 devnull
514 d1f529f4 2005-10-29 devnull for(n=0; n<4; n++)
515 d1f529f4 2005-10-29 devnull dp->start[n] = &dp->inst[start[n]->id];
516 d1f529f4 2005-10-29 devnull dp->ninst = nid;
517 d1f529f4 2005-10-29 devnull
518 d1f529f4 2005-10-29 devnull binfree(&d.bin);
519 d1f529f4 2005-10-29 devnull return dp;
520 d1f529f4 2005-10-29 devnull }
521 d1f529f4 2005-10-29 devnull
522 d1f529f4 2005-10-29 devnull int
523 d1f529f4 2005-10-29 devnull dregexec(Dreprog *p, char *s, int bol)
524 d1f529f4 2005-10-29 devnull {
525 d1f529f4 2005-10-29 devnull Rune r;
526 d1f529f4 2005-10-29 devnull ulong rr;
527 d1f529f4 2005-10-29 devnull Dreinst *i;
528 d1f529f4 2005-10-29 devnull Drecase *c, *ec;
529 d1f529f4 2005-10-29 devnull int best, n;
530 d1f529f4 2005-10-29 devnull char *os;
531 d1f529f4 2005-10-29 devnull
532 d1f529f4 2005-10-29 devnull i = p->start[(bol ? 1 : 0) | (s[1]=='\n' ? 2 : 0)];
533 d1f529f4 2005-10-29 devnull best = -1;
534 d1f529f4 2005-10-29 devnull os = s;
535 d1f529f4 2005-10-29 devnull for(; *s; s+=n){
536 d1f529f4 2005-10-29 devnull if(i->isfinal)
537 d1f529f4 2005-10-29 devnull best = s - os;
538 d1f529f4 2005-10-29 devnull if(i->isloop){
539 d1f529f4 2005-10-29 devnull if(i->isfinal)
540 d1f529f4 2005-10-29 devnull return strlen(os);
541 d1f529f4 2005-10-29 devnull else
542 d1f529f4 2005-10-29 devnull return best;
543 d1f529f4 2005-10-29 devnull }
544 d1f529f4 2005-10-29 devnull if((*s&0xFF) < Runeself){
545 d1f529f4 2005-10-29 devnull r = *s;
546 d1f529f4 2005-10-29 devnull n = 1;
547 d1f529f4 2005-10-29 devnull }else
548 d1f529f4 2005-10-29 devnull n = chartorune(&r, s);
549 d1f529f4 2005-10-29 devnull c = i->c;
550 d1f529f4 2005-10-29 devnull ec = c+i->nc;
551 d1f529f4 2005-10-29 devnull rr = r;
552 d1f529f4 2005-10-29 devnull if(s[n] == '\n' || s[n] == '\0')
553 d1f529f4 2005-10-29 devnull rr |= 0x10000;
554 d1f529f4 2005-10-29 devnull for(; c<ec; c++){
555 d1f529f4 2005-10-29 devnull if(c->start > rr){
556 d1f529f4 2005-10-29 devnull i = c[-1].next;
557 d1f529f4 2005-10-29 devnull goto Out;
558 d1f529f4 2005-10-29 devnull }
559 d1f529f4 2005-10-29 devnull }
560 d1f529f4 2005-10-29 devnull i = ec[-1].next;
561 d1f529f4 2005-10-29 devnull Out:;
562 d1f529f4 2005-10-29 devnull }
563 d1f529f4 2005-10-29 devnull if(i->isfinal)
564 d1f529f4 2005-10-29 devnull best = s - os;
565 d1f529f4 2005-10-29 devnull return best;
566 d1f529f4 2005-10-29 devnull }
567 d1f529f4 2005-10-29 devnull
568 d1f529f4 2005-10-29 devnull
569 d1f529f4 2005-10-29 devnull #ifdef DUMP
570 d1f529f4 2005-10-29 devnull void
571 d1f529f4 2005-10-29 devnull ddump(Deter *d)
572 d1f529f4 2005-10-29 devnull {
573 d1f529f4 2005-10-29 devnull int i, id;
574 d1f529f4 2005-10-29 devnull Reiset *s;
575 d1f529f4 2005-10-29 devnull
576 d1f529f4 2005-10-29 devnull for(s=d->alloc; s; s=s->next){
577 d1f529f4 2005-10-29 devnull print("%d ", s->id);
578 d1f529f4 2005-10-29 devnull id = -1;
579 d1f529f4 2005-10-29 devnull for(i=0; i<2*d->nc; i++){
580 d1f529f4 2005-10-29 devnull if(id != s->delta[i]->id){
581 d1f529f4 2005-10-29 devnull if(i==0)
582 d1f529f4 2005-10-29 devnull print(" [");
583 d1f529f4 2005-10-29 devnull else if(i/d->nc)
584 d1f529f4 2005-10-29 devnull print(" [%C$", d->c[i%d->nc]);
585 d1f529f4 2005-10-29 devnull else
586 d1f529f4 2005-10-29 devnull print(" [%C", d->c[i%d->nc]);
587 d1f529f4 2005-10-29 devnull print(" %d]", s->delta[i]->id);
588 d1f529f4 2005-10-29 devnull id = s->delta[i]->id;
589 d1f529f4 2005-10-29 devnull }
590 d1f529f4 2005-10-29 devnull }
591 d1f529f4 2005-10-29 devnull print("\n");
592 d1f529f4 2005-10-29 devnull }
593 d1f529f4 2005-10-29 devnull }
594 d1f529f4 2005-10-29 devnull
595 d1f529f4 2005-10-29 devnull void
596 d1f529f4 2005-10-29 devnull rdump(Reprog *pp)
597 d1f529f4 2005-10-29 devnull {
598 d1f529f4 2005-10-29 devnull Reinst *l;
599 d1f529f4 2005-10-29 devnull Rune *p;
600 d1f529f4 2005-10-29 devnull
601 d1f529f4 2005-10-29 devnull l = pp->firstinst;
602 d1f529f4 2005-10-29 devnull do{
603 d1f529f4 2005-10-29 devnull print("%ld:\t0%o\t%ld\t%ld", l-pp->firstinst, l->type,
604 d1f529f4 2005-10-29 devnull l->left-pp->firstinst, l->right-pp->firstinst);
605 d1f529f4 2005-10-29 devnull if(l->type == RUNE)
606 d1f529f4 2005-10-29 devnull print("\t%C\n", l->r);
607 d1f529f4 2005-10-29 devnull else if(l->type == CCLASS || l->type == NCCLASS){
608 d1f529f4 2005-10-29 devnull print("\t[");
609 d1f529f4 2005-10-29 devnull if(l->type == NCCLASS)
610 d1f529f4 2005-10-29 devnull print("^");
611 d1f529f4 2005-10-29 devnull for(p = l->cp->spans; p < l->cp->end; p += 2)
612 d1f529f4 2005-10-29 devnull if(p[0] == p[1])
613 d1f529f4 2005-10-29 devnull print("%C", p[0]);
614 d1f529f4 2005-10-29 devnull else
615 d1f529f4 2005-10-29 devnull print("%C-%C", p[0], p[1]);
616 d1f529f4 2005-10-29 devnull print("]\n");
617 d1f529f4 2005-10-29 devnull } else
618 d1f529f4 2005-10-29 devnull print("\n");
619 d1f529f4 2005-10-29 devnull }while(l++->type);
620 d1f529f4 2005-10-29 devnull }
621 d1f529f4 2005-10-29 devnull
622 d1f529f4 2005-10-29 devnull void
623 d1f529f4 2005-10-29 devnull dump(Dreprog *pp)
624 d1f529f4 2005-10-29 devnull {
625 d1f529f4 2005-10-29 devnull int i, j;
626 d1f529f4 2005-10-29 devnull Dreinst *l;
627 d1f529f4 2005-10-29 devnull
628 d1f529f4 2005-10-29 devnull print("start %ld %ld %ld %ld\n",
629 d1f529f4 2005-10-29 devnull pp->start[0]-pp->inst,
630 d1f529f4 2005-10-29 devnull pp->start[1]-pp->inst,
631 d1f529f4 2005-10-29 devnull pp->start[2]-pp->inst,
632 d1f529f4 2005-10-29 devnull pp->start[3]-pp->inst);
633 d1f529f4 2005-10-29 devnull
634 d1f529f4 2005-10-29 devnull for(i=0; i<pp->ninst; i++){
635 d1f529f4 2005-10-29 devnull l = &pp->inst[i];
636 d1f529f4 2005-10-29 devnull print("%d:", i);
637 d1f529f4 2005-10-29 devnull for(j=0; j<l->nc; j++){
638 d1f529f4 2005-10-29 devnull print(" [");
639 d1f529f4 2005-10-29 devnull if(j == 0)
640 d1f529f4 2005-10-29 devnull if(l->c[j].start != 1)
641 d1f529f4 2005-10-29 devnull abort();
642 d1f529f4 2005-10-29 devnull if(j != 0)
643 d1f529f4 2005-10-29 devnull print("%C%s", l->c[j].start&0xFFFF, (l->c[j].start&0x10000) ? "$" : "");
644 d1f529f4 2005-10-29 devnull print("-");
645 d1f529f4 2005-10-29 devnull if(j != l->nc-1)
646 d1f529f4 2005-10-29 devnull print("%C%s", (l->c[j+1].start&0xFFFF)-1, (l->c[j+1].start&0x10000) ? "$" : "");
647 d1f529f4 2005-10-29 devnull print("] %ld", l->c[j].next - pp->inst);
648 d1f529f4 2005-10-29 devnull }
649 d1f529f4 2005-10-29 devnull if(l->isfinal)
650 d1f529f4 2005-10-29 devnull print(" final");
651 d1f529f4 2005-10-29 devnull if(l->isloop)
652 d1f529f4 2005-10-29 devnull print(" loop");
653 d1f529f4 2005-10-29 devnull print("\n");
654 d1f529f4 2005-10-29 devnull }
655 d1f529f4 2005-10-29 devnull }
656 d1f529f4 2005-10-29 devnull
657 d1f529f4 2005-10-29 devnull
658 d1f529f4 2005-10-29 devnull void
659 d1f529f4 2005-10-29 devnull main(int argc, char **argv)
660 d1f529f4 2005-10-29 devnull {
661 d1f529f4 2005-10-29 devnull int i;
662 d1f529f4 2005-10-29 devnull Reprog *p;
663 d1f529f4 2005-10-29 devnull Dreprog *dp;
664 d1f529f4 2005-10-29 devnull
665 d1f529f4 2005-10-29 devnull i = 1;
666 d1f529f4 2005-10-29 devnull p = regcomp(argv[i]);
667 d1f529f4 2005-10-29 devnull if(p == 0){
668 d1f529f4 2005-10-29 devnull print("=== %s: bad regexp\n", argv[i]);
669 d1f529f4 2005-10-29 devnull }
670 cbeb0b26 2006-04-01 devnull /* print("=== %s\n", argv[i]); */
671 cbeb0b26 2006-04-01 devnull /* rdump(p); */
672 d1f529f4 2005-10-29 devnull dp = dregcvt(p);
673 d1f529f4 2005-10-29 devnull print("=== dfa\n");
674 d1f529f4 2005-10-29 devnull dump(dp);
675 fa325e9b 2020-01-10 cross
676 d1f529f4 2005-10-29 devnull for(i=2; i<argc; i++)
677 d1f529f4 2005-10-29 devnull print("match %d\n", dregexec(dp, argv[i], 0));
678 d1f529f4 2005-10-29 devnull exits(0);
679 d1f529f4 2005-10-29 devnull }
680 d1f529f4 2005-10-29 devnull #endif
681 d1f529f4 2005-10-29 devnull
682 d1f529f4 2005-10-29 devnull void
683 d1f529f4 2005-10-29 devnull Bprintdfa(Biobuf *b, Dreprog *p)
684 d1f529f4 2005-10-29 devnull {
685 d1f529f4 2005-10-29 devnull int i, j, nc;
686 d1f529f4 2005-10-29 devnull
687 d1f529f4 2005-10-29 devnull Bprint(b, "# dreprog\n");
688 d1f529f4 2005-10-29 devnull nc = 0;
689 d1f529f4 2005-10-29 devnull for(i=0; i<p->ninst; i++)
690 d1f529f4 2005-10-29 devnull nc += p->inst[i].nc;
691 d1f529f4 2005-10-29 devnull Bprint(b, "%d %d %ld %ld %ld %ld\n", p->ninst, nc,
692 d1f529f4 2005-10-29 devnull p->start[0]-p->inst, p->start[1]-p->inst,
693 d1f529f4 2005-10-29 devnull p->start[2]-p->inst, p->start[3]-p->inst);
694 d1f529f4 2005-10-29 devnull for(i=0; i<p->ninst; i++){
695 d1f529f4 2005-10-29 devnull Bprint(b, "%d %d %d", p->inst[i].isfinal, p->inst[i].isloop, p->inst[i].nc);
696 d1f529f4 2005-10-29 devnull for(j=0; j<p->inst[i].nc; j++)
697 d1f529f4 2005-10-29 devnull Bprint(b, " %d %ld", p->inst[i].c[j].start, p->inst[i].c[j].next-p->inst);
698 d1f529f4 2005-10-29 devnull Bprint(b, "\n");
699 d1f529f4 2005-10-29 devnull }
700 d1f529f4 2005-10-29 devnull }
701 d1f529f4 2005-10-29 devnull
702 d1f529f4 2005-10-29 devnull static char*
703 d1f529f4 2005-10-29 devnull egetline(Biobuf *b, int c, jmp_buf jb)
704 d1f529f4 2005-10-29 devnull {
705 d1f529f4 2005-10-29 devnull char *p;
706 d1f529f4 2005-10-29 devnull
707 d1f529f4 2005-10-29 devnull p = Brdline(b, c);
708 d1f529f4 2005-10-29 devnull if(p == nil)
709 d1f529f4 2005-10-29 devnull longjmp(jb, 1);
710 d1f529f4 2005-10-29 devnull p[Blinelen(b)-1] = '\0';
711 d1f529f4 2005-10-29 devnull return p;
712 d1f529f4 2005-10-29 devnull }
713 d1f529f4 2005-10-29 devnull
714 d1f529f4 2005-10-29 devnull static void
715 d1f529f4 2005-10-29 devnull egetc(Biobuf *b, int c, jmp_buf jb)
716 d1f529f4 2005-10-29 devnull {
717 d1f529f4 2005-10-29 devnull if(Bgetc(b) != c)
718 d1f529f4 2005-10-29 devnull longjmp(jb, 1);
719 d1f529f4 2005-10-29 devnull }
720 d1f529f4 2005-10-29 devnull
721 d1f529f4 2005-10-29 devnull static int
722 d1f529f4 2005-10-29 devnull egetnum(Biobuf *b, int want, jmp_buf jb)
723 d1f529f4 2005-10-29 devnull {
724 d1f529f4 2005-10-29 devnull int c;
725 d1f529f4 2005-10-29 devnull int n, first;
726 d1f529f4 2005-10-29 devnull
727 d1f529f4 2005-10-29 devnull n = 0;
728 d1f529f4 2005-10-29 devnull first = 1;
729 d1f529f4 2005-10-29 devnull while((c = Bgetc(b)) != Beof){
730 d1f529f4 2005-10-29 devnull if(c < '0' || c > '9'){
731 d1f529f4 2005-10-29 devnull if(want == 0){
732 d1f529f4 2005-10-29 devnull Bungetc(b);
733 d1f529f4 2005-10-29 devnull c = 0;
734 d1f529f4 2005-10-29 devnull }
735 d1f529f4 2005-10-29 devnull if(first || c != want){
736 d1f529f4 2005-10-29 devnull werrstr("format error");
737 d1f529f4 2005-10-29 devnull longjmp(jb, 1);
738 d1f529f4 2005-10-29 devnull }
739 d1f529f4 2005-10-29 devnull return n;
740 d1f529f4 2005-10-29 devnull }
741 d1f529f4 2005-10-29 devnull n = n*10 + c - '0';
742 d1f529f4 2005-10-29 devnull first = 0;
743 d1f529f4 2005-10-29 devnull }
744 d1f529f4 2005-10-29 devnull werrstr("unexpected eof");
745 d1f529f4 2005-10-29 devnull longjmp(jb, 1);
746 d1f529f4 2005-10-29 devnull return -1;
747 d1f529f4 2005-10-29 devnull }
748 d1f529f4 2005-10-29 devnull
749 d1f529f4 2005-10-29 devnull Dreprog*
750 d1f529f4 2005-10-29 devnull Breaddfa(Biobuf *b)
751 d1f529f4 2005-10-29 devnull {
752 d1f529f4 2005-10-29 devnull char *s;
753 d1f529f4 2005-10-29 devnull int ninst, nc;
754 d1f529f4 2005-10-29 devnull jmp_buf jb;
755 d1f529f4 2005-10-29 devnull Dreprog *p;
756 d1f529f4 2005-10-29 devnull Drecase *c;
757 d1f529f4 2005-10-29 devnull Dreinst *l;
758 d1f529f4 2005-10-29 devnull int j, k;
759 d1f529f4 2005-10-29 devnull
760 d1f529f4 2005-10-29 devnull p = nil;
761 d1f529f4 2005-10-29 devnull if(setjmp(jb)){
762 d1f529f4 2005-10-29 devnull free(p);
763 d1f529f4 2005-10-29 devnull return nil;
764 d1f529f4 2005-10-29 devnull }
765 d1f529f4 2005-10-29 devnull
766 d1f529f4 2005-10-29 devnull s = egetline(b, '\n', jb);
767 d1f529f4 2005-10-29 devnull if(strcmp(s, "# dreprog") != 0){
768 d1f529f4 2005-10-29 devnull werrstr("format error");
769 d1f529f4 2005-10-29 devnull longjmp(jb, 1);
770 d1f529f4 2005-10-29 devnull }
771 d1f529f4 2005-10-29 devnull
772 d1f529f4 2005-10-29 devnull ninst = egetnum(b, ' ', jb);
773 d1f529f4 2005-10-29 devnull nc = egetnum(b, ' ', jb);
774 d1f529f4 2005-10-29 devnull
775 d1f529f4 2005-10-29 devnull p = mallocz(sizeof(Dreprog)+ninst*sizeof(Dreinst)+nc*sizeof(Drecase), 1);
776 d1f529f4 2005-10-29 devnull if(p == nil)
777 d1f529f4 2005-10-29 devnull longjmp(jb, 1);
778 d1f529f4 2005-10-29 devnull c = (Drecase*)&p->inst[ninst];
779 d1f529f4 2005-10-29 devnull
780 d1f529f4 2005-10-29 devnull p->start[0] = &p->inst[egetnum(b, ' ', jb)];
781 d1f529f4 2005-10-29 devnull p->start[1] = &p->inst[egetnum(b, ' ', jb)];
782 d1f529f4 2005-10-29 devnull p->start[2] = &p->inst[egetnum(b, ' ', jb)];
783 d1f529f4 2005-10-29 devnull p->start[3] = &p->inst[egetnum(b, '\n', jb)];
784 d1f529f4 2005-10-29 devnull
785 d1f529f4 2005-10-29 devnull for(j=0; j<ninst; j++){
786 d1f529f4 2005-10-29 devnull l = &p->inst[j];
787 d1f529f4 2005-10-29 devnull l->isfinal = egetnum(b, ' ', jb);
788 d1f529f4 2005-10-29 devnull l->isloop = egetnum(b, ' ', jb);
789 d1f529f4 2005-10-29 devnull l->nc = egetnum(b, 0, jb);
790 d1f529f4 2005-10-29 devnull l->c = c;
791 d1f529f4 2005-10-29 devnull for(k=0; k<l->nc; k++){
792 d1f529f4 2005-10-29 devnull egetc(b, ' ', jb);
793 d1f529f4 2005-10-29 devnull c->start = egetnum(b, ' ', jb);
794 d1f529f4 2005-10-29 devnull c->next = &p->inst[egetnum(b, 0, jb)];
795 d1f529f4 2005-10-29 devnull c++;
796 d1f529f4 2005-10-29 devnull }
797 d1f529f4 2005-10-29 devnull egetc(b, '\n', jb);
798 d1f529f4 2005-10-29 devnull }
799 d1f529f4 2005-10-29 devnull return p;
800 d1f529f4 2005-10-29 devnull }