Blob


1 #include <u.h>
2 #include <libc.h>
3 #include <bio.h>
4 #include "hash.h"
6 #define mergesort bayesmergesort
8 /***
9 * String hash tables.
10 */
12 Stringtab *tfree;
14 Stringtab*
15 taballoc(void)
16 {
17 static Stringtab *t;
18 static uint nt;
20 if(tfree){
21 Stringtab *tt = tfree;
22 tfree = tt->link;
23 return tt;
24 }
26 if(nt == 0){
27 t = malloc(64000*sizeof(Stringtab));
28 if(t == 0)
29 sysfatal("out of memory");
30 nt = 64000;
31 }
32 nt--;
33 return t++;
34 }
36 void
37 tabfree(Stringtab *tt)
38 {
39 tt->link = tfree;
40 tfree = tt;
41 }
43 char*
44 xstrdup(char *s, int len)
45 {
46 char *r;
47 static char *t;
48 static int nt;
50 if(nt < len){
51 t = malloc(512*1024+len);
52 if(t == 0)
53 sysfatal("out of memory");
54 nt = 512*1024;
55 }
56 r = t;
57 t += len;
58 nt -= len;
59 memmove(r, s, len);
60 return r;
61 }
63 static uint
64 hash(char *s, int n)
65 {
66 uint h;
67 uchar *p, *ep;
68 h = 0;
69 for(p=(uchar*)s, ep=p+n; p<ep; p++)
70 h = h*37 + *p;
71 return h;
72 }
74 static void
75 rehash(Hash *hh)
76 {
77 int h;
78 Stringtab *s;
80 if(hh->nstab == 0)
81 hh->nstab = 1024;
82 else
83 hh->nstab = hh->ntab*2;
85 free(hh->stab);
86 hh->stab = mallocz(hh->nstab*sizeof(Stringtab*), 1);
87 if(hh->stab == nil)
88 sysfatal("out of memory");
90 for(s=hh->all; s; s=s->link){
91 h = hash(s->str, s->n) % hh->nstab;
92 s->hash = hh->stab[h];
93 hh->stab[h] = s;
94 }
95 }
97 Stringtab*
98 findstab(Hash *hh, char *str, int n, int create)
99 {
100 uint h;
101 Stringtab *tab, **l;
103 if(hh->nstab == 0)
104 rehash(hh);
106 h = hash(str, n) % hh->nstab;
107 for(tab=hh->stab[h], l=&hh->stab[h]; tab; l=&tab->hash, tab=tab->hash)
108 if(n==tab->n && memcmp(str, tab->str, n) == 0){
109 *l = tab->hash;
110 tab->hash = hh->stab[h];
111 hh->stab[h] = tab;
112 return tab;
115 if(!create)
116 return nil;
118 hh->sorted = 0;
119 tab = taballoc();
120 tab->str = xstrdup(str, n);
121 tab->hash = hh->stab[h];
122 tab->link = hh->all;
123 hh->all = tab;
124 tab->n = n;
125 tab->count = 0;
126 tab->date = 0;
127 hh->stab[h] = tab;
129 hh->ntab++;
130 if(hh->ntab > 2*hh->nstab && !(hh->ntab&(hh->ntab-1)))
131 rehash(hh);
132 return tab;
135 int
136 scmp(Stringtab *a, Stringtab *b)
138 int n, x;
140 if(a == 0)
141 return 1;
142 if(b == 0)
143 return -1;
144 n = a->n;
145 if(n > b->n)
146 n = b->n;
147 x = memcmp(a->str, b->str, n);
148 if(x != 0)
149 return x;
150 if(a->n < b->n)
151 return -1;
152 if(a->n > b->n)
153 return 1;
154 return 0; /* shouldn't happen */
157 Stringtab*
158 merge(Stringtab *a, Stringtab *b)
160 Stringtab *s, **l;
162 l = &s;
163 while(a || b){
164 if(scmp(a, b) < 0){
165 *l = a;
166 l = &a->link;
167 a = a->link;
168 }else{
169 *l = b;
170 l = &b->link;
171 b = b->link;
174 *l = 0;
175 return s;
178 Stringtab*
179 mergesort(Stringtab *s)
181 Stringtab *a, *b;
182 int delay;
184 if(s == nil)
185 return nil;
186 if(s->link == nil)
187 return s;
189 a = b = s;
190 delay = 1;
191 while(a && b){
192 if(delay) /* easy way to handle 2-element list */
193 delay = 0;
194 else
195 a = a->link;
196 if(b = b->link)
197 b = b->link;
200 b = a->link;
201 a->link = nil;
203 a = mergesort(s);
204 b = mergesort(b);
206 return merge(a, b);
209 Stringtab*
210 sortstab(Hash *hh)
212 if(!hh->sorted){
213 hh->all = mergesort(hh->all);
214 hh->sorted = 1;
216 return hh->all;
219 int
220 Bwritehash(Biobuf *b, Hash *hh)
222 Stringtab *s;
223 int now;
225 now = time(0);
226 s = sortstab(hh);
227 Bprint(b, "# hash table\n");
228 for(; s; s=s->link){
229 if(s->count <= 0)
230 continue;
231 /*
232 * Entries that haven't been updated in thirty days get tossed.
233 */
234 if(s->date+30*86400 < now)
235 continue;
236 Bwrite(b, s->str, s->n);
237 Bprint(b, "\t%d %d\n", s->count, s->date);
239 if(Bflush(b) == Beof)
240 return -1;
241 return 0;
244 void
245 Breadhash(Biobuf *b, Hash *hh, int scale)
247 char *s;
248 char *t;
249 int n;
250 int date;
251 Stringtab *st;
253 s = Brdstr(b, '\n', 1);
254 if(s == nil)
255 return;
256 if(strcmp(s, "# hash table") != 0)
257 sysfatal("bad hash table format");
259 while(s = Brdline(b, '\n')){
260 s[Blinelen(b)-1] = 0;
261 t = strrchr(s, '\t');
262 if(t == nil)
263 sysfatal("bad hash table format");
264 *t++ = '\0';
265 if(*t < '0' || *t > '9')
266 sysfatal("bad hash table format");
267 n = strtol(t, &t, 10);
268 date = time(0);
269 if(*t != 0){
270 if(*t == ' '){
271 t++;
272 date = strtol(t, &t, 10);
274 if(*t != 0)
275 sysfatal("bad hash table format");
277 st = findstab(hh, s, strlen(s), 1);
278 if(date > st->date)
279 st->date = date;
280 st->count += n*scale;
284 void
285 freehash(Hash *h)
287 Stringtab *s, *next;
289 for(s=h->all; s; s=next){
290 next = s->link;
291 tabfree(s);
293 free(h->stab);
294 free(h);
297 Biobuf*
298 Bopenlock(char *file, int mode)
300 int i;
301 Biobuf *b;
302 char err[ERRMAX];
304 b = nil;
305 for(i=0; i<120; i++){
306 if((b = Bopen(file, mode)) != nil)
307 break;
308 rerrstr(err, sizeof err);
309 if(strstr(err, "file is locked")==nil && strstr(err, "exclusive lock")==nil)
310 break;
311 sleep(1000);
313 return b;