Blob
1 #include <u.h>2 #include <libc.h>3 #include <bio.h>4 #include "hash.h"6 #define mergesort bayesmergesort8 /***9 * String hash tables.10 */12 Stringtab *tfree;14 Stringtab*15 taballoc(void)16 {17 static Stringtab *t;18 static uint nt;20 if(tfree){21 Stringtab *tt = tfree;22 tfree = tt->link;23 return tt;24 }26 if(nt == 0){27 t = malloc(64000*sizeof(Stringtab));28 if(t == 0)29 sysfatal("out of memory");30 nt = 64000;31 }32 nt--;33 return t++;34 }36 void37 tabfree(Stringtab *tt)38 {39 tt->link = tfree;40 tfree = tt;41 }43 char*44 xstrdup(char *s, int len)45 {46 char *r;47 static char *t;48 static int nt;50 if(nt < len){51 t = malloc(512*1024+len);52 if(t == 0)53 sysfatal("out of memory");54 nt = 512*1024;55 }56 r = t;57 t += len;58 nt -= len;59 memmove(r, s, len);60 return r;61 }63 static uint64 hash(char *s, int n)65 {66 uint h;67 uchar *p, *ep;68 h = 0;69 for(p=(uchar*)s, ep=p+n; p<ep; p++)70 h = h*37 + *p;71 return h;72 }74 static void75 rehash(Hash *hh)76 {77 int h;78 Stringtab *s;80 if(hh->nstab == 0)81 hh->nstab = 1024;82 else83 hh->nstab = hh->ntab*2;85 free(hh->stab);86 hh->stab = mallocz(hh->nstab*sizeof(Stringtab*), 1);87 if(hh->stab == nil)88 sysfatal("out of memory");90 for(s=hh->all; s; s=s->link){91 h = hash(s->str, s->n) % hh->nstab;92 s->hash = hh->stab[h];93 hh->stab[h] = s;94 }95 }97 Stringtab*98 findstab(Hash *hh, char *str, int n, int create)99 {100 uint h;101 Stringtab *tab, **l;103 if(hh->nstab == 0)104 rehash(hh);106 h = hash(str, n) % hh->nstab;107 for(tab=hh->stab[h], l=&hh->stab[h]; tab; l=&tab->hash, tab=tab->hash)108 if(n==tab->n && memcmp(str, tab->str, n) == 0){109 *l = tab->hash;110 tab->hash = hh->stab[h];111 hh->stab[h] = tab;112 return tab;113 }115 if(!create)116 return nil;118 hh->sorted = 0;119 tab = taballoc();120 tab->str = xstrdup(str, n);121 tab->hash = hh->stab[h];122 tab->link = hh->all;123 hh->all = tab;124 tab->n = n;125 tab->count = 0;126 tab->date = 0;127 hh->stab[h] = tab;129 hh->ntab++;130 if(hh->ntab > 2*hh->nstab && !(hh->ntab&(hh->ntab-1)))131 rehash(hh);132 return tab;133 }135 int136 scmp(Stringtab *a, Stringtab *b)137 {138 int n, x;140 if(a == 0)141 return 1;142 if(b == 0)143 return -1;144 n = a->n;145 if(n > b->n)146 n = b->n;147 x = memcmp(a->str, b->str, n);148 if(x != 0)149 return x;150 if(a->n < b->n)151 return -1;152 if(a->n > b->n)153 return 1;154 return 0; /* shouldn't happen */155 }157 Stringtab*158 merge(Stringtab *a, Stringtab *b)159 {160 Stringtab *s, **l;162 l = &s;163 while(a || b){164 if(scmp(a, b) < 0){165 *l = a;166 l = &a->link;167 a = a->link;168 }else{169 *l = b;170 l = &b->link;171 b = b->link;172 }173 }174 *l = 0;175 return s;176 }178 Stringtab*179 mergesort(Stringtab *s)180 {181 Stringtab *a, *b;182 int delay;184 if(s == nil)185 return nil;186 if(s->link == nil)187 return s;189 a = b = s;190 delay = 1;191 while(a && b){192 if(delay) /* easy way to handle 2-element list */193 delay = 0;194 else195 a = a->link;196 if(b = b->link)197 b = b->link;198 }200 b = a->link;201 a->link = nil;203 a = mergesort(s);204 b = mergesort(b);206 return merge(a, b);207 }209 Stringtab*210 sortstab(Hash *hh)211 {212 if(!hh->sorted){213 hh->all = mergesort(hh->all);214 hh->sorted = 1;215 }216 return hh->all;217 }219 int220 Bwritehash(Biobuf *b, Hash *hh)221 {222 Stringtab *s;223 int now;225 now = time(0);226 s = sortstab(hh);227 Bprint(b, "# hash table\n");228 for(; s; s=s->link){229 if(s->count <= 0)230 continue;231 /*232 * Entries that haven't been updated in thirty days get tossed.233 */234 if(s->date+30*86400 < now)235 continue;236 Bwrite(b, s->str, s->n);237 Bprint(b, "\t%d %d\n", s->count, s->date);238 }239 if(Bflush(b) == Beof)240 return -1;241 return 0;242 }244 void245 Breadhash(Biobuf *b, Hash *hh, int scale)246 {247 char *s;248 char *t;249 int n;250 int date;251 Stringtab *st;253 s = Brdstr(b, '\n', 1);254 if(s == nil)255 return;256 if(strcmp(s, "# hash table") != 0)257 sysfatal("bad hash table format");259 while(s = Brdline(b, '\n')){260 s[Blinelen(b)-1] = 0;261 t = strrchr(s, '\t');262 if(t == nil)263 sysfatal("bad hash table format");264 *t++ = '\0';265 if(*t < '0' || *t > '9')266 sysfatal("bad hash table format");267 n = strtol(t, &t, 10);268 date = time(0);269 if(*t != 0){270 if(*t == ' '){271 t++;272 date = strtol(t, &t, 10);273 }274 if(*t != 0)275 sysfatal("bad hash table format");276 }277 st = findstab(hh, s, strlen(s), 1);278 if(date > st->date)279 st->date = date;280 st->count += n*scale;281 }282 }284 void285 freehash(Hash *h)286 {287 Stringtab *s, *next;289 for(s=h->all; s; s=next){290 next = s->link;291 tabfree(s);292 }293 free(h->stab);294 free(h);295 }297 Biobuf*298 Bopenlock(char *file, int mode)299 {300 int i;301 Biobuf *b;302 char err[ERRMAX];304 b = nil;305 for(i=0; i<120; i++){306 if((b = Bopen(file, mode)) != nil)307 break;308 rerrstr(err, sizeof err);309 if(strstr(err, "file is locked")==nil && strstr(err, "exclusive lock")==nil)310 break;311 sleep(1000);312 }313 return b;314 }