Blob


1 /*
2 * Bloom filter tracking which scores are present in our arenas
3 * and (more importantly) which are not.
4 */
6 #include "stdinc.h"
7 #include "dat.h"
8 #include "fns.h"
10 int
11 bloominit(Bloom *b, vlong vsize, u8int *data)
12 {
13 ulong size;
15 size = vsize;
16 if(size != vsize){ /* truncation */
17 werrstr("bloom data too big");
18 return -1;
19 }
21 b->size = size;
22 b->nhash = 32; /* will be fixed by caller on initialization */
23 if(data != nil)
24 if(unpackbloomhead(b, data) < 0)
25 return -1;
27 b->mask = b->size-1;
28 b->data = data;
29 return 0;
30 }
32 void
33 wbbloomhead(Bloom *b)
34 {
35 packbloomhead(b, b->data);
36 }
38 Bloom*
39 readbloom(Part *p)
40 {
41 int i, n;
42 uint ones;
43 uchar buf[512];
44 uchar *data;
45 u32int *a;
46 Bloom *b;
48 b = vtmallocz(sizeof *b);
49 if(readpart(p, 0, buf, sizeof buf) < 0)
50 return nil;
51 if(bloominit(b, 0, buf) < 0){
52 vtfree(b);
53 return nil;
54 }
55 data = vtmallocz(b->size);
56 if(readpart(p, 0, data, b->size) < 0){
57 vtfree(b);
58 vtfree(data);
59 return nil;
60 }
61 b->data = data;
62 b->part = p;
64 a = (u32int*)b->data;
65 n = b->size/4;
66 ones = 0;
67 for(i=0; i<n; i++)
68 ones += countbits(a[i]);
69 addstat(StatBloomOnes, ones);
71 if(b->size == MaxBloomSize) /* 2^32 overflows ulong */
72 addstat(StatBloomBits, b->size*8-1);
73 else
74 addstat(StatBloomBits, b->size*8);
76 return b;
77 }
79 int
80 writebloom(Bloom *b)
81 {
82 wbbloomhead(b);
83 return writepart(b->part, 0, b->data, b->size);
84 }
86 /*
87 * Derive two random 32-bit quantities a, b from the score
88 * and then use a+b*i as a sequence of bloom filter indices.
89 * Michael Mitzenmacher has a recent (2005) paper saying this is okay.
90 * We reserve the bottom bytes (BloomHeadSize*8 bits) for the header.
91 */
92 static void
93 gethashes(u8int *score, ulong *h)
94 {
95 int i;
96 u32int a, b;
98 a = 0;
99 b = 0;
100 for(i=4; i+8<=VtScoreSize; i+=8){
101 a ^= *(u32int*)(score+i);
102 b ^= *(u32int*)(score+i+4);
104 if(i+4 <= VtScoreSize) /* 20 is not 4-aligned */
105 a ^= *(u32int*)(score+i);
106 for(i=0; i<BloomMaxHash; i++, a+=b)
107 h[i] = a < BloomHeadSize*8 ? BloomHeadSize*8 : a;
110 static void
111 _markbloomfilter(Bloom *b, u8int *score)
113 int i, nnew;
114 ulong h[BloomMaxHash];
115 u32int x, *y, z, *tab;
117 trace("markbloomfilter", "markbloomfilter %V", score);
118 gethashes(score, h);
119 nnew = 0;
120 tab = (u32int*)b->data;
121 for(i=0; i<b->nhash; i++){
122 x = h[i];
123 y = &tab[(x&b->mask)>>5];
124 z = 1<<(x&31);
125 if(!(*y&z)){
126 nnew++;
127 *y |= z;
130 if(nnew)
131 addstat(StatBloomOnes, nnew);
133 trace("markbloomfilter", "markbloomfilter exit");
136 static int
137 _inbloomfilter(Bloom *b, u8int *score)
139 int i;
140 ulong h[BloomMaxHash], x;
141 u32int *tab;
143 gethashes(score, h);
144 tab = (u32int*)b->data;
145 for(i=0; i<b->nhash; i++){
146 x = h[i];
147 if(!(tab[(x&b->mask)>>5] & (1<<(x&31))))
148 return 0;
150 return 1;
153 int
154 inbloomfilter(Bloom *b, u8int *score)
156 int r;
157 uint ms;
159 if(b == nil)
160 return 1;
162 ms = msec();
163 rlock(&b->lk);
164 r = _inbloomfilter(b, score);
165 runlock(&b->lk);
166 ms = ms - msec();
167 addstat2(StatBloomLookup, 1, StatBloomLookupTime, ms);
168 if(r)
169 addstat(StatBloomMiss, 1);
170 else
171 addstat(StatBloomHit, 1);
172 return r;
175 void
176 markbloomfilter(Bloom *b, u8int *score)
178 if(b == nil)
179 return;
181 rlock(&b->lk);
182 qlock(&b->mod);
183 _markbloomfilter(b, score);
184 qunlock(&b->mod);
185 runlock(&b->lk);
188 static void
189 bloomwriteproc(void *v)
191 Bloom *b;
193 b = v;
194 for(;;){
195 recv(b->writechan, 0);
196 if(writebloom(b) < 0)
197 fprint(2, "oops! writing bloom: %r\n");
198 send(b->writedonechan, 0);
202 void
203 startbloomproc(Bloom *b)
205 b->writechan = chancreate(sizeof(void*), 0);
206 b->writedonechan = chancreate(sizeof(void*), 0);
207 vtproc(bloomwriteproc, b);