2 * Bloom filter tracking which scores are present in our arenas
3 * and (more importantly) which are not.
13 bloominit(Bloom *b, vlong vsize, u8int *data)
18 if(size != vsize){ /* truncation */
19 werrstr("bloom data too big");
24 b->nhash = 32; /* will be fixed by caller on initialization */
26 if(unpackbloomhead(b, data) < 0)
29 b->bitmask = (b->size<<3) - 1;
37 packbloomhead(b, b->data);
46 b = vtmallocz(sizeof *b);
47 if(readpart(p, 0, buf, sizeof buf) < 0)
50 * pass buf as b->data so that bloominit
51 * can parse header. won't be used for
52 * accessing bits (cleared below).
54 if(bloominit(b, 0, buf) < 0){
59 * default block size is system page size.
60 * the bloom filter is usually very big.
61 * bump the block size up to speed i/o.
63 if(p->blocksize < (1<<20)){
65 if(p->blocksize > p->size)
66 p->blocksize = p->size;
79 data = vtmallocz(b->size);
81 if(b->size == MaxBloomSize) /* 2^32 overflows ulong */
82 addstat(StatBloomBits, b->size*8-1);
84 addstat(StatBloomBits, b->size*8);
96 data = vtmallocz(b->size);
97 if(readpart(b->part, 0, data, b->size) < 0){
104 a = (u32int*)b->data;
108 ones += countbits(a[i]);
109 addstat(StatBloomOnes, ones);
111 if(b->size == MaxBloomSize) /* 2^32 overflows ulong */
112 addstat(StatBloomBits, b->size*8-1);
114 addstat(StatBloomBits, b->size*8);
123 if(writepart(b->part, 0, b->data, b->size) < 0)
125 if(flushpart(b->part) < 0)
131 * Derive two random 32-bit quantities a, b from the score
132 * and then use a+b*i as a sequence of bloom filter indices.
133 * Michael Mitzenmacher has a recent (2005) paper saying this is okay.
134 * We reserve the bottom bytes (BloomHeadSize*8 bits) for the header.
137 gethashes(u8int *score, ulong *h)
144 for(i=4; i+8<=VtScoreSize; i+=8){
145 a ^= *(u32int*)(score+i);
146 b ^= *(u32int*)(score+i+4);
148 if(i+4 <= VtScoreSize) /* 20 is not 4-aligned */
149 a ^= *(u32int*)(score+i);
150 for(i=0; i<BloomMaxHash; i++, a+=b)
151 h[i] = a < BloomHeadSize*8 ? BloomHeadSize*8 : a;
155 _markbloomfilter(Bloom *b, u8int *score)
158 ulong h[BloomMaxHash];
159 u32int x, *y, z, *tab;
161 trace("markbloomfilter", "markbloomfilter %V", score);
164 tab = (u32int*)b->data;
165 for(i=0; i<b->nhash; i++){
167 y = &tab[(x&b->bitmask)>>5];
175 addstat(StatBloomOnes, nnew);
177 trace("markbloomfilter", "markbloomfilter exit");
181 _inbloomfilter(Bloom *b, u8int *score)
184 ulong h[BloomMaxHash], x;
188 tab = (u32int*)b->data;
189 for(i=0; i<b->nhash; i++){
191 if(!(tab[(x&b->bitmask)>>5] & (1<<(x&31))))
198 inbloomfilter(Bloom *b, u8int *score)
202 if(b == nil || b->data == nil)
209 r = _inbloomfilter(b, score);
211 addstat(StatBloomLookup, 1);
213 addstat(StatBloomMiss, 1);
215 addstat(StatBloomHit, 1);
220 markbloomfilter(Bloom *b, u8int *score)
222 if(b == nil || b->data == nil)
227 _markbloomfilter(b, score);
233 markbloomfiltern(Bloom *b, u8int score[][20], int n)
237 if(b == nil || b->data == nil)
243 _markbloomfilter(b, score[i]);
249 bloomwriteproc(void *v)
254 threadsetname("bloomwriteproc");
257 recv(b->writechan, 0);
258 if((ret=writebloom(b)) < 0)
259 fprint(2, "oops! writing bloom: %r\n");
262 sendul(b->writedonechan, ret);
267 startbloomproc(Bloom *b)
269 b->writechan = chancreate(sizeof(void*), 0);
270 b->writedonechan = chancreate(sizeof(void*), 0);
271 vtproc(bloomwriteproc, b);