Blame


1 a0d146ed 2005-07-12 devnull /*
2 a0d146ed 2005-07-12 devnull * Bloom filter tracking which scores are present in our arenas
3 fa325e9b 2020-01-10 cross * and (more importantly) which are not.
4 a0d146ed 2005-07-12 devnull */
5 a0d146ed 2005-07-12 devnull
6 a0d146ed 2005-07-12 devnull #include "stdinc.h"
7 a0d146ed 2005-07-12 devnull #include "dat.h"
8 a0d146ed 2005-07-12 devnull #include "fns.h"
9 a0d146ed 2005-07-12 devnull
10 28b49df3 2006-07-18 devnull int ignorebloom;
11 28b49df3 2006-07-18 devnull
12 a0d146ed 2005-07-12 devnull int
13 a0d146ed 2005-07-12 devnull bloominit(Bloom *b, vlong vsize, u8int *data)
14 a0d146ed 2005-07-12 devnull {
15 a0d146ed 2005-07-12 devnull ulong size;
16 fa325e9b 2020-01-10 cross
17 a0d146ed 2005-07-12 devnull size = vsize;
18 a0d146ed 2005-07-12 devnull if(size != vsize){ /* truncation */
19 a0d146ed 2005-07-12 devnull werrstr("bloom data too big");
20 a0d146ed 2005-07-12 devnull return -1;
21 a0d146ed 2005-07-12 devnull }
22 fa325e9b 2020-01-10 cross
23 a0d146ed 2005-07-12 devnull b->size = size;
24 a0d146ed 2005-07-12 devnull b->nhash = 32; /* will be fixed by caller on initialization */
25 a0d146ed 2005-07-12 devnull if(data != nil)
26 a0d146ed 2005-07-12 devnull if(unpackbloomhead(b, data) < 0)
27 a0d146ed 2005-07-12 devnull return -1;
28 fa325e9b 2020-01-10 cross
29 27d28098 2007-04-21 devnull b->bitmask = (b->size<<3) - 1;
30 a0d146ed 2005-07-12 devnull b->data = data;
31 a0d146ed 2005-07-12 devnull return 0;
32 a0d146ed 2005-07-12 devnull }
33 a0d146ed 2005-07-12 devnull
34 a0d146ed 2005-07-12 devnull void
35 a0d146ed 2005-07-12 devnull wbbloomhead(Bloom *b)
36 a0d146ed 2005-07-12 devnull {
37 a0d146ed 2005-07-12 devnull packbloomhead(b, b->data);
38 a0d146ed 2005-07-12 devnull }
39 a0d146ed 2005-07-12 devnull
40 a0d146ed 2005-07-12 devnull Bloom*
41 a0d146ed 2005-07-12 devnull readbloom(Part *p)
42 a0d146ed 2005-07-12 devnull {
43 a0d146ed 2005-07-12 devnull uchar buf[512];
44 a0d146ed 2005-07-12 devnull Bloom *b;
45 fa325e9b 2020-01-10 cross
46 a0d146ed 2005-07-12 devnull b = vtmallocz(sizeof *b);
47 a0d146ed 2005-07-12 devnull if(readpart(p, 0, buf, sizeof buf) < 0)
48 a0d146ed 2005-07-12 devnull return nil;
49 27d28098 2007-04-21 devnull /*
50 27d28098 2007-04-21 devnull * pass buf as b->data so that bloominit
51 27d28098 2007-04-21 devnull * can parse header. won't be used for
52 27d28098 2007-04-21 devnull * accessing bits (cleared below).
53 27d28098 2007-04-21 devnull */
54 a0d146ed 2005-07-12 devnull if(bloominit(b, 0, buf) < 0){
55 a0d146ed 2005-07-12 devnull vtfree(b);
56 a0d146ed 2005-07-12 devnull return nil;
57 7e452401 2007-04-27 devnull }else{
58 7e452401 2007-04-27 devnull /*
59 7e452401 2007-04-27 devnull * default block size is system page size.
60 7e452401 2007-04-27 devnull * the bloom filter is usually very big.
61 7e452401 2007-04-27 devnull * bump the block size up to speed i/o.
62 7e452401 2007-04-27 devnull */
63 7e452401 2007-04-27 devnull if(p->blocksize < (1<<20)){
64 7e452401 2007-04-27 devnull p->blocksize = 1<<20;
65 7e452401 2007-04-27 devnull if(p->blocksize > p->size)
66 7e452401 2007-04-27 devnull p->blocksize = p->size;
67 7e452401 2007-04-27 devnull }
68 a0d146ed 2005-07-12 devnull }
69 28b49df3 2006-07-18 devnull b->part = p;
70 27d28098 2007-04-21 devnull b->data = nil;
71 28b49df3 2006-07-18 devnull return b;
72 28b49df3 2006-07-18 devnull }
73 28b49df3 2006-07-18 devnull
74 28b49df3 2006-07-18 devnull int
75 28b49df3 2006-07-18 devnull resetbloom(Bloom *b)
76 28b49df3 2006-07-18 devnull {
77 28b49df3 2006-07-18 devnull uchar *data;
78 fa325e9b 2020-01-10 cross
79 a0d146ed 2005-07-12 devnull data = vtmallocz(b->size);
80 28b49df3 2006-07-18 devnull b->data = data;
81 28b49df3 2006-07-18 devnull if(b->size == MaxBloomSize) /* 2^32 overflows ulong */
82 28b49df3 2006-07-18 devnull addstat(StatBloomBits, b->size*8-1);
83 28b49df3 2006-07-18 devnull else
84 28b49df3 2006-07-18 devnull addstat(StatBloomBits, b->size*8);
85 28b49df3 2006-07-18 devnull return 0;
86 28b49df3 2006-07-18 devnull }
87 28b49df3 2006-07-18 devnull
88 28b49df3 2006-07-18 devnull int
89 28b49df3 2006-07-18 devnull loadbloom(Bloom *b)
90 28b49df3 2006-07-18 devnull {
91 28b49df3 2006-07-18 devnull int i, n;
92 28b49df3 2006-07-18 devnull uint ones;
93 28b49df3 2006-07-18 devnull uchar *data;
94 28b49df3 2006-07-18 devnull u32int *a;
95 fa325e9b 2020-01-10 cross
96 28b49df3 2006-07-18 devnull data = vtmallocz(b->size);
97 28b49df3 2006-07-18 devnull if(readpart(b->part, 0, data, b->size) < 0){
98 a0d146ed 2005-07-12 devnull vtfree(b);
99 a0d146ed 2005-07-12 devnull vtfree(data);
100 28b49df3 2006-07-18 devnull return -1;
101 a0d146ed 2005-07-12 devnull }
102 a0d146ed 2005-07-12 devnull b->data = data;
103 a0d146ed 2005-07-12 devnull
104 a0d146ed 2005-07-12 devnull a = (u32int*)b->data;
105 a0d146ed 2005-07-12 devnull n = b->size/4;
106 a0d146ed 2005-07-12 devnull ones = 0;
107 a0d146ed 2005-07-12 devnull for(i=0; i<n; i++)
108 fa325e9b 2020-01-10 cross ones += countbits(a[i]);
109 a0d146ed 2005-07-12 devnull addstat(StatBloomOnes, ones);
110 a0d146ed 2005-07-12 devnull
111 a0d146ed 2005-07-12 devnull if(b->size == MaxBloomSize) /* 2^32 overflows ulong */
112 a0d146ed 2005-07-12 devnull addstat(StatBloomBits, b->size*8-1);
113 a0d146ed 2005-07-12 devnull else
114 a0d146ed 2005-07-12 devnull addstat(StatBloomBits, b->size*8);
115 fa325e9b 2020-01-10 cross
116 28b49df3 2006-07-18 devnull return 0;
117 a0d146ed 2005-07-12 devnull }
118 a0d146ed 2005-07-12 devnull
119 a0d146ed 2005-07-12 devnull int
120 a0d146ed 2005-07-12 devnull writebloom(Bloom *b)
121 a0d146ed 2005-07-12 devnull {
122 a0d146ed 2005-07-12 devnull wbbloomhead(b);
123 e46cacb0 2007-04-27 devnull if(writepart(b->part, 0, b->data, b->size) < 0)
124 e46cacb0 2007-04-27 devnull return -1;
125 e46cacb0 2007-04-27 devnull if(flushpart(b->part) < 0)
126 e46cacb0 2007-04-27 devnull return -1;
127 e46cacb0 2007-04-27 devnull return 0;
128 a0d146ed 2005-07-12 devnull }
129 a0d146ed 2005-07-12 devnull
130 a0d146ed 2005-07-12 devnull /*
131 a0d146ed 2005-07-12 devnull * Derive two random 32-bit quantities a, b from the score
132 a0d146ed 2005-07-12 devnull * and then use a+b*i as a sequence of bloom filter indices.
133 a0d146ed 2005-07-12 devnull * Michael Mitzenmacher has a recent (2005) paper saying this is okay.
134 a0d146ed 2005-07-12 devnull * We reserve the bottom bytes (BloomHeadSize*8 bits) for the header.
135 a0d146ed 2005-07-12 devnull */
136 a0d146ed 2005-07-12 devnull static void
137 a0d146ed 2005-07-12 devnull gethashes(u8int *score, ulong *h)
138 a0d146ed 2005-07-12 devnull {
139 a0d146ed 2005-07-12 devnull int i;
140 a0d146ed 2005-07-12 devnull u32int a, b;
141 a0d146ed 2005-07-12 devnull
142 a0d146ed 2005-07-12 devnull a = 0;
143 a0d146ed 2005-07-12 devnull b = 0;
144 a0d146ed 2005-07-12 devnull for(i=4; i+8<=VtScoreSize; i+=8){
145 a0d146ed 2005-07-12 devnull a ^= *(u32int*)(score+i);
146 a0d146ed 2005-07-12 devnull b ^= *(u32int*)(score+i+4);
147 a0d146ed 2005-07-12 devnull }
148 28b49df3 2006-07-18 devnull if(i+4 <= VtScoreSize) /* 20 is not 4-aligned */
149 28b49df3 2006-07-18 devnull a ^= *(u32int*)(score+i);
150 a0d146ed 2005-07-12 devnull for(i=0; i<BloomMaxHash; i++, a+=b)
151 a0d146ed 2005-07-12 devnull h[i] = a < BloomHeadSize*8 ? BloomHeadSize*8 : a;
152 a0d146ed 2005-07-12 devnull }
153 a0d146ed 2005-07-12 devnull
154 a0d146ed 2005-07-12 devnull static void
155 a0d146ed 2005-07-12 devnull _markbloomfilter(Bloom *b, u8int *score)
156 a0d146ed 2005-07-12 devnull {
157 a0d146ed 2005-07-12 devnull int i, nnew;
158 a0d146ed 2005-07-12 devnull ulong h[BloomMaxHash];
159 a0d146ed 2005-07-12 devnull u32int x, *y, z, *tab;
160 a0d146ed 2005-07-12 devnull
161 a0d146ed 2005-07-12 devnull trace("markbloomfilter", "markbloomfilter %V", score);
162 a0d146ed 2005-07-12 devnull gethashes(score, h);
163 a0d146ed 2005-07-12 devnull nnew = 0;
164 a0d146ed 2005-07-12 devnull tab = (u32int*)b->data;
165 a0d146ed 2005-07-12 devnull for(i=0; i<b->nhash; i++){
166 a0d146ed 2005-07-12 devnull x = h[i];
167 27d28098 2007-04-21 devnull y = &tab[(x&b->bitmask)>>5];
168 a0d146ed 2005-07-12 devnull z = 1<<(x&31);
169 a0d146ed 2005-07-12 devnull if(!(*y&z)){
170 a0d146ed 2005-07-12 devnull nnew++;
171 a0d146ed 2005-07-12 devnull *y |= z;
172 a0d146ed 2005-07-12 devnull }
173 a0d146ed 2005-07-12 devnull }
174 a0d146ed 2005-07-12 devnull if(nnew)
175 a0d146ed 2005-07-12 devnull addstat(StatBloomOnes, nnew);
176 a0d146ed 2005-07-12 devnull
177 a0d146ed 2005-07-12 devnull trace("markbloomfilter", "markbloomfilter exit");
178 a0d146ed 2005-07-12 devnull }
179 a0d146ed 2005-07-12 devnull
180 a0d146ed 2005-07-12 devnull static int
181 a0d146ed 2005-07-12 devnull _inbloomfilter(Bloom *b, u8int *score)
182 a0d146ed 2005-07-12 devnull {
183 a0d146ed 2005-07-12 devnull int i;
184 a0d146ed 2005-07-12 devnull ulong h[BloomMaxHash], x;
185 a0d146ed 2005-07-12 devnull u32int *tab;
186 a0d146ed 2005-07-12 devnull
187 a0d146ed 2005-07-12 devnull gethashes(score, h);
188 a0d146ed 2005-07-12 devnull tab = (u32int*)b->data;
189 a0d146ed 2005-07-12 devnull for(i=0; i<b->nhash; i++){
190 a0d146ed 2005-07-12 devnull x = h[i];
191 27d28098 2007-04-21 devnull if(!(tab[(x&b->bitmask)>>5] & (1<<(x&31))))
192 a0d146ed 2005-07-12 devnull return 0;
193 a0d146ed 2005-07-12 devnull }
194 a0d146ed 2005-07-12 devnull return 1;
195 a0d146ed 2005-07-12 devnull }
196 a0d146ed 2005-07-12 devnull
197 a0d146ed 2005-07-12 devnull int
198 a0d146ed 2005-07-12 devnull inbloomfilter(Bloom *b, u8int *score)
199 a0d146ed 2005-07-12 devnull {
200 a0d146ed 2005-07-12 devnull int r;
201 a0d146ed 2005-07-12 devnull
202 28b49df3 2006-07-18 devnull if(b == nil || b->data == nil)
203 a0d146ed 2005-07-12 devnull return 1;
204 a0d146ed 2005-07-12 devnull
205 28b49df3 2006-07-18 devnull if(ignorebloom)
206 28b49df3 2006-07-18 devnull return 1;
207 fa325e9b 2020-01-10 cross
208 a0d146ed 2005-07-12 devnull rlock(&b->lk);
209 a0d146ed 2005-07-12 devnull r = _inbloomfilter(b, score);
210 a0d146ed 2005-07-12 devnull runlock(&b->lk);
211 54dd92be 2008-01-30 rsc addstat(StatBloomLookup, 1);
212 a0d146ed 2005-07-12 devnull if(r)
213 a0d146ed 2005-07-12 devnull addstat(StatBloomMiss, 1);
214 a0d146ed 2005-07-12 devnull else
215 a0d146ed 2005-07-12 devnull addstat(StatBloomHit, 1);
216 a0d146ed 2005-07-12 devnull return r;
217 a0d146ed 2005-07-12 devnull }
218 a0d146ed 2005-07-12 devnull
219 a0d146ed 2005-07-12 devnull void
220 a0d146ed 2005-07-12 devnull markbloomfilter(Bloom *b, u8int *score)
221 a0d146ed 2005-07-12 devnull {
222 28b49df3 2006-07-18 devnull if(b == nil || b->data == nil)
223 a0d146ed 2005-07-12 devnull return;
224 a0d146ed 2005-07-12 devnull
225 a0d146ed 2005-07-12 devnull rlock(&b->lk);
226 a0d146ed 2005-07-12 devnull qlock(&b->mod);
227 a0d146ed 2005-07-12 devnull _markbloomfilter(b, score);
228 ac5a97e6 2008-07-04 rsc qunlock(&b->mod);
229 ac5a97e6 2008-07-04 rsc runlock(&b->lk);
230 ac5a97e6 2008-07-04 rsc }
231 ac5a97e6 2008-07-04 rsc
232 ac5a97e6 2008-07-04 rsc void
233 ac5a97e6 2008-07-04 rsc markbloomfiltern(Bloom *b, u8int score[][20], int n)
234 ac5a97e6 2008-07-04 rsc {
235 ac5a97e6 2008-07-04 rsc int i;
236 ac5a97e6 2008-07-04 rsc
237 ac5a97e6 2008-07-04 rsc if(b == nil || b->data == nil)
238 ac5a97e6 2008-07-04 rsc return;
239 fa325e9b 2020-01-10 cross
240 ac5a97e6 2008-07-04 rsc rlock(&b->lk);
241 ac5a97e6 2008-07-04 rsc qlock(&b->mod);
242 ac5a97e6 2008-07-04 rsc for(i=0; i<n; i++)
243 ac5a97e6 2008-07-04 rsc _markbloomfilter(b, score[i]);
244 a0d146ed 2005-07-12 devnull qunlock(&b->mod);
245 a0d146ed 2005-07-12 devnull runlock(&b->lk);
246 a0d146ed 2005-07-12 devnull }
247 a0d146ed 2005-07-12 devnull
248 a0d146ed 2005-07-12 devnull static void
249 a0d146ed 2005-07-12 devnull bloomwriteproc(void *v)
250 a0d146ed 2005-07-12 devnull {
251 28b49df3 2006-07-18 devnull int ret;
252 a0d146ed 2005-07-12 devnull Bloom *b;
253 28b49df3 2006-07-18 devnull
254 fa325e9b 2020-01-10 cross threadsetname("bloomwriteproc");
255 a0d146ed 2005-07-12 devnull b = v;
256 a0d146ed 2005-07-12 devnull for(;;){
257 a0d146ed 2005-07-12 devnull recv(b->writechan, 0);
258 28b49df3 2006-07-18 devnull if((ret=writebloom(b)) < 0)
259 a0d146ed 2005-07-12 devnull fprint(2, "oops! writing bloom: %r\n");
260 28b49df3 2006-07-18 devnull else
261 28b49df3 2006-07-18 devnull ret = 0;
262 28b49df3 2006-07-18 devnull sendul(b->writedonechan, ret);
263 a0d146ed 2005-07-12 devnull }
264 a0d146ed 2005-07-12 devnull }
265 a0d146ed 2005-07-12 devnull
266 a0d146ed 2005-07-12 devnull void
267 a0d146ed 2005-07-12 devnull startbloomproc(Bloom *b)
268 a0d146ed 2005-07-12 devnull {
269 a0d146ed 2005-07-12 devnull b->writechan = chancreate(sizeof(void*), 0);
270 d117737d 2012-05-02 0intro b->writedonechan = chancreate(sizeof(ulong), 0);
271 fa325e9b 2020-01-10 cross vtproc(bloomwriteproc, b);
272 a0d146ed 2005-07-12 devnull }