Blame


1 a0d146ed 2005-07-12 devnull /*
2 a0d146ed 2005-07-12 devnull * Bloom filter tracking which scores are present in our arenas
3 a0d146ed 2005-07-12 devnull * and (more importantly) which are not.
4 a0d146ed 2005-07-12 devnull */
5 a0d146ed 2005-07-12 devnull
6 a0d146ed 2005-07-12 devnull #include "stdinc.h"
7 a0d146ed 2005-07-12 devnull #include "dat.h"
8 a0d146ed 2005-07-12 devnull #include "fns.h"
9 a0d146ed 2005-07-12 devnull
10 a0d146ed 2005-07-12 devnull int
11 a0d146ed 2005-07-12 devnull bloominit(Bloom *b, vlong vsize, u8int *data)
12 a0d146ed 2005-07-12 devnull {
13 a0d146ed 2005-07-12 devnull ulong size;
14 a0d146ed 2005-07-12 devnull
15 a0d146ed 2005-07-12 devnull size = vsize;
16 a0d146ed 2005-07-12 devnull if(size != vsize){ /* truncation */
17 a0d146ed 2005-07-12 devnull werrstr("bloom data too big");
18 a0d146ed 2005-07-12 devnull return -1;
19 a0d146ed 2005-07-12 devnull }
20 a0d146ed 2005-07-12 devnull
21 a0d146ed 2005-07-12 devnull b->size = size;
22 a0d146ed 2005-07-12 devnull b->nhash = 32; /* will be fixed by caller on initialization */
23 a0d146ed 2005-07-12 devnull if(data != nil)
24 a0d146ed 2005-07-12 devnull if(unpackbloomhead(b, data) < 0)
25 a0d146ed 2005-07-12 devnull return -1;
26 a0d146ed 2005-07-12 devnull
27 a0d146ed 2005-07-12 devnull b->mask = b->size-1;
28 a0d146ed 2005-07-12 devnull b->data = data;
29 a0d146ed 2005-07-12 devnull return 0;
30 a0d146ed 2005-07-12 devnull }
31 a0d146ed 2005-07-12 devnull
32 a0d146ed 2005-07-12 devnull void
33 a0d146ed 2005-07-12 devnull wbbloomhead(Bloom *b)
34 a0d146ed 2005-07-12 devnull {
35 a0d146ed 2005-07-12 devnull packbloomhead(b, b->data);
36 a0d146ed 2005-07-12 devnull }
37 a0d146ed 2005-07-12 devnull
38 a0d146ed 2005-07-12 devnull Bloom*
39 a0d146ed 2005-07-12 devnull readbloom(Part *p)
40 a0d146ed 2005-07-12 devnull {
41 a0d146ed 2005-07-12 devnull int i, n;
42 a0d146ed 2005-07-12 devnull uint ones;
43 a0d146ed 2005-07-12 devnull uchar buf[512];
44 a0d146ed 2005-07-12 devnull uchar *data;
45 a0d146ed 2005-07-12 devnull u32int *a;
46 a0d146ed 2005-07-12 devnull Bloom *b;
47 a0d146ed 2005-07-12 devnull
48 a0d146ed 2005-07-12 devnull b = vtmallocz(sizeof *b);
49 a0d146ed 2005-07-12 devnull if(readpart(p, 0, buf, sizeof buf) < 0)
50 a0d146ed 2005-07-12 devnull return nil;
51 a0d146ed 2005-07-12 devnull if(bloominit(b, 0, buf) < 0){
52 a0d146ed 2005-07-12 devnull vtfree(b);
53 a0d146ed 2005-07-12 devnull return nil;
54 a0d146ed 2005-07-12 devnull }
55 a0d146ed 2005-07-12 devnull data = vtmallocz(b->size);
56 a0d146ed 2005-07-12 devnull if(readpart(p, 0, data, b->size) < 0){
57 a0d146ed 2005-07-12 devnull vtfree(b);
58 a0d146ed 2005-07-12 devnull vtfree(data);
59 a0d146ed 2005-07-12 devnull return nil;
60 a0d146ed 2005-07-12 devnull }
61 a0d146ed 2005-07-12 devnull b->data = data;
62 a0d146ed 2005-07-12 devnull b->part = p;
63 a0d146ed 2005-07-12 devnull
64 a0d146ed 2005-07-12 devnull a = (u32int*)b->data;
65 a0d146ed 2005-07-12 devnull n = b->size/4;
66 a0d146ed 2005-07-12 devnull ones = 0;
67 a0d146ed 2005-07-12 devnull for(i=0; i<n; i++)
68 a0d146ed 2005-07-12 devnull ones += countbits(a[i]);
69 a0d146ed 2005-07-12 devnull addstat(StatBloomOnes, ones);
70 a0d146ed 2005-07-12 devnull
71 a0d146ed 2005-07-12 devnull if(b->size == MaxBloomSize) /* 2^32 overflows ulong */
72 a0d146ed 2005-07-12 devnull addstat(StatBloomBits, b->size*8-1);
73 a0d146ed 2005-07-12 devnull else
74 a0d146ed 2005-07-12 devnull addstat(StatBloomBits, b->size*8);
75 a0d146ed 2005-07-12 devnull
76 a0d146ed 2005-07-12 devnull return b;
77 a0d146ed 2005-07-12 devnull }
78 a0d146ed 2005-07-12 devnull
79 a0d146ed 2005-07-12 devnull int
80 a0d146ed 2005-07-12 devnull writebloom(Bloom *b)
81 a0d146ed 2005-07-12 devnull {
82 a0d146ed 2005-07-12 devnull wbbloomhead(b);
83 a0d146ed 2005-07-12 devnull return writepart(b->part, 0, b->data, b->size);
84 a0d146ed 2005-07-12 devnull }
85 a0d146ed 2005-07-12 devnull
86 a0d146ed 2005-07-12 devnull /*
87 a0d146ed 2005-07-12 devnull * Derive two random 32-bit quantities a, b from the score
88 a0d146ed 2005-07-12 devnull * and then use a+b*i as a sequence of bloom filter indices.
89 a0d146ed 2005-07-12 devnull * Michael Mitzenmacher has a recent (2005) paper saying this is okay.
90 a0d146ed 2005-07-12 devnull * We reserve the bottom bytes (BloomHeadSize*8 bits) for the header.
91 a0d146ed 2005-07-12 devnull */
92 a0d146ed 2005-07-12 devnull static void
93 a0d146ed 2005-07-12 devnull gethashes(u8int *score, ulong *h)
94 a0d146ed 2005-07-12 devnull {
95 a0d146ed 2005-07-12 devnull int i;
96 a0d146ed 2005-07-12 devnull u32int a, b;
97 a0d146ed 2005-07-12 devnull
98 a0d146ed 2005-07-12 devnull a = 0;
99 a0d146ed 2005-07-12 devnull b = 0;
100 a0d146ed 2005-07-12 devnull for(i=4; i+8<=VtScoreSize; i+=8){
101 a0d146ed 2005-07-12 devnull a ^= *(u32int*)(score+i);
102 a0d146ed 2005-07-12 devnull b ^= *(u32int*)(score+i+4);
103 a0d146ed 2005-07-12 devnull }
104 a0d146ed 2005-07-12 devnull if(i+4 <= VtScoreSize) /* 20 is not 4-aligned */
105 a0d146ed 2005-07-12 devnull a ^= *(u32int*)(score+i);
106 a0d146ed 2005-07-12 devnull for(i=0; i<BloomMaxHash; i++, a+=b)
107 a0d146ed 2005-07-12 devnull h[i] = a < BloomHeadSize*8 ? BloomHeadSize*8 : a;
108 a0d146ed 2005-07-12 devnull }
109 a0d146ed 2005-07-12 devnull
110 a0d146ed 2005-07-12 devnull static void
111 a0d146ed 2005-07-12 devnull _markbloomfilter(Bloom *b, u8int *score)
112 a0d146ed 2005-07-12 devnull {
113 a0d146ed 2005-07-12 devnull int i, nnew;
114 a0d146ed 2005-07-12 devnull ulong h[BloomMaxHash];
115 a0d146ed 2005-07-12 devnull u32int x, *y, z, *tab;
116 a0d146ed 2005-07-12 devnull
117 a0d146ed 2005-07-12 devnull trace("markbloomfilter", "markbloomfilter %V", score);
118 a0d146ed 2005-07-12 devnull gethashes(score, h);
119 a0d146ed 2005-07-12 devnull nnew = 0;
120 a0d146ed 2005-07-12 devnull tab = (u32int*)b->data;
121 a0d146ed 2005-07-12 devnull for(i=0; i<b->nhash; i++){
122 a0d146ed 2005-07-12 devnull x = h[i];
123 a0d146ed 2005-07-12 devnull y = &tab[(x&b->mask)>>5];
124 a0d146ed 2005-07-12 devnull z = 1<<(x&31);
125 a0d146ed 2005-07-12 devnull if(!(*y&z)){
126 a0d146ed 2005-07-12 devnull nnew++;
127 a0d146ed 2005-07-12 devnull *y |= z;
128 a0d146ed 2005-07-12 devnull }
129 a0d146ed 2005-07-12 devnull }
130 a0d146ed 2005-07-12 devnull if(nnew)
131 a0d146ed 2005-07-12 devnull addstat(StatBloomOnes, nnew);
132 a0d146ed 2005-07-12 devnull
133 a0d146ed 2005-07-12 devnull trace("markbloomfilter", "markbloomfilter exit");
134 a0d146ed 2005-07-12 devnull }
135 a0d146ed 2005-07-12 devnull
136 a0d146ed 2005-07-12 devnull static int
137 a0d146ed 2005-07-12 devnull _inbloomfilter(Bloom *b, u8int *score)
138 a0d146ed 2005-07-12 devnull {
139 a0d146ed 2005-07-12 devnull int i;
140 a0d146ed 2005-07-12 devnull ulong h[BloomMaxHash], x;
141 a0d146ed 2005-07-12 devnull u32int *tab;
142 a0d146ed 2005-07-12 devnull
143 a0d146ed 2005-07-12 devnull gethashes(score, h);
144 a0d146ed 2005-07-12 devnull tab = (u32int*)b->data;
145 a0d146ed 2005-07-12 devnull for(i=0; i<b->nhash; i++){
146 a0d146ed 2005-07-12 devnull x = h[i];
147 a0d146ed 2005-07-12 devnull if(!(tab[(x&b->mask)>>5] & (1<<(x&31))))
148 a0d146ed 2005-07-12 devnull return 0;
149 a0d146ed 2005-07-12 devnull }
150 a0d146ed 2005-07-12 devnull return 1;
151 a0d146ed 2005-07-12 devnull }
152 a0d146ed 2005-07-12 devnull
153 a0d146ed 2005-07-12 devnull int
154 a0d146ed 2005-07-12 devnull inbloomfilter(Bloom *b, u8int *score)
155 a0d146ed 2005-07-12 devnull {
156 a0d146ed 2005-07-12 devnull int r;
157 a0d146ed 2005-07-12 devnull uint ms;
158 a0d146ed 2005-07-12 devnull
159 a0d146ed 2005-07-12 devnull if(b == nil)
160 a0d146ed 2005-07-12 devnull return 1;
161 a0d146ed 2005-07-12 devnull
162 a0d146ed 2005-07-12 devnull ms = msec();
163 a0d146ed 2005-07-12 devnull rlock(&b->lk);
164 a0d146ed 2005-07-12 devnull r = _inbloomfilter(b, score);
165 a0d146ed 2005-07-12 devnull runlock(&b->lk);
166 a0d146ed 2005-07-12 devnull ms = ms - msec();
167 a0d146ed 2005-07-12 devnull addstat2(StatBloomLookup, 1, StatBloomLookupTime, ms);
168 a0d146ed 2005-07-12 devnull if(r)
169 a0d146ed 2005-07-12 devnull addstat(StatBloomMiss, 1);
170 a0d146ed 2005-07-12 devnull else
171 a0d146ed 2005-07-12 devnull addstat(StatBloomHit, 1);
172 a0d146ed 2005-07-12 devnull return r;
173 a0d146ed 2005-07-12 devnull }
174 a0d146ed 2005-07-12 devnull
175 a0d146ed 2005-07-12 devnull void
176 a0d146ed 2005-07-12 devnull markbloomfilter(Bloom *b, u8int *score)
177 a0d146ed 2005-07-12 devnull {
178 a0d146ed 2005-07-12 devnull if(b == nil)
179 a0d146ed 2005-07-12 devnull return;
180 a0d146ed 2005-07-12 devnull
181 a0d146ed 2005-07-12 devnull rlock(&b->lk);
182 a0d146ed 2005-07-12 devnull qlock(&b->mod);
183 a0d146ed 2005-07-12 devnull _markbloomfilter(b, score);
184 a0d146ed 2005-07-12 devnull qunlock(&b->mod);
185 a0d146ed 2005-07-12 devnull runlock(&b->lk);
186 a0d146ed 2005-07-12 devnull }
187 a0d146ed 2005-07-12 devnull
188 a0d146ed 2005-07-12 devnull static void
189 a0d146ed 2005-07-12 devnull bloomwriteproc(void *v)
190 a0d146ed 2005-07-12 devnull {
191 a0d146ed 2005-07-12 devnull Bloom *b;
192 a0d146ed 2005-07-12 devnull
193 a0d146ed 2005-07-12 devnull b = v;
194 a0d146ed 2005-07-12 devnull for(;;){
195 a0d146ed 2005-07-12 devnull recv(b->writechan, 0);
196 a0d146ed 2005-07-12 devnull if(writebloom(b) < 0)
197 a0d146ed 2005-07-12 devnull fprint(2, "oops! writing bloom: %r\n");
198 a0d146ed 2005-07-12 devnull send(b->writedonechan, 0);
199 a0d146ed 2005-07-12 devnull }
200 a0d146ed 2005-07-12 devnull }
201 a0d146ed 2005-07-12 devnull
202 a0d146ed 2005-07-12 devnull void
203 a0d146ed 2005-07-12 devnull startbloomproc(Bloom *b)
204 a0d146ed 2005-07-12 devnull {
205 a0d146ed 2005-07-12 devnull b->writechan = chancreate(sizeof(void*), 0);
206 a0d146ed 2005-07-12 devnull b->writedonechan = chancreate(sizeof(void*), 0);
207 a0d146ed 2005-07-12 devnull vtproc(bloomwriteproc, b);
208 a0d146ed 2005-07-12 devnull }