2 a0d146ed 2005-07-12 devnull * Index, mapping scores to log positions.
4 a0d146ed 2005-07-12 devnull * The index is made up of some number of index sections, each of
5 a0d146ed 2005-07-12 devnull * which is typically stored on a different disk. The blocks in all the
6 a0d146ed 2005-07-12 devnull * index sections are logically numbered, with each index section
7 a0d146ed 2005-07-12 devnull * responsible for a range of blocks. Blocks are typically 8kB.
9 a0d146ed 2005-07-12 devnull * The N index blocks are treated as a giant hash table. The top 32 bits
10 a0d146ed 2005-07-12 devnull * of score are used as the key for a lookup. Each index block holds
11 a0d146ed 2005-07-12 devnull * one hash bucket, which is responsible for ceil(2^32 / N) of the key space.
13 a0d146ed 2005-07-12 devnull * The index is sized so that a particular bucket is extraordinarily
14 a0d146ed 2005-07-12 devnull * unlikely to overflow: assuming compressed data blocks are 4kB
15 a0d146ed 2005-07-12 devnull * on disk, and assuming each block has a 40 byte index entry,
16 a0d146ed 2005-07-12 devnull * the index data will be 1% of the total data. Since scores are essentially
17 a0d146ed 2005-07-12 devnull * random, all buckets should be about the same fullness.
18 a0d146ed 2005-07-12 devnull * A factor of 5 gives us a wide comfort boundary to account for
19 a0d146ed 2005-07-12 devnull * random variation. So the index disk space should be 5% of the arena disk space.
22 a0d146ed 2005-07-12 devnull #include "stdinc.h"
23 a0d146ed 2005-07-12 devnull #include "dat.h"
24 a0d146ed 2005-07-12 devnull #include "fns.h"
26 a0d146ed 2005-07-12 devnull //static int bucklook(u8int *score, int type, u8int *data, int n);
27 a0d146ed 2005-07-12 devnull //static int writebucket(ISect *is, u32int buck, IBucket *ib, DBlock *b);
28 a0d146ed 2005-07-12 devnull //static int okibucket(IBucket *ib, ISect *is);
29 a0d146ed 2005-07-12 devnull static int initindex1(Index*);
30 a0d146ed 2005-07-12 devnull static ISect *initisect1(ISect *is);
31 a0d146ed 2005-07-12 devnull //static int splitiblock(Index *ix, DBlock *b, ISect *is, u32int buck, IBucket *ib);
33 a0d146ed 2005-07-12 devnull #define KEY(k,d) ((d) ? (k)>>(32-(d)) : 0)
35 a0d146ed 2005-07-12 devnull //static QLock indexlock; //ZZZ
37 a0d146ed 2005-07-12 devnull static char IndexMagic[] = "venti index configuration";
40 a0d146ed 2005-07-12 devnull initindex(char *name, ISect **sects, int n)
43 a0d146ed 2005-07-12 devnull Index *ix;
44 a0d146ed 2005-07-12 devnull ISect *is;
45 a0d146ed 2005-07-12 devnull u32int last, blocksize, tabsize;
48 a0d146ed 2005-07-12 devnull if(n <= 0){
49 a0d146ed 2005-07-12 devnull fprint(2, "bad n\n");
50 a0d146ed 2005-07-12 devnull seterr(EOk, "no index sections to initialize index");
51 a0d146ed 2005-07-12 devnull return nil;
53 a0d146ed 2005-07-12 devnull ix = MKZ(Index);
54 a0d146ed 2005-07-12 devnull if(ix == nil){
55 a0d146ed 2005-07-12 devnull fprint(2, "no mem\n");
56 a0d146ed 2005-07-12 devnull seterr(EOk, "can't initialize index: out of memory");
57 a0d146ed 2005-07-12 devnull freeindex(ix);
58 a0d146ed 2005-07-12 devnull return nil;
61 a0d146ed 2005-07-12 devnull tabsize = sects[0]->tabsize;
62 a0d146ed 2005-07-12 devnull if(partifile(&f, sects[0]->part, sects[0]->tabbase, tabsize) < 0)
63 a0d146ed 2005-07-12 devnull return nil;
64 a0d146ed 2005-07-12 devnull if(parseindex(&f, ix) < 0){
65 a0d146ed 2005-07-12 devnull freeifile(&f);
66 a0d146ed 2005-07-12 devnull freeindex(ix);
67 a0d146ed 2005-07-12 devnull return nil;
69 a0d146ed 2005-07-12 devnull freeifile(&f);
70 a0d146ed 2005-07-12 devnull if(namecmp(ix->name, name) != 0){
71 a0d146ed 2005-07-12 devnull seterr(ECorrupt, "mismatched index name: found %s expected %s", ix->name, name);
72 a0d146ed 2005-07-12 devnull return nil;
74 a0d146ed 2005-07-12 devnull if(ix->nsects != n){
75 a0d146ed 2005-07-12 devnull seterr(ECorrupt, "mismatched number index sections: found %d expected %d", n, ix->nsects);
76 a0d146ed 2005-07-12 devnull freeindex(ix);
77 a0d146ed 2005-07-12 devnull return nil;
79 a0d146ed 2005-07-12 devnull ix->sects = sects;
80 a0d146ed 2005-07-12 devnull last = 0;
81 a0d146ed 2005-07-12 devnull blocksize = ix->blocksize;
82 a0d146ed 2005-07-12 devnull for(i = 0; i < ix->nsects; i++){
83 a0d146ed 2005-07-12 devnull is = sects[i];
84 a0d146ed 2005-07-12 devnull if(namecmp(ix->name, is->index) != 0
85 a0d146ed 2005-07-12 devnull || is->blocksize != blocksize
86 a0d146ed 2005-07-12 devnull || is->tabsize != tabsize
87 a0d146ed 2005-07-12 devnull || namecmp(is->name, ix->smap[i].name) != 0
88 a0d146ed 2005-07-12 devnull || is->start != ix->smap[i].start
89 a0d146ed 2005-07-12 devnull || is->stop != ix->smap[i].stop
90 a0d146ed 2005-07-12 devnull || last != is->start
91 a0d146ed 2005-07-12 devnull || is->start > is->stop){
92 a0d146ed 2005-07-12 devnull seterr(ECorrupt, "inconsistent index sections in %s", ix->name);
93 a0d146ed 2005-07-12 devnull freeindex(ix);
94 a0d146ed 2005-07-12 devnull return nil;
96 a0d146ed 2005-07-12 devnull last = is->stop;
98 a0d146ed 2005-07-12 devnull ix->tabsize = tabsize;
99 a0d146ed 2005-07-12 devnull ix->buckets = last;
101 a0d146ed 2005-07-12 devnull if(initindex1(ix) < 0){
102 a0d146ed 2005-07-12 devnull freeindex(ix);
103 a0d146ed 2005-07-12 devnull return nil;
106 a0d146ed 2005-07-12 devnull ix->arenas = MKNZ(Arena*, ix->narenas);
107 a0d146ed 2005-07-12 devnull if(maparenas(ix->amap, ix->arenas, ix->narenas, ix->name) < 0){
108 a0d146ed 2005-07-12 devnull freeindex(ix);
109 a0d146ed 2005-07-12 devnull return nil;
112 a0d146ed 2005-07-12 devnull return ix;
115 a0d146ed 2005-07-12 devnull static int
116 a0d146ed 2005-07-12 devnull initindex1(Index *ix)
118 a0d146ed 2005-07-12 devnull u32int buckets;
120 a0d146ed 2005-07-12 devnull ix->div = (((u64int)1 << 32) + ix->buckets - 1) / ix->buckets;
121 a0d146ed 2005-07-12 devnull buckets = (((u64int)1 << 32) - 1) / ix->div + 1;
122 a0d146ed 2005-07-12 devnull if(buckets != ix->buckets){
123 a0d146ed 2005-07-12 devnull seterr(ECorrupt, "inconsistent math for divisor and buckets in %s", ix->name);
124 a0d146ed 2005-07-12 devnull return -1;
127 a0d146ed 2005-07-12 devnull return 0;
131 a0d146ed 2005-07-12 devnull wbindex(Index *ix)
134 a0d146ed 2005-07-12 devnull ZBlock *b;
137 a0d146ed 2005-07-12 devnull if(ix->nsects == 0){
138 a0d146ed 2005-07-12 devnull seterr(EOk, "no sections in index %s", ix->name);
139 a0d146ed 2005-07-12 devnull return -1;
141 a0d146ed 2005-07-12 devnull b = alloczblock(ix->tabsize, 1, ix->blocksize);
142 a0d146ed 2005-07-12 devnull if(b == nil){
143 a0d146ed 2005-07-12 devnull seterr(EOk, "can't write index configuration: out of memory");
144 a0d146ed 2005-07-12 devnull return -1;
146 a0d146ed 2005-07-12 devnull fmtzbinit(&f, b);
147 a0d146ed 2005-07-12 devnull if(outputindex(&f, ix) < 0){
148 a0d146ed 2005-07-12 devnull seterr(EOk, "can't make index configuration: table storage too small %d", ix->tabsize);
149 a0d146ed 2005-07-12 devnull freezblock(b);
150 a0d146ed 2005-07-12 devnull return -1;
152 a0d146ed 2005-07-12 devnull for(i = 0; i < ix->nsects; i++){
153 a0d146ed 2005-07-12 devnull if(writepart(ix->sects[i]->part, ix->sects[i]->tabbase, b->data, ix->tabsize) < 0){
154 a0d146ed 2005-07-12 devnull seterr(EOk, "can't write index: %r");
155 a0d146ed 2005-07-12 devnull freezblock(b);
156 a0d146ed 2005-07-12 devnull return -1;
159 a0d146ed 2005-07-12 devnull freezblock(b);
161 a0d146ed 2005-07-12 devnull for(i = 0; i < ix->nsects; i++)
162 a0d146ed 2005-07-12 devnull if(wbisect(ix->sects[i]) < 0)
163 a0d146ed 2005-07-12 devnull return -1;
165 a0d146ed 2005-07-12 devnull return 0;
169 a0d146ed 2005-07-12 devnull * index: IndexMagic '\n' version '\n' name '\n' blocksize '\n' [V2: bitblocks '\n'] sections arenas
170 a0d146ed 2005-07-12 devnull * version, blocksize: u32int
171 a0d146ed 2005-07-12 devnull * name: max. ANameSize string
172 a0d146ed 2005-07-12 devnull * sections, arenas: AMap
175 a0d146ed 2005-07-12 devnull outputindex(Fmt *f, Index *ix)
177 a0d146ed 2005-07-12 devnull if(fmtprint(f, "%s\n%ud\n%s\n%ud\n", IndexMagic, ix->version, ix->name, ix->blocksize) < 0
178 a0d146ed 2005-07-12 devnull || outputamap(f, ix->smap, ix->nsects) < 0
179 a0d146ed 2005-07-12 devnull || outputamap(f, ix->amap, ix->narenas) < 0)
180 a0d146ed 2005-07-12 devnull return -1;
181 a0d146ed 2005-07-12 devnull return 0;
185 a0d146ed 2005-07-12 devnull parseindex(IFile *f, Index *ix)
187 a0d146ed 2005-07-12 devnull AMapN amn;
188 a0d146ed 2005-07-12 devnull u32int v;
189 a0d146ed 2005-07-12 devnull char *s;
194 a0d146ed 2005-07-12 devnull s = ifileline(f);
195 a0d146ed 2005-07-12 devnull if(s == nil || strcmp(s, IndexMagic) != 0){
196 a0d146ed 2005-07-12 devnull seterr(ECorrupt, "bad index magic for %s", f->name);
197 a0d146ed 2005-07-12 devnull return -1;
201 a0d146ed 2005-07-12 devnull * version
203 a0d146ed 2005-07-12 devnull if(ifileu32int(f, &v) < 0){
204 a0d146ed 2005-07-12 devnull seterr(ECorrupt, "syntax error: bad version number in %s", f->name);
205 a0d146ed 2005-07-12 devnull return -1;
207 a0d146ed 2005-07-12 devnull ix->version = v;
208 a0d146ed 2005-07-12 devnull if(ix->version != IndexVersion){
209 a0d146ed 2005-07-12 devnull seterr(ECorrupt, "bad version number in %s", f->name);
210 a0d146ed 2005-07-12 devnull return -1;
216 a0d146ed 2005-07-12 devnull if(ifilename(f, ix->name) < 0){
217 a0d146ed 2005-07-12 devnull seterr(ECorrupt, "syntax error: bad index name in %s", f->name);
218 a0d146ed 2005-07-12 devnull return -1;
222 a0d146ed 2005-07-12 devnull * block size
224 a0d146ed 2005-07-12 devnull if(ifileu32int(f, &v) < 0){
225 a0d146ed 2005-07-12 devnull seterr(ECorrupt, "syntax error: bad block size number in %s", f->name);
226 a0d146ed 2005-07-12 devnull return -1;
228 a0d146ed 2005-07-12 devnull ix->blocksize = v;
230 a0d146ed 2005-07-12 devnull if(parseamap(f, &amn) < 0)
231 a0d146ed 2005-07-12 devnull return -1;
232 a0d146ed 2005-07-12 devnull ix->nsects = amn.n;
233 a0d146ed 2005-07-12 devnull ix->smap = amn.map;
235 a0d146ed 2005-07-12 devnull if(parseamap(f, &amn) < 0)
236 a0d146ed 2005-07-12 devnull return -1;
237 a0d146ed 2005-07-12 devnull ix->narenas = amn.n;
238 a0d146ed 2005-07-12 devnull ix->amap = amn.map;
240 a0d146ed 2005-07-12 devnull return 0;
244 a0d146ed 2005-07-12 devnull * initialize an entirely new index
247 a0d146ed 2005-07-12 devnull newindex(char *name, ISect **sects, int n)
249 a0d146ed 2005-07-12 devnull Index *ix;
250 a0d146ed 2005-07-12 devnull AMap *smap;
251 a0d146ed 2005-07-12 devnull u64int nb;
252 a0d146ed 2005-07-12 devnull u32int div, ub, xb, fb, start, stop, blocksize, tabsize;
253 a0d146ed 2005-07-12 devnull int i, j;
255 a0d146ed 2005-07-12 devnull if(n < 1){
256 a0d146ed 2005-07-12 devnull seterr(EOk, "creating index with no index sections");
257 a0d146ed 2005-07-12 devnull return nil;
261 a0d146ed 2005-07-12 devnull * compute the total buckets available in the index,
262 a0d146ed 2005-07-12 devnull * and the total buckets which are used.
265 a0d146ed 2005-07-12 devnull blocksize = sects[0]->blocksize;
266 a0d146ed 2005-07-12 devnull tabsize = sects[0]->tabsize;
267 a0d146ed 2005-07-12 devnull for(i = 0; i < n; i++){
268 a0d146ed 2005-07-12 devnull if(sects[i]->start != 0 || sects[i]->stop != 0
269 a0d146ed 2005-07-12 devnull || sects[i]->index[0] != '\0'){
270 a0d146ed 2005-07-12 devnull seterr(EOk, "creating new index using non-empty section %s", sects[i]->name);
271 a0d146ed 2005-07-12 devnull return nil;
273 a0d146ed 2005-07-12 devnull if(blocksize != sects[i]->blocksize){
274 a0d146ed 2005-07-12 devnull seterr(EOk, "mismatched block sizes in index sections");
275 a0d146ed 2005-07-12 devnull return nil;
277 a0d146ed 2005-07-12 devnull if(tabsize != sects[i]->tabsize){
278 a0d146ed 2005-07-12 devnull seterr(EOk, "mismatched config table sizes in index sections");
279 a0d146ed 2005-07-12 devnull return nil;
281 a0d146ed 2005-07-12 devnull nb += sects[i]->blocks;
285 a0d146ed 2005-07-12 devnull * check for duplicate names
287 a0d146ed 2005-07-12 devnull for(i = 0; i < n; i++){
288 a0d146ed 2005-07-12 devnull for(j = i + 1; j < n; j++){
289 a0d146ed 2005-07-12 devnull if(namecmp(sects[i]->name, sects[j]->name) == 0){
290 a0d146ed 2005-07-12 devnull seterr(EOk, "duplicate section name %s for index %s", sects[i]->name, name);
291 a0d146ed 2005-07-12 devnull return nil;
296 a0d146ed 2005-07-12 devnull if(nb >= ((u64int)1 << 32)){
297 a0d146ed 2005-07-12 devnull seterr(EBug, "index too large");
298 a0d146ed 2005-07-12 devnull return nil;
302 a0d146ed 2005-07-12 devnull div = (((u64int)1 << 32) + nb - 1) / nb;
303 a0d146ed 2005-07-12 devnull ub = (((u64int)1 << 32) - 1) / div + 1;
304 a0d146ed 2005-07-12 devnull if(div < 100){
305 a0d146ed 2005-07-12 devnull seterr(EBug, "index divisor too coarse");
306 a0d146ed 2005-07-12 devnull return nil;
308 a0d146ed 2005-07-12 devnull if(ub > nb){
309 a0d146ed 2005-07-12 devnull seterr(EBug, "index initialization math wrong");
310 a0d146ed 2005-07-12 devnull return nil;
312 a0d146ed 2005-07-12 devnull xb = nb - ub;
315 a0d146ed 2005-07-12 devnull * initialize each of the index sections
316 a0d146ed 2005-07-12 devnull * and the section map table
318 a0d146ed 2005-07-12 devnull smap = MKNZ(AMap, n);
319 a0d146ed 2005-07-12 devnull if(smap == nil){
320 a0d146ed 2005-07-12 devnull seterr(EOk, "can't create new index: out of memory");
321 a0d146ed 2005-07-12 devnull return nil;
323 a0d146ed 2005-07-12 devnull start = 0;
324 a0d146ed 2005-07-12 devnull for(i = 0; i < n; i++){
325 a0d146ed 2005-07-12 devnull stop = start + sects[i]->blocks - xb / n;
326 a0d146ed 2005-07-12 devnull if(i == n - 1)
327 a0d146ed 2005-07-12 devnull stop = ub;
328 a0d146ed 2005-07-12 devnull sects[i]->start = start;
329 a0d146ed 2005-07-12 devnull sects[i]->stop = stop;
330 a0d146ed 2005-07-12 devnull namecp(sects[i]->index, name);
332 a0d146ed 2005-07-12 devnull smap[i].start = start;
333 a0d146ed 2005-07-12 devnull smap[i].stop = stop;
334 a0d146ed 2005-07-12 devnull namecp(smap[i].name, sects[i]->name);
335 a0d146ed 2005-07-12 devnull start = stop;
339 a0d146ed 2005-07-12 devnull * initialize the index itself
341 a0d146ed 2005-07-12 devnull ix = MKZ(Index);
342 a0d146ed 2005-07-12 devnull if(ix == nil){
343 a0d146ed 2005-07-12 devnull seterr(EOk, "can't create new index: out of memory");
344 a0d146ed 2005-07-12 devnull free(smap);
345 a0d146ed 2005-07-12 devnull return nil;
347 a0d146ed 2005-07-12 devnull ix->version = IndexVersion;
348 a0d146ed 2005-07-12 devnull namecp(ix->name, name);
349 a0d146ed 2005-07-12 devnull ix->sects = sects;
350 a0d146ed 2005-07-12 devnull ix->smap = smap;
351 a0d146ed 2005-07-12 devnull ix->nsects = n;
352 a0d146ed 2005-07-12 devnull ix->blocksize = blocksize;
353 a0d146ed 2005-07-12 devnull ix->buckets = ub;
354 a0d146ed 2005-07-12 devnull ix->tabsize = tabsize;
355 a0d146ed 2005-07-12 devnull ix->div = div;
356 a0d146ed 2005-07-12 devnull ix->bitblocks = fb;
358 a0d146ed 2005-07-12 devnull if(initindex1(ix) < 0){
359 a0d146ed 2005-07-12 devnull free(smap);
360 a0d146ed 2005-07-12 devnull return nil;
363 a0d146ed 2005-07-12 devnull return ix;
367 a0d146ed 2005-07-12 devnull initisect(Part *part)
369 a0d146ed 2005-07-12 devnull ISect *is;
370 a0d146ed 2005-07-12 devnull ZBlock *b;
373 a0d146ed 2005-07-12 devnull b = alloczblock(HeadSize, 0, 0);
374 a0d146ed 2005-07-12 devnull if(b == nil || readpart(part, PartBlank, b->data, HeadSize) < 0){
375 a0d146ed 2005-07-12 devnull seterr(EAdmin, "can't read index section header: %r");
376 a0d146ed 2005-07-12 devnull return nil;
379 a0d146ed 2005-07-12 devnull is = MKZ(ISect);
380 a0d146ed 2005-07-12 devnull if(is == nil){
381 a0d146ed 2005-07-12 devnull freezblock(b);
382 a0d146ed 2005-07-12 devnull return nil;
384 a0d146ed 2005-07-12 devnull is->part = part;
385 a0d146ed 2005-07-12 devnull ok = unpackisect(is, b->data);
386 a0d146ed 2005-07-12 devnull freezblock(b);
387 a0d146ed 2005-07-12 devnull if(ok < 0){
388 a0d146ed 2005-07-12 devnull seterr(ECorrupt, "corrupted index section header: %r");
389 a0d146ed 2005-07-12 devnull freeisect(is);
390 a0d146ed 2005-07-12 devnull return nil;
393 a0d146ed 2005-07-12 devnull if(is->version != ISectVersion1 && is->version != ISectVersion2){
394 a0d146ed 2005-07-12 devnull seterr(EAdmin, "unknown index section version %d", is->version);
395 a0d146ed 2005-07-12 devnull freeisect(is);
396 a0d146ed 2005-07-12 devnull return nil;
399 a0d146ed 2005-07-12 devnull return initisect1(is);
403 a0d146ed 2005-07-12 devnull newisect(Part *part, u32int vers, char *name, u32int blocksize, u32int tabsize)
405 a0d146ed 2005-07-12 devnull ISect *is;
406 a0d146ed 2005-07-12 devnull u32int tabbase;
408 a0d146ed 2005-07-12 devnull is = MKZ(ISect);
409 a0d146ed 2005-07-12 devnull if(is == nil)
410 a0d146ed 2005-07-12 devnull return nil;
412 a0d146ed 2005-07-12 devnull namecp(is->name, name);
413 a0d146ed 2005-07-12 devnull is->version = vers;
414 a0d146ed 2005-07-12 devnull is->part = part;
415 a0d146ed 2005-07-12 devnull is->blocksize = blocksize;
416 a0d146ed 2005-07-12 devnull is->start = 0;
417 a0d146ed 2005-07-12 devnull is->stop = 0;
418 a0d146ed 2005-07-12 devnull tabbase = (PartBlank + HeadSize + blocksize - 1) & ~(blocksize - 1);
419 a0d146ed 2005-07-12 devnull is->blockbase = (tabbase + tabsize + blocksize - 1) & ~(blocksize - 1);
420 a0d146ed 2005-07-12 devnull is->blocks = is->part->size / blocksize - is->blockbase / blocksize;
421 a0d146ed 2005-07-12 devnull is->bucketmagic = 0;
422 a0d146ed 2005-07-12 devnull if(is->version == ISectVersion2){
424 a0d146ed 2005-07-12 devnull is->bucketmagic = fastrand();
425 a0d146ed 2005-07-12 devnull }while(is->bucketmagic==0);
427 a0d146ed 2005-07-12 devnull is = initisect1(is);
428 a0d146ed 2005-07-12 devnull if(is == nil)
429 a0d146ed 2005-07-12 devnull return nil;
431 a0d146ed 2005-07-12 devnull return is;
435 a0d146ed 2005-07-12 devnull * initialize the computed parameters for an index
437 a0d146ed 2005-07-12 devnull static ISect*
438 a0d146ed 2005-07-12 devnull initisect1(ISect *is)
440 a0d146ed 2005-07-12 devnull u64int v;
442 a0d146ed 2005-07-12 devnull is->buckmax = (is->blocksize - IBucketSize) / IEntrySize;
443 a0d146ed 2005-07-12 devnull is->blocklog = u64log2(is->blocksize);
444 a0d146ed 2005-07-12 devnull if(is->blocksize != (1 << is->blocklog)){
445 a0d146ed 2005-07-12 devnull seterr(ECorrupt, "illegal non-power-of-2 bucket size %d\n", is->blocksize);
446 a0d146ed 2005-07-12 devnull freeisect(is);
447 a0d146ed 2005-07-12 devnull return nil;
449 a0d146ed 2005-07-12 devnull partblocksize(is->part, is->blocksize);
450 a0d146ed 2005-07-12 devnull is->tabbase = (PartBlank + HeadSize + is->blocksize - 1) & ~(is->blocksize - 1);
451 a0d146ed 2005-07-12 devnull if(is->tabbase >= is->blockbase){
452 a0d146ed 2005-07-12 devnull seterr(ECorrupt, "index section config table overlaps bucket storage");
453 a0d146ed 2005-07-12 devnull freeisect(is);
454 a0d146ed 2005-07-12 devnull return nil;
456 a0d146ed 2005-07-12 devnull is->tabsize = is->blockbase - is->tabbase;
457 a0d146ed 2005-07-12 devnull v = is->part->size & ~(u64int)(is->blocksize - 1);
458 a0d146ed 2005-07-12 devnull if(is->blockbase + (u64int)is->blocks * is->blocksize != v){
459 a0d146ed 2005-07-12 devnull seterr(ECorrupt, "invalid blocks in index section %s", is->name);
460 a0d146ed 2005-07-12 devnull //ZZZZZZZZZ
461 a0d146ed 2005-07-12 devnull // freeisect(is);
462 a0d146ed 2005-07-12 devnull // return nil;
465 a0d146ed 2005-07-12 devnull if(is->stop - is->start > is->blocks){
466 a0d146ed 2005-07-12 devnull seterr(ECorrupt, "index section overflows available space");
467 a0d146ed 2005-07-12 devnull freeisect(is);
468 a0d146ed 2005-07-12 devnull return nil;
470 a0d146ed 2005-07-12 devnull if(is->start > is->stop){
471 a0d146ed 2005-07-12 devnull seterr(ECorrupt, "invalid index section range");
472 a0d146ed 2005-07-12 devnull freeisect(is);
473 a0d146ed 2005-07-12 devnull return nil;
476 a0d146ed 2005-07-12 devnull return is;
480 a0d146ed 2005-07-12 devnull wbisect(ISect *is)
482 a0d146ed 2005-07-12 devnull ZBlock *b;
484 a0d146ed 2005-07-12 devnull b = alloczblock(HeadSize, 1, 0);
485 a0d146ed 2005-07-12 devnull if(b == nil)
486 a0d146ed 2005-07-12 devnull //ZZZ set error?
487 a0d146ed 2005-07-12 devnull return -1;
489 a0d146ed 2005-07-12 devnull if(packisect(is, b->data) < 0){
490 a0d146ed 2005-07-12 devnull seterr(ECorrupt, "can't make index section header: %r");
491 a0d146ed 2005-07-12 devnull freezblock(b);
492 a0d146ed 2005-07-12 devnull return -1;
494 a0d146ed 2005-07-12 devnull if(writepart(is->part, PartBlank, b->data, HeadSize) < 0){
495 a0d146ed 2005-07-12 devnull seterr(EAdmin, "can't write index section header: %r");
496 a0d146ed 2005-07-12 devnull freezblock(b);
497 a0d146ed 2005-07-12 devnull return -1;
499 a0d146ed 2005-07-12 devnull freezblock(b);
501 a0d146ed 2005-07-12 devnull return 0;
505 a0d146ed 2005-07-12 devnull freeisect(ISect *is)
507 a0d146ed 2005-07-12 devnull if(is == nil)
509 a0d146ed 2005-07-12 devnull free(is);
513 a0d146ed 2005-07-12 devnull freeindex(Index *ix)
517 a0d146ed 2005-07-12 devnull if(ix == nil)
519 a0d146ed 2005-07-12 devnull free(ix->amap);
520 a0d146ed 2005-07-12 devnull free(ix->arenas);
521 a0d146ed 2005-07-12 devnull if(ix->sects)
522 a0d146ed 2005-07-12 devnull for(i = 0; i < ix->nsects; i++)
523 a0d146ed 2005-07-12 devnull freeisect(ix->sects[i]);
524 a0d146ed 2005-07-12 devnull free(ix->sects);
525 a0d146ed 2005-07-12 devnull free(ix->smap);
526 a0d146ed 2005-07-12 devnull free(ix);
530 a0d146ed 2005-07-12 devnull * write a clump to an available arena in the index
531 a0d146ed 2005-07-12 devnull * and return the address of the clump within the index.
532 a0d146ed 2005-07-12 devnull ZZZ question: should this distinguish between an arena
533 a0d146ed 2005-07-12 devnull filling up and real errors writing the clump?
536 a0d146ed 2005-07-12 devnull writeiclump(Index *ix, Clump *c, u8int *clbuf, u64int *pa)
538 a0d146ed 2005-07-12 devnull u64int a;
541 a0d146ed 2005-07-12 devnull trace(TraceLump, "writeiclump enter");
542 a0d146ed 2005-07-12 devnull for(i = ix->mapalloc; i < ix->narenas; i++){
543 a0d146ed 2005-07-12 devnull a = writeaclump(ix->arenas[i], c, clbuf, ix->amap[i].start, pa);
544 a0d146ed 2005-07-12 devnull if(a != TWID64){
545 a0d146ed 2005-07-12 devnull ix->mapalloc = i; /* assuming write is atomic, race is okay */
546 a0d146ed 2005-07-12 devnull trace(TraceLump, "writeiclump exit");
547 a0d146ed 2005-07-12 devnull return a;
551 a0d146ed 2005-07-12 devnull seterr(EAdmin, "no space left in arenas");
552 a0d146ed 2005-07-12 devnull trace(TraceLump, "writeiclump failed");
553 a0d146ed 2005-07-12 devnull return TWID64;
557 a0d146ed 2005-07-12 devnull * convert an arena index to an relative arena address
560 a0d146ed 2005-07-12 devnull amapitoa(Index *ix, u64int a, u64int *aa)
562 a0d146ed 2005-07-12 devnull int i, r, l, m;
565 a0d146ed 2005-07-12 devnull r = ix->narenas - 1;
566 a0d146ed 2005-07-12 devnull while(l <= r){
567 a0d146ed 2005-07-12 devnull m = (r + l) / 2;
568 a0d146ed 2005-07-12 devnull if(ix->amap[m].start <= a)
569 a0d146ed 2005-07-12 devnull l = m + 1;
571 a0d146ed 2005-07-12 devnull r = m - 1;
575 a0d146ed 2005-07-12 devnull if(a > ix->amap[l].stop){
576 a0d146ed 2005-07-12 devnull for(i=0; i<ix->narenas; i++)
577 a0d146ed 2005-07-12 devnull print("arena %d: %llux - %llux\n", i, ix->amap[i].start, ix->amap[i].stop);
578 a0d146ed 2005-07-12 devnull print("want arena %d for %llux\n", l, a);
579 a0d146ed 2005-07-12 devnull seterr(ECrash, "unmapped address passed to amapitoa");
580 a0d146ed 2005-07-12 devnull return nil;
583 a0d146ed 2005-07-12 devnull if(ix->arenas[l] == nil){
584 a0d146ed 2005-07-12 devnull seterr(ECrash, "unmapped arena selected in amapitoa");
585 a0d146ed 2005-07-12 devnull return nil;
587 a0d146ed 2005-07-12 devnull *aa = a - ix->amap[l].start;
588 a0d146ed 2005-07-12 devnull return ix->arenas[l];
592 a0d146ed 2005-07-12 devnull iaddrcmp(IAddr *ia1, IAddr *ia2)
594 a0d146ed 2005-07-12 devnull return ia1->type != ia2->type
595 a0d146ed 2005-07-12 devnull || ia1->size != ia2->size
596 a0d146ed 2005-07-12 devnull || ia1->blocks != ia2->blocks
597 a0d146ed 2005-07-12 devnull || ia1->addr != ia2->addr;
601 a0d146ed 2005-07-12 devnull * lookup the score in the partition
603 a0d146ed 2005-07-12 devnull * nothing needs to be explicitly locked:
604 a0d146ed 2005-07-12 devnull * only static parts of ix are used, and
605 a0d146ed 2005-07-12 devnull * the bucket is locked by the DBlock lock.
608 a0d146ed 2005-07-12 devnull loadientry(Index *ix, u8int *score, int type, IEntry *ie)
610 a0d146ed 2005-07-12 devnull ISect *is;
611 a0d146ed 2005-07-12 devnull DBlock *b;
612 a0d146ed 2005-07-12 devnull IBucket ib;
613 a0d146ed 2005-07-12 devnull u32int buck;
614 a0d146ed 2005-07-12 devnull int h, ok;
616 a0d146ed 2005-07-12 devnull ok = -1;
618 a0d146ed 2005-07-12 devnull trace(TraceLump, "loadientry enter");
621 a0d146ed 2005-07-12 devnull qlock(&stats.lock);
622 a0d146ed 2005-07-12 devnull stats.indexreads++;
623 a0d146ed 2005-07-12 devnull qunlock(&stats.lock);
626 a0d146ed 2005-07-12 devnull if(!inbloomfilter(mainindex->bloom, score)){
627 a0d146ed 2005-07-12 devnull trace(TraceLump, "loadientry bloomhit");
628 a0d146ed 2005-07-12 devnull return -1;
631 a0d146ed 2005-07-12 devnull trace(TraceLump, "loadientry loadibucket");
632 a0d146ed 2005-07-12 devnull b = loadibucket(ix, score, &is, &buck, &ib);
633 a0d146ed 2005-07-12 devnull trace(TraceLump, "loadientry loadedibucket");
634 a0d146ed 2005-07-12 devnull if(b == nil)
635 a0d146ed 2005-07-12 devnull return -1;
637 a0d146ed 2005-07-12 devnull if(okibucket(&ib, is) < 0){
638 a0d146ed 2005-07-12 devnull trace(TraceLump, "loadientry badbucket");
639 a0d146ed 2005-07-12 devnull goto out;
642 a0d146ed 2005-07-12 devnull h = bucklook(score, type, ib.data, ib.n);
643 a0d146ed 2005-07-12 devnull if(h & 1){
645 a0d146ed 2005-07-12 devnull trace(TraceLump, "loadientry found");
646 a0d146ed 2005-07-12 devnull unpackientry(ie, &ib.data[h]);
648 a0d146ed 2005-07-12 devnull goto out;
650 a0d146ed 2005-07-12 devnull trace(TraceLump, "loadientry notfound");
651 a0d146ed 2005-07-12 devnull addstat(StatBloomFalseMiss, 1);
653 a0d146ed 2005-07-12 devnull putdblock(b);
654 a0d146ed 2005-07-12 devnull trace(TraceLump, "loadientry exit");
655 a0d146ed 2005-07-12 devnull return ok;
659 a0d146ed 2005-07-12 devnull okibucket(IBucket *ib, ISect *is)
661 a0d146ed 2005-07-12 devnull if(ib->n <= is->buckmax)
662 a0d146ed 2005-07-12 devnull return 0;
664 a0d146ed 2005-07-12 devnull seterr(EICorrupt, "corrupted disk index bucket: n=%ud max=%ud, range=[%lud,%lud)",
665 a0d146ed 2005-07-12 devnull ib->n, is->buckmax, is->start, is->stop);
666 a0d146ed 2005-07-12 devnull return -1;
670 a0d146ed 2005-07-12 devnull * look for score within data;
671 a0d146ed 2005-07-12 devnull * return 1 | byte index of matching index,
672 a0d146ed 2005-07-12 devnull * or 0 | index of least element > score
675 a0d146ed 2005-07-12 devnull bucklook(u8int *score, int otype, u8int *data, int n)
677 a0d146ed 2005-07-12 devnull int i, r, l, m, h, c, cc, type;
679 a0d146ed 2005-07-12 devnull type = vttodisktype(otype);
681 a0d146ed 2005-07-12 devnull r = n - 1;
682 a0d146ed 2005-07-12 devnull while(l <= r){
683 a0d146ed 2005-07-12 devnull m = (r + l) >> 1;
684 a0d146ed 2005-07-12 devnull h = m * IEntrySize;
685 a0d146ed 2005-07-12 devnull for(i = 0; i < VtScoreSize; i++){
686 a0d146ed 2005-07-12 devnull c = score[i];
687 a0d146ed 2005-07-12 devnull cc = data[h + i];
688 a0d146ed 2005-07-12 devnull if(c != cc){
689 a0d146ed 2005-07-12 devnull if(c > cc)
690 a0d146ed 2005-07-12 devnull l = m + 1;
692 a0d146ed 2005-07-12 devnull r = m - 1;
693 a0d146ed 2005-07-12 devnull goto cont;
696 a0d146ed 2005-07-12 devnull cc = data[h + IEntryTypeOff];
697 a0d146ed 2005-07-12 devnull if(type != cc){
698 a0d146ed 2005-07-12 devnull if(type > cc)
699 a0d146ed 2005-07-12 devnull l = m + 1;
701 a0d146ed 2005-07-12 devnull r = m - 1;
702 a0d146ed 2005-07-12 devnull goto cont;
704 a0d146ed 2005-07-12 devnull return h | 1;
708 a0d146ed 2005-07-12 devnull return l * IEntrySize;
712 a0d146ed 2005-07-12 devnull * compare two IEntries; consistent with bucklook
715 a0d146ed 2005-07-12 devnull ientrycmp(const void *vie1, const void *vie2)
717 a0d146ed 2005-07-12 devnull u8int *ie1, *ie2;
718 a0d146ed 2005-07-12 devnull int i, v1, v2;
720 a0d146ed 2005-07-12 devnull ie1 = (u8int*)vie1;
721 a0d146ed 2005-07-12 devnull ie2 = (u8int*)vie2;
722 a0d146ed 2005-07-12 devnull for(i = 0; i < VtScoreSize; i++){
723 a0d146ed 2005-07-12 devnull v1 = ie1[i];
724 a0d146ed 2005-07-12 devnull v2 = ie2[i];
725 a0d146ed 2005-07-12 devnull if(v1 != v2){
726 a0d146ed 2005-07-12 devnull if(v1 < v2)
727 a0d146ed 2005-07-12 devnull return -1;
728 a0d146ed 2005-07-12 devnull return 1;
731 a0d146ed 2005-07-12 devnull v1 = ie1[IEntryTypeOff];
732 a0d146ed 2005-07-12 devnull v2 = ie2[IEntryTypeOff];
733 a0d146ed 2005-07-12 devnull if(v1 != v2){
734 a0d146ed 2005-07-12 devnull if(v1 < v2)
735 a0d146ed 2005-07-12 devnull return -1;
736 a0d146ed 2005-07-12 devnull return 1;
738 a0d146ed 2005-07-12 devnull return 0;
742 a0d146ed 2005-07-12 devnull * find the number of the index section holding bucket #buck
745 a0d146ed 2005-07-12 devnull indexsect0(Index *ix, u32int buck)
747 a0d146ed 2005-07-12 devnull int r, l, m;
750 a0d146ed 2005-07-12 devnull r = ix->nsects - 1;
751 a0d146ed 2005-07-12 devnull while(l <= r){
752 a0d146ed 2005-07-12 devnull m = (r + l) >> 1;
753 a0d146ed 2005-07-12 devnull if(ix->sects[m]->start <= buck)
754 a0d146ed 2005-07-12 devnull l = m + 1;
756 a0d146ed 2005-07-12 devnull r = m - 1;
758 a0d146ed 2005-07-12 devnull return l - 1;
762 a0d146ed 2005-07-12 devnull * load the index block at bucket #buck
764 a0d146ed 2005-07-12 devnull static DBlock*
765 a0d146ed 2005-07-12 devnull loadibucket0(Index *ix, u32int buck, ISect **pis, u32int *pbuck, IBucket *ib, int mode)
767 a0d146ed 2005-07-12 devnull ISect *is;
768 a0d146ed 2005-07-12 devnull DBlock *b;
770 a0d146ed 2005-07-12 devnull is = ix->sects[indexsect0(ix, buck)];
771 a0d146ed 2005-07-12 devnull if(buck < is->start || is->stop <= buck){
772 a0d146ed 2005-07-12 devnull seterr(EAdmin, "index lookup out of range: %ud not found in index\n", buck);
773 a0d146ed 2005-07-12 devnull return nil;
776 a0d146ed 2005-07-12 devnull buck -= is->start;
777 a0d146ed 2005-07-12 devnull if((b = getdblock(is->part, is->blockbase + ((u64int)buck << is->blocklog), mode)) == nil)
778 a0d146ed 2005-07-12 devnull return nil;
781 a0d146ed 2005-07-12 devnull *pis = is;
782 a0d146ed 2005-07-12 devnull if(pbuck)
783 a0d146ed 2005-07-12 devnull *pbuck = buck;
785 a0d146ed 2005-07-12 devnull unpackibucket(ib, b->data, is->bucketmagic);
786 a0d146ed 2005-07-12 devnull return b;
790 a0d146ed 2005-07-12 devnull * find the number of the index section holding score
792 a0d146ed 2005-07-12 devnull static int
793 a0d146ed 2005-07-12 devnull indexsect1(Index *ix, u8int *score)
795 a0d146ed 2005-07-12 devnull return indexsect0(ix, hashbits(score, 32) / ix->div);
799 a0d146ed 2005-07-12 devnull * load the index block responsible for score.
801 a0d146ed 2005-07-12 devnull static DBlock*
802 a0d146ed 2005-07-12 devnull loadibucket1(Index *ix, u8int *score, ISect **pis, u32int *pbuck, IBucket *ib)
804 a0d146ed 2005-07-12 devnull return loadibucket0(ix, hashbits(score, 32)/ix->div, pis, pbuck, ib, OREAD);
808 a0d146ed 2005-07-12 devnull indexsect(Index *ix, u8int *score)
810 a0d146ed 2005-07-12 devnull return indexsect1(ix, score);
814 a0d146ed 2005-07-12 devnull loadibucket(Index *ix, u8int *score, ISect **pis, u32int *pbuck, IBucket *ib)
816 a0d146ed 2005-07-12 devnull return loadibucket1(ix, score, pis, pbuck, ib);