Blob


1 #include "stdinc.h"
2 #include "dat.h"
3 #include "fns.h"
5 static int bucklook(u8int *score, int type, u8int *data, int n);
6 static int writebucket(ISect *is, u32int buck, IBucket *ib, DBlock *b);
7 static int okibucket(IBucket *ib, ISect *is);
8 static ISect *initisect1(ISect *is);
10 //static QLock indexlock; //ZZZ
12 static char IndexMagic[] = "venti index configuration";
14 Index*
15 initindex(char *name, ISect **sects, int n)
16 {
17 IFile f;
18 Index *ix;
19 ISect *is;
20 u32int last, blocksize, tabsize;
21 int i;
23 if(n <= 0){
24 seterr(EOk, "no index sections to initialize index");
25 return nil;
26 }
27 ix = MKZ(Index);
28 if(ix == nil){
29 seterr(EOk, "can't initialize index: out of memory");
30 freeindex(ix);
31 return nil;
32 }
34 tabsize = sects[0]->tabsize;
35 if(partifile(&f, sects[0]->part, sects[0]->tabbase, tabsize) < 0)
36 return nil;
37 if(parseindex(&f, ix) < 0){
38 freeifile(&f);
39 freeindex(ix);
40 return nil;
41 }
42 freeifile(&f);
43 if(namecmp(ix->name, name) != 0){
44 seterr(ECorrupt, "mismatched index name: found %s expected %s", ix->name, name);
45 return nil;
46 }
47 if(ix->nsects != n){
48 seterr(ECorrupt, "mismatched number index sections: found %d expected %d", n, ix->nsects);
49 freeindex(ix);
50 return nil;
51 }
52 ix->sects = sects;
53 last = 0;
54 blocksize = ix->blocksize;
55 for(i = 0; i < ix->nsects; i++){
56 is = sects[i];
57 if(namecmp(ix->name, is->index) != 0
58 || is->blocksize != blocksize
59 || is->tabsize != tabsize
60 || namecmp(is->name, ix->smap[i].name) != 0
61 || is->start != ix->smap[i].start
62 || is->stop != ix->smap[i].stop
63 || last != is->start
64 || is->start > is->stop){
65 seterr(ECorrupt, "inconsistent index sections in %s", ix->name);
66 freeindex(ix);
67 return nil;
68 }
69 last = is->stop;
70 }
71 ix->tabsize = tabsize;
72 ix->buckets = last;
74 ix->div = (((u64int)1 << 32) + last - 1) / last;
75 last = (((u64int)1 << 32) - 1) / ix->div + 1;
76 if(last != ix->buckets){
77 seterr(ECorrupt, "inconsistent math for buckets in %s", ix->name);
78 freeindex(ix);
79 return nil;
80 }
82 ix->arenas = MKNZ(Arena*, ix->narenas);
83 if(maparenas(ix->amap, ix->arenas, ix->narenas, ix->name) < 0){
84 freeindex(ix);
85 return nil;
86 }
87 return ix;
88 }
90 int
91 wbindex(Index *ix)
92 {
93 Fmt f;
94 ZBlock *b;
95 int i;
97 if(ix->nsects == 0){
98 seterr(EOk, "no sections in index %s", ix->name);
99 return -1;
101 b = alloczblock(ix->tabsize, 1);
102 if(b == nil){
103 seterr(EOk, "can't write index configuration: out of memory");
104 return -1;
106 fmtzbinit(&f, b);
107 if(outputindex(&f, ix) < 0){
108 seterr(EOk, "can't make index configuration: table storage too small %d", ix->tabsize);
109 freezblock(b);
110 return -1;
112 for(i = 0; i < ix->nsects; i++){
113 if(writepart(ix->sects[i]->part, ix->sects[i]->tabbase, b->data, ix->tabsize) < 0){
114 seterr(EOk, "can't write index: %r");
115 freezblock(b);
116 return -1;
119 freezblock(b);
121 for(i = 0; i < ix->nsects; i++)
122 if(wbisect(ix->sects[i]) < 0)
123 return -1;
125 return 0;
128 /*
129 * index: IndexMagic '\n' version '\n' name '\n' blocksize '\n' sections arenas
130 * version, blocksize: u32int
131 * name: max. ANameSize string
132 * sections, arenas: AMap
133 */
134 int
135 outputindex(Fmt *f, Index *ix)
137 if(fmtprint(f, "%s\n%ud\n%s\n%ud\n", IndexMagic, ix->version, ix->name, ix->blocksize) < 0
138 || outputamap(f, ix->smap, ix->nsects) < 0
139 || outputamap(f, ix->amap, ix->narenas) < 0)
140 return -1;
141 return 0;
144 int
145 parseindex(IFile *f, Index *ix)
147 AMapN amn;
148 u32int v;
149 char *s;
151 /*
152 * magic
153 */
154 s = ifileline(f);
155 if(s == nil || strcmp(s, IndexMagic) != 0){
156 seterr(ECorrupt, "bad index magic for %s", f->name);
157 return -1;
160 /*
161 * version
162 */
163 if(ifileu32int(f, &v) < 0){
164 seterr(ECorrupt, "syntax error: bad version number in %s", f->name);
165 return -1;
167 ix->version = v;
168 if(ix->version != IndexVersion){
169 seterr(ECorrupt, "bad version number in %s", f->name);
170 return -1;
173 /*
174 * name
175 */
176 if(ifilename(f, ix->name) < 0){
177 seterr(ECorrupt, "syntax error: bad index name in %s", f->name);
178 return -1;
181 /*
182 * block size
183 */
184 if(ifileu32int(f, &v) < 0){
185 seterr(ECorrupt, "syntax error: bad version number in %s", f->name);
186 return -1;
188 ix->blocksize = v;
190 if(parseamap(f, &amn) < 0)
191 return -1;
192 ix->nsects = amn.n;
193 ix->smap = amn.map;
195 if(parseamap(f, &amn) < 0)
196 return -1;
197 ix->narenas = amn.n;
198 ix->amap = amn.map;
200 return 0;
203 /*
204 * initialize an entirely new index
205 */
206 Index *
207 newindex(char *name, ISect **sects, int n)
209 Index *ix;
210 AMap *smap;
211 u64int nb;
212 u32int div, ub, xb, start, stop, blocksize, tabsize;
213 int i, j;
215 if(n < 1){
216 seterr(EOk, "creating index with no index sections");
217 return nil;
220 /*
221 * compute the total buckets available in the index,
222 * and the total buckets which are used.
223 */
224 nb = 0;
225 blocksize = sects[0]->blocksize;
226 tabsize = sects[0]->tabsize;
227 for(i = 0; i < n; i++){
228 if(sects[i]->start != 0 || sects[i]->stop != 0
229 || sects[i]->index[0] != '\0'){
230 seterr(EOk, "creating new index using non-empty section %s", sects[i]->name);
231 return nil;
233 if(blocksize != sects[i]->blocksize){
234 seterr(EOk, "mismatched block sizes in index sections");
235 return nil;
237 if(tabsize != sects[i]->tabsize){
238 seterr(EOk, "mismatched config table sizes in index sections");
239 return nil;
241 nb += sects[i]->blocks;
244 /*
245 * check for duplicate names
246 */
247 for(i = 0; i < n; i++){
248 for(j = i + 1; j < n; j++){
249 if(namecmp(sects[i]->name, sects[j]->name) == 0){
250 seterr(EOk, "duplicate section name %s for index %s", sects[i]->name, name);
251 return nil;
256 if(nb >= ((u64int)1 << 32)){
257 seterr(EBug, "index too large");
258 return nil;
260 div = (((u64int)1 << 32) + nb - 1) / nb;
261 ub = (((u64int)1 << 32) - 1) / div + 1;
262 if(div < 100){
263 seterr(EBug, "index divisor too coarse");
264 return nil;
266 if(ub > nb){
267 seterr(EBug, "index initialization math wrong");
268 return nil;
271 /*
272 * initialize each of the index sections
273 * and the section map table
274 */
275 smap = MKNZ(AMap, n);
276 if(smap == nil){
277 seterr(EOk, "can't create new index: out of memory");
278 return nil;
280 xb = nb - ub;
281 start = 0;
282 for(i = 0; i < n; i++){
283 stop = start + sects[i]->blocks - xb / n;
284 if(i == n - 1)
285 stop = ub;
286 sects[i]->start = start;
287 sects[i]->stop = stop;
288 namecp(sects[i]->index, name);
290 smap[i].start = start;
291 smap[i].stop = stop;
292 namecp(smap[i].name, sects[i]->name);
293 start = stop;
296 /*
297 * initialize the index itself
298 */
299 ix = MKZ(Index);
300 if(ix == nil){
301 seterr(EOk, "can't create new index: out of memory");
302 free(smap);
303 return nil;
305 ix->version = IndexVersion;
306 namecp(ix->name, name);
307 ix->sects = sects;
308 ix->smap = smap;
309 ix->nsects = n;
310 ix->blocksize = blocksize;
311 ix->div = div;
312 ix->buckets = ub;
313 ix->tabsize = tabsize;
314 return ix;
317 ISect*
318 initisect(Part *part)
320 ISect *is;
321 ZBlock *b;
322 int ok;
324 b = alloczblock(HeadSize, 0);
325 if(b == nil || readpart(part, PartBlank, b->data, HeadSize) < 0){
326 seterr(EAdmin, "can't read index section header: %r");
327 return nil;
330 is = MKZ(ISect);
331 if(is == nil){
332 freezblock(b);
333 return nil;
335 is->part = part;
336 ok = unpackisect(is, b->data);
337 freezblock(b);
338 if(ok < 0){
339 seterr(ECorrupt, "corrupted index section header: %r");
340 freeisect(is);
341 return nil;
344 if(is->version != ISectVersion){
345 seterr(EAdmin, "unknown index section version %d", is->version);
346 freeisect(is);
347 return nil;
350 return initisect1(is);
353 ISect*
354 newisect(Part *part, char *name, u32int blocksize, u32int tabsize)
356 ISect *is;
357 u32int tabbase;
359 is = MKZ(ISect);
360 if(is == nil)
361 return nil;
363 namecp(is->name, name);
364 is->version = ISectVersion;
365 is->part = part;
366 is->blocksize = blocksize;
367 is->start = 0;
368 is->stop = 0;
369 tabbase = (PartBlank + HeadSize + blocksize - 1) & ~(blocksize - 1);
370 is->blockbase = (tabbase + tabsize + blocksize - 1) & ~(blocksize - 1);
371 is->blocks = is->part->size / blocksize - is->blockbase / blocksize;
373 is = initisect1(is);
374 if(is == nil)
375 return nil;
377 return is;
380 /*
381 * initialize the computed paramaters for an index
382 */
383 static ISect*
384 initisect1(ISect *is)
386 u64int v;
388 is->buckmax = (is->blocksize - IBucketSize) / IEntrySize;
389 is->blocklog = u64log2(is->blocksize);
390 if(is->blocksize != (1 << is->blocklog)){
391 seterr(ECorrupt, "illegal non-power-of-2 bucket size %d\n", is->blocksize);
392 freeisect(is);
393 return nil;
395 partblocksize(is->part, is->blocksize);
396 is->tabbase = (PartBlank + HeadSize + is->blocksize - 1) & ~(is->blocksize - 1);
397 if(is->tabbase >= is->blockbase){
398 seterr(ECorrupt, "index section config table overlaps bucket storage");
399 freeisect(is);
400 return nil;
402 is->tabsize = is->blockbase - is->tabbase;
403 v = is->part->size & ~(u64int)(is->blocksize - 1);
404 if(is->blockbase + (u64int)is->blocks * is->blocksize != v){
405 seterr(ECorrupt, "invalid blocks in index section %s", is->name);
406 //ZZZZZZZZZ
407 // freeisect(is);
408 // return nil;
411 if(is->stop - is->start > is->blocks){
412 seterr(ECorrupt, "index section overflows available space");
413 freeisect(is);
414 return nil;
416 if(is->start > is->stop){
417 seterr(ECorrupt, "invalid index section range");
418 freeisect(is);
419 return nil;
422 return is;
425 int
426 wbisect(ISect *is)
428 ZBlock *b;
430 b = alloczblock(HeadSize, 1);
431 if(b == nil)
432 //ZZZ set error?
433 return -1;
435 if(packisect(is, b->data) < 0){
436 seterr(ECorrupt, "can't make index section header: %r");
437 freezblock(b);
438 return -1;
440 if(writepart(is->part, PartBlank, b->data, HeadSize) < 0){
441 seterr(EAdmin, "can't write index section header: %r");
442 freezblock(b);
443 return -1;
445 freezblock(b);
447 return 0;
450 void
451 freeisect(ISect *is)
453 if(is == nil)
454 return;
455 free(is);
458 void
459 freeindex(Index *ix)
461 int i;
463 if(ix == nil)
464 return;
465 free(ix->amap);
466 free(ix->arenas);
467 if(ix->sects)
468 for(i = 0; i < ix->nsects; i++)
469 freeisect(ix->sects[i]);
470 free(ix->sects);
471 free(ix->smap);
472 free(ix);
475 /*
476 * write a clump to an available arena in the index
477 * and return the address of the clump within the index.
478 ZZZ question: should this distinguish between an arena
479 filling up and real errors writing the clump?
480 */
481 u64int
482 writeiclump(Index *ix, Clump *c, u8int *clbuf)
484 u64int a;
485 int i;
487 for(i = ix->mapalloc; i < ix->narenas; i++){
488 a = writeaclump(ix->arenas[i], c, clbuf);
489 if(a != TWID64)
490 return a + ix->amap[i].start;
493 seterr(EAdmin, "no space left in arenas");
494 return -1;
497 /*
498 * convert an arena index to an relative address address
499 */
500 Arena*
501 amapitoa(Index *ix, u64int a, u64int *aa)
503 int i, r, l, m;
505 l = 1;
506 r = ix->narenas - 1;
507 while(l <= r){
508 m = (r + l) / 2;
509 if(ix->amap[m].start <= a)
510 l = m + 1;
511 else
512 r = m - 1;
514 l--;
516 if(a > ix->amap[l].stop){
517 for(i=0; i<ix->narenas; i++)
518 print("arena %d: %llux - %llux\n", i, ix->amap[i].start, ix->amap[i].stop);
519 print("want arena %d for %llux\n", l, a);
520 seterr(ECrash, "unmapped address passed to amapitoa");
521 return nil;
524 if(ix->arenas[l] == nil){
525 seterr(ECrash, "unmapped arena selected in amapitoa");
526 return nil;
528 *aa = a - ix->amap[l].start;
529 return ix->arenas[l];
532 int
533 iaddrcmp(IAddr *ia1, IAddr *ia2)
535 return ia1->type != ia2->type
536 || ia1->size != ia2->size
537 || ia1->blocks != ia2->blocks
538 || ia1->addr != ia2->addr;
541 /*
542 * lookup the score in the partition
544 * nothing needs to be explicitly locked:
545 * only static parts of ix are used, and
546 * the bucket is locked by the DBlock lock.
547 */
548 int
549 loadientry(Index *ix, u8int *score, int type, IEntry *ie)
551 ISect *is;
552 DBlock *b;
553 IBucket ib;
554 u32int buck;
555 int h, ok;
557 fprint(2, "loadientry %V %d\n", score, type);
558 buck = hashbits(score, 32) / ix->div;
559 ok = -1;
560 for(;;){
561 qlock(&stats.lock);
562 stats.indexreads++;
563 qunlock(&stats.lock);
564 is = findisect(ix, buck);
565 if(is == nil){
566 seterr(EAdmin, "bad math in loadientry");
567 return -1;
569 buck -= is->start;
570 b = getdblock(is->part, is->blockbase + ((u64int)buck << is->blocklog), 1);
571 if(b == nil)
572 break;
574 unpackibucket(&ib, b->data);
575 if(okibucket(&ib, is) < 0)
576 break;
578 h = bucklook(score, type, ib.data, ib.n);
579 if(h & 1){
580 h ^= 1;
581 unpackientry(ie, &ib.data[h]);
582 ok = 0;
583 break;
586 break;
588 putdblock(b);
589 return ok;
592 /*
593 * insert or update an index entry into the appropriate bucket
594 */
595 int
596 storeientry(Index *ix, IEntry *ie)
598 ISect *is;
599 DBlock *b;
600 IBucket ib;
601 u32int buck;
602 int h, ok;
604 buck = hashbits(ie->score, 32) / ix->div;
605 ok = 0;
606 for(;;){
607 qlock(&stats.lock);
608 stats.indexwreads++;
609 qunlock(&stats.lock);
610 is = findisect(ix, buck);
611 if(is == nil){
612 seterr(EAdmin, "bad math in storeientry");
613 return -1;
615 buck -= is->start;
616 b = getdblock(is->part, is->blockbase + ((u64int)buck << is->blocklog), 1);
617 if(b == nil)
618 break;
620 unpackibucket(&ib, b->data);
621 if(okibucket(&ib, is) < 0)
622 break;
624 h = bucklook(ie->score, ie->ia.type, ib.data, ib.n);
625 if(h & 1){
626 h ^= 1;
627 packientry(ie, &ib.data[h]);
628 ok = writebucket(is, buck, &ib, b);
629 break;
632 if(ib.n < is->buckmax){
633 memmove(&ib.data[h + IEntrySize], &ib.data[h], ib.n * IEntrySize - h);
634 ib.n++;
636 packientry(ie, &ib.data[h]);
637 ok = writebucket(is, buck, &ib, b);
638 break;
641 break;
644 putdblock(b);
645 return ok;
648 static int
649 writebucket(ISect *is, u32int buck, IBucket *ib, DBlock *b)
651 if(buck >= is->blocks)
652 seterr(EAdmin, "index write out of bounds: %d >= %d\n",
653 buck, is->blocks);
654 qlock(&stats.lock);
655 stats.indexwrites++;
656 qunlock(&stats.lock);
657 packibucket(ib, b->data);
658 return writepart(is->part, is->blockbase + ((u64int)buck << is->blocklog), b->data, is->blocksize);
661 /*
662 * find the number of the index section holding score
663 */
664 int
665 indexsect(Index *ix, u8int *score)
667 u32int buck;
668 int r, l, m;
670 buck = hashbits(score, 32) / ix->div;
671 l = 1;
672 r = ix->nsects - 1;
673 while(l <= r){
674 m = (r + l) >> 1;
675 if(ix->sects[m]->start <= buck)
676 l = m + 1;
677 else
678 r = m - 1;
680 return l - 1;
683 /*
684 * find the index section which holds buck
685 */
686 ISect*
687 findisect(Index *ix, u32int buck)
689 ISect *is;
690 int r, l, m;
692 l = 1;
693 r = ix->nsects - 1;
694 while(l <= r){
695 m = (r + l) >> 1;
696 if(ix->sects[m]->start <= buck)
697 l = m + 1;
698 else
699 r = m - 1;
701 is = ix->sects[l - 1];
702 if(is->start <= buck && is->stop > buck)
703 return is;
704 return nil;
707 static int
708 okibucket(IBucket *ib, ISect *is)
710 if(ib->n <= is->buckmax && (ib->next == 0 || ib->next >= is->start && ib->next < is->stop))
711 return 0;
713 seterr(EICorrupt, "corrupted disk index bucket: n=%ud max=%ud, next=%lud range=[%lud,%lud)",
714 ib->n, is->buckmax, ib->next, is->start, is->stop);
715 return -1;
718 /*
719 * look for score within data;
720 * return 1 | byte index of matching index,
721 * or 0 | index of least element > score
722 */
723 static int
724 bucklook(u8int *score, int otype, u8int *data, int n)
726 int i, r, l, m, h, c, cc, type;
728 type = vttodisktype(otype);
729 fprint(2, "bucklook %V %d->%d %d\n", score, otype, type, n);
730 l = 0;
731 r = n - 1;
732 while(l <= r){
733 m = (r + l) >> 1;
734 h = m * IEntrySize;
735 fprint(2, "perhaps %V %d\n", data+h, data[h+IEntryTypeOff]);
736 for(i = 0; i < VtScoreSize; i++){
737 c = score[i];
738 cc = data[h + i];
739 if(c != cc){
740 if(c > cc)
741 l = m + 1;
742 else
743 r = m - 1;
744 goto cont;
747 cc = data[h + IEntryTypeOff];
748 if(type != cc){
749 if(type > cc)
750 l = m + 1;
751 else
752 r = m - 1;
753 goto cont;
755 return h | 1;
756 cont:;
759 return l * IEntrySize;
762 /*
763 * compare two IEntries; consistent with bucklook
764 */
765 int
766 ientrycmp(const void *vie1, const void *vie2)
768 u8int *ie1, *ie2;
769 int i, v1, v2;
771 ie1 = (u8int*)vie1;
772 ie2 = (u8int*)vie2;
773 for(i = 0; i < VtScoreSize; i++){
774 v1 = ie1[i];
775 v2 = ie2[i];
776 if(v1 != v2){
777 if(v1 < v2)
778 return -1;
779 return 0;
782 v1 = ie1[IEntryTypeOff];
783 v2 = ie2[IEntryTypeOff];
784 if(v1 != v2){
785 if(v1 < v2)
786 return -1;
787 return 0;
789 return -1;