Blob


1 #include "stdinc.h"
2 #include "dat.h"
3 #include "fns.h"
5 static int
6 writebucket(Index *ix, u32int buck, IBucket *ib, ZBlock *b)
7 {
8 ISect *is;
10 is = findisect(ix, buck);
11 if(is == nil){
12 seterr(EAdmin, "bad math in writebucket");
13 return -1;
14 }
15 if(buck < is->start || buck >= is->stop)
16 seterr(EAdmin, "index write out of bounds: %d not in [%d,%d)\n",
17 buck, is->start, is->stop);
18 buck -= is->start;
19 qlock(&stats.lock);
20 stats.indexwrites++;
21 qunlock(&stats.lock);
22 packibucket(ib, b->data);
23 return writepart(is->part, is->blockbase + ((u64int)buck << is->blocklog), b->data, is->blocksize);
24 }
26 static int
27 buildindex(Index *ix, Part *part, u64int off, u64int clumps, int zero)
28 {
29 IEStream *ies;
30 IBucket ib, zib;
31 ZBlock *z, *b;
32 u32int next, buck;
33 int ok;
34 u64int found = 0;
36 //ZZZ make buffer size configurable
37 b = alloczblock(ix->blocksize, 0);
38 z = alloczblock(ix->blocksize, 1);
39 ies = initiestream(part, off, clumps, 64*1024);
40 if(b == nil || z == nil || ies == nil){
41 ok = 0;
42 goto breakout;
43 return -1;
44 }
45 ok = 0;
46 next = 0;
47 ib.data = b->data + IBucketSize;
48 zib.data = z->data + IBucketSize;
49 zib.n = 0;
50 zib.next = 0;
51 for(;;){
52 buck = buildbucket(ix, ies, &ib);
53 found += ib.n;
54 if(zero){
55 for(; next != buck; next++){
56 if(next == ix->buckets){
57 if(buck != TWID32){
58 fprint(2, "bucket out of range\n");
59 ok = -1;
60 }
61 goto breakout;
62 }
63 if(writebucket(ix, next, &zib, z) < 0){
64 fprint(2, "can't write zero bucket to buck=%d: %r", next);
65 ok = -1;
66 }
67 }
68 }
69 if(buck >= ix->buckets){
70 if(buck == TWID32)
71 break;
72 fprint(2, "bucket out of range\n");
73 ok = -1;
74 goto breakout;
75 }
76 if(writebucket(ix, buck, &ib, b) < 0){
77 fprint(2, "bad bucket found=%lld: %r\n", found);
78 ok = -1;
79 }
80 next = buck + 1;
81 }
82 breakout:;
83 fprint(2, "constructed index with %lld entries\n", found);
84 freeiestream(ies);
85 freezblock(z);
86 freezblock(b);
87 return ok;
88 }
90 void
91 usage(void)
92 {
93 fprint(2, "usage: buildindex [-Z] [-B blockcachesize] config tmppart\n");
94 threadexitsall(0);
95 }
97 void
98 threadmain(int argc, char *argv[])
99 {
100 Part *part;
101 u64int clumps, base;
102 u32int bcmem;
103 int zero;
105 zero = 1;
106 bcmem = 0;
107 ARGBEGIN{
108 case 'B':
109 bcmem = unittoull(ARGF());
110 break;
111 case 'Z':
112 zero = 0;
113 break;
114 default:
115 usage();
116 break;
117 }ARGEND
119 if(argc != 2)
120 usage();
122 if(initventi(argv[0]) < 0)
123 sysfatal("can't init venti: %r");
125 if(bcmem < maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16))
126 bcmem = maxblocksize * (mainindex->narenas + mainindex->nsects * 4 + 16);
127 fprint(2, "initialize %d bytes of disk block cache\n", bcmem);
128 initdcache(bcmem);
130 fprint(2, "building a new index %s using %s for temporary storage\n", mainindex->name, argv[1]);
132 part = initpart(argv[1], 1);
133 if(part == nil)
134 sysfatal("can't initialize temporary partition: %r");
136 clumps = sortrawientries(mainindex, part, &base);
137 if(clumps == TWID64)
138 sysfatal("can't build sorted index: %r");
139 fprint(2, "found and sorted index entries for clumps=%lld at %lld\n", clumps, base);
141 if(buildindex(mainindex, part, base, clumps, zero) < 0)
142 sysfatal("can't build new index: %r");
144 threadexitsall(0);