Blob


1 /*
2 * Archiver. In charge of sending blocks to Venti.
3 */
5 #include "stdinc.h"
6 #include "dat.h"
7 #include "fns.h"
8 #include "error.h"
10 #include "9.h" /* for consPrint */
12 #define DEBUG 0
14 static void archThread(void*);
16 struct Arch
17 {
18 int ref;
19 uint blockSize;
20 uint diskSize;
21 Cache *c;
22 Fs *fs;
23 VtConn *z;
25 QLock lk;
26 Rendez starve;
27 Rendez die;
28 };
30 Arch *
31 archInit(Cache *c, Disk *disk, Fs *fs, VtConn *z)
32 {
33 Arch *a;
35 a = vtmallocz(sizeof(Arch));
37 a->c = c;
38 a->z = z;
39 a->fs = fs;
40 a->blockSize = diskBlockSize(disk);
41 a->starve.l = &a->lk;
43 a->ref = 2;
44 proccreate(archThread, a, STACK);
46 return a;
47 }
49 void
50 archFree(Arch *a)
51 {
52 /* kill slave */
53 qlock(&a->lk);
54 a->die.l = &a->lk;
55 rwakeup(&a->starve);
56 while(a->ref > 1)
57 rsleep(&a->die);
58 qunlock(&a->lk);
59 vtfree(a);
60 }
62 static int
63 ventiSend(Arch *a, Block *b, uchar *data)
64 {
65 uint n;
66 uchar score[VtScoreSize];
68 if(DEBUG > 1)
69 fprint(2, "ventiSend: sending %#ux %L to venti\n", b->addr, &b->l);
70 n = vtzerotruncate(vtType[b->l.type], data, a->blockSize);
71 if(DEBUG > 1)
72 fprint(2, "ventiSend: truncate %d to %d\n", a->blockSize, n);
73 if(vtwrite(a->z, score, vtType[b->l.type], data, n) < 0){
74 fprint(2, "ventiSend: vtwrite block %#ux failed: %r\n", b->addr);
75 return 0;
76 }
77 if(vtsha1check(score, data, n) < 0){
78 uchar score2[VtScoreSize];
79 vtsha1(score2, data, n);
80 fprint(2, "ventiSend: vtwrite block %#ux failed vtsha1check %V %V\n",
81 b->addr, score, score2);
82 return 0;
83 }
84 if(vtsync(a->z) < 0)
85 return 0;
86 return 1;
87 }
89 /*
90 * parameters for recursion; there are so many,
91 * and some only change occasionally. this is
92 * easier than spelling things out at each call.
93 */
94 typedef struct Param Param;
95 struct Param
96 {
97 /* these never change */
98 uint snapEpoch; /* epoch for snapshot being archived */
99 uint blockSize;
100 Cache *c;
101 Arch *a;
103 /* changes on every call */
104 uint depth;
106 /* statistics */
107 uint nfixed;
108 uint nsend;
109 uint nvisit;
110 uint nfailsend;
111 uint maxdepth;
112 uint nreclaim;
113 uint nfake;
114 uint nreal;
116 /* these occasionally change (must save old values and put back) */
117 uint dsize;
118 uint psize;
120 /* return value; avoids using stack space */
121 Label l;
122 uchar score[VtScoreSize];
123 };
125 static void
126 shaBlock(uchar score[VtScoreSize], Block *b, uchar *data, uint bsize)
128 vtsha1(score, data, vtzerotruncate(vtType[b->l.type], data, bsize));
131 static uchar*
132 copyBlock(Block *b, u32int blockSize)
134 uchar *data;
136 data = vtmalloc(blockSize);
137 if(data == nil)
138 return nil;
139 memmove(data, b->data, blockSize);
140 return data;
143 /*
144 * Walk over the block tree, archiving it to Venti.
146 * We don't archive the snapshots. Instead we zero the
147 * entries in a temporary copy of the block and archive that.
149 * Return value is:
151 * ArchFailure some error occurred
152 * ArchSuccess block and all children archived
153 * ArchFaked success, but block or children got copied
154 */
155 enum
157 ArchFailure,
158 ArchSuccess,
159 ArchFaked,
160 };
161 static int
162 archWalk(Param *p, u32int addr, uchar type, u32int tag)
164 int ret, i, x, psize, dsize;
165 uchar *data, score[VtScoreSize];
166 Block *b;
167 Label l;
168 Entry *e;
169 WalkPtr w;
170 char err[ERRMAX];
172 p->nvisit++;
174 b = cacheLocalData(p->c, addr, type, tag, OReadWrite,0);
175 if(b == nil){
176 fprint(2, "archive(%ud, %#ux): cannot find block: %r\n", p->snapEpoch, addr);
177 rerrstr(err, sizeof err);
178 if(strcmp(err, ELabelMismatch) == 0){
179 /* might as well plod on so we write _something_ to Venti */
180 memmove(p->score, vtzeroscore, VtScoreSize);
181 return ArchFaked;
183 return ArchFailure;
186 if(DEBUG) fprint(2, "%*sarchive(%ud, %#ux): block label %L\n",
187 p->depth*2, "", p->snapEpoch, b->addr, &b->l);
188 p->depth++;
189 if(p->depth > p->maxdepth)
190 p->maxdepth = p->depth;
192 data = b->data;
193 if((b->l.state&BsVenti) == 0){
194 initWalk(&w, b, b->l.type==BtDir ? p->dsize : p->psize);
195 for(i=0; nextWalk(&w, score, &type, &tag, &e); i++){
196 if(e){
197 if(!(e->flags&VtEntryActive))
198 continue;
199 if((e->snap && !e->archive)
200 || (e->flags&VtEntryNoArchive)){
201 if(0) fprint(2, "snap; faking %#ux\n", b->addr);
202 if(data == b->data){
203 data = copyBlock(b, p->blockSize);
204 if(data == nil){
205 ret = ArchFailure;
206 goto Out;
208 w.data = data;
210 memmove(e->score, vtzeroscore, VtScoreSize);
211 e->depth = 0;
212 e->size = 0;
213 e->tag = 0;
214 e->flags &= ~VtEntryLocal;
215 entryPack(e, data, w.n-1);
216 continue;
219 addr = globalToLocal(score);
220 if(addr == NilBlock)
221 continue;
222 dsize = p->dsize;
223 psize = p->psize;
224 if(e){
225 p->dsize= e->dsize;
226 p->psize = e->psize;
228 qunlock(&b->lk);
229 x = archWalk(p, addr, type, tag);
230 qlock(&b->lk);
231 if(e){
232 p->dsize = dsize;
233 p->psize = psize;
235 while(b->iostate != BioClean && b->iostate != BioDirty)
236 rsleep(&b->ioready);
237 switch(x){
238 case ArchFailure:
239 fprint(2, "archWalk %#ux failed; ptr is in %#ux offset %d\n",
240 addr, b->addr, i);
241 ret = ArchFailure;
242 goto Out;
243 case ArchFaked:
244 /*
245 * When we're writing the entry for an archive directory
246 * (like /archive/2003/1215) then even if we've faked
247 * any data, record the score unconditionally.
248 * This way, we will always record the Venti score here.
249 * Otherwise, temporary data or corrupted file system
250 * would cause us to keep holding onto the on-disk
251 * copy of the archive.
252 */
253 if(e==nil || !e->archive)
254 if(data == b->data){
255 if(0) fprint(2, "faked %#ux, faking %#ux (%V)\n", addr, b->addr, p->score);
256 data = copyBlock(b, p->blockSize);
257 if(data == nil){
258 ret = ArchFailure;
259 goto Out;
261 w.data = data;
263 /* fall through */
264 if(0) fprint(2, "falling\n");
265 case ArchSuccess:
266 if(e){
267 memmove(e->score, p->score, VtScoreSize);
268 e->flags &= ~VtEntryLocal;
269 entryPack(e, data, w.n-1);
270 }else
271 memmove(data+(w.n-1)*VtScoreSize, p->score, VtScoreSize);
272 if(data == b->data){
273 blockDirty(b);
274 /*
275 * If b is in the active tree, then we need to note that we've
276 * just removed addr from the active tree (replacing it with the
277 * copy we just stored to Venti). If addr is in other snapshots,
278 * this will close addr but not free it, since it has a non-empty
279 * epoch range.
281 * If b is in the active tree but has been copied (this can happen
282 * if we get killed at just the right moment), then we will
283 * mistakenly leak its kids.
285 * The children of an archive directory (e.g., /archive/2004/0604)
286 * are not treated as in the active tree.
287 */
288 if((b->l.state&BsCopied)==0 && (e==nil || e->snap==0))
289 blockRemoveLink(b, addr, p->l.type, p->l.tag, 0);
291 break;
295 if(!ventiSend(p->a, b, data)){
296 p->nfailsend++;
297 ret = ArchFailure;
298 goto Out;
300 p->nsend++;
301 if(data != b->data)
302 p->nfake++;
303 if(data == b->data){ /* not faking it, so update state */
304 p->nreal++;
305 l = b->l;
306 l.state |= BsVenti;
307 if(!blockSetLabel(b, &l, 0)){
308 ret = ArchFailure;
309 goto Out;
314 shaBlock(p->score, b, data, p->blockSize);
315 if(0) fprint(2, "ventisend %V %p %p %p\n", p->score, data, b->data, w.data);
316 ret = data!=b->data ? ArchFaked : ArchSuccess;
317 p->l = b->l;
318 Out:
319 if(data != b->data)
320 vtfree(data);
321 p->depth--;
322 blockPut(b);
323 return ret;
326 static void
327 archThread(void *v)
329 Arch *a = v;
330 Block *b;
331 Param p;
332 Super super;
333 int ret;
334 u32int addr;
335 uchar rbuf[VtRootSize];
336 VtRoot root;
338 threadsetname("arch");
340 for(;;){
341 /* look for work */
342 wlock(&a->fs->elk);
343 b = superGet(a->c, &super);
344 if(b == nil){
345 wunlock(&a->fs->elk);
346 fprint(2, "archThread: superGet: %r\n");
347 sleep(60*1000);
348 continue;
350 addr = super.next;
351 if(addr != NilBlock && super.current == NilBlock){
352 super.current = addr;
353 super.next = NilBlock;
354 superPack(&super, b->data);
355 blockDirty(b);
356 }else
357 addr = super.current;
358 blockPut(b);
359 wunlock(&a->fs->elk);
361 if(addr == NilBlock){
362 /* wait for work */
363 qlock(&a->lk);
364 rsleep(&a->starve);
365 if(a->die.l != nil)
366 goto Done;
367 qunlock(&a->lk);
368 continue;
371 sleep(10*1000); /* window of opportunity to provoke races */
373 /* do work */
374 memset(&p, 0, sizeof p);
375 p.blockSize = a->blockSize;
376 p.dsize = 3*VtEntrySize; /* root has three Entries */
377 p.c = a->c;
378 p.a = a;
380 ret = archWalk(&p, addr, BtDir, RootTag);
381 switch(ret){
382 default:
383 abort();
384 case ArchFailure:
385 fprint(2, "archiveBlock %#ux: %r\n", addr);
386 sleep(60*1000);
387 continue;
388 case ArchSuccess:
389 case ArchFaked:
390 break;
393 if(0) fprint(2, "archiveSnapshot 0x%#ux: maxdepth %ud nfixed %ud"
394 " send %ud nfailsend %ud nvisit %ud"
395 " nreclaim %ud nfake %ud nreal %ud\n",
396 addr, p.maxdepth, p.nfixed,
397 p.nsend, p.nfailsend, p.nvisit,
398 p.nreclaim, p.nfake, p.nreal);
399 if(0) fprint(2, "archiveBlock %V (%ud)\n", p.score, p.blockSize);
401 /* tie up vac root */
402 memset(&root, 0, sizeof root);
403 strecpy(root.type, root.type+sizeof root.type, "vac");
404 strecpy(root.name, root.name+sizeof root.name, "fossil");
405 memmove(root.score, p.score, VtScoreSize);
406 memmove(root.prev, super.last, VtScoreSize);
407 root.blocksize = a->blockSize;
408 vtrootpack(&root, rbuf);
409 if(vtwrite(a->z, p.score, VtRootType, rbuf, VtRootSize) < 0
410 || vtsha1check(p.score, rbuf, VtRootSize) < 0){
411 fprint(2, "vtWriteBlock %#ux: %r\n", addr);
412 sleep(60*1000);
413 continue;
416 /* record success */
417 wlock(&a->fs->elk);
418 b = superGet(a->c, &super);
419 if(b == nil){
420 wunlock(&a->fs->elk);
421 fprint(2, "archThread: superGet: %r\n");
422 sleep(60*1000);
423 continue;
425 super.current = NilBlock;
426 memmove(super.last, p.score, VtScoreSize);
427 superPack(&super, b->data);
428 blockDirty(b);
429 blockPut(b);
430 wunlock(&a->fs->elk);
432 consPrint("archive vac:%V\n", p.score);
435 Done:
436 a->ref--;
437 rwakeup(&a->die);
438 qunlock(&a->lk);
441 void
442 archKick(Arch *a)
444 if(a == nil){
445 fprint(2, "warning: archKick nil\n");
446 return;
448 qlock(&a->lk);
449 rwakeup(&a->starve);
450 qunlock(&a->lk);