Blob


1 /*
2 * Manage tree of VtFiles stored in the block cache.
3 *
4 * The single point of truth for the info about the VtFiles themselves
5 * is the block data. Because of this, there is no explicit locking of
6 * VtFile structures, and indeed there may be more than one VtFile
7 * structure for a given Venti file. They synchronize through the
8 * block cache.
9 *
10 * This is a bit simpler than fossil because there are no epochs
11 * or tags or anything else. Just mutable local blocks and immutable
12 * Venti blocks.
13 */
15 #include <u.h>
16 #include <libc.h>
17 #include <venti.h>
19 #define MaxBlock (1UL<<31)
21 static char ENotDir[] = "walk in non-directory";
22 static char ETooBig[] = "file too big";
23 /* static char EBadAddr[] = "bad address"; */
24 static char ELabelMismatch[] = "label mismatch";
26 static int sizetodepth(uvlong s, int psize, int dsize);
27 static VtBlock *fileload(VtFile *r, VtEntry *e);
28 static int shrinkdepth(VtFile*, VtBlock*, VtEntry*, int);
29 static int shrinksize(VtFile*, VtEntry*, uvlong);
30 static int growdepth(VtFile*, VtBlock*, VtEntry*, int);
32 #define ISLOCKED(r) ((r)->b != nil)
33 #define DEPTH(t) ((t)&VtTypeDepthMask)
35 static VtFile *
36 vtfilealloc(VtCache *c, VtBlock *b, VtFile *p, u32int offset, int mode)
37 {
38 int epb;
39 VtEntry e;
40 VtFile *r;
42 assert(p==nil || ISLOCKED(p));
44 if(p == nil){
45 assert(offset == 0);
46 epb = 1;
47 }else
48 epb = p->dsize / VtEntrySize;
50 if(b->type != VtDirType){
51 werrstr("bad block type %#uo", b->type);
52 return nil;
53 }
55 /*
56 * a non-active entry is the only thing that
57 * can legitimately happen here. all the others
58 * get prints.
59 */
60 if(vtentryunpack(&e, b->data, offset % epb) < 0){
61 fprint(2, "vtentryunpack failed: %r (%.*H)\n", VtEntrySize, b->data+VtEntrySize*(offset%epb));
62 return nil;
63 }
64 if(!(e.flags & VtEntryActive)){
65 werrstr("entry not active");
66 return nil;
67 }
69 if(DEPTH(e.type) < sizetodepth(e.size, e.psize, e.dsize)){
70 fprint(2, "depth %ud size %llud psize %ud dsize %ud\n",
71 DEPTH(e.type), e.size, e.psize, e.dsize);
72 werrstr("bad depth");
73 return nil;
74 }
76 r = vtmallocz(sizeof(VtFile));
77 r->c = c;
78 r->bsize = b->size;
79 r->mode = mode;
80 r->dsize = e.dsize;
81 r->psize = e.psize;
82 r->gen = e.gen;
83 r->dir = (e.type & VtTypeBaseMask) == VtDirType;
84 r->ref = 1;
85 r->parent = p;
86 if(p){
87 qlock(&p->lk);
88 assert(mode == VtOREAD || p->mode == VtORDWR);
89 p->ref++;
90 qunlock(&p->lk);
91 }else{
92 assert(b->addr != NilBlock);
93 r->local = 1;
94 }
95 memmove(r->score, b->score, VtScoreSize);
96 r->offset = offset;
97 r->epb = epb;
99 return r;
102 VtFile *
103 vtfileroot(VtCache *c, u32int addr, int mode)
105 VtFile *r;
106 VtBlock *b;
108 b = vtcachelocal(c, addr, VtDirType);
109 if(b == nil)
110 return nil;
111 r = vtfilealloc(c, b, nil, 0, mode);
112 vtblockput(b);
113 return r;
116 VtFile*
117 vtfileopenroot(VtCache *c, VtEntry *e)
119 VtBlock *b;
120 VtFile *f;
122 b = vtcacheallocblock(c, VtDirType, VtEntrySize);
123 if(b == nil)
124 return nil;
126 vtentrypack(e, b->data, 0);
127 f = vtfilealloc(c, b, nil, 0, VtORDWR);
128 vtblockput(b);
129 return f;
132 VtFile *
133 vtfilecreateroot(VtCache *c, int psize, int dsize, int type)
135 VtEntry e;
137 memset(&e, 0, sizeof e);
138 e.flags = VtEntryActive;
139 e.psize = psize;
140 e.dsize = dsize;
141 e.type = type;
142 memmove(e.score, vtzeroscore, VtScoreSize);
144 return vtfileopenroot(c, &e);
147 VtFile *
148 vtfileopen(VtFile *r, u32int offset, int mode)
150 ulong bn;
151 VtBlock *b;
153 assert(ISLOCKED(r));
154 if(!r->dir){
155 werrstr(ENotDir);
156 return nil;
159 bn = offset/(r->dsize/VtEntrySize);
161 b = vtfileblock(r, bn, mode);
162 if(b == nil)
163 return nil;
164 r = vtfilealloc(r->c, b, r, offset, mode);
165 vtblockput(b);
166 return r;
169 VtFile*
170 vtfilecreate(VtFile *r, int psize, int dsize, int type)
172 return _vtfilecreate(r, -1, psize, dsize, type);
175 VtFile*
176 _vtfilecreate(VtFile *r, int o, int psize, int dsize, int type)
178 int i;
179 VtBlock *b;
180 u32int bn, size;
181 VtEntry e;
182 int epb;
183 VtFile *rr;
184 u32int offset;
186 assert(ISLOCKED(r));
187 assert(type == VtDirType || type == VtDataType);
189 if(!r->dir){
190 werrstr(ENotDir);
191 return nil;
194 epb = r->dsize/VtEntrySize;
196 size = vtfilegetdirsize(r);
197 /*
198 * look at a random block to see if we can find an empty entry
199 */
200 if(o == -1){
201 offset = lnrand(size+1);
202 offset -= offset % epb;
203 }else
204 offset = o;
206 /* try the given block and then try the last block */
207 for(;;){
208 bn = offset/epb;
209 b = vtfileblock(r, bn, VtORDWR);
210 if(b == nil)
211 return nil;
212 for(i=offset%r->epb; i<epb; i++){
213 if(vtentryunpack(&e, b->data, i) < 0)
214 continue;
215 if((e.flags&VtEntryActive) == 0 && e.gen != ~0)
216 goto Found;
218 vtblockput(b);
219 if(offset == size){
220 fprint(2, "vtfilecreate: cannot happen\n");
221 werrstr("vtfilecreate: cannot happen");
222 return nil;
224 offset = size;
227 Found:
228 /* found an entry - gen already set */
229 e.psize = psize;
230 e.dsize = dsize;
231 e.flags = VtEntryActive;
232 e.type = type;
233 e.size = 0;
234 memmove(e.score, vtzeroscore, VtScoreSize);
235 vtentrypack(&e, b->data, i);
237 offset = bn*epb + i;
238 if(offset+1 > size){
239 if(vtfilesetdirsize(r, offset+1) < 0){
240 vtblockput(b);
241 return nil;
245 rr = vtfilealloc(r->c, b, r, offset, VtORDWR);
246 vtblockput(b);
247 return rr;
250 static int
251 vtfilekill(VtFile *r, int doremove)
253 VtEntry e;
254 VtBlock *b;
256 assert(ISLOCKED(r));
257 b = fileload(r, &e);
258 if(b == nil)
259 return -1;
261 if(doremove==0 && e.size == 0){
262 /* already truncated */
263 vtblockput(b);
264 return 0;
267 if(doremove){
268 if(e.gen != ~0)
269 e.gen++;
270 e.dsize = 0;
271 e.psize = 0;
272 e.flags = 0;
273 }else
274 e.flags &= ~VtEntryLocal;
275 e.type = 0;
276 e.size = 0;
277 memmove(e.score, vtzeroscore, VtScoreSize);
278 vtentrypack(&e, b->data, r->offset % r->epb);
279 vtblockput(b);
281 if(doremove){
282 vtfileunlock(r);
283 vtfileclose(r);
286 return 0;
289 int
290 vtfileremove(VtFile *r)
292 return vtfilekill(r, 1);
295 int
296 vtfiletruncate(VtFile *r)
298 return vtfilekill(r, 0);
301 uvlong
302 vtfilegetsize(VtFile *r)
304 VtEntry e;
305 VtBlock *b;
307 assert(ISLOCKED(r));
308 b = fileload(r, &e);
309 if(b == nil)
310 return ~(uvlong)0;
311 vtblockput(b);
313 return e.size;
316 static int
317 shrinksize(VtFile *r, VtEntry *e, uvlong size)
319 int i, depth, bsiz, type, isdir, ppb;
320 uvlong ptrsz;
321 uchar score[VtScoreSize];
322 VtBlock *b;
324 b = vtcacheglobal(r->c, e->score, e->type, r->dsize);
325 if(b == nil)
326 return -1;
328 ptrsz = e->dsize;
329 ppb = e->psize/VtScoreSize;
330 type = e->type;
331 depth = DEPTH(type);
332 for(i=0; i+1<depth; i++)
333 ptrsz *= ppb;
335 isdir = r->dir;
336 while(DEPTH(type) > 0){
337 if(b->addr == NilBlock){
338 /* not worth copying the block just so we can zero some of it */
339 vtblockput(b);
340 return -1;
343 /*
344 * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes
345 */
347 /* zero the pointers to unnecessary blocks */
348 i = (size+ptrsz-1)/ptrsz;
349 for(; i<ppb; i++)
350 memmove(b->data+i*VtScoreSize, vtzeroscore, VtScoreSize);
352 /* recurse (go around again) on the partially necessary block */
353 i = size/ptrsz;
354 size = size%ptrsz;
355 if(size == 0){
356 vtblockput(b);
357 return 0;
359 ptrsz /= ppb;
360 type--;
361 memmove(score, b->data+i*VtScoreSize, VtScoreSize);
362 vtblockput(b);
363 if(type == VtDataType || type == VtDirType)
364 bsiz = r->dsize;
365 else
366 bsiz = r->psize;
367 b = vtcacheglobal(r->c, score, type, bsiz);
368 if(b == nil)
369 return -1;
372 if(b->addr == NilBlock){
373 vtblockput(b);
374 return -1;
377 /*
378 * No one ever truncates BtDir blocks.
379 */
380 if(depth==0 && !isdir && e->dsize > size)
381 memset(b->data+size, 0, e->dsize-size);
382 vtblockput(b);
383 return 0;
386 int
387 vtfilesetsize(VtFile *r, u64int size)
389 int depth, edepth;
390 VtEntry e;
391 VtBlock *b;
393 assert(ISLOCKED(r));
394 if(size == 0)
395 return vtfiletruncate(r);
397 if(size > VtMaxFileSize || size > ((uvlong)MaxBlock)*r->dsize){
398 werrstr(ETooBig);
399 return -1;
402 b = fileload(r, &e);
403 if(b == nil)
404 return -1;
406 /* quick out */
407 if(e.size == size){
408 vtblockput(b);
409 return 0;
412 depth = sizetodepth(size, e.psize, e.dsize);
413 edepth = DEPTH(e.type);
414 if(depth < edepth){
415 if(shrinkdepth(r, b, &e, depth) < 0){
416 vtblockput(b);
417 return -1;
419 }else if(depth > edepth){
420 if(growdepth(r, b, &e, depth) < 0){
421 vtblockput(b);
422 return -1;
426 if(size < e.size)
427 shrinksize(r, &e, size);
429 e.size = size;
430 vtentrypack(&e, b->data, r->offset % r->epb);
431 vtblockput(b);
433 return 0;
436 int
437 vtfilesetdirsize(VtFile *r, u32int ds)
439 uvlong size;
440 int epb;
442 assert(ISLOCKED(r));
443 epb = r->dsize/VtEntrySize;
445 size = (uvlong)r->dsize*(ds/epb);
446 size += VtEntrySize*(ds%epb);
447 return vtfilesetsize(r, size);
450 u32int
451 vtfilegetdirsize(VtFile *r)
453 ulong ds;
454 uvlong size;
455 int epb;
457 assert(ISLOCKED(r));
458 epb = r->dsize/VtEntrySize;
460 size = vtfilegetsize(r);
461 ds = epb*(size/r->dsize);
462 ds += (size%r->dsize)/VtEntrySize;
463 return ds;
466 int
467 vtfilegetentry(VtFile *r, VtEntry *e)
469 VtBlock *b;
471 assert(ISLOCKED(r));
472 b = fileload(r, e);
473 if(b == nil)
474 return -1;
475 vtblockput(b);
477 return 0;
480 int
481 vtfilesetentry(VtFile *r, VtEntry *e)
483 VtBlock *b;
484 VtEntry ee;
486 assert(ISLOCKED(r));
487 b = fileload(r, &ee);
488 if(b == nil)
489 return -1;
490 vtentrypack(e, b->data, r->offset % r->epb);
491 vtblockput(b);
492 return 0;
495 static VtBlock *
496 blockwalk(VtFile *r, VtBlock *p, int index, VtCache *c, int mode, VtEntry *e)
498 VtBlock *b;
499 int type, size;
500 uchar *score;
501 VtEntry oe;
503 switch(p->type){
504 case VtDataType:
505 assert(0);
506 case VtDirType:
507 type = e->type;
508 score = e->score;
509 break;
510 default:
511 type = p->type - 1;
512 score = p->data+index*VtScoreSize;
513 break;
515 /*print("walk from %V/%d ty %d to %V ty %d\n", p->score, index, p->type, score, type); */
517 if(type == VtDirType || type == VtDataType)
518 size = r->dsize;
519 else
520 size = r->psize;
521 if(mode == VtOWRITE && vtglobaltolocal(score) == NilBlock){
522 b = vtcacheallocblock(c, type, size);
523 if(b)
524 goto HaveCopy;
525 }else
526 b = vtcacheglobal(c, score, type, size);
528 if(b == nil || mode == VtOREAD)
529 return b;
531 if(vtglobaltolocal(b->score) != NilBlock)
532 return b;
534 oe = *e;
536 /*
537 * Copy on write.
538 */
539 e->flags |= VtEntryLocal;
541 b = vtblockcopy(b/*, e->tag, fs->ehi, fs->elo*/);
542 if(b == nil)
543 return nil;
545 HaveCopy:
546 if(p->type == VtDirType){
547 memmove(e->score, b->score, VtScoreSize);
548 vtentrypack(e, p->data, index);
549 }else{
550 memmove(p->data+index*VtScoreSize, b->score, VtScoreSize);
552 return b;
555 /*
556 * Change the depth of the VtFile r.
557 * The entry e for r is contained in block p.
558 */
559 static int
560 growdepth(VtFile *r, VtBlock *p, VtEntry *e, int depth)
562 VtBlock *b, *bb;
563 VtEntry oe;
565 assert(ISLOCKED(r));
566 assert(depth <= VtPointerDepth);
568 b = vtcacheglobal(r->c, e->score, e->type, r->dsize);
569 if(b == nil)
570 return -1;
572 oe = *e;
574 /*
575 * Keep adding layers until we get to the right depth
576 * or an error occurs.
577 */
578 while(DEPTH(e->type) < depth){
579 bb = vtcacheallocblock(r->c, e->type+1, r->psize);
580 if(bb == nil)
581 break;
582 memmove(bb->data, b->score, VtScoreSize);
583 memmove(e->score, bb->score, VtScoreSize);
584 e->type++;
585 e->flags |= VtEntryLocal;
586 vtblockput(b);
587 b = bb;
590 vtentrypack(e, p->data, r->offset % r->epb);
591 vtblockput(b);
593 if(DEPTH(e->type) == depth)
594 return 0;
595 return -1;
598 static int
599 shrinkdepth(VtFile *r, VtBlock *p, VtEntry *e, int depth)
601 VtBlock *b, *nb, *ob, *rb;
602 VtEntry oe;
604 assert(ISLOCKED(r));
605 assert(depth <= VtPointerDepth);
607 rb = vtcacheglobal(r->c, e->score, e->type, r->psize);
608 if(rb == nil)
609 return -1;
611 /*
612 * Walk down to the new root block.
613 * We may stop early, but something is better than nothing.
614 */
615 oe = *e;
617 ob = nil;
618 b = rb;
619 for(; DEPTH(e->type) > depth; e->type--){
620 nb = vtcacheglobal(r->c, b->data, e->type-1, r->psize);
621 if(nb == nil)
622 break;
623 if(ob!=nil && ob!=rb)
624 vtblockput(ob);
625 ob = b;
626 b = nb;
629 if(b == rb){
630 vtblockput(rb);
631 return 0;
634 /*
635 * Right now, e points at the root block rb, b is the new root block,
636 * and ob points at b. To update:
638 * (i) change e to point at b
639 * (ii) zero the pointer ob -> b
640 * (iii) free the root block
642 * p (the block containing e) must be written before
643 * anything else.
644 */
646 /* (i) */
647 memmove(e->score, b->score, VtScoreSize);
648 vtentrypack(e, p->data, r->offset % r->epb);
650 /* (ii) */
651 memmove(ob->data, vtzeroscore, VtScoreSize);
653 /* (iii) */
654 vtblockput(rb);
655 if(ob!=nil && ob!=rb)
656 vtblockput(ob);
657 vtblockput(b);
659 if(DEPTH(e->type) == depth)
660 return 0;
661 return -1;
664 static int
665 mkindices(VtEntry *e, u32int bn, int *index)
667 int i, np;
669 memset(index, 0, (VtPointerDepth+1)*sizeof(int));
671 np = e->psize/VtScoreSize;
672 for(i=0; bn > 0; i++){
673 if(i >= VtPointerDepth){
674 werrstr("bad address 0x%lux", (ulong)bn);
675 return -1;
677 index[i] = bn % np;
678 bn /= np;
680 return i;
683 VtBlock *
684 vtfileblock(VtFile *r, u32int bn, int mode)
686 VtBlock *b, *bb;
687 int index[VtPointerDepth+1];
688 VtEntry e;
689 int i;
690 int m;
692 assert(ISLOCKED(r));
693 assert(bn != NilBlock);
695 b = fileload(r, &e);
696 if(b == nil)
697 return nil;
699 i = mkindices(&e, bn, index);
700 if(i < 0)
701 goto Err;
702 if(i > DEPTH(e.type)){
703 if(mode == VtOREAD){
704 werrstr("bad address 0x%lux", (ulong)bn);
705 goto Err;
707 index[i] = 0;
708 if(growdepth(r, b, &e, i) < 0)
709 goto Err;
712 assert(b->type == VtDirType);
714 index[DEPTH(e.type)] = r->offset % r->epb;
716 /* mode for intermediate block */
717 m = mode;
718 if(m == VtOWRITE)
719 m = VtORDWR;
721 for(i=DEPTH(e.type); i>=0; i--){
722 bb = blockwalk(r, b, index[i], r->c, i==0 ? mode : m, &e);
723 if(bb == nil)
724 goto Err;
725 vtblockput(b);
726 b = bb;
728 b->pc = getcallerpc(&r);
729 return b;
730 Err:
731 vtblockput(b);
732 return nil;
735 int
736 vtfileblockscore(VtFile *r, u32int bn, uchar score[VtScoreSize])
738 VtBlock *b, *bb;
739 int index[VtPointerDepth+1];
740 VtEntry e;
741 int i;
743 assert(ISLOCKED(r));
744 assert(bn != NilBlock);
746 b = fileload(r, &e);
747 if(b == nil)
748 return -1;
750 if(DEPTH(e.type) == 0){
751 memmove(score, e.score, VtScoreSize);
752 vtblockput(b);
753 return 0;
756 i = mkindices(&e, bn, index);
757 if(i < 0){
758 vtblockput(b);
759 return -1;
761 if(i > DEPTH(e.type)){
762 memmove(score, vtzeroscore, VtScoreSize);
763 vtblockput(b);
764 return 0;
767 index[DEPTH(e.type)] = r->offset % r->epb;
769 for(i=DEPTH(e.type); i>=1; i--){
770 bb = blockwalk(r, b, index[i], r->c, VtOREAD, &e);
771 if(bb == nil)
772 goto Err;
773 vtblockput(b);
774 b = bb;
775 if(memcmp(b->score, vtzeroscore, VtScoreSize) == 0)
776 break;
779 memmove(score, b->data+index[0]*VtScoreSize, VtScoreSize);
780 vtblockput(b);
781 return 0;
783 Err:
784 vtblockput(b);
785 return -1;
788 void
789 vtfileincref(VtFile *r)
791 qlock(&r->lk);
792 r->ref++;
793 qunlock(&r->lk);
796 void
797 vtfileclose(VtFile *r)
799 if(r == nil)
800 return;
801 qlock(&r->lk);
802 r->ref--;
803 if(r->ref){
804 qunlock(&r->lk);
805 return;
807 assert(r->ref == 0);
808 qunlock(&r->lk);
809 if(r->parent)
810 vtfileclose(r->parent);
811 memset(r, ~0, sizeof(*r));
812 vtfree(r);
815 /*
816 * Retrieve the block containing the entry for r.
817 * If a snapshot has happened, we might need
818 * to get a new copy of the block. We avoid this
819 * in the common case by caching the score for
820 * the block and the last epoch in which it was valid.
822 * We use r->mode to tell the difference between active
823 * file system VtFiles (VtORDWR) and VtFiles for the
824 * snapshot file system (VtOREAD).
825 */
826 static VtBlock*
827 fileloadblock(VtFile *r, int mode)
829 char e[ERRMAX];
830 u32int addr;
831 VtBlock *b;
833 switch(r->mode){
834 default:
835 assert(0);
836 case VtORDWR:
837 assert(r->mode == VtORDWR);
838 if(r->local == 1){
839 b = vtcacheglobal(r->c, r->score, VtDirType, r->bsize);
840 if(b == nil)
841 return nil;
842 b->pc = getcallerpc(&r);
843 return b;
845 assert(r->parent != nil);
846 if(vtfilelock(r->parent, VtORDWR) < 0)
847 return nil;
848 b = vtfileblock(r->parent, r->offset/r->epb, VtORDWR);
849 vtfileunlock(r->parent);
850 if(b == nil)
851 return nil;
852 memmove(r->score, b->score, VtScoreSize);
853 r->local = 1;
854 return b;
856 case VtOREAD:
857 if(mode == VtORDWR){
858 werrstr("read/write lock of read-only file");
859 return nil;
861 addr = vtglobaltolocal(r->score);
862 if(addr == NilBlock)
863 return vtcacheglobal(r->c, r->score, VtDirType, r->bsize);
865 b = vtcachelocal(r->c, addr, VtDirType);
866 if(b)
867 return b;
869 /*
870 * If it failed because the epochs don't match, the block has been
871 * archived and reclaimed. Rewalk from the parent and get the
872 * new pointer. This can't happen in the VtORDWR case
873 * above because blocks in the current epoch don't get
874 * reclaimed. The fact that we're VtOREAD means we're
875 * a snapshot. (Or else the file system is read-only, but then
876 * the archiver isn't going around deleting blocks.)
877 */
878 rerrstr(e, sizeof e);
879 if(strcmp(e, ELabelMismatch) == 0){
880 if(vtfilelock(r->parent, VtOREAD) < 0)
881 return nil;
882 b = vtfileblock(r->parent, r->offset/r->epb, VtOREAD);
883 vtfileunlock(r->parent);
884 if(b){
885 fprint(2, "vtfilealloc: lost %V found %V\n",
886 r->score, b->score);
887 memmove(r->score, b->score, VtScoreSize);
888 return b;
891 return nil;
895 int
896 vtfilelock(VtFile *r, int mode)
898 VtBlock *b;
900 if(mode == -1)
901 mode = r->mode;
903 b = fileloadblock(r, mode);
904 if(b == nil)
905 return -1;
906 /*
907 * The fact that we are holding b serves as the
908 * lock entitling us to write to r->b.
909 */
910 assert(r->b == nil);
911 r->b = b;
912 b->pc = getcallerpc(&r);
913 return 0;
916 /*
917 * Lock two (usually sibling) VtFiles. This needs special care
918 * because the Entries for both vtFiles might be in the same block.
919 * We also try to lock blocks in left-to-right order within the tree.
920 */
921 int
922 vtfilelock2(VtFile *r, VtFile *rr, int mode)
924 VtBlock *b, *bb;
926 if(rr == nil)
927 return vtfilelock(r, mode);
929 if(mode == -1)
930 mode = r->mode;
932 if(r->parent==rr->parent && r->offset/r->epb == rr->offset/rr->epb){
933 b = fileloadblock(r, mode);
934 if(b == nil)
935 return -1;
936 vtblockduplock(b);
937 bb = b;
938 }else if(r->parent==rr->parent || r->offset > rr->offset){
939 bb = fileloadblock(rr, mode);
940 b = fileloadblock(r, mode);
941 }else{
942 b = fileloadblock(r, mode);
943 bb = fileloadblock(rr, mode);
945 if(b == nil || bb == nil){
946 if(b)
947 vtblockput(b);
948 if(bb)
949 vtblockput(bb);
950 return -1;
953 /*
954 * The fact that we are holding b and bb serves
955 * as the lock entitling us to write to r->b and rr->b.
956 */
957 r->b = b;
958 rr->b = bb;
959 b->pc = getcallerpc(&r);
960 bb->pc = getcallerpc(&r);
961 return 0;
964 void
965 vtfileunlock(VtFile *r)
967 VtBlock *b;
969 if(r->b == nil){
970 fprint(2, "vtfileunlock: already unlocked\n");
971 abort();
973 b = r->b;
974 r->b = nil;
975 vtblockput(b);
978 static VtBlock*
979 fileload(VtFile *r, VtEntry *e)
981 VtBlock *b;
983 assert(ISLOCKED(r));
984 b = r->b;
985 if(vtentryunpack(e, b->data, r->offset % r->epb) < 0)
986 return nil;
987 vtblockduplock(b);
988 return b;
991 static int
992 sizetodepth(uvlong s, int psize, int dsize)
994 int np;
995 int d;
997 /* determine pointer depth */
998 np = psize/VtScoreSize;
999 s = (s + dsize - 1)/dsize;
1000 for(d = 0; s > 1; d++)
1001 s = (s + np - 1)/np;
1002 return d;
1005 long
1006 vtfileread(VtFile *f, void *data, long count, vlong offset)
1008 int frag;
1009 VtBlock *b;
1010 VtEntry e;
1012 assert(ISLOCKED(f));
1014 vtfilegetentry(f, &e);
1015 if(count == 0)
1016 return 0;
1017 if(count < 0 || offset < 0){
1018 werrstr("vtfileread: bad offset or count");
1019 return -1;
1021 if(offset >= e.size)
1022 return 0;
1024 if(offset+count > e.size)
1025 count = e.size - offset;
1027 frag = offset % e.dsize;
1028 if(frag+count > e.dsize)
1029 count = e.dsize - frag;
1031 b = vtfileblock(f, offset/e.dsize, VtOREAD);
1032 if(b == nil)
1033 return -1;
1035 memmove(data, b->data+frag, count);
1036 vtblockput(b);
1037 return count;
1040 static long
1041 filewrite1(VtFile *f, void *data, long count, vlong offset)
1043 int frag, m;
1044 VtBlock *b;
1045 VtEntry e;
1047 vtfilegetentry(f, &e);
1048 if(count < 0 || offset < 0){
1049 werrstr("vtfilewrite: bad offset or count");
1050 return -1;
1053 frag = offset % e.dsize;
1054 if(frag+count > e.dsize)
1055 count = e.dsize - frag;
1057 m = VtORDWR;
1058 if(frag == 0 && count == e.dsize)
1059 m = VtOWRITE;
1061 b = vtfileblock(f, offset/e.dsize, m);
1062 if(b == nil)
1063 return -1;
1065 memmove(b->data+frag, data, count);
1066 if(m == VtOWRITE && frag+count < e.dsize)
1067 memset(b->data+frag+count, 0, e.dsize-frag-count);
1069 if(offset+count > e.size){
1070 vtfilegetentry(f, &e);
1071 e.size = offset+count;
1072 vtfilesetentry(f, &e);
1075 vtblockput(b);
1076 return count;
1079 long
1080 vtfilewrite(VtFile *f, void *data, long count, vlong offset)
1082 long tot, m;
1084 assert(ISLOCKED(f));
1086 tot = 0;
1087 m = 0;
1088 while(tot < count){
1089 m = filewrite1(f, (char*)data+tot, count-tot, offset+tot);
1090 if(m <= 0)
1091 break;
1092 tot += m;
1094 if(tot==0)
1095 return m;
1096 return tot;
1099 static int
1100 flushblock(VtCache *c, VtBlock *bb, uchar score[VtScoreSize], int ppb, int epb,
1101 int type)
1103 u32int addr;
1104 VtBlock *b;
1105 VtEntry e;
1106 int i;
1108 addr = vtglobaltolocal(score);
1109 if(addr == NilBlock)
1110 return 0;
1112 if(bb){
1113 b = bb;
1114 if(memcmp(b->score, score, VtScoreSize) != 0)
1115 abort();
1116 }else
1117 if((b = vtcachelocal(c, addr, type)) == nil)
1118 return -1;
1120 switch(type){
1121 case VtDataType:
1122 break;
1124 case VtDirType:
1125 for(i=0; i<epb; i++){
1126 if(vtentryunpack(&e, b->data, i) < 0)
1127 goto Err;
1128 if(!(e.flags&VtEntryActive))
1129 continue;
1130 if(flushblock(c, nil, e.score, e.psize/VtScoreSize, e.dsize/VtEntrySize,
1131 e.type) < 0)
1132 goto Err;
1133 vtentrypack(&e, b->data, i);
1135 break;
1137 default: /* VtPointerTypeX */
1138 for(i=0; i<ppb; i++){
1139 if(flushblock(c, nil, b->data+VtScoreSize*i, ppb, epb, type-1) < 0)
1140 goto Err;
1142 break;
1145 if(vtblockwrite(b) < 0)
1146 goto Err;
1147 memmove(score, b->score, VtScoreSize);
1148 if(b != bb)
1149 vtblockput(b);
1150 return 0;
1152 Err:
1153 if(b != bb)
1154 vtblockput(b);
1155 return -1;
1158 int
1159 vtfileflush(VtFile *f)
1161 int ret;
1162 VtBlock *b;
1163 VtEntry e;
1165 assert(ISLOCKED(f));
1166 b = fileload(f, &e);
1167 if(!(e.flags&VtEntryLocal)){
1168 vtblockput(b);
1169 return 0;
1172 ret = flushblock(f->c, nil, e.score, e.psize/VtScoreSize, e.dsize/VtEntrySize,
1173 e.type);
1174 if(ret < 0){
1175 vtblockput(b);
1176 return -1;
1179 vtentrypack(&e, b->data, f->offset % f->epb);
1180 vtblockput(b);
1181 return 0;
1184 int
1185 vtfileflushbefore(VtFile *r, u64int offset)
1187 VtBlock *b, *bb;
1188 VtEntry e;
1189 int i, base, depth, ppb, epb, doflush;
1190 int index[VtPointerDepth+1], j, ret;
1191 VtBlock *bi[VtPointerDepth+2];
1192 uchar *score;
1194 assert(ISLOCKED(r));
1195 if(offset == 0)
1196 return 0;
1198 b = fileload(r, &e);
1199 if(b == nil)
1200 return -1;
1203 * compute path through tree for the last written byte and the next one.
1205 ret = -1;
1206 memset(bi, 0, sizeof bi);
1207 depth = DEPTH(e.type);
1208 bi[depth+1] = b;
1209 i = mkindices(&e, (offset-1)/e.dsize, index);
1210 if(i < 0)
1211 goto Err;
1212 if(i > depth)
1213 goto Err;
1214 ppb = e.psize / VtScoreSize;
1215 epb = e.dsize / VtEntrySize;
1218 * load the blocks along the last written byte
1220 index[depth] = r->offset % r->epb;
1221 for(i=depth; i>=0; i--){
1222 bb = blockwalk(r, b, index[i], r->c, VtORDWR, &e);
1223 if(bb == nil)
1224 goto Err;
1225 bi[i] = bb;
1226 b = bb;
1228 ret = 0;
1231 * walk up the path from leaf to root, flushing anything that
1232 * has been finished.
1234 base = e.type&~VtTypeDepthMask;
1235 for(i=0; i<=depth; i++){
1236 doflush = 0;
1237 if(i == 0){
1238 /* leaf: data or dir block */
1239 if(offset%e.dsize == 0)
1240 doflush = 1;
1241 }else{
1243 * interior node: pointer blocks.
1244 * specifically, b = bi[i] is a block whose index[i-1]'th entry
1245 * points at bi[i-1].
1247 b = bi[i];
1250 * the index entries up to but not including index[i-1] point at
1251 * finished blocks, so flush them for sure.
1253 for(j=0; j<index[i-1]; j++)
1254 if(flushblock(r->c, nil, b->data+j*VtScoreSize, ppb, epb, base+i-1) < 0)
1255 goto Err;
1258 * if index[i-1] is the last entry in the block and is global
1259 * (i.e. the kid is flushed), then we can flush this block.
1261 if(j==ppb-1 && vtglobaltolocal(b->data+j*VtScoreSize)==NilBlock)
1262 doflush = 1;
1264 if(doflush){
1265 if(i == depth)
1266 score = e.score;
1267 else
1268 score = bi[i+1]->data+index[i]*VtScoreSize;
1269 if(flushblock(r->c, bi[i], score, ppb, epb, base+i) < 0)
1270 goto Err;
1274 Err:
1275 /* top: entry. do this always so that the score is up-to-date */
1276 vtentrypack(&e, bi[depth+1]->data, index[depth]);
1277 for(i=0; i<nelem(bi); i++)
1278 if(bi[i])
1279 vtblockput(bi[i]);
1280 return ret;