Blob


1 /*
2 * Manage tree of VtFiles stored in the block cache.
3 *
4 * The single point of truth for the info about the VtFiles themselves
5 * is the block data. Because of this, there is no explicit locking of
6 * VtFile structures, and indeed there may be more than one VtFile
7 * structure for a given Venti file. They synchronize through the
8 * block cache.
9 *
10 * This is a bit simpler than fossil because there are no epochs
11 * or tags or anything else. Just mutable local blocks and immutable
12 * Venti blocks.
13 */
15 #include <u.h>
16 #include <libc.h>
17 #include <venti.h>
19 #define MaxBlock (1UL<<31)
21 static char EBadEntry[] = "bad VtEntry";
22 static char ENotDir[] = "walk in non-directory";
23 static char ETooBig[] = "file too big";
24 /* static char EBadAddr[] = "bad address"; */
25 static char ELabelMismatch[] = "label mismatch";
27 static int sizetodepth(uvlong s, int psize, int dsize);
28 static VtBlock *fileload(VtFile *r, VtEntry *e);
29 static int shrinkdepth(VtFile*, VtBlock*, VtEntry*, int);
30 static int shrinksize(VtFile*, VtEntry*, uvlong);
31 static int growdepth(VtFile*, VtBlock*, VtEntry*, int);
33 #define ISLOCKED(r) ((r)->b != nil)
34 #define DEPTH(t) ((t)&VtTypeDepthMask)
36 static VtFile *
37 vtfilealloc(VtCache *c, VtBlock *b, VtFile *p, u32int offset, int mode)
38 {
39 int epb;
40 u32int size;
41 VtEntry e;
42 VtFile *r;
44 assert(p==nil || ISLOCKED(p));
46 if(p == nil){
47 assert(offset == 0);
48 epb = 1;
49 }else
50 epb = p->dsize / VtEntrySize;
52 if(b->type != VtDirType)
53 goto Bad;
55 /*
56 * a non-active entry is the only thing that
57 * can legitimately happen here. all the others
58 * get prints.
59 */
60 if(vtentryunpack(&e, b->data, offset % epb) < 0){
61 fprint(2, "vtentryunpack failed\n");
62 goto Bad;
63 }
64 if(!(e.flags & VtEntryActive)){
65 if(0)fprint(2, "not active\n");
66 goto Bad;
67 }
68 if(e.psize < 256 || e.dsize < 256){
69 fprint(2, "psize %ud dsize %ud\n", e.psize, e.dsize);
70 goto Bad;
71 }
73 if(DEPTH(e.type) < sizetodepth(e.size, e.psize, e.dsize)){
74 fprint(2, "depth %ud size %llud psize %ud dsize %ud\n",
75 DEPTH(e.type), e.size, e.psize, e.dsize);
76 goto Bad;
77 }
79 size = vtcacheblocksize(c);
80 if(e.dsize > size || e.psize > size){
81 fprint(2, "psize %ud dsize %ud blocksize %ud\n", e.psize, e.dsize, size);
82 goto Bad;
83 }
85 r = vtmallocz(sizeof(VtFile));
86 r->c = c;
87 r->mode = mode;
88 r->dsize = e.dsize;
89 r->psize = e.psize;
90 r->gen = e.gen;
91 r->dir = (e.type & VtTypeBaseMask) == VtDirType;
92 r->ref = 1;
93 r->parent = p;
94 if(p){
95 qlock(&p->lk);
96 assert(mode == VtOREAD || p->mode == VtORDWR);
97 p->ref++;
98 qunlock(&p->lk);
99 }else{
100 assert(b->addr != NilBlock);
101 r->local = 1;
103 memmove(r->score, b->score, VtScoreSize);
104 r->offset = offset;
105 r->epb = epb;
107 return r;
108 Bad:
109 werrstr(EBadEntry);
110 return nil;
114 VtFile *
115 vtfileroot(VtCache *c, u32int addr, int mode)
117 VtFile *r;
118 VtBlock *b;
120 b = vtcachelocal(c, addr, VtDirType);
121 if(b == nil)
122 return nil;
123 r = vtfilealloc(c, b, nil, 0, mode);
124 vtblockput(b);
125 return r;
128 VtFile*
129 vtfileopenroot(VtCache *c, VtEntry *e)
131 VtBlock *b;
132 VtFile *f;
134 b = vtcacheallocblock(c, VtDirType);
135 if(b == nil)
136 return nil;
138 vtentrypack(e, b->data, 0);
139 f = vtfilealloc(c, b, nil, 0, VtORDWR);
140 vtblockput(b);
141 return f;
144 VtFile *
145 vtfilecreateroot(VtCache *c, int psize, int dsize, int type)
147 VtEntry e;
149 memset(&e, 0, sizeof e);
150 e.flags = VtEntryActive;
151 e.psize = psize;
152 e.dsize = dsize;
153 e.type = type;
154 memmove(e.score, vtzeroscore, VtScoreSize);
156 return vtfileopenroot(c, &e);
159 VtFile *
160 vtfileopen(VtFile *r, u32int offset, int mode)
162 ulong bn;
163 VtBlock *b;
165 assert(ISLOCKED(r));
166 if(!r->dir){
167 werrstr(ENotDir);
168 return nil;
171 bn = offset/(r->dsize/VtEntrySize);
173 b = vtfileblock(r, bn, mode);
174 if(b == nil)
175 return nil;
176 r = vtfilealloc(r->c, b, r, offset, mode);
177 vtblockput(b);
178 return r;
181 VtFile *
182 vtfilecreate(VtFile *r, int psize, int dsize, int type)
184 int i;
185 VtBlock *b;
186 u32int bn, size;
187 VtEntry e;
188 int epb;
189 VtFile *rr;
190 u32int offset;
192 assert(ISLOCKED(r));
193 assert(psize <= VtMaxLumpSize);
194 assert(dsize <= VtMaxLumpSize);
195 assert(type == VtDirType || type == VtDataType);
197 if(!r->dir){
198 werrstr(ENotDir);
199 return nil;
202 epb = r->dsize/VtEntrySize;
204 size = vtfilegetdirsize(r);
205 /*
206 * look at a random block to see if we can find an empty entry
207 */
208 offset = lnrand(size+1);
209 offset -= offset % epb;
211 /* try the given block and then try the last block */
212 for(;;){
213 bn = offset/epb;
214 b = vtfileblock(r, bn, VtORDWR);
215 if(b == nil)
216 return nil;
217 for(i=offset%r->epb; i<epb; i++){
218 if(vtentryunpack(&e, b->data, i) < 0)
219 continue;
220 if((e.flags&VtEntryActive) == 0 && e.gen != ~0)
221 goto Found;
223 vtblockput(b);
224 if(offset == size){
225 fprint(2, "vtfilecreate: cannot happen\n");
226 werrstr("vtfilecreate: cannot happen");
227 return nil;
229 offset = size;
232 Found:
233 /* found an entry - gen already set */
234 e.psize = psize;
235 e.dsize = dsize;
236 e.flags = VtEntryActive;
237 e.type = type;
238 e.size = 0;
239 memmove(e.score, vtzeroscore, VtScoreSize);
240 vtentrypack(&e, b->data, i);
242 offset = bn*epb + i;
243 if(offset+1 > size){
244 if(vtfilesetdirsize(r, offset+1) < 0){
245 vtblockput(b);
246 return nil;
250 rr = vtfilealloc(r->c, b, r, offset, VtORDWR);
251 vtblockput(b);
252 return rr;
255 static int
256 vtfilekill(VtFile *r, int doremove)
258 VtEntry e;
259 VtBlock *b;
261 assert(ISLOCKED(r));
262 b = fileload(r, &e);
263 if(b == nil)
264 return -1;
266 if(doremove==0 && e.size == 0){
267 /* already truncated */
268 vtblockput(b);
269 return 0;
272 if(doremove){
273 if(e.gen != ~0)
274 e.gen++;
275 e.dsize = 0;
276 e.psize = 0;
277 e.flags = 0;
278 }else
279 e.flags &= ~VtEntryLocal;
280 e.type = 0;
281 e.size = 0;
282 memmove(e.score, vtzeroscore, VtScoreSize);
283 vtentrypack(&e, b->data, r->offset % r->epb);
284 vtblockput(b);
286 if(doremove){
287 vtfileunlock(r);
288 vtfileclose(r);
291 return 0;
294 int
295 vtfileremove(VtFile *r)
297 return vtfilekill(r, 1);
300 int
301 vtfiletruncate(VtFile *r)
303 return vtfilekill(r, 0);
306 uvlong
307 vtfilegetsize(VtFile *r)
309 VtEntry e;
310 VtBlock *b;
312 assert(ISLOCKED(r));
313 b = fileload(r, &e);
314 if(b == nil)
315 return ~(uvlong)0;
316 vtblockput(b);
318 return e.size;
321 static int
322 shrinksize(VtFile *r, VtEntry *e, uvlong size)
324 int i, depth, type, isdir, ppb;
325 uvlong ptrsz;
326 uchar score[VtScoreSize];
327 VtBlock *b;
329 b = vtcacheglobal(r->c, e->score, e->type);
330 if(b == nil)
331 return -1;
333 ptrsz = e->dsize;
334 ppb = e->psize/VtScoreSize;
335 type = e->type;
336 depth = DEPTH(type);
337 for(i=0; i+1<depth; i++)
338 ptrsz *= ppb;
340 isdir = r->dir;
341 while(depth > 0){
342 if(b->addr == NilBlock){
343 /* not worth copying the block just so we can zero some of it */
344 vtblockput(b);
345 return -1;
348 /*
349 * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes
350 */
352 /* zero the pointers to unnecessary blocks */
353 i = (size+ptrsz-1)/ptrsz;
354 for(; i<ppb; i++)
355 memmove(b->data+i*VtScoreSize, vtzeroscore, VtScoreSize);
357 /* recurse (go around again) on the partially necessary block */
358 i = size/ptrsz;
359 size = size%ptrsz;
360 if(size == 0){
361 vtblockput(b);
362 return 0;
364 ptrsz /= ppb;
365 type--;
366 memmove(score, b->data+i*VtScoreSize, VtScoreSize);
367 vtblockput(b);
368 b = vtcacheglobal(r->c, score, type);
369 if(b == nil)
370 return -1;
373 if(b->addr == NilBlock){
374 vtblockput(b);
375 return -1;
378 /*
379 * No one ever truncates BtDir blocks.
380 */
381 if(depth==0 && !isdir && e->dsize > size)
382 memset(b->data+size, 0, e->dsize-size);
383 vtblockput(b);
384 return 0;
387 int
388 vtfilesetsize(VtFile *r, uvlong size)
390 int depth, edepth;
391 VtEntry e;
392 VtBlock *b;
394 assert(ISLOCKED(r));
395 if(size == 0)
396 return vtfiletruncate(r);
398 if(size > VtMaxFileSize || size > ((uvlong)MaxBlock)*r->dsize){
399 werrstr(ETooBig);
400 return -1;
403 b = fileload(r, &e);
404 if(b == nil)
405 return -1;
407 /* quick out */
408 if(e.size == size){
409 vtblockput(b);
410 return 0;
413 depth = sizetodepth(size, e.psize, e.dsize);
414 edepth = DEPTH(e.type);
415 if(depth < edepth){
416 if(shrinkdepth(r, b, &e, depth) < 0){
417 vtblockput(b);
418 return -1;
420 }else if(depth > edepth){
421 if(growdepth(r, b, &e, depth) < 0){
422 vtblockput(b);
423 return -1;
427 if(size < e.size)
428 shrinksize(r, &e, size);
430 e.size = size;
431 vtentrypack(&e, b->data, r->offset % r->epb);
432 vtblockput(b);
434 return 0;
437 int
438 vtfilesetdirsize(VtFile *r, u32int ds)
440 uvlong size;
441 int epb;
443 assert(ISLOCKED(r));
444 epb = r->dsize/VtEntrySize;
446 size = (uvlong)r->dsize*(ds/epb);
447 size += VtEntrySize*(ds%epb);
448 return vtfilesetsize(r, size);
451 u32int
452 vtfilegetdirsize(VtFile *r)
454 ulong ds;
455 uvlong size;
456 int epb;
458 assert(ISLOCKED(r));
459 epb = r->dsize/VtEntrySize;
461 size = vtfilegetsize(r);
462 ds = epb*(size/r->dsize);
463 ds += (size%r->dsize)/VtEntrySize;
464 return ds;
467 int
468 vtfilegetentry(VtFile *r, VtEntry *e)
470 VtBlock *b;
472 assert(ISLOCKED(r));
473 b = fileload(r, e);
474 if(b == nil)
475 return -1;
476 vtblockput(b);
478 return 0;
481 int
482 vtfilesetentry(VtFile *r, VtEntry *e)
484 VtBlock *b;
485 VtEntry ee;
487 assert(ISLOCKED(r));
488 b = fileload(r, &ee);
489 if(b == nil)
490 return -1;
491 vtentrypack(e, b->data, r->offset % r->epb);
492 vtblockput(b);
493 return 0;
496 static VtBlock *
497 blockwalk(VtBlock *p, int index, VtCache *c, int mode, VtEntry *e)
499 VtBlock *b;
500 int type;
501 uchar *score;
502 VtEntry oe;
504 switch(p->type){
505 case VtDataType:
506 assert(0);
507 case VtDirType:
508 type = e->type;
509 score = e->score;
510 break;
511 default:
512 type = p->type - 1;
513 score = p->data+index*VtScoreSize;
514 break;
516 //print("walk from %V/%d ty %d to %V ty %d\n", p->score, index, p->type, score, type);
518 if(mode == VtOWRITE && vtglobaltolocal(score) == NilBlock){
519 b = vtcacheallocblock(c, type);
520 if(b)
521 goto HaveCopy;
522 }else
523 b = vtcacheglobal(c, score, type);
525 if(b == nil || mode == VtOREAD)
526 return b;
528 if(vtglobaltolocal(b->score) != NilBlock)
529 return b;
531 oe = *e;
533 /*
534 * Copy on write.
535 */
536 e->flags |= VtEntryLocal;
538 b = vtblockcopy(b/*, e->tag, fs->ehi, fs->elo*/);
539 if(b == nil)
540 return nil;
542 HaveCopy:
543 if(p->type == VtDirType){
544 memmove(e->score, b->score, VtScoreSize);
545 vtentrypack(e, p->data, index);
546 }else{
547 memmove(p->data+index*VtScoreSize, b->score, VtScoreSize);
549 return b;
552 /*
553 * Change the depth of the VtFile r.
554 * The entry e for r is contained in block p.
555 */
556 static int
557 growdepth(VtFile *r, VtBlock *p, VtEntry *e, int depth)
559 VtBlock *b, *bb;
560 VtEntry oe;
562 assert(ISLOCKED(r));
563 assert(depth <= VtPointerDepth);
565 b = vtcacheglobal(r->c, e->score, e->type);
566 if(b == nil)
567 return -1;
569 oe = *e;
571 /*
572 * Keep adding layers until we get to the right depth
573 * or an error occurs.
574 */
575 while(DEPTH(e->type) < depth){
576 bb = vtcacheallocblock(r->c, e->type+1);
577 if(bb == nil)
578 break;
579 memmove(bb->data, b->score, VtScoreSize);
580 memmove(e->score, bb->score, VtScoreSize);
581 e->type++;
582 e->flags |= VtEntryLocal;
583 vtblockput(b);
584 b = bb;
587 vtentrypack(e, p->data, r->offset % r->epb);
588 vtblockput(b);
590 if(DEPTH(e->type) == depth)
591 return 0;
592 return -1;
595 static int
596 shrinkdepth(VtFile *r, VtBlock *p, VtEntry *e, int depth)
598 VtBlock *b, *nb, *ob, *rb;
599 VtEntry oe;
601 assert(ISLOCKED(r));
602 assert(depth <= VtPointerDepth);
604 rb = vtcacheglobal(r->c, e->score, e->type);
605 if(rb == nil)
606 return 0;
608 /*
609 * Walk down to the new root block.
610 * We may stop early, but something is better than nothing.
611 */
612 oe = *e;
614 ob = nil;
615 b = rb;
616 for(; DEPTH(e->type) > depth; e->type--){
617 nb = vtcacheglobal(r->c, b->data, e->type-1);
618 if(nb == nil)
619 break;
620 if(ob!=nil && ob!=rb)
621 vtblockput(ob);
622 ob = b;
623 b = nb;
626 if(b == rb){
627 vtblockput(rb);
628 return 0;
631 /*
632 * Right now, e points at the root block rb, b is the new root block,
633 * and ob points at b. To update:
635 * (i) change e to point at b
636 * (ii) zero the pointer ob -> b
637 * (iii) free the root block
639 * p (the block containing e) must be written before
640 * anything else.
641 */
643 /* (i) */
644 memmove(e->score, b->score, VtScoreSize);
645 vtentrypack(e, p->data, r->offset % r->epb);
647 /* (ii) */
648 memmove(ob->data, vtzeroscore, VtScoreSize);
650 /* (iii) */
651 vtblockput(rb);
652 if(ob!=nil && ob!=rb)
653 vtblockput(ob);
654 vtblockput(b);
656 if(DEPTH(e->type) == depth)
657 return 0;
658 return -1;
661 static int
662 mkindices(VtEntry *e, u32int bn, int *index)
664 int i, np;
666 memset(index, 0, (VtPointerDepth+1)*sizeof(int));
668 np = e->psize/VtScoreSize;
669 for(i=0; bn > 0; i++){
670 if(i >= VtPointerDepth){
671 werrstr("bad address 0x%lux", (ulong)bn);
672 return -1;
674 index[i] = bn % np;
675 bn /= np;
677 return i;
680 VtBlock *
681 vtfileblock(VtFile *r, u32int bn, int mode)
683 VtBlock *b, *bb;
684 int index[VtPointerDepth+1];
685 VtEntry e;
686 int i;
687 int m;
689 assert(ISLOCKED(r));
690 assert(bn != NilBlock);
692 b = fileload(r, &e);
693 if(b == nil)
694 return nil;
696 i = mkindices(&e, bn, index);
697 if(i < 0)
698 return nil;
699 if(i > DEPTH(e.type)){
700 if(mode == VtOREAD){
701 werrstr("bad address 0x%lux", (ulong)bn);
702 goto Err;
704 index[i] = 0;
705 if(growdepth(r, b, &e, i) < 0)
706 goto Err;
709 assert(b->type == VtDirType);
711 index[DEPTH(e.type)] = r->offset % r->epb;
713 /* mode for intermediate block */
714 m = mode;
715 if(m == VtOWRITE)
716 m = VtORDWR;
718 for(i=DEPTH(e.type); i>=0; i--){
719 bb = blockwalk(b, index[i], r->c, i==0 ? mode : m, &e);
720 if(bb == nil)
721 goto Err;
722 vtblockput(b);
723 b = bb;
725 return b;
726 Err:
727 vtblockput(b);
728 return nil;
731 int
732 vtfileblockscore(VtFile *r, u32int bn, uchar score[VtScoreSize])
734 VtBlock *b, *bb;
735 int index[VtPointerDepth+1];
736 VtEntry e;
737 int i;
739 assert(ISLOCKED(r));
740 assert(bn != NilBlock);
742 b = fileload(r, &e);
743 if(b == nil)
744 return -1;
746 i = mkindices(&e, bn, index);
747 if(i < 0){
748 vtblockput(b);
749 return -1;
751 if(i > DEPTH(e.type)){
752 memmove(score, vtzeroscore, VtScoreSize);
753 vtblockput(b);
754 return 0;
757 index[DEPTH(e.type)] = r->offset % r->epb;
759 for(i=DEPTH(e.type); i>=1; i--){
760 bb = blockwalk(b, index[i], r->c, VtOREAD, &e);
761 if(bb == nil)
762 goto Err;
763 vtblockput(b);
764 b = bb;
765 if(memcmp(b->score, vtzeroscore, VtScoreSize) == 0)
766 break;
769 memmove(score, b->data+index[0]*VtScoreSize, VtScoreSize);
770 vtblockput(b);
771 return 0;
773 Err:
774 vtblockput(b);
775 return -1;
778 void
779 vtfileincref(VtFile *r)
781 qlock(&r->lk);
782 r->ref++;
783 qunlock(&r->lk);
786 void
787 vtfileclose(VtFile *r)
789 if(r == nil)
790 return;
791 qlock(&r->lk);
792 r->ref--;
793 if(r->ref){
794 qunlock(&r->lk);
795 return;
797 assert(r->ref == 0);
798 qunlock(&r->lk);
799 if(r->parent)
800 vtfileclose(r->parent);
801 memset(r, ~0, sizeof(*r));
802 vtfree(r);
805 /*
806 * Retrieve the block containing the entry for r.
807 * If a snapshot has happened, we might need
808 * to get a new copy of the block. We avoid this
809 * in the common case by caching the score for
810 * the block and the last epoch in which it was valid.
812 * We use r->mode to tell the difference between active
813 * file system VtFiles (VtORDWR) and VtFiles for the
814 * snapshot file system (VtOREAD).
815 */
816 static VtBlock*
817 fileloadblock(VtFile *r, int mode)
819 char e[ERRMAX];
820 u32int addr;
821 VtBlock *b;
823 switch(r->mode){
824 default:
825 assert(0);
826 case VtORDWR:
827 assert(r->mode == VtORDWR);
828 if(r->local == 1){
829 b = vtcacheglobal(r->c, r->score, VtDirType);
830 if(b == nil)
831 return nil;
832 return b;
834 assert(r->parent != nil);
835 if(vtfilelock(r->parent, VtORDWR) < 0)
836 return nil;
837 b = vtfileblock(r->parent, r->offset/r->epb, VtORDWR);
838 vtfileunlock(r->parent);
839 if(b == nil)
840 return nil;
841 memmove(r->score, b->score, VtScoreSize);
842 r->local = 1;
843 return b;
845 case VtOREAD:
846 if(mode == VtORDWR){
847 werrstr("read/write lock of read-only file");
848 return nil;
850 addr = vtglobaltolocal(r->score);
851 if(addr == NilBlock)
852 return vtcacheglobal(r->c, r->score, VtDirType);
854 b = vtcachelocal(r->c, addr, VtDirType);
855 if(b)
856 return b;
858 /*
859 * If it failed because the epochs don't match, the block has been
860 * archived and reclaimed. Rewalk from the parent and get the
861 * new pointer. This can't happen in the VtORDWR case
862 * above because blocks in the current epoch don't get
863 * reclaimed. The fact that we're VtOREAD means we're
864 * a snapshot. (Or else the file system is read-only, but then
865 * the archiver isn't going around deleting blocks.)
866 */
867 rerrstr(e, sizeof e);
868 if(strcmp(e, ELabelMismatch) == 0){
869 if(vtfilelock(r->parent, VtOREAD) < 0)
870 return nil;
871 b = vtfileblock(r->parent, r->offset/r->epb, VtOREAD);
872 vtfileunlock(r->parent);
873 if(b){
874 fprint(2, "vtfilealloc: lost %V found %V\n",
875 r->score, b->score);
876 memmove(r->score, b->score, VtScoreSize);
877 return b;
880 return nil;
884 int
885 vtfilelock(VtFile *r, int mode)
887 VtBlock *b;
889 if(mode == -1)
890 mode = r->mode;
892 b = fileloadblock(r, mode);
893 if(b == nil)
894 return -1;
895 /*
896 * The fact that we are holding b serves as the
897 * lock entitling us to write to r->b.
898 */
899 assert(r->b == nil);
900 r->b = b;
901 return 0;
904 /*
905 * Lock two (usually sibling) VtFiles. This needs special care
906 * because the Entries for both vtFiles might be in the same block.
907 * We also try to lock blocks in left-to-right order within the tree.
908 */
909 int
910 vtfilelock2(VtFile *r, VtFile *rr, int mode)
912 VtBlock *b, *bb;
914 if(rr == nil)
915 return vtfilelock(r, mode);
917 if(mode == -1)
918 mode = r->mode;
920 if(r->parent==rr->parent && r->offset/r->epb == rr->offset/rr->epb){
921 b = fileloadblock(r, mode);
922 if(b == nil)
923 return -1;
924 vtblockduplock(b);
925 bb = b;
926 }else if(r->parent==rr->parent || r->offset > rr->offset){
927 bb = fileloadblock(rr, mode);
928 b = fileloadblock(r, mode);
929 }else{
930 b = fileloadblock(r, mode);
931 bb = fileloadblock(rr, mode);
933 if(b == nil || bb == nil){
934 if(b)
935 vtblockput(b);
936 if(bb)
937 vtblockput(bb);
938 return -1;
941 /*
942 * The fact that we are holding b and bb serves
943 * as the lock entitling us to write to r->b and rr->b.
944 */
945 r->b = b;
946 rr->b = bb;
947 return 0;
950 void
951 vtfileunlock(VtFile *r)
953 VtBlock *b;
955 if(r->b == nil){
956 fprint(2, "vtfileunlock: already unlocked\n");
957 abort();
959 b = r->b;
960 r->b = nil;
961 vtblockput(b);
964 static VtBlock*
965 fileload(VtFile *r, VtEntry *e)
967 VtBlock *b;
969 assert(ISLOCKED(r));
970 b = r->b;
971 if(vtentryunpack(e, b->data, r->offset % r->epb) < 0)
972 return nil;
973 vtblockduplock(b);
974 return b;
977 static int
978 sizetodepth(uvlong s, int psize, int dsize)
980 int np;
981 int d;
983 /* determine pointer depth */
984 np = psize/VtScoreSize;
985 s = (s + dsize - 1)/dsize;
986 for(d = 0; s > 1; d++)
987 s = (s + np - 1)/np;
988 return d;
991 long
992 vtfileread(VtFile *f, void *data, long count, vlong offset)
994 int frag;
995 VtBlock *b;
996 VtEntry e;
998 assert(ISLOCKED(f));
1000 vtfilegetentry(f, &e);
1001 if(count == 0)
1002 return 0;
1003 if(count < 0 || offset < 0){
1004 werrstr("vtfileread: bad offset or count");
1005 return -1;
1007 if(offset >= e.size)
1008 return 0;
1010 if(offset+count > e.size)
1011 count = e.size - offset;
1013 frag = offset % e.dsize;
1014 if(frag+count > e.dsize)
1015 count = e.dsize - frag;
1017 b = vtfileblock(f, offset/e.dsize, VtOREAD);
1018 if(b == nil)
1019 return -1;
1021 memmove(data, b->data+frag, count);
1022 vtblockput(b);
1023 return count;
1026 static long
1027 filewrite1(VtFile *f, void *data, long count, vlong offset)
1029 int frag, m;
1030 VtBlock *b;
1031 VtEntry e;
1033 vtfilegetentry(f, &e);
1034 if(count < 0 || offset < 0){
1035 werrstr("vtfilewrite: bad offset or count");
1036 return -1;
1039 frag = offset % e.dsize;
1040 if(frag+count > e.dsize)
1041 count = e.dsize - frag;
1043 m = VtORDWR;
1044 if(frag == 0 && count == e.dsize)
1045 m = VtOWRITE;
1047 b = vtfileblock(f, offset/e.dsize, m);
1048 if(b == nil)
1049 return -1;
1051 memmove(b->data+frag, data, count);
1053 if(offset+count > e.size){
1054 vtfilegetentry(f, &e);
1055 e.size = offset+count;
1056 vtfilesetentry(f, &e);
1059 vtblockput(b);
1060 return count;
1063 long
1064 vtfilewrite(VtFile *f, void *data, long count, vlong offset)
1066 long tot, m;
1068 assert(ISLOCKED(f));
1070 tot = 0;
1071 m = 0;
1072 while(tot < count){
1073 m = filewrite1(f, (char*)data+tot, count-tot, offset+tot);
1074 if(m <= 0)
1075 break;
1076 tot += m;
1078 if(tot==0)
1079 return m;
1080 return tot;
1083 static int
1084 flushblock(VtCache *c, VtBlock *bb, uchar score[VtScoreSize], int ppb, int epb,
1085 int type)
1087 u32int addr;
1088 VtBlock *b;
1089 VtEntry e;
1090 int i;
1092 addr = vtglobaltolocal(score);
1093 if(addr == NilBlock)
1094 return 0;
1096 if(bb){
1097 b = bb;
1098 if(memcmp(b->score, score, VtScoreSize) != 0)
1099 abort();
1100 }else
1101 if((b = vtcachelocal(c, addr, type)) == nil)
1102 return -1;
1104 switch(type){
1105 case VtDataType:
1106 break;
1108 case VtDirType:
1109 for(i=0; i<epb; i++){
1110 if(vtentryunpack(&e, b->data, i) < 0)
1111 goto Err;
1112 if(flushblock(c, nil, e.score, e.psize/VtScoreSize, e.dsize/VtEntrySize,
1113 e.type) < 0)
1114 goto Err;
1116 break;
1118 default: /* VtPointerTypeX */
1119 for(i=0; i<ppb; i++){
1120 if(flushblock(c, nil, b->data+VtScoreSize*i, ppb, epb, type-1) < 0)
1121 goto Err;
1123 break;
1126 if(vtblockwrite(b) < 0)
1127 goto Err;
1128 memmove(score, b->score, VtScoreSize);
1129 if(b != bb)
1130 vtblockput(b);
1131 return 0;
1133 Err:
1134 if(b != bb)
1135 vtblockput(b);
1136 return -1;
1139 int
1140 vtfileflush(VtFile *f)
1142 int ret;
1143 VtBlock *b;
1144 VtEntry e;
1146 assert(ISLOCKED(f));
1147 b = fileload(f, &e);
1148 if(!(e.flags&VtEntryLocal)){
1149 vtblockput(b);
1150 return 0;
1153 ret = flushblock(f->c, nil, e.score, e.psize/VtScoreSize, e.dsize/VtEntrySize,
1154 e.type);
1155 if(ret < 0){
1156 vtblockput(b);
1157 return -1;
1160 vtentrypack(&e, b->data, f->offset % f->epb);
1161 vtblockput(b);
1162 return 0;
1165 int
1166 vtfileflushbefore(VtFile *r, u64int offset)
1168 VtBlock *b, *bb;
1169 VtEntry e;
1170 int i, base, depth, ppb, epb, doflush;
1171 int index[VtPointerDepth+1], j, ret;
1172 VtBlock *bi[VtPointerDepth+2];
1173 uchar *score;
1175 assert(ISLOCKED(r));
1176 if(offset == 0)
1177 return 0;
1179 b = fileload(r, &e);
1180 if(b == nil)
1181 return -1;
1184 * compute path through tree for the last written byte and the next one.
1186 ret = -1;
1187 memset(bi, 0, sizeof bi);
1188 depth = DEPTH(e.type);
1189 bi[depth+1] = b;
1190 i = mkindices(&e, (offset-1)/e.dsize, index);
1191 if(i < 0)
1192 goto Err;
1193 if(i > depth)
1194 goto Err;
1195 ppb = e.psize / VtScoreSize;
1196 epb = e.dsize / VtEntrySize;
1199 * load the blocks along the last written byte
1201 index[depth] = r->offset % r->epb;
1202 for(i=depth; i>=0; i--){
1203 bb = blockwalk(b, index[i], r->c, VtORDWR, &e);
1204 if(bb == nil)
1205 goto Err;
1206 bi[i] = bb;
1207 b = bb;
1209 ret = 0;
1212 * walk up the path from leaf to root, flushing anything that
1213 * has been finished.
1215 base = e.type&~VtTypeDepthMask;
1216 for(i=0; i<=depth; i++){
1217 doflush = 0;
1218 if(i == 0){
1219 /* leaf: data or dir block */
1220 if(offset%e.dsize == 0)
1221 doflush = 1;
1222 }else{
1224 * interior node: pointer blocks.
1225 * specifically, b = bi[i] is a block whose index[i-1]'th entry
1226 * points at bi[i-1].
1228 b = bi[i];
1231 * the index entries up to but not including index[i-1] point at
1232 * finished blocks, so flush them for sure.
1234 for(j=0; j<index[i-1]; j++)
1235 if(flushblock(r->c, nil, b->data+j*VtScoreSize, ppb, epb, base+i-1) < 0)
1236 goto Err;
1239 * if index[i-1] is the last entry in the block and is global
1240 * (i.e. the kid is flushed), then we can flush this block.
1242 if(j==ppb-1 && vtglobaltolocal(b->data+j*VtScoreSize)==NilBlock)
1243 doflush = 1;
1245 if(doflush){
1246 if(i == depth)
1247 score = e.score;
1248 else
1249 score = bi[i+1]->data+index[i]*VtScoreSize;
1250 if(flushblock(r->c, bi[i], score, ppb, epb, base+i) < 0)
1251 goto Err;
1255 Err:
1256 /* top: entry. do this always so that the score is up-to-date */
1257 vtentrypack(&e, bi[depth+1]->data, index[depth]);
1258 for(i=0; i<nelem(bi); i++)
1259 if(bi[i])
1260 vtblockput(bi[i]);
1261 return ret;