Blob


1 /*
2 * Manage tree of VtFiles stored in the block cache.
3 *
4 * The single point of truth for the info about the VtFiles themselves
5 * is the block data. Because of this, there is no explicit locking of
6 * VtFile structures, and indeed there may be more than one VtFile
7 * structure for a given Venti file. They synchronize through the
8 * block cache.
9 *
10 * This is a bit simpler than fossil because there are no epochs
11 * or tags or anything else. Just mutable local blocks and immutable
12 * Venti blocks.
13 */
15 #include <u.h>
16 #include <libc.h>
17 #include <venti.h>
19 #define MaxBlock (1UL<<31)
21 static char EBadEntry[] = "bad VtEntry";
22 static char ENotDir[] = "walk in non-directory";
23 static char ETooBig[] = "file too big";
24 /* static char EBadAddr[] = "bad address"; */
25 static char ELabelMismatch[] = "label mismatch";
27 static int sizetodepth(uvlong s, int psize, int dsize);
28 static VtBlock *fileload(VtFile *r, VtEntry *e);
29 static int shrinkdepth(VtFile*, VtBlock*, VtEntry*, int);
30 static int shrinksize(VtFile*, VtEntry*, uvlong);
31 static int growdepth(VtFile*, VtBlock*, VtEntry*, int);
33 #define ISLOCKED(r) ((r)->b != nil)
34 #define DEPTH(t) ((t)&VtTypeDepthMask)
36 static VtFile *
37 vtfilealloc(VtCache *c, VtBlock *b, VtFile *p, u32int offset, int mode)
38 {
39 int epb;
40 u32int size;
41 VtEntry e;
42 VtFile *r;
44 assert(p==nil || ISLOCKED(p));
46 if(p == nil){
47 assert(offset == 0);
48 epb = 1;
49 }else
50 epb = p->dsize / VtEntrySize;
52 if(b->type != VtDirType)
53 goto Bad;
55 /*
56 * a non-active entry is the only thing that
57 * can legitimately happen here. all the others
58 * get prints.
59 */
60 if(vtentryunpack(&e, b->data, offset % epb) < 0){
61 fprint(2, "vtentryunpack failed\n");
62 goto Bad;
63 }
64 if(!(e.flags & VtEntryActive)){
65 if(0)fprint(2, "not active\n");
66 goto Bad;
67 }
68 if(e.psize < 256 || e.dsize < 256){
69 fprint(2, "psize %ud dsize %ud\n", e.psize, e.dsize);
70 goto Bad;
71 }
73 if(DEPTH(e.type) < sizetodepth(e.size, e.psize, e.dsize)){
74 fprint(2, "depth %ud size %llud psize %ud dsize %ud\n",
75 DEPTH(e.type), e.size, e.psize, e.dsize);
76 goto Bad;
77 }
79 size = vtcacheblocksize(c);
80 if(e.dsize > size || e.psize > size){
81 fprint(2, "psize %ud dsize %ud blocksize %ud\n", e.psize, e.dsize, size);
82 goto Bad;
83 }
85 r = vtmallocz(sizeof(VtFile));
86 r->c = c;
87 r->mode = mode;
88 r->dsize = e.dsize;
89 r->gen = e.gen;
90 r->dir = (e.type & VtTypeBaseMask) == VtDirType;
91 r->ref = 1;
92 r->parent = p;
93 if(p){
94 qlock(&p->lk);
95 assert(mode == VtOREAD || p->mode == VtORDWR);
96 p->ref++;
97 qunlock(&p->lk);
98 }else{
99 assert(b->addr != NilBlock);
100 r->local = 1;
102 memmove(r->score, b->score, VtScoreSize);
103 r->offset = offset;
104 r->epb = epb;
106 return r;
107 Bad:
108 werrstr(EBadEntry);
109 return nil;
113 VtFile *
114 vtfileroot(VtCache *c, u32int addr, int mode)
116 VtFile *r;
117 VtBlock *b;
119 b = vtcachelocal(c, addr, VtDirType);
120 if(b == nil)
121 return nil;
122 r = vtfilealloc(c, b, nil, 0, mode);
123 vtblockput(b);
124 return r;
127 VtFile*
128 vtfileopenroot(VtCache *c, VtEntry *e)
130 VtBlock *b;
131 VtFile *f;
133 b = vtcacheallocblock(c, VtDirType);
134 if(b == nil)
135 return nil;
137 vtentrypack(e, b->data, 0);
138 f = vtfilealloc(c, b, nil, 0, VtORDWR);
139 vtblockput(b);
140 return f;
143 VtFile *
144 vtfilecreateroot(VtCache *c, int psize, int dsize, int type)
146 VtEntry e;
148 memset(&e, 0, sizeof e);
149 e.flags = VtEntryActive;
150 e.psize = psize;
151 e.dsize = dsize;
152 e.type = type;
153 memmove(e.score, vtzeroscore, VtScoreSize);
155 return vtfileopenroot(c, &e);
158 VtFile *
159 vtfileopen(VtFile *r, u32int offset, int mode)
161 ulong bn;
162 VtBlock *b;
164 assert(ISLOCKED(r));
165 if(!r->dir){
166 werrstr(ENotDir);
167 return nil;
170 bn = offset/(r->dsize/VtEntrySize);
172 b = vtfileblock(r, bn, mode);
173 if(b == nil)
174 return nil;
175 r = vtfilealloc(r->c, b, r, offset, mode);
176 vtblockput(b);
177 return r;
180 VtFile *
181 vtfilecreate(VtFile *r, int psize, int dsize, int dir)
183 int i;
184 VtBlock *b;
185 u32int bn, size;
186 VtEntry e;
187 int epb;
188 VtFile *rr;
189 u32int offset;
191 assert(ISLOCKED(r));
192 assert(psize <= VtMaxLumpSize);
193 assert(dsize <= VtMaxLumpSize);
195 if(!r->dir){
196 werrstr(ENotDir);
197 return nil;
200 epb = r->dsize/VtEntrySize;
202 size = vtfilegetdirsize(r);
203 /*
204 * look at a random block to see if we can find an empty entry
205 */
206 offset = lnrand(size+1);
207 offset -= offset % epb;
209 /* try the given block and then try the last block */
210 for(;;){
211 bn = offset/epb;
212 b = vtfileblock(r, bn, VtORDWR);
213 if(b == nil)
214 return nil;
215 for(i=offset%r->epb; i<epb; i++){
216 if(vtentryunpack(&e, b->data, i) < 0)
217 continue;
218 if((e.flags&VtEntryActive) == 0 && e.gen != ~0)
219 goto Found;
221 vtblockput(b);
222 if(offset == size){
223 fprint(2, "vtfilecreate: cannot happen\n");
224 werrstr("vtfilecreate: cannot happen");
225 return nil;
227 offset = size;
230 Found:
231 /* found an entry - gen already set */
232 e.psize = psize;
233 e.dsize = dsize;
234 e.flags = VtEntryActive;
235 e.type = dir ? VtDirType : VtDataType;
236 e.size = 0;
237 memmove(e.score, vtzeroscore, VtScoreSize);
238 vtentrypack(&e, b->data, i);
240 offset = bn*epb + i;
241 if(offset+1 > size){
242 if(vtfilesetdirsize(r, offset+1) < 0){
243 vtblockput(b);
244 return nil;
248 rr = vtfilealloc(r->c, b, r, offset, VtORDWR);
249 vtblockput(b);
250 return rr;
253 static int
254 vtfilekill(VtFile *r, int doremove)
256 VtEntry e;
257 VtBlock *b;
259 assert(ISLOCKED(r));
260 b = fileload(r, &e);
261 if(b == nil)
262 return -1;
264 if(doremove==0 && e.size == 0){
265 /* already truncated */
266 vtblockput(b);
267 return 0;
270 if(doremove){
271 if(e.gen != ~0)
272 e.gen++;
273 e.dsize = 0;
274 e.psize = 0;
275 e.flags = 0;
276 }else
277 e.flags &= ~VtEntryLocal;
278 e.type = 0;
279 e.size = 0;
280 memmove(e.score, vtzeroscore, VtScoreSize);
281 vtentrypack(&e, b->data, r->offset % r->epb);
282 vtblockput(b);
284 if(doremove){
285 vtfileunlock(r);
286 vtfileclose(r);
289 return 0;
292 int
293 vtfileremove(VtFile *r)
295 return vtfilekill(r, 1);
298 int
299 vtfiletruncate(VtFile *r)
301 return vtfilekill(r, 0);
304 uvlong
305 vtfilegetsize(VtFile *r)
307 VtEntry e;
308 VtBlock *b;
310 assert(ISLOCKED(r));
311 b = fileload(r, &e);
312 if(b == nil)
313 return ~(uvlong)0;
314 vtblockput(b);
316 return e.size;
319 static int
320 shrinksize(VtFile *r, VtEntry *e, uvlong size)
322 int i, depth, type, isdir, ppb;
323 uvlong ptrsz;
324 uchar score[VtScoreSize];
325 VtBlock *b;
327 b = vtcacheglobal(r->c, e->score, e->type);
328 if(b == nil)
329 return -1;
331 ptrsz = e->dsize;
332 ppb = e->psize/VtScoreSize;
333 type = e->type;
334 depth = DEPTH(type);
335 for(i=0; i+1<depth; i++)
336 ptrsz *= ppb;
338 isdir = r->dir;
339 while(depth > 0){
340 if(b->addr == NilBlock){
341 /* not worth copying the block just so we can zero some of it */
342 vtblockput(b);
343 return -1;
346 /*
347 * invariant: each pointer in the tree rooted at b accounts for ptrsz bytes
348 */
350 /* zero the pointers to unnecessary blocks */
351 i = (size+ptrsz-1)/ptrsz;
352 for(; i<ppb; i++)
353 memmove(b->data+i*VtScoreSize, vtzeroscore, VtScoreSize);
355 /* recurse (go around again) on the partially necessary block */
356 i = size/ptrsz;
357 size = size%ptrsz;
358 if(size == 0){
359 vtblockput(b);
360 return 0;
362 ptrsz /= ppb;
363 type--;
364 memmove(score, b->data+i*VtScoreSize, VtScoreSize);
365 vtblockput(b);
366 b = vtcacheglobal(r->c, score, type);
367 if(b == nil)
368 return -1;
371 if(b->addr == NilBlock){
372 vtblockput(b);
373 return -1;
376 /*
377 * No one ever truncates BtDir blocks.
378 */
379 if(depth==0 && !isdir && e->dsize > size)
380 memset(b->data+size, 0, e->dsize-size);
381 vtblockput(b);
382 return 0;
385 int
386 vtfilesetsize(VtFile *r, uvlong size)
388 int depth, edepth;
389 VtEntry e;
390 VtBlock *b;
392 assert(ISLOCKED(r));
393 if(size == 0)
394 return vtfiletruncate(r);
396 if(size > VtMaxFileSize || size > ((uvlong)MaxBlock)*r->dsize){
397 werrstr(ETooBig);
398 return -1;
401 b = fileload(r, &e);
402 if(b == nil)
403 return -1;
405 /* quick out */
406 if(e.size == size){
407 vtblockput(b);
408 return 0;
411 depth = sizetodepth(size, e.psize, e.dsize);
412 edepth = DEPTH(e.type);
413 if(depth < edepth){
414 if(shrinkdepth(r, b, &e, depth) < 0){
415 vtblockput(b);
416 return -1;
418 }else if(depth > edepth){
419 if(growdepth(r, b, &e, depth) < 0){
420 vtblockput(b);
421 return -1;
425 if(size < e.size)
426 shrinksize(r, &e, size);
428 e.size = size;
429 vtentrypack(&e, b->data, r->offset % r->epb);
430 vtblockput(b);
432 return 0;
435 int
436 vtfilesetdirsize(VtFile *r, u32int ds)
438 uvlong size;
439 int epb;
441 assert(ISLOCKED(r));
442 epb = r->dsize/VtEntrySize;
444 size = (uvlong)r->dsize*(ds/epb);
445 size += VtEntrySize*(ds%epb);
446 return vtfilesetsize(r, size);
449 u32int
450 vtfilegetdirsize(VtFile *r)
452 ulong ds;
453 uvlong size;
454 int epb;
456 assert(ISLOCKED(r));
457 epb = r->dsize/VtEntrySize;
459 size = vtfilegetsize(r);
460 ds = epb*(size/r->dsize);
461 ds += (size%r->dsize)/VtEntrySize;
462 return ds;
465 int
466 vtfilegetentry(VtFile *r, VtEntry *e)
468 VtBlock *b;
470 assert(ISLOCKED(r));
471 b = fileload(r, e);
472 if(b == nil)
473 return -1;
474 vtblockput(b);
476 return 0;
479 int
480 vtfilesetentry(VtFile *r, VtEntry *e)
482 VtBlock *b;
483 VtEntry ee;
485 assert(ISLOCKED(r));
486 b = fileload(r, &ee);
487 if(b == nil)
488 return -1;
489 vtentrypack(e, b->data, r->offset % r->epb);
490 vtblockput(b);
491 return 0;
494 static VtBlock *
495 blockwalk(VtBlock *p, int index, VtCache *c, int mode, VtEntry *e)
497 VtBlock *b;
498 int type;
499 uchar *score;
500 VtEntry oe;
502 switch(p->type){
503 case VtDataType:
504 assert(0);
505 case VtDirType:
506 type = e->type;
507 score = e->score;
508 break;
509 default:
510 type = p->type - 1;
511 score = p->data+index*VtScoreSize;
512 break;
514 //print("walk from %V/%d ty %d to %V ty %d\n", p->score, index, p->type, score, type);
516 if(mode == VtOWRITE && vtglobaltolocal(score) == NilBlock){
517 b = vtcacheallocblock(c, type);
518 if(b)
519 goto HaveCopy;
520 }else
521 b = vtcacheglobal(c, score, type);
523 if(b == nil || mode == VtOREAD)
524 return b;
526 if(vtglobaltolocal(b->score) != NilBlock)
527 return b;
529 oe = *e;
531 /*
532 * Copy on write.
533 */
534 e->flags |= VtEntryLocal;
536 b = vtblockcopy(b/*, e->tag, fs->ehi, fs->elo*/);
537 if(b == nil)
538 return nil;
540 HaveCopy:
541 if(p->type == VtDirType){
542 memmove(e->score, b->score, VtScoreSize);
543 vtentrypack(e, p->data, index);
544 }else{
545 memmove(p->data+index*VtScoreSize, b->score, VtScoreSize);
547 return b;
550 /*
551 * Change the depth of the VtFile r.
552 * The entry e for r is contained in block p.
553 */
554 static int
555 growdepth(VtFile *r, VtBlock *p, VtEntry *e, int depth)
557 VtBlock *b, *bb;
558 VtEntry oe;
560 assert(ISLOCKED(r));
561 assert(depth <= VtPointerDepth);
563 b = vtcacheglobal(r->c, e->score, e->type);
564 if(b == nil)
565 return -1;
567 oe = *e;
569 /*
570 * Keep adding layers until we get to the right depth
571 * or an error occurs.
572 */
573 while(DEPTH(e->type) < depth){
574 bb = vtcacheallocblock(r->c, e->type+1);
575 if(bb == nil)
576 break;
577 memmove(bb->data, b->score, VtScoreSize);
578 memmove(e->score, bb->score, VtScoreSize);
579 e->type++;
580 e->flags |= VtEntryLocal;
581 vtblockput(b);
582 b = bb;
585 vtentrypack(e, p->data, r->offset % r->epb);
586 vtblockput(b);
588 if(DEPTH(e->type) == depth)
589 return 0;
590 return -1;
593 static int
594 shrinkdepth(VtFile *r, VtBlock *p, VtEntry *e, int depth)
596 VtBlock *b, *nb, *ob, *rb;
597 VtEntry oe;
599 assert(ISLOCKED(r));
600 assert(depth <= VtPointerDepth);
602 rb = vtcacheglobal(r->c, e->score, e->type);
603 if(rb == nil)
604 return 0;
606 /*
607 * Walk down to the new root block.
608 * We may stop early, but something is better than nothing.
609 */
610 oe = *e;
612 ob = nil;
613 b = rb;
614 for(; DEPTH(e->type) > depth; e->type--){
615 nb = vtcacheglobal(r->c, b->data, e->type-1);
616 if(nb == nil)
617 break;
618 if(ob!=nil && ob!=rb)
619 vtblockput(ob);
620 ob = b;
621 b = nb;
624 if(b == rb){
625 vtblockput(rb);
626 return 0;
629 /*
630 * Right now, e points at the root block rb, b is the new root block,
631 * and ob points at b. To update:
633 * (i) change e to point at b
634 * (ii) zero the pointer ob -> b
635 * (iii) free the root block
637 * p (the block containing e) must be written before
638 * anything else.
639 */
641 /* (i) */
642 memmove(e->score, b->score, VtScoreSize);
643 vtentrypack(e, p->data, r->offset % r->epb);
645 /* (ii) */
646 memmove(ob->data, vtzeroscore, VtScoreSize);
648 /* (iii) */
649 vtblockput(rb);
650 if(ob!=nil && ob!=rb)
651 vtblockput(ob);
652 vtblockput(b);
654 if(DEPTH(e->type) == depth)
655 return 0;
656 return -1;
659 static int
660 mkindices(VtEntry *e, u32int bn, int *index)
662 int i, np;
664 memset(index, 0, (VtPointerDepth+1)*sizeof(int));
666 np = e->psize/VtScoreSize;
667 for(i=0; bn > 0; i++){
668 if(i >= VtPointerDepth){
669 werrstr("bad address 0x%lux", (ulong)bn);
670 return -1;
672 index[i] = bn % np;
673 bn /= np;
675 return i;
678 VtBlock *
679 vtfileblock(VtFile *r, u32int bn, int mode)
681 VtBlock *b, *bb;
682 int index[VtPointerDepth+1];
683 VtEntry e;
684 int i;
685 int m;
687 assert(ISLOCKED(r));
688 assert(bn != NilBlock);
690 b = fileload(r, &e);
691 if(b == nil)
692 return nil;
694 i = mkindices(&e, bn, index);
695 if(i < 0)
696 return nil;
697 if(i > DEPTH(e.type)){
698 if(mode == VtOREAD){
699 werrstr("bad address 0x%lux", (ulong)bn);
700 goto Err;
702 index[i] = 0;
703 if(growdepth(r, b, &e, i) < 0)
704 goto Err;
707 assert(b->type == VtDirType);
709 index[DEPTH(e.type)] = r->offset % r->epb;
711 /* mode for intermediate block */
712 m = mode;
713 if(m == VtOWRITE)
714 m = VtORDWR;
716 for(i=DEPTH(e.type); i>=0; i--){
717 bb = blockwalk(b, index[i], r->c, i==0 ? mode : m, &e);
718 if(bb == nil)
719 goto Err;
720 vtblockput(b);
721 b = bb;
723 return b;
724 Err:
725 vtblockput(b);
726 return nil;
729 int
730 vtfileblockscore(VtFile *r, u32int bn, uchar score[VtScoreSize])
732 VtBlock *b, *bb;
733 int index[VtPointerDepth+1];
734 VtEntry e;
735 int i;
737 assert(ISLOCKED(r));
738 assert(bn != NilBlock);
740 b = fileload(r, &e);
741 if(b == nil)
742 return -1;
744 i = mkindices(&e, bn, index);
745 if(i < 0){
746 vtblockput(b);
747 return -1;
749 if(i > DEPTH(e.type)){
750 memmove(score, vtzeroscore, VtScoreSize);
751 vtblockput(b);
752 return 0;
755 index[DEPTH(e.type)] = r->offset % r->epb;
757 for(i=DEPTH(e.type); i>=1; i--){
758 bb = blockwalk(b, index[i], r->c, VtOREAD, &e);
759 if(bb == nil)
760 goto Err;
761 vtblockput(b);
762 b = bb;
763 if(memcmp(b->score, vtzeroscore, VtScoreSize) == 0)
764 break;
767 memmove(score, b->data+index[0]*VtScoreSize, VtScoreSize);
768 vtblockput(b);
769 return 0;
771 Err:
772 vtblockput(b);
773 return -1;
776 void
777 vtfileincref(VtFile *r)
779 qlock(&r->lk);
780 r->ref++;
781 qunlock(&r->lk);
784 void
785 vtfileclose(VtFile *r)
787 if(r == nil)
788 return;
789 qlock(&r->lk);
790 r->ref--;
791 if(r->ref){
792 qunlock(&r->lk);
793 return;
795 assert(r->ref == 0);
796 qunlock(&r->lk);
797 if(r->parent)
798 vtfileclose(r->parent);
799 memset(r, ~0, sizeof(*r));
800 vtfree(r);
803 /*
804 * Retrieve the block containing the entry for r.
805 * If a snapshot has happened, we might need
806 * to get a new copy of the block. We avoid this
807 * in the common case by caching the score for
808 * the block and the last epoch in which it was valid.
810 * We use r->mode to tell the difference between active
811 * file system VtFiles (VtORDWR) and VtFiles for the
812 * snapshot file system (VtOREAD).
813 */
814 static VtBlock*
815 fileloadblock(VtFile *r, int mode)
817 char e[ERRMAX];
818 u32int addr;
819 VtBlock *b;
821 switch(r->mode){
822 default:
823 assert(0);
824 case VtORDWR:
825 assert(r->mode == VtORDWR);
826 if(r->local == 1){
827 b = vtcacheglobal(r->c, r->score, VtDirType);
828 if(b == nil)
829 return nil;
830 return b;
832 assert(r->parent != nil);
833 if(vtfilelock(r->parent, VtORDWR) < 0)
834 return nil;
835 b = vtfileblock(r->parent, r->offset/r->epb, VtORDWR);
836 vtfileunlock(r->parent);
837 if(b == nil)
838 return nil;
839 memmove(r->score, b->score, VtScoreSize);
840 r->local = 1;
841 return b;
843 case VtOREAD:
844 if(mode == VtORDWR){
845 werrstr("read/write lock of read-only file");
846 return nil;
848 addr = vtglobaltolocal(r->score);
849 if(addr == NilBlock)
850 return vtcacheglobal(r->c, r->score, VtDirType);
852 b = vtcachelocal(r->c, addr, VtDirType);
853 if(b)
854 return b;
856 /*
857 * If it failed because the epochs don't match, the block has been
858 * archived and reclaimed. Rewalk from the parent and get the
859 * new pointer. This can't happen in the VtORDWR case
860 * above because blocks in the current epoch don't get
861 * reclaimed. The fact that we're VtOREAD means we're
862 * a snapshot. (Or else the file system is read-only, but then
863 * the archiver isn't going around deleting blocks.)
864 */
865 rerrstr(e, sizeof e);
866 if(strcmp(e, ELabelMismatch) == 0){
867 if(vtfilelock(r->parent, VtOREAD) < 0)
868 return nil;
869 b = vtfileblock(r->parent, r->offset/r->epb, VtOREAD);
870 vtfileunlock(r->parent);
871 if(b){
872 fprint(2, "vtfilealloc: lost %V found %V\n",
873 r->score, b->score);
874 memmove(r->score, b->score, VtScoreSize);
875 return b;
878 return nil;
882 int
883 vtfilelock(VtFile *r, int mode)
885 VtBlock *b;
887 if(mode == -1)
888 mode = r->mode;
890 b = fileloadblock(r, mode);
891 if(b == nil)
892 return -1;
893 /*
894 * The fact that we are holding b serves as the
895 * lock entitling us to write to r->b.
896 */
897 assert(r->b == nil);
898 r->b = b;
899 return 0;
902 /*
903 * Lock two (usually sibling) VtFiles. This needs special care
904 * because the Entries for both vtFiles might be in the same block.
905 * We also try to lock blocks in left-to-right order within the tree.
906 */
907 int
908 vtfilelock2(VtFile *r, VtFile *rr, int mode)
910 VtBlock *b, *bb;
912 if(rr == nil)
913 return vtfilelock(r, mode);
915 if(mode == -1)
916 mode = r->mode;
918 if(r->parent==rr->parent && r->offset/r->epb == rr->offset/rr->epb){
919 b = fileloadblock(r, mode);
920 if(b == nil)
921 return -1;
922 vtblockduplock(b);
923 bb = b;
924 }else if(r->parent==rr->parent || r->offset > rr->offset){
925 bb = fileloadblock(rr, mode);
926 b = fileloadblock(r, mode);
927 }else{
928 b = fileloadblock(r, mode);
929 bb = fileloadblock(rr, mode);
931 if(b == nil || bb == nil){
932 if(b)
933 vtblockput(b);
934 if(bb)
935 vtblockput(bb);
936 return -1;
939 /*
940 * The fact that we are holding b and bb serves
941 * as the lock entitling us to write to r->b and rr->b.
942 */
943 r->b = b;
944 rr->b = bb;
945 return 0;
948 void
949 vtfileunlock(VtFile *r)
951 VtBlock *b;
953 if(r->b == nil){
954 fprint(2, "vtfileunlock: already unlocked\n");
955 abort();
957 b = r->b;
958 r->b = nil;
959 vtblockput(b);
962 static VtBlock*
963 fileload(VtFile *r, VtEntry *e)
965 VtBlock *b;
967 assert(ISLOCKED(r));
968 b = r->b;
969 if(vtentryunpack(e, b->data, r->offset % r->epb) < 0)
970 return nil;
971 vtblockduplock(b);
972 return b;
975 static int
976 sizetodepth(uvlong s, int psize, int dsize)
978 int np;
979 int d;
981 /* determine pointer depth */
982 np = psize/VtScoreSize;
983 s = (s + dsize - 1)/dsize;
984 for(d = 0; s > 1; d++)
985 s = (s + np - 1)/np;
986 return d;
989 long
990 vtfileread(VtFile *f, void *data, long count, vlong offset)
992 int frag;
993 VtBlock *b;
994 VtEntry e;
996 assert(ISLOCKED(f));
998 vtfilegetentry(f, &e);
999 if(count == 0)
1000 return 0;
1001 if(count < 0 || offset < 0){
1002 werrstr("vtfileread: bad offset or count");
1003 return -1;
1005 if(offset >= e.size)
1006 return 0;
1008 if(offset+count > e.size)
1009 count = e.size - offset;
1011 frag = offset % e.dsize;
1012 if(frag+count > e.dsize)
1013 count = e.dsize - frag;
1015 b = vtfileblock(f, offset/e.dsize, VtOREAD);
1016 if(b == nil)
1017 return -1;
1019 memmove(data, b->data+frag, count);
1020 vtblockput(b);
1021 return count;
1024 static long
1025 filewrite1(VtFile *f, void *data, long count, vlong offset)
1027 int frag, m;
1028 VtBlock *b;
1029 VtEntry e;
1031 vtfilegetentry(f, &e);
1032 if(count < 0 || offset < 0){
1033 werrstr("vtfilewrite: bad offset or count");
1034 return -1;
1037 frag = offset % e.dsize;
1038 if(frag+count > e.dsize)
1039 count = e.dsize - frag;
1041 m = VtORDWR;
1042 if(frag == 0 && count == e.dsize)
1043 m = VtOWRITE;
1045 b = vtfileblock(f, offset/e.dsize, m);
1046 if(b == nil)
1047 return -1;
1049 memmove(b->data+frag, data, count);
1051 if(offset+count > e.size){
1052 vtfilegetentry(f, &e);
1053 e.size = offset+count;
1054 vtfilesetentry(f, &e);
1057 vtblockput(b);
1058 return count;
1061 long
1062 vtfilewrite(VtFile *f, void *data, long count, vlong offset)
1064 long tot, m;
1066 assert(ISLOCKED(f));
1068 tot = 0;
1069 m = 0;
1070 while(tot < count){
1071 m = filewrite1(f, (char*)data+tot, count-tot, offset+tot);
1072 if(m <= 0)
1073 break;
1074 tot += m;
1076 if(tot==0)
1077 return m;
1078 return tot;
1081 static int
1082 flushblock(VtCache *c, VtBlock *bb, uchar score[VtScoreSize], int ppb, int epb,
1083 int type)
1085 u32int addr;
1086 VtBlock *b;
1087 VtEntry e;
1088 int i;
1090 addr = vtglobaltolocal(score);
1091 if(addr == NilBlock)
1092 return 0;
1094 if(bb){
1095 b = bb;
1096 if(memcmp(b->score, score, VtScoreSize) != 0)
1097 abort();
1098 }else
1099 if((b = vtcachelocal(c, addr, type)) == nil)
1100 return -1;
1102 switch(type){
1103 case VtDataType:
1104 break;
1106 case VtDirType:
1107 for(i=0; i<epb; i++){
1108 if(vtentryunpack(&e, b->data, i) < 0)
1109 goto Err;
1110 if(flushblock(c, nil, e.score, e.psize/VtScoreSize, e.dsize/VtEntrySize,
1111 e.type) < 0)
1112 goto Err;
1114 break;
1116 default: /* VtPointerTypeX */
1117 for(i=0; i<ppb; i++){
1118 if(flushblock(c, nil, b->data+VtScoreSize*i, ppb, epb, type-1) < 0)
1119 goto Err;
1121 break;
1124 if(vtblockwrite(b) < 0)
1125 goto Err;
1126 memmove(score, b->score, VtScoreSize);
1127 if(b != bb)
1128 vtblockput(b);
1129 return 0;
1131 Err:
1132 if(b != bb)
1133 vtblockput(b);
1134 return -1;
1137 int
1138 vtfileflush(VtFile *f)
1140 int ret;
1141 VtBlock *b;
1142 VtEntry e;
1144 assert(ISLOCKED(f));
1145 b = fileload(f, &e);
1146 if(!(e.flags&VtEntryLocal)){
1147 vtblockput(b);
1148 return 0;
1151 ret = flushblock(f->c, nil, e.score, e.psize/VtScoreSize, e.dsize/VtEntrySize,
1152 e.type);
1153 if(ret < 0){
1154 vtblockput(b);
1155 return -1;
1158 vtentrypack(&e, b->data, f->offset % f->epb);
1159 vtblockput(b);
1160 return 0;
1163 int
1164 vtfileflushbefore(VtFile *r, u64int offset)
1166 VtBlock *b, *bb;
1167 VtEntry e;
1168 int i, base, depth, ppb, epb, doflush;
1169 int index[VtPointerDepth+1], j, ret;
1170 VtBlock *bi[VtPointerDepth+2];
1171 uchar *score;
1173 assert(ISLOCKED(r));
1174 if(offset == 0)
1175 return 0;
1177 b = fileload(r, &e);
1178 if(b == nil)
1179 return -1;
1182 * compute path through tree for the last written byte and the next one.
1184 ret = -1;
1185 memset(bi, 0, sizeof bi);
1186 depth = DEPTH(e.type);
1187 bi[depth+1] = b;
1188 i = mkindices(&e, (offset-1)/e.dsize, index);
1189 if(i < 0)
1190 goto Err;
1191 if(i > depth)
1192 goto Err;
1193 ppb = e.psize / VtScoreSize;
1194 epb = e.dsize / VtEntrySize;
1197 * load the blocks along the last written byte
1199 index[depth] = r->offset % r->epb;
1200 for(i=depth; i>=0; i--){
1201 bb = blockwalk(b, index[i], r->c, VtORDWR, &e);
1202 if(bb == nil)
1203 goto Err;
1204 bi[i] = bb;
1205 b = bb;
1207 ret = 0;
1210 * walk up the path from leaf to root, flushing anything that
1211 * has been finished.
1213 base = e.type&~VtTypeDepthMask;
1214 for(i=0; i<=depth; i++){
1215 doflush = 0;
1216 if(i == 0){
1217 /* leaf: data or dir block */
1218 if(offset%e.dsize == 0)
1219 doflush = 1;
1220 }else{
1222 * interior node: pointer blocks.
1223 * specifically, b = bi[i] is a block whose index[i-1]'th entry
1224 * points at bi[i-1].
1226 b = bi[i];
1229 * the index entries up to but not including index[i-1] point at
1230 * finished blocks, so flush them for sure.
1232 for(j=0; j<index[i-1]; j++)
1233 if(flushblock(r->c, nil, b->data+j*VtScoreSize, ppb, epb, base+i-1) < 0)
1234 goto Err;
1237 * if index[i-1] is the last entry in the block and is global
1238 * (i.e. the kid is flushed), then we can flush this block.
1240 if(j==ppb-1 && vtglobaltolocal(b->data+j*VtScoreSize)==NilBlock)
1241 doflush = 1;
1243 if(doflush){
1244 if(i == depth)
1245 score = e.score;
1246 else
1247 score = bi[i+1]->data+index[i]*VtScoreSize;
1248 if(flushblock(r->c, bi[i], score, ppb, epb, base+i) < 0)
1249 goto Err;
1253 Err:
1254 /* top: entry. do this always so that the score is up-to-date */
1255 vtentrypack(&e, bi[depth+1]->data, index[depth]);
1256 for(i=0; i<nelem(bi); i++)
1257 if(bi[i])
1258 vtblockput(bi[i]);
1259 return ret;