6 static void fsMetaFlush(void *a);
7 static Snap *snapInit(Fs*);
8 static void snapClose(Snap*);
11 fsOpen(char *file, VtConn *z, long ncache, int mode)
14 uchar oscore[VtScoreSize];
34 werrstr("open %s: %r", file);
41 werrstr("diskAlloc: %r");
46 fs = vtmallocz(sizeof(Fs));
48 fs->name = vtstrdup(file);
49 fs->blockSize = diskBlockSize(disk);
50 fs->cache = cacheAlloc(disk, z, ncache, mode);
51 if(mode == OReadWrite && z)
52 fs->arch = archInit(fs->cache, disk, fs, z);
55 b = cacheLocal(fs->cache, PartSuper, 0, mode);
58 if(!superUnpack(&super, b->data)){
60 werrstr("bad super block");
65 fs->ehi = super.epochHigh;
66 fs->elo = super.epochLow;
68 //fprint(2, "%s: fs->ehi %d fs->elo %d active=%d\n", argv0, fs->ehi, fs->elo, super.active);
70 fs->source = sourceRoot(fs, super.active, mode);
71 if(fs->source == nil){
73 * Perhaps it failed because the block is copy-on-write.
74 * Do the copy and try again.
77 if(mode == OReadOnly || strcmp(e, EBadRoot) != 0)
79 b = cacheLocalData(fs->cache, super.active, BtDir, RootTag,
82 werrstr("cacheLocalData: %r");
85 if(b->l.epoch == fs->ehi){
87 werrstr("bad root source block");
90 b = blockCopy(b, RootTag, fs->ehi, fs->elo);
93 localToGlobal(super.active, oscore);
94 super.active = b->addr;
95 bs = cacheLocal(fs->cache, PartSuper, 0, OReadWrite);
98 werrstr("cacheLocal: %r");
101 superPack(&super, bs->data);
102 blockDependency(bs, b, 0, oscore, nil);
105 blockRemoveLink(bs, globalToLocal(oscore), BtDir, RootTag, 0);
107 fs->source = sourceRoot(fs, super.active, mode);
108 if(fs->source == nil){
109 werrstr("sourceRoot: %r");
114 //fprint(2, "%s: got fs source\n", argv0);
117 fs->file = fileRoot(fs->source);
118 fs->source->file = fs->file; /* point back */
121 werrstr("fileRoot: %r");
125 //fprint(2, "%s: got file root\n", argv0);
127 if(mode == OReadWrite){
128 fs->metaFlush = periodicAlloc(fsMetaFlush, fs, 1000);
129 fs->snap = snapInit(fs);
134 fprint(2, "%s: fsOpen error\n", argv0);
143 periodicKill(fs->metaFlush);
146 fileMetaFlush(fs->file, 0);
147 if(!fileDecRef(fs->file))
148 sysfatal("fsClose: files still in use: %r");
151 sourceClose(fs->source);
152 cacheFree(fs->cache);
157 memset(fs, ~0, sizeof(Fs));
162 fsRedial(Fs *fs, char *host)
164 if(vtredial(fs->z, host) < 0)
166 if(vtconnect(fs->z) < 0)
174 return fileIncRef(fs->file);
178 fsGetBlockSize(Fs *fs)
180 return fs->blockSize;
184 superGet(Cache *c, Super* super)
188 if((b = cacheLocal(c, PartSuper, 0, OReadWrite)) == nil){
189 fprint(2, "%s: superGet: cacheLocal failed: %r\n", argv0);
192 if(!superUnpack(super, b->data)){
193 fprint(2, "%s: superGet: superUnpack failed: %r\n", argv0);
202 superWrite(Block* b, Super* super, int forceWrite)
204 superPack(super, b->data);
207 while(!blockWrite(b, Waitlock)){
208 /* this should no longer happen */
209 fprint(2, "%s: could not write super block; "
210 "waiting 10 seconds\n", argv0);
213 while(b->iostate != BioClean && b->iostate != BioDirty){
214 assert(b->iostate == BioWriting);
218 * it's okay that b might still be dirty.
219 * that means it got written out but with an old root pointer,
220 * but the other fields went out, and those are the ones
221 * we really care about. (specifically, epochHigh; see fsSnapshot).
227 * Prepare the directory to store a snapshot.
228 * Temporary snapshots go into /snapshot/yyyy/mmdd/hhmm[.#]
229 * Archival snapshots go into /archive/yyyy/mmdd[.#].
231 * TODO This should be rewritten to eliminate most of the duplication.
234 fileOpenSnapshot(Fs *fs, char *dstpath, int doarchive)
237 char buf[30], *s, *p, *elem;
242 if((p = strrchr(dstpath, '/')) != nil){
252 if((dir = fileOpen(fs, p)) == nil)
254 f = fileCreate(dir, elem, ModeDir|ModeSnapshot|0555, "adm");
259 * a snapshot intended to be archived to venti.
261 dir = fileOpen(fs, "/archive");
264 now = *localtime(time(0));
267 snprint(buf, sizeof(buf), "%d", now.year+1900);
268 f = fileWalk(dir, buf);
270 f = fileCreate(dir, buf, ModeDir|0555, "adm");
277 snprint(buf, sizeof(buf), "%02d%02d", now.mon+1, now.mday);
281 seprint(s, buf+sizeof(buf), ".%d", n);
282 f = fileWalk(dir, buf);
287 f = fileCreate(dir, buf, ModeDir|ModeSnapshot|0555, "adm");
294 * Just a temporary snapshot
295 * We'll use /snapshot/yyyy/mmdd/hhmm.
296 * There may well be a better naming scheme.
297 * (I'd have used hh:mm but ':' is reserved in Microsoft file systems.)
299 dir = fileOpen(fs, "/snapshot");
303 now = *localtime(time(0));
306 snprint(buf, sizeof(buf), "%d", now.year+1900);
307 f = fileWalk(dir, buf);
309 f = fileCreate(dir, buf, ModeDir|0555, "adm");
316 snprint(buf, sizeof(buf), "%02d%02d", now.mon+1, now.mday);
317 f = fileWalk(dir, buf);
319 f = fileCreate(dir, buf, ModeDir|0555, "adm");
326 snprint(buf, sizeof buf, "%02d%02d", now.hour, now.min);
330 seprint(s, buf+sizeof(buf), ".%d", n);
331 f = fileWalk(dir, buf);
336 f = fileCreate(dir, buf, ModeDir|ModeSnapshot|0555, "adm");
345 fsNeedArch(Fs *fs, uint archMinute)
354 now = *localtime(then);
356 /* back up to yesterday if necessary */
357 if(now.hour < archMinute/60
358 || now.hour == archMinute/60 && now.min < archMinute%60)
359 now = *localtime(then-86400);
361 snprint(buf, sizeof buf, "/archive/%d/%02d%02d",
362 now.year+1900, now.mon+1, now.mday);
365 f = fileOpen(fs, buf);
375 fsEpochLow(Fs *fs, u32int low)
382 werrstr("bad low epoch (must be <= %ud)", fs->ehi);
387 if((bs = superGet(fs->cache, &super)) == nil){
392 super.epochLow = low;
394 superWrite(bs, &super, 1);
402 bumpEpoch(Fs *fs, int doarchive)
404 uchar oscore[VtScoreSize];
412 * Duplicate the root block.
414 * As a hint to flchk, the garbage collector,
415 * and any (human) debuggers, store a pointer
416 * to the old root block in entry 1 of the new root block.
419 b = cacheGlobal(fs->cache, r->score, BtDir, RootTag, OReadOnly);
423 memset(&e, 0, sizeof e);
424 e.flags = VtEntryActive | VtEntryLocal | _VtEntryDir;
425 memmove(e.score, b->score, VtScoreSize);
429 b = blockCopy(b, RootTag, fs->ehi+1, fs->elo);
431 fprint(2, "%s: bumpEpoch: blockCopy: %r\n", argv0);
435 if(0) fprint(2, "%s: snapshot root from %d to %d\n", argv0, oldaddr, b->addr);
436 entryPack(&e, b->data, 1);
440 * Update the superblock with the new root and epoch.
442 if((bs = superGet(fs->cache, &super)) == nil)
446 memmove(r->score, b->score, VtScoreSize);
449 super.epochHigh = fs->ehi;
450 oldaddr = super.active;
451 super.active = b->addr;
453 super.next = oldaddr;
456 * Record that the new super.active can't get written out until
457 * the new b gets written out. Until then, use the old value.
459 localToGlobal(oldaddr, oscore);
460 blockDependency(bs, b, 0, oscore, nil);
464 * We force the super block to disk so that super.epochHigh gets updated.
465 * Otherwise, if we crash and come back, we might incorrectly treat as active
466 * some of the blocks that making up the snapshot we just created.
467 * Basically every block in the active file system and all the blocks in
468 * the recently-created snapshot depend on the super block now.
469 * Rather than record all those dependencies, we just force the block to disk.
471 * Note that blockWrite might actually (will probably) send a slightly outdated
472 * super.active to disk. It will be the address of the most recent root that has
475 superWrite(bs, &super, 1);
476 blockRemoveLink(bs, globalToLocal(oscore), BtDir, RootTag, 0);
489 if((b = superGet(fs->cache, &super)) == nil)
494 if(!fileSetQidSpace(fs->file, 0, qidMax))
501 fsSnapshot(Fs *fs, char *srcpath, char *dstpath, int doarchive)
505 assert(fs->mode == OReadWrite);
510 werrstr("file system is halted");
515 * Freeze file system activity.
520 * Get the root of the directory we're going to save.
524 src = fileOpen(fs, srcpath);
529 * It is important that we maintain the invariant that:
530 * if both b and bb are marked as Active with start epoch e
531 * and b points at bb, then no other pointers to bb exist.
533 * When bb is unlinked from b, its close epoch is set to b's epoch.
534 * A block with epoch == close epoch is
535 * treated as free by cacheAllocBlock; this aggressively
536 * reclaims blocks after they have been stored to Venti.
538 * Let's say src->source is block sb, and src->msource is block
539 * mb. Let's also say that block b holds the Entry structures for
540 * both src->source and src->msource (their Entry structures might
541 * be in different blocks, but the argument is the same).
542 * That is, right now we have:
544 * b Active w/ epoch e, holds ptrs to sb and mb.
545 * sb Active w/ epoch e.
546 * mb Active w/ epoch e.
548 * With things as they are now, the invariant requires that
549 * b holds the only pointers to sb and mb. We want to record
550 * pointers to sb and mb in new Entries corresponding to dst,
551 * which breaks the invariant. Thus we need to do something
552 * about b. Specifically, we bump the file system's epoch and
553 * then rewalk the path from the root down to and including b.
554 * This will copy-on-write as we walk, so now the state will be:
556 * b Snap w/ epoch e, holds ptrs to sb and mb.
557 * new-b Active w/ epoch e+1, holds ptrs to sb and mb.
558 * sb Active w/ epoch e.
559 * mb Active w/ epoch e.
561 * In this state, it's perfectly okay to make more pointers to sb and mb.
563 if(!bumpEpoch(fs, 0) || !fileWalkSources(src))
567 * Sync to disk. I'm not sure this is necessary, but better safe than sorry.
569 cacheFlush(fs->cache, 1);
572 * Create the directory where we will store the copy of src.
574 dst = fileOpenSnapshot(fs, dstpath, doarchive);
579 * Actually make the copy by setting dst's source and msource
582 if(!fileSnapshot(dst, src, fs->ehi-1, doarchive))
591 * Make another copy of the file system. This one is for the
592 * archiver, so that the file system we archive has the recently
593 * added snapshot both in /active and in /archive/yyyy/mmdd[.#].
598 if(!bumpEpoch(fs, 1))
604 /* BUG? can fs->arch fall out from under us here? */
605 if(doarchive && fs->arch)
611 fprint(2, "%s: fsSnapshot: %r\n", argv0);
621 fsVac(Fs *fs, char *name, uchar score[VtScoreSize])
629 f = fileOpen(fs, name);
635 if(!fileGetSources(f, &e, &ee) || !fileGetDir(f, &de)){
642 r = mkVac(fs->z, fs->blockSize, &e, &ee, &de, score);
648 vtWriteBlock(VtConn *z, uchar *buf, uint n, uint type, uchar score[VtScoreSize])
650 if(vtwrite(z, score, type, buf, n) < 0)
652 if(vtsha1check(score, buf, n) < 0)
658 mkVac(VtConn *z, uint blockSize, Entry *pe, Entry *pee, DirEntry *pde, uchar score[VtScoreSize])
674 if(globalToLocal(e.score) != NilBlock
675 || (ee.flags&VtEntryActive && globalToLocal(ee.score) != NilBlock)){
676 werrstr("can only vac paths already stored on venti");
681 * Build metadata source for root.
684 if(n+MetaHeaderSize+MetaIndexSize > sizeof buf){
685 werrstr("DirEntry too big");
688 memset(buf, 0, sizeof buf);
689 mbInit(&mb, buf, n+MetaHeaderSize+MetaIndexSize, 1);
693 mbSearch(&mb, de.elem, &i, &me);
698 mbInsert(&mb, i, &me);
701 eee.size = n+MetaHeaderSize+MetaIndexSize;
702 if(!vtWriteBlock(z, buf, eee.size, VtDataType, eee.score))
707 eee.flags = VtEntryActive;
710 * Build root source with three entries in it.
712 entryPack(&e, buf, 0);
713 entryPack(&ee, buf, 1);
714 entryPack(&eee, buf, 2);
717 memset(&root, 0, sizeof root);
718 if(!vtWriteBlock(z, buf, n, VtDirType, root.score))
724 strecpy(root.type, root.type+sizeof root.type, "vac");
725 strecpy(root.name, root.name+sizeof root.name, de.elem);
726 root.blocksize = blockSize;
727 vtrootpack(&root, buf);
728 if(!vtWriteBlock(z, buf, VtRootSize, VtRootType, score))
738 fileMetaFlush(fs->file, 1);
739 cacheFlush(fs->cache, 1);
749 fileMetaFlush(fs->file, 1);
750 cacheFlush(fs->cache, 1);
765 fsNextQid(Fs *fs, u64int *qid)
770 if((b = superGet(fs->cache, &super)) == nil)
776 * It's okay if the super block doesn't go to disk immediately,
777 * since fileMetaAlloc will record a dependency between the
778 * block holding this qid and the super block. See file.c:/^fileMetaAlloc.
780 superWrite(b, &super, 0);
792 rv = fileMetaFlush(fs->file, 1);
795 cacheFlush(fs->cache, 0);
799 fsEsearch1(File *f, char *path, u32int savetime, u32int *plo)
814 r = deeRead(dee, &de);
817 if(de.mode & ModeSnapshot){
818 if((ff = fileWalk(f, de.elem)) != nil){
819 if(fileGetSources(ff, &e, &ee))
820 if(de.mtime >= savetime && e.snap != 0)
826 else if(de.mode & ModeDir){
827 if((ff = fileWalk(f, de.elem)) != nil){
828 t = smprint("%s/%s", path, de.elem);
829 n += fsEsearch1(ff, t, savetime, plo);
844 fsEsearch(Fs *fs, char *path, u32int savetime, u32int *plo)
850 f = fileOpen(fs, path);
853 if(!fileGetDir(f, &de)){
857 if((de.mode & ModeDir) == 0){
863 n = fsEsearch1(f, path, savetime, plo);
869 fsSnapshotCleanup(Fs *fs, u32int age)
874 * Find the best low epoch we can use,
875 * given that we need to save all the unventied archives
876 * and all the snapshots younger than age.
880 fsEsearch(fs, "/archive", 0, &lo);
881 fsEsearch(fs, "/snapshot", time(0)-age*60, &lo);
885 fsSnapshotRemove(fs);
888 /* remove all snapshots that have expired */
889 /* return number of directory entries remaining */
891 fsRsearch1(File *f, char *s)
905 r = deeRead(dee, &de);
909 if(de.mode & ModeSnapshot){
910 rerrstr(e, sizeof e);
911 if((ff = fileWalk(f, de.elem)) != nil)
913 else if(strcmp(e, ESnapOld) == 0){
914 if(fileClri(f, de.elem, "adm"))
918 else if(de.mode & ModeDir){
919 if((ff = fileWalk(f, de.elem)) != nil){
920 t = smprint("%s/%s", s, de.elem);
921 if(fsRsearch1(ff, t) == 0)
922 if(fileRemove(ff, "adm"))
938 fsRsearch(Fs *fs, char *path)
943 f = fileOpen(fs, path);
946 if(!fileGetDir(f, &de)){
950 if((de.mode & ModeDir) == 0){
962 fsSnapshotRemove(Fs *fs)
965 fsRsearch(fs, "/snapshot");
998 * Snapshots happen every snapMinutes minutes.
999 * If we miss a snapshot (for example, because we
1000 * were down), we wait for the next one.
1002 if(s->snapMinutes != ~0 && s->snapMinutes != 0
1003 && now%s->snapMinutes==0 && now != s->lastSnap){
1004 if(!fsSnapshot(s->fs, nil, nil, 0))
1005 fprint(2, "%s: fsSnapshot snap: %r\n", argv0);
1010 * Archival snapshots happen at archMinute.
1011 * If we miss an archive (for example, because we
1012 * were down), we do it as soon as possible.
1014 tm = *localtime(now*60);
1015 min = tm.hour*60+tm.min;
1016 if(s->archMinute != ~0){
1018 if(min == s->archMinute && now != s->lastArch)
1020 if(s->lastArch == 0){
1022 if(fsNeedArch(s->fs, s->archMinute))
1026 fsSnapshot(s->fs, nil, nil, 1);
1032 * Snapshot cleanup happens every snaplife or every day.
1034 snaplife = s->snapLife;
1037 if(s->lastCleanup+snaplife < now){
1038 fsSnapshotCleanup(s->fs, s->snapLife);
1039 s->lastCleanup = now;
1049 s = vtmallocz(sizeof(Snap));
1051 s->tick = periodicAlloc(snapEvent, s, 10*1000);
1052 s->snapMinutes = -1;
1055 s->ignore = 5*2; /* wait five minutes for clock to stabilize */
1060 snapGetTimes(Snap *s, u32int *arch, u32int *snap, u32int *snaplen)
1070 *snap = s->snapMinutes;
1071 *arch = s->archMinute;
1072 *snaplen = s->snapLife;
1077 snapSetTimes(Snap *s, u32int arch, u32int snap, u32int snaplen)
1083 s->snapMinutes = snap;
1084 s->archMinute = arch;
1085 s->snapLife = snaplen;
1095 periodicKill(s->tick);