commit 7e4524011b432ca5225d9768ba7f11a377776fbf from: rsc date: Fri Apr 27 17:52:24 2007 UTC checkpoint commit - 9ec61f3ede67c5df2726c9e67736fec8af6d8b80 commit + 7e4524011b432ca5225d9768ba7f11a377776fbf blob - 0121d4512d7bca8a694f226c10db52baca86068e blob + 4d8ab578e2cf177bcb3dcc2546627bfd48e1004f --- src/cmd/venti/srv/arena.c +++ src/cmd/venti/srv/arena.c @@ -295,7 +295,7 @@ writeaclump(Arena *arena, Clump *c, u8int *clbuf, u64i if(arena->memstats.sealed || aa + n + U32Size + arenadirsize(arena, arena->memstats.clumps + 1) > arena->size){ if(!arena->memstats.sealed){ - trace(0, "seal memstats %s", arena->name); + logerr(EOk, "seal memstats %s", arena->name); arena->memstats.sealed = 1; as.arena = arena; as.aa = start+aa; @@ -422,16 +422,10 @@ setatailstate(AState *as) return; } - /* - * Walk backward until we find the last time these were in sync. - */ - for(j=i; --j>=0; ){ + for(j=0; j<=i; j++){ a = ix->arenas[j]; if(atailcmp(&a->diskstats, &a->memstats) == 0) - break; - } - for(j++; j<=i; j++){ - a = ix->arenas[j]; + continue; qlock(&a->lock); osealed = a->diskstats.sealed; if(j == i) blob - 04fd65105eae165c1ebddc1d074517a683ee50d2 blob + bd398815e031cf80ba718ab1f5357ac67c690048 --- src/cmd/venti/srv/bloom.c +++ src/cmd/venti/srv/bloom.c @@ -26,7 +26,6 @@ bloominit(Bloom *b, vlong vsize, u8int *data) if(unpackbloomhead(b, data) < 0) return -1; -fprint(2, "bloom size %lud nhash %d\n", b->size, b->nhash); b->bitmask = (b->size<<3) - 1; b->data = data; return 0; @@ -54,7 +53,19 @@ readbloom(Part *p) */ if(bloominit(b, 0, buf) < 0){ vtfree(b); + freepart(p); return nil; + }else{ + /* + * default block size is system page size. + * the bloom filter is usually very big. + * bump the block size up to speed i/o. + */ + if(p->blocksize < (1<<20)){ + p->blocksize = 1<<20; + if(p->blocksize > p->size) + p->blocksize = p->size; + } } b->part = p; b->data = nil; blob - 275d4fc91130eca836cd0615ba5fd68313d8136f blob + 2f69a53b6595a2d5403151eb5b19902f7585b25d --- src/cmd/venti/srv/buildindex.c +++ src/cmd/venti/srv/buildindex.c @@ -119,9 +119,10 @@ threadmain(int argc, char *argv[]) fprint(2, "%T read index\n"); isectdonechan = chancreate(sizeof(void*), 0); for(i=0; insects; i++){ - if(shouldprocess(ix->sects[i])) + if(shouldprocess(ix->sects[i])){ ix->sects[i]->writechan = chancreate(sizeof(IEntry), 0); - vtproc(isectproc, ix->sects[i]); + vtproc(isectproc, ix->sects[i]); + } } for(i=0; idirty == dirty; p++){ assert(b<=p && pwritedonechan); + } + + /* + * Flush the partitions that have been written to. + */ + part = nil; + for(p=b; ppart){ + part = (*p)->part; + flushpart(part); + } } return p-b; blob - 2d3ed47c55d88b185454f3999ddc0cfac3a06b55 blob + 8f8fed4ed3cda8b7d79a84837e9af819e7b77250 --- src/cmd/venti/srv/icachewrite.c +++ src/cmd/venti/srv/icachewrite.c @@ -181,6 +181,7 @@ icachewritesect(Index *ix, ISect *is, u8int *buf) err = -1; continue; } + flushpart(is->part); addstat(StatIsectWriteBytes, nbuf); addstat(StatIsectWrite, 1); icacheclean(chunk); blob - 16b37a07c28b737ff1d2612ed1aaf742072f32ba blob + 7d477e7609db96c7bbc9d0b2c2061eb1960a823b --- src/cmd/venti/srv/index.c +++ src/cmd/venti/srv/index.c @@ -259,8 +259,14 @@ newindex(char *name, ISect **sects, int n) blocksize = sects[0]->blocksize; tabsize = sects[0]->tabsize; for(i = 0; i < n; i++){ - if(sects[i]->start != 0 || sects[i]->stop != 0 - || sects[i]->index[0] != '\0'){ + /* + * allow index, start, and stop to be set if index is correct + * and start and stop are what we would have picked. + * this allows calling fmtindex to reformat the index after + * replacing a bad index section with a freshly formatted one. + * start and stop are checked below. + */ + if(sects[i]->index[0] != '\0' && strcmp(sects[i]->index, name) != 0){ seterr(EOk, "creating new index using non-empty section %s", sects[i]->name); return nil; } @@ -318,6 +324,13 @@ newindex(char *name, ISect **sects, int n) stop = start + sects[i]->blocks - xb / n; if(i == n - 1) stop = ub; + + if(sects[i]->start != 0 || sects[i]->stop != 0) + if(sects[i]->start != start || sects[i]->stop != stop){ + seterr(EOk, "creating new index using non-empty section %s", sects[i]->name); + return nil; + } + sects[i]->start = start; sects[i]->stop = stop; namecp(sects[i]->index, name); @@ -367,8 +380,6 @@ initisect(Part *part) seterr(EAdmin, "can't read index section header: %r"); return nil; } -print("read %s at %d: %.2ux %.2ux %.2ux %.2ux\n", - part->name, PartBlank, b->data[0], b->data[1], b->data[2], b->data[3]); is = MKZ(ISect); if(is == nil){ blob - a726dfde34f324f996e5ce0ac87dc3be9ba15abf blob + e33c7b76ad268e08cd5a772fe181ece6510f2258 --- src/cmd/venti/srv/part.c +++ src/cmd/venti/srv/part.c @@ -15,6 +15,16 @@ #include #include "dat.h" #include "fns.h" + +/* TODO for linux: +don't use O_DIRECT. +use + posix_fadvise(fd, 0, 0, POSIX_FADV_NOREUSE); +after block is read and also use + posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM); +to disable readahead on the index partition. +bump block size of bloom filter higher. +*/ u32int maxblocksize; int readonly; @@ -157,6 +167,11 @@ initpart(char *name, int mode) } void +flushpart(Part *part) +{ +} + +void freepart(Part *part) { if(part == nil) @@ -397,11 +412,7 @@ rwpart(Part *part, int isread, u64int offset, u8int *b * Try to fix things up and continue. */ rerrstr(err, sizeof err); - if(strstr(err, "i/o timeout") || strstr(err, "i/o error")){ - if(sdreset(part) >= 0) - reopen(part); - continue; - }else if(strstr(err, "partition has changed")){ + if(strstr(err, "i/o timeout") || strstr(err, "i/o error") || strstr(err, "partition has changed")){ reopen(part); continue; } @@ -583,9 +594,15 @@ reopen(Part *part) fprint(2, "reopen %s\n", part->filename); if((fd = open(part->filename, ORDWR)) < 0){ - fprint(2, "reopen %s: %r\n", part->filename); - return -1; - } + if(access(part->filename, AEXIST) < 0){ + sdreset(part); + fd = open(part->filename, ORDWR); + } + if(fd < 0){ + fprint(2, "reopen %s: %r\n", part->filename); + return -1; + } + } if(fd != part->fd){ dup(fd, part->fd); close(fd); blob - b9e1eb5f4bccb691af505d3a0e9035b6b0d9e451 blob + 73c1d0689f671f7a88c689a73cb04c23b24a396f --- src/cmd/venti/srv/utils.c +++ src/cmd/venti/srv/utils.c @@ -94,9 +94,9 @@ logit(int severity, char *fmt, va_list args) return nil; if(severity != EOk){ if(argv0 == nil) - fprint(2, "%s: err %d: %s\n", argv0, severity, s); + fprint(2, "%T %s: err %d: %s\n", argv0, severity, s); else - fprint(2, "err %d: %s\n", severity, s); + fprint(2, "%T err %d: %s\n", severity, s); } return s; } blob - e9ca05364653a4486e854bc578ae14b8a0c4494c blob + af3175a5251b21527c71d9ed41f6028f4e129836 --- src/cmd/venti/srv/venti.c +++ src/cmd/venti/srv/venti.c @@ -92,10 +92,10 @@ threadmain(int argc, char *argv[]) } #endif + ventifmtinstall(); trace(TraceQuiet, "venti started"); - fprint(2, "venti: "); + fprint(2, "%T venti: "); - ventifmtinstall(); if(configfile == nil) configfile = "venti.conf"; blob - 2cdb7ba05356c28e70d3c04a0ea5b2e0bac11c5a blob + eb63d15d731afd5ca4b938d60a2c807c7dd461c6 --- src/cmd/venti/srv/verifyarena.c +++ src/cmd/venti/srv/verifyarena.c @@ -59,7 +59,7 @@ verifyarena(char *name, vlong len) u32int bs; u8int score[VtScoreSize]; - fprint(2, "verify %s\n", name); + fprint(2, "%T verify %s\n", name); memset(&arena, 0, sizeof arena); memset(&s, 0, sizeof s); @@ -68,20 +68,20 @@ verifyarena(char *name, vlong len) * read a little bit, which will include the header */ if(readblock(data, HeadSize) < 0){ - fprint(2, "%s: reading header: %r\n", name); + fprint(2, "%T %s: reading header: %r\n", name); return; } sha1(data, HeadSize, nil, &s); if(unpackarenahead(&head, data) < 0){ - fprint(2, "%s: corrupt arena header: %r\n", name); + fprint(2, "%T %s: corrupt arena header: %r\n", name); return; } if(head.version != ArenaVersion4 && head.version != ArenaVersion5) - fprint(2, "%s: warning: unknown arena version %d\n", name, head.version); + fprint(2, "%T %s: warning: unknown arena version %d\n", name, head.version); if(len != 0 && len != head.size) - fprint(2, "%s: warning: unexpected length %lld != %lld\n", name, head.size, len); + fprint(2, "%T %s: warning: unexpected length %lld != %lld\n", name, head.size, len); if(strcmp(name, "") != 0 && strcmp(head.name, name) != 0) - fprint(2, "%s: warning: unexpected name %s\n", name, head.name); + fprint(2, "%T %s: warning: unexpected name %s\n", name, head.name); /* * now we know how much to read @@ -93,7 +93,7 @@ verifyarena(char *name, vlong len) if(n + bs > e) bs = e - n; if(readblock(data, bs) < 0){ - fprint(2, "%s: read data: %r\n", name); + fprint(2, "%T %s: read data: %r\n", name); return; } sha1(data, bs, nil, &s); @@ -107,7 +107,7 @@ verifyarena(char *name, vlong len) */ bs = head.blocksize; if(readblock(data, bs) < 0){ - fprint(2, "%s: read last block: %r\n", name); + fprint(2, "%T %s: read last block: %r\n", name); return; } sha1(data, bs-VtScoreSize, nil, &s); @@ -119,18 +119,18 @@ verifyarena(char *name, vlong len) */ arena.blocksize = head.blocksize; if(unpackarena(&arena, data) < 0){ - fprint(2, "%s: corrupt arena trailer: %r\n", name); + fprint(2, "%T %s: corrupt arena trailer: %r\n", name); return; } scorecp(arena.score, &data[arena.blocksize - VtScoreSize]); if(namecmp(arena.name, head.name) != 0){ - fprint(2, "%s: wrong name in trailer: %s vs. %s\n", + fprint(2, "%T %s: wrong name in trailer: %s vs. %s\n", name, head.name, arena.name); return; } if(arena.version != head.version){ - fprint(2, "%s: wrong version in trailer: %d vs. %d\n", + fprint(2, "%T %s: wrong version in trailer: %d vs. %d\n", name, head.version, arena.version); return; } @@ -140,11 +140,11 @@ verifyarena(char *name, vlong len) * check for no checksum or the same */ if(scorecmp(score, arena.score) == 0) - fprint(2, "%s: verified score\n", name); + fprint(2, "%T %s: verified score\n", name); else if(scorecmp(zeroscore, arena.score) == 0) - fprint(2, "%s: unsealed\n", name); + fprint(2, "%T %s: unsealed\n", name); else{ - fprint(2, "%s: mismatch checksum - found=%V calculated=%V\n", + fprint(2, "%T %s: mismatch checksum - found=%V calculated=%V\n", name, arena.score, score); return; } @@ -207,7 +207,7 @@ threadmain(int argc, char *argv[]) sysfatal("read arena part header: %r"); if(unpackarenapart(&ap, data) < 0) sysfatal("corrupted arena part header: %r"); - fprint(2, "# arena part version=%d blocksize=%d arenabase=%d\n", + fprint(2, "%T # arena part version=%d blocksize=%d arenabase=%d\n", ap.version, ap.blocksize, ap.arenabase); ap.tabbase = (PartBlank+HeadSize+ap.blocksize-1)&~(ap.blocksize-1); ap.tabsize = ap.arenabase - ap.tabbase; @@ -222,21 +222,21 @@ threadmain(int argc, char *argv[]) p++; for(i=0; i= sizeof line){ - fprint(2, "warning: long arena table line: %s\n", p); + fprint(2, "%T warning: long arena table line: %s\n", p); p = q; continue; } strcpy(line, p); memset(f, 0, sizeof f); if(tokenize(line, f, nelem(f)) < 3){ - fprint(2, "warning: bad arena table line: %s\n", p); + fprint(2, "%T warning: bad arena table line: %s\n", p); p = q; continue; } @@ -245,17 +245,17 @@ threadmain(int argc, char *argv[]) start = strtoull(f[1], 0, 0); stop = strtoull(f[2], 0, 0); if(stop <= start){ - fprint(2, "%s: bad start,stop %lld,%lld\n", f[0], stop, start); + fprint(2, "%T %s: bad start,stop %lld,%lld\n", f[0], stop, start); continue; } if(seek(fd, start, 0) < 0) - fprint(2, "%s: seek to start: %r\n", f[0]); + fprint(2, "%T %s: seek to start: %r\n", f[0]); verifyarena(f[0], stop - start); } } for(i=1; i