Commit Diff


commit - be7cbb4ef2cb02aa9ac48c02dc1ee585a8e49043
commit + 0c98da8bf8ea51d0288222f6c6ba3c125cf20f46
blob - /dev/null
blob + 9d69bfff622a0bf9122281607156cda864fc88d2 (mode 644)
--- /dev/null
+++ include/diskfs.h
@@ -0,0 +1,94 @@
+/* Copyright (c) Russ Cox, MIT; see src/libdiskfs/COPYRIGHT */
+
+AUTOLIB(diskfs)
+
+typedef struct Block Block;
+typedef struct Disk Disk;
+typedef struct Fsys Fsys;
+
+#ifndef _NFS3H_	/* in case sunrpc.h, nfs3.h are not included */
+typedef struct SunAuthUnix SunAuthUnix;
+typedef struct Nfs3Attr Nfs3Attr;
+typedef struct Nfs3Entry Nfs3Entry;
+typedef struct Nfs3Handle Nfs3Handle;
+typedef int Nfs3Status;
+#endif
+struct VtCache;
+
+struct Disk
+{
+	Block *(*_read)(Disk *disk, u32int count, u64int offset);
+	int (*_sync)(Disk*);
+	void (*_close)(Disk*);
+	void *priv;
+};
+
+struct Block
+{
+	Disk *disk;
+	u32int len;
+	uchar *data;
+	void (*_close)(Block*);
+	void *priv;
+};
+
+struct Fsys
+{
+	u32int blocksize;
+	u32int nblock;
+	char *type;
+
+	Disk *disk;
+	Block *(*_readblock)(Fsys *fsys, u64int blockno);
+	int (*_sync)(Fsys *fsys);
+	void (*_close)(Fsys *fsys);
+
+	Nfs3Status (*_root)(Fsys*, Nfs3Handle*);
+	Nfs3Status (*_access)(Fsys*, SunAuthUnix*, Nfs3Handle*, u32int, u32int*, Nfs3Attr*);
+	Nfs3Status (*_lookup)(Fsys*, SunAuthUnix*, Nfs3Handle*, char*, Nfs3Handle*);
+	Nfs3Status (*_getattr)(Fsys*, SunAuthUnix*, Nfs3Handle*, Nfs3Attr*);
+	Nfs3Status (*_readdir)(Fsys *fsys, SunAuthUnix*, Nfs3Handle *h, u32int count, u64int cookie, uchar**, u32int*, uchar*);
+	Nfs3Status (*_readfile)(Fsys *fsys, SunAuthUnix*, Nfs3Handle *h, u32int count, u64int offset, uchar**, u32int*, uchar*);
+	Nfs3Status (*_readlink)(Fsys *fsys, SunAuthUnix*, Nfs3Handle *h, char **link);
+
+	void *priv;
+};
+
+struct Handle
+{
+	uchar h[64];
+	int len;
+};
+
+void		blockdump(Block *b, char *desc);
+void		blockput(Block *b);
+
+Disk*	diskcache(Disk*, uint, uint);
+Disk*	diskopenventi(struct VtCache*, uchar*);
+Disk*	diskopenfile(char *file);
+
+Disk*	diskopen(char *file);
+void		diskclose(Disk *disk);
+Block*	diskread(Disk *disk, u32int, u64int offset);
+int		disksync(Disk *disk);
+
+Fsys*	fsysopenffs(Disk*);
+Fsys*	fsysopenkfs(Disk*);
+Fsys*	fsysopenext2(Disk*);
+Fsys*	fsysopenfat(Disk*);
+
+Fsys*	fsysopen(Disk *disk);
+Block*	fsysreadblock(Fsys *fsys, u64int blockno);
+int		fsyssync(Fsys *fsys);
+void		fsysclose(Fsys *fsys);
+
+Nfs3Status		fsysroot(Fsys *fsys, Nfs3Handle *h);
+Nfs3Status		fsyslookup(Fsys *fsys, SunAuthUnix*, Nfs3Handle *h, char *name, Nfs3Handle *nh);
+Nfs3Status		fsysgetattr(Fsys *fsys, SunAuthUnix*, Nfs3Handle *h, Nfs3Attr *attr);
+Nfs3Status		fsysreaddir(Fsys *fsys, SunAuthUnix*, Nfs3Handle *h, u32int count, u64int cookie, uchar **e, u32int *ne, uchar*);
+Nfs3Status		fsysreadfile(Fsys *fsys, SunAuthUnix*, Nfs3Handle *h, u32int, u64int, uchar**, u32int*, uchar*);
+Nfs3Status		fsysreadlink(Fsys *fsys, SunAuthUnix*, Nfs3Handle *h, char **plink);
+Nfs3Status		fsysaccess(Fsys *fsys, SunAuthUnix*, Nfs3Handle*, u32int, u32int*, Nfs3Attr*);
+void*	emalloc(ulong size);	/* provided by caller */
+
+extern int allowall;
blob - /dev/null
blob + 2af95d83d9ac96aed1a71134c4b3e9ec6095b9b7 (mode 644)
--- /dev/null
+++ src/libdiskfs/COPYRIGHT
@@ -0,0 +1,27 @@
+This software was developed as part of a project at MIT:
+	$PLAN9/src/libdiskfs/*
+	$PLAN9/include/diskfs.h
+	$PLAN9/src/cmd/vbackup/*
+
+Copyright (c) 2005 Russ Cox,
+                   Massachusetts Institute of Technology
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
blob - /dev/null
blob + 1c73ed34cb4396bf3511b350cd713fee94d4a078 (mode 644)
--- /dev/null
+++ src/libdiskfs/block.c
@@ -0,0 +1,51 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <diskfs.h>
+
+void
+blockdump(Block *bb, char *desc)
+{
+	uchar *p, *ep;
+	int i;
+	Biobuf b;
+
+	Binit(&b, 2, OWRITE);
+
+	Bprint(&b, "%s\n", desc);
+
+	p = bb->data;
+	ep = bb->data + bb->len;
+
+	while(p < ep){
+		for(i=0; i<16; i++){
+			if(p+i < ep)
+				Bprint(&b, "%.2ux ", p[i]);
+			else
+				Bprint(&b, "   ");
+			if(i==7)
+				Bprint(&b, "- ");
+		}
+		Bprint(&b, " ");
+		for(i=0; i<16; i++){
+			if(p+i < ep)
+				Bprint(&b, "%c", p[i] >= 0x20 && p[i] <= 0x7F ? p[i] : '.');
+			else
+				Bprint(&b, " ");
+		}
+		p += 16;
+		Bprint(&b, "\n");
+	}
+}
+
+void
+blockput(Block *b)
+{
+	if(b == nil)
+		return;
+	if(!b->_close){
+		fprint(2, "no blockPut\n");
+		abort();	
+	}
+	(*b->_close)(b);
+}
blob - /dev/null
blob + cdef865ea448754afb608fb1f1705c65f2c41c76 (mode 644)
--- /dev/null
+++ src/libdiskfs/cache.c
@@ -0,0 +1,311 @@
+#include <u.h>
+#include <libc.h>
+#include <diskfs.h>
+
+/*
+ * Disk cache.  Caches by offset, so higher levels have 
+ * to deal with alignment issues (if we get asked for the
+ * blocks at offsets 0 and 1, we'll do two reads.
+ */
+
+typedef struct DiskCache DiskCache;
+typedef struct DiskCacheBlock DiskCacheBlock;
+
+struct DiskCache
+{
+	Disk disk;
+	Disk *subdisk;
+	DiskCacheBlock **h;
+	DiskCacheBlock *lruhead;
+	DiskCacheBlock *lrutail;
+	int nhash;
+	int blocksize;
+	Lock lk;
+};
+
+struct DiskCacheBlock
+{
+	Block block;
+	Block *subblock;
+	Lock lk;
+	int ref;
+	DiskCache *dc;
+	DiskCacheBlock *next;
+	DiskCacheBlock *lrunext;
+	DiskCacheBlock *lruprev;
+	u64int offset;
+};
+
+static void
+addtohash(DiskCache *d, DiskCacheBlock *b, u64int offset)
+{
+	int h;
+
+	if(b->offset != ~(u64int)0){
+		fprint(2, "bad offset in addtohash\n");
+		return;	
+	}
+	b->offset = offset;
+	h = offset % d->nhash;
+	b->next = d->h[h];
+	d->h[h] = b;
+}
+
+static void
+delfromhash(DiskCache *d, DiskCacheBlock *b)
+{
+	int h;
+	DiskCacheBlock **l;
+
+	if(b->offset == ~(u64int)0)
+		return;
+
+	h = b->offset % d->nhash;
+	for(l=&d->h[h]; *l; l=&(*l)->next)
+		if(*l == b){
+			*l = b->next;
+			b->offset = ~(u64int)0;
+			return;
+		}
+	fprint(2, "delfromhash: didn't find in hash table\n");
+	return;
+}
+
+static void
+putmru(DiskCache *d, DiskCacheBlock *b)
+{
+	b->lruprev = nil;
+	b->lrunext = d->lruhead;
+	d->lruhead = b;
+	if(b->lrunext == nil)
+		d->lrutail = b;
+	else
+		b->lrunext->lruprev = b;
+}
+
+static void
+putlru(DiskCache *d, DiskCacheBlock *b)
+{
+	b->lruprev = d->lrutail;
+	b->lrunext = nil;
+	d->lrutail = b;
+	if(b->lruprev == nil)
+		d->lruhead = b;
+	else
+		b->lruprev->lrunext = b;
+}
+
+static void
+delfromlru(DiskCache *d, DiskCacheBlock *b)
+{
+	if(b->lruprev)
+		b->lruprev->lrunext = b->lrunext;
+	else
+		d->lruhead = b->lrunext;
+	if(b->lrunext)
+		b->lrunext->lruprev = b->lruprev;
+	else
+		d->lrutail = b->lruprev;
+}
+
+static DiskCacheBlock*
+getlru(DiskCache *d)
+{
+	DiskCacheBlock *b;
+
+	b = d->lrutail;
+	if(b){
+		delfromlru(d, b);
+		delfromhash(d, b);
+		blockput(b->subblock);
+		b->subblock = nil;
+	}
+	return b;
+}
+
+static DiskCacheBlock*
+findblock(DiskCache *d, u64int offset)
+{
+	int h;
+	DiskCacheBlock *b;
+
+	h = offset % d->nhash;
+	for(b=d->h[h]; b; b=b->next)
+		if(b->offset == offset)
+			return b;
+	return nil;
+}
+
+static DiskCacheBlock*
+diskcachereadbig(DiskCache *d, u64int offset)
+{
+	Block *b;
+	DiskCacheBlock *dcb;
+
+	lock(&d->lk);
+	dcb = findblock(d, offset);
+	if(dcb){
+//fprint(2, "found %llud in cache %p\n", (uvlong)offset, dcb);
+		if(dcb->ref++ == 0)
+			delfromlru(d, dcb);
+		unlock(&d->lk);
+		return dcb;
+	}
+
+	dcb = getlru(d);
+	unlock(&d->lk);
+	if(dcb == nil){
+		fprint(2, "diskcacheread: all blocks in use\n");
+		return nil;
+	}
+
+	b = diskread(d->subdisk, d->blocksize, offset);
+	lock(&d->lk);
+	if(b == nil){
+		putlru(d, dcb);
+		dcb = nil;
+	}else{
+//fprint(2, "read %llud from disk %p\n", (uvlong)offset, dcb);
+		dcb->subblock = b;
+		dcb->ref++;
+		addtohash(d, dcb, offset);
+	}
+	unlock(&d->lk);
+	return dcb;
+}
+
+static void
+diskcacheblockclose(Block *bb)
+{
+	DiskCacheBlock *b = bb->priv;
+
+	lock(&b->dc->lk);
+	if(--b->ref == 0)
+		putmru(b->dc, b);
+	unlock(&b->dc->lk);
+	free(bb);
+}
+
+static Block*
+diskcacheread(Disk *dd, u32int len, u64int offset)
+{
+	int frag, dlen;
+	DiskCache *d = (DiskCache*)dd;
+	DiskCacheBlock *dcb;
+	Block *b;
+
+	if(offset/d->blocksize != (offset+len-1)/d->blocksize){
+		fprint(2, "diskBigRead: request for block crossing big block boundary\n");
+		return nil;
+	}
+
+	b = mallocz(sizeof(Block), 1);
+	if(b == nil)
+		return nil;
+
+	frag = offset%d->blocksize;
+
+	dcb = diskcachereadbig(d, offset-frag);
+	if(dcb == nil){
+		free(b);
+		return nil;
+	}
+	b->priv = dcb;
+	b->_close = diskcacheblockclose;
+	b->data = dcb->subblock->data+frag;
+
+	dlen = dcb->subblock->len;
+	if(frag+len >= dlen){
+		if(frag >= dlen){
+			blockput(b);
+			return nil;
+		}
+		len = dlen-frag;
+	}
+	b->len = len;
+//fprint(2, "offset %llud at pointer %p %lux\n", (uvlong)offset, b->data, *(ulong*)(b->data+4));
+	return b;
+}
+
+/* 
+ * It's okay to remove these from the hash table. 
+ * Either the block is in use by someone or it is on
+ * the lru list.  If it's in use it will get put on the lru
+ * list once the refs go away.
+ */
+static int
+diskcachesync(Disk *dd)
+{
+	DiskCache *d = (DiskCache*)dd;
+	DiskCacheBlock *b, *nextb;
+	int i;
+
+	lock(&d->lk);
+	for(i=0; i<d->nhash; i++){
+		for(b=d->h[i]; b; b=nextb){
+			nextb = b->next;
+			b->next = nil;
+			b->offset = ~(u64int)0;
+		}
+		d->h[i] = nil;
+	}
+	unlock(&d->lk);
+	return disksync(d->subdisk);
+}
+
+static void
+diskcacheclose(Disk *dd)
+{
+	DiskCacheBlock *b;
+	DiskCache *d = (DiskCache*)dd;
+
+	diskclose(d->subdisk);
+	for(b=d->lruhead; b; b=b->lrunext)
+		blockput(b->subblock);
+	free(d);
+}
+		
+/* needn't be fast */
+static int
+isprime(int n)
+{
+	int i;
+
+	for(i=2; i*i<=n; i++)
+		if(n%i == 0)
+			return 0;
+	return 1;
+}
+
+Disk*
+diskcache(Disk *subdisk, uint blocksize, uint ncache)
+{
+	int nhash, i;
+	DiskCache *d;
+	DiskCacheBlock *b;
+
+	nhash = ncache;
+	while(nhash > 1 && !isprime(nhash))
+		nhash--;
+	d = mallocz(sizeof(DiskCache)+ncache*sizeof(DiskCacheBlock)+nhash*sizeof(DiskCacheBlock*), 1);
+	if(d == nil)
+		return nil;
+
+	b = (DiskCacheBlock*)&d[1];
+	d->h = (DiskCacheBlock**)&b[ncache];
+	d->nhash = nhash;
+	d->blocksize = blocksize;
+	d->subdisk = subdisk;
+	d->disk._read = diskcacheread;
+	d->disk._sync = diskcachesync;
+	d->disk._close = diskcacheclose;
+
+	for(i=0; i<ncache; i++){
+		b[i].block._close = diskcacheblockclose;
+		b[i].offset = ~(u64int)0;
+		b[i].dc = d;
+		putlru(d, &b[i]);
+	}
+
+	return &d->disk;
+}
blob - /dev/null
blob + 001a19de034aa8740eaa6208845458d78af7b6de (mode 644)
--- /dev/null
+++ src/libdiskfs/disk.c
@@ -0,0 +1,39 @@
+#include <u.h>
+#include <libc.h>
+#include <bio.h>
+#include <diskfs.h>
+
+Block*
+diskread(Disk *disk, u32int count, u64int offset)
+{
+	if(disk == nil)
+		return nil;
+
+	if(!disk->_read){
+		werrstr("no disk read dispatch function");
+		return nil;
+	}
+	return (*disk->_read)(disk, count, offset);
+}
+
+int
+disksync(Disk *disk)
+{
+	if(disk == nil)
+		return 0;
+	if(!disk->_sync)
+		return 0;
+	return (*disk->_sync)(disk);
+}
+
+void
+diskclose(Disk *disk)
+{
+	if(disk == nil)
+		return;
+	if(!disk->_close){
+		fprint(2, "no diskClose\n");
+		abort();
+	}
+	(*disk->_close)(disk);
+}
blob - /dev/null
blob + 17039c0f396aeeb885f98c745e5dc884d3bcca9e (mode 644)
--- /dev/null
+++ src/libdiskfs/ext2.c
@@ -0,0 +1,742 @@
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+#include <sunrpc.h>
+#include <nfs3.h>
+#include <diskfs.h>
+#include "ext2.h"
+
+#define debug 1
+
+static int ext2sync(Fsys*);
+static void ext2close(Fsys*);
+static Block* ext2blockread(Fsys*, u64int);
+
+static Nfs3Status ext2root(Fsys*, Nfs3Handle*);
+static Nfs3Status ext2getattr(Fsys*, SunAuthUnix *au, Nfs3Handle*, Nfs3Attr*);
+static Nfs3Status ext2lookup(Fsys*, SunAuthUnix *au, Nfs3Handle*, char*, Nfs3Handle*);
+static Nfs3Status ext2readfile(Fsys*, SunAuthUnix *au, Nfs3Handle*, u32int, u64int, uchar**, u32int*, u1int*);
+static Nfs3Status ext2readlink(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, char **link);
+static Nfs3Status ext2readdir(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int, u64int, uchar**, u32int*, u1int*);
+static Nfs3Status ext2access(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int want, u32int *got, Nfs3Attr *attr);
+
+Fsys*
+fsysopenext2(Disk *disk)
+{
+	Ext2 *fs;
+	Fsys *fsys;
+
+	fsys = emalloc(sizeof(Fsys));
+	fs = emalloc(sizeof(Ext2));
+	fs->disk = disk;
+	fsys->priv = fs;
+	fs->fsys = fsys;
+	fsys->type = "ext2";
+	fsys->_readblock = ext2blockread;
+	fsys->_sync = ext2sync;
+	fsys->_root = ext2root;
+	fsys->_getattr = ext2getattr;
+	fsys->_access = ext2access;
+	fsys->_lookup = ext2lookup;
+	fsys->_readfile = ext2readfile;
+	fsys->_readlink = ext2readlink;
+	fsys->_readdir = ext2readdir;
+
+	if(ext2sync(fsys) < 0)
+		goto error;
+
+	return fsys;
+
+error:
+	ext2close(fsys);
+	return nil;
+}
+
+static void
+ext2close(Fsys *fsys)
+{
+	Ext2 *fs;
+
+	fs = fsys->priv;
+	free(fs);
+	free(fsys);
+}
+
+static Group*
+ext2group(Ext2 *fs, u32int i, Block **pb)
+{
+	Block *b;
+	u64int addr;
+	Group *g;
+
+	if(i >= fs->ngroup)
+		return nil;
+
+	addr = fs->groupaddr + i/fs->descperblock;
+	b = diskread(fs->disk, fs->blocksize, addr*fs->blocksize);
+	if(b == nil)
+		return nil;
+	g = (Group*)(b->data+i%fs->descperblock*GroupSize);
+	*pb = b;
+	return g;
+}
+
+static Block*
+ext2blockread(Fsys *fsys, u64int vbno)
+{
+	Block *bitb;
+	Group *g;
+	Block *gb;
+	uchar *bits;
+	u32int bno, boff;
+	Ext2 *fs;
+
+	fs = fsys->priv;
+	if(vbno >= fs->nblock)
+		return nil;
+	bno = vbno;
+	if(bno != vbno)
+		return nil;
+
+/*	
+	if(bno < fs->firstblock)
+		return diskread(fs->disk, fs->blocksize, (u64int)bno*fs->blocksize);
+*/
+	if(bno < fs->firstblock)
+		return nil;
+
+	bno -= fs->firstblock;
+	if((g = ext2group(fs, bno/fs->blockspergroup, &gb)) == nil){
+		if(debug)
+			fprint(2, "loading group: %r...");
+		return nil;
+	}
+//	if(debug)
+//		fprint(2, "group %d bitblock=%d...", bno/fs->blockspergroup, g->bitblock);
+
+	if((bitb = diskread(fs->disk, fs->blocksize, (u64int)g->bitblock*fs->blocksize)) == nil){
+		if(debug)
+			fprint(2, "loading bitblock: %r...");
+		blockput(gb);
+		return nil;
+	}
+	bits = bitb->data;
+	boff = bno%fs->blockspergroup;
+	if((bits[boff>>3] & (1<<(boff&7))) == 0){
+		if(debug)
+			fprint(2, "block %d not allocated...", bno);
+		blockput(bitb);
+		blockput(gb);
+		return nil;
+	}
+
+	bno += fs->firstblock;
+	return diskread(fs->disk, fs->blocksize, (u64int)bno*fs->blocksize);
+}
+
+static Block*
+ext2datablock(Ext2 *fs, u32int bno, int size)
+{
+	return ext2blockread(fs->fsys, bno+fs->firstblock);
+}
+
+static Block*
+ext2fileblock(Ext2 *fs, Inode *ino, u32int bno, int size)
+{
+	int ppb;
+	Block *b;
+	u32int *a;
+	u32int obno;
+
+	obno = bno;
+	if(bno < NDIRBLOCKS){
+		if(debug)
+			fprint(2, "fileblock %d -> %d...", 
+				bno, ino->block[bno]);
+		return ext2datablock(fs, ino->block[bno], size);
+	}
+	bno -= NDIRBLOCKS;
+	ppb = fs->blocksize/4;
+
+	/* one indirect */
+	if(bno < ppb){
+		b = ext2datablock(fs, ino->block[INDBLOCK], fs->blocksize);
+		if(b == nil)
+			return nil;
+		a = (u32int*)b->data;
+		bno = a[bno%ppb];
+		blockput(b);
+		return ext2datablock(fs, bno, size);
+	}
+	bno -= ppb;
+
+	/* one double indirect */
+	if(bno < ppb*ppb){
+		b = ext2datablock(fs, ino->block[DINDBLOCK], fs->blocksize);
+		if(b == nil)
+			return nil;
+		a = (u32int*)b->data;
+		bno = a[(bno/ppb)%ppb];
+		blockput(b);
+		b = ext2datablock(fs, bno, fs->blocksize);
+		if(b == nil)
+			return nil;
+		a = (u32int*)b->data;
+		bno = a[bno%ppb];
+		blockput(b);
+		return ext2datablock(fs, bno, size);
+	}
+	bno -= ppb*ppb;
+
+	/* one triple indirect */
+	if(bno < ppb*ppb*ppb){
+		b = ext2datablock(fs, ino->block[TINDBLOCK], fs->blocksize);
+		if(b == nil)
+			return nil;
+		a = (u32int*)b->data;
+		bno = a[(bno/(ppb*ppb))%ppb];
+		blockput(b);
+		b = ext2datablock(fs, bno, fs->blocksize);
+		if(b == nil)
+			return nil;
+		a = (u32int*)b->data;
+		bno = a[(bno/ppb)%ppb];
+		blockput(b);
+		b = ext2datablock(fs, bno, fs->blocksize);
+		if(b == nil)
+			return nil;
+		a = (u32int*)b->data;
+		bno = a[bno%ppb];
+		blockput(b);
+		return ext2datablock(fs, bno, size);
+	}
+
+	fprint(2, "ext2fileblock %llud: too big\n", obno);
+	return nil;
+}
+
+static int
+checksuper(Super *super)
+{
+	if(super->magic != SUPERMAGIC){
+		werrstr("bad magic 0x%ux wanted 0x%ux", super->magic, SUPERMAGIC);
+		return -1;
+	}
+	return 0;
+}
+
+static int
+ext2sync(Fsys *fsys)
+{
+	int i;
+	Group *g;
+	Block *b;
+	Super *super;
+	Ext2 *fs;
+	Disk *disk;
+
+	fs = fsys->priv;
+	disk = fs->disk;
+	if((b = diskread(disk, SBSIZE, SBOFF)) == nil)
+		goto error;
+	super = (Super*)b->data;
+	if(checksuper(super) < 0)
+		goto error;
+	fs->blocksize = MINBLOCKSIZE<<super->logblocksize;
+	fs->nblock = super->nblock;
+	fs->ngroup = (super->nblock+super->blockspergroup-1)
+		/ super->blockspergroup;
+	fs->inospergroup = super->inospergroup;
+	fs->blockspergroup = super->blockspergroup;
+	fs->inosperblock = fs->blocksize / InodeSize;
+	if(fs->blocksize == SBOFF)
+		fs->groupaddr = 2;
+	else
+		fs->groupaddr = 1;
+	fs->descperblock = fs->blocksize / GroupSize;
+	fs->firstblock = super->firstdatablock;
+	blockput(b);
+
+	fsys->blocksize = fs->blocksize;
+	fsys->nblock = fs->nblock;
+	fprint(2, "ext2 %d %d-byte blocks, first data block %d, %d groups of %d\n",
+		fs->nblock, fs->blocksize, fs->firstblock, fs->ngroup, fs->blockspergroup);
+
+	if(0){
+		for(i=0; i<fs->ngroup; i++)
+			if((g = ext2group(fs, i, &b)) != nil){
+				fprint(2, "grp %d: bitblock=%d\n", i, g->bitblock);
+				blockput(b);
+			}
+	}
+	return 0;
+
+error:
+	blockput(b);
+	return -1;
+}
+
+static void
+mkhandle(Nfs3Handle *h, u64int ino)
+{
+	h->h[0] = ino>>24;
+	h->h[1] = ino>>16;
+	h->h[2] = ino>>8;
+	h->h[3] = ino;
+	h->len = 4;
+}
+
+static u32int
+byte2u32(uchar *p)
+{
+	return (p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];
+}
+
+static Nfs3Status
+handle2ino(Ext2 *fs, Nfs3Handle *h, u32int *pinum, Inode *ino)
+{
+	int i;
+	uint ioff;
+	u32int inum;
+	u32int addr;
+	Block *gb, *b;
+	Group *g;
+
+	if(h->len != 4)
+		return Nfs3ErrBadHandle;
+	inum = byte2u32(h->h);
+	if(pinum)
+		*pinum = inum;
+	i = (inum-1) / fs->inospergroup;
+	if(i >= fs->ngroup)
+		return Nfs3ErrBadHandle;
+	ioff = (inum-1) % fs->inospergroup;
+	if((g = ext2group(fs, i, &gb)) == nil)
+		return Nfs3ErrIo;
+	addr = g->inodeaddr + ioff/fs->inosperblock;
+	blockput(gb);
+	if((b = diskread(fs->disk, fs->blocksize, (u64int)addr*fs->blocksize)) == nil)
+		return Nfs3ErrIo;
+	*ino = ((Inode*)b->data)[ioff%fs->inosperblock];
+	blockput(b);
+	return Nfs3Ok;
+}
+
+static Nfs3Status
+ext2root(Fsys *fsys, Nfs3Handle *h)
+{
+	mkhandle(h, ROOTINODE);
+	return Nfs3Ok;
+}
+
+static Nfs3Status
+ino2attr(Ext2 *fs, Inode *ino, u32int inum, Nfs3Attr *attr)
+{
+	u32int rdev;
+
+	attr->type = -1;
+	switch(ino->mode&IFMT){
+	case IFIFO:
+		attr->type = Nfs3FileFifo;
+		break;
+	case IFCHR:
+		attr->type = Nfs3FileChar;
+		break;
+	case IFDIR:
+		attr->type = Nfs3FileDir;
+		break;
+	case IFBLK:
+		attr->type = Nfs3FileBlock;
+		break;
+	case IFREG:
+		attr->type = Nfs3FileReg;
+		break;
+	case IFLNK:
+		attr->type = Nfs3FileSymlink;
+		break;
+	case IFSOCK:
+		attr->type = Nfs3FileSocket;
+		break;
+	case IFWHT:
+	default:
+		return Nfs3ErrBadHandle;
+	}
+
+	attr->mode = ino->mode&07777;
+	attr->nlink = ino->nlink;
+	attr->uid = ino->uid;
+	attr->gid = ino->gid;
+	attr->size = ino->size;
+	attr->used = ino->nblock*fs->blocksize;
+	if(attr->type==Nfs3FileBlock || attr->type==Nfs3FileChar){
+		rdev = ino->block[0];
+		attr->major = (rdev>>8)&0xFF;
+		attr->minor = rdev & 0xFFFF00FF;
+	}else{
+		attr->major = 0;
+		attr->minor = 0;
+	}
+	attr->fsid = 0;
+	attr->fileid = inum;
+	attr->atime.sec = ino->atime;
+	attr->atime.nsec = 0;
+	attr->mtime.sec = ino->mtime;
+	attr->mtime.nsec = 0;
+	attr->ctime.sec = ino->ctime;
+	attr->ctime.nsec = 0;
+	return Nfs3Ok;
+}
+
+static int
+ingroup(SunAuthUnix *au, uint gid)
+{
+	int i;
+
+	for(i=0; i<au->ng; i++)
+		if(au->g[i] == gid)
+			return 1;
+	return 0;
+}
+
+static Nfs3Status
+inoperm(Inode *ino, SunAuthUnix *au, int need)
+{
+	int have;
+
+	if(allowall)
+		return Nfs3Ok;
+
+	have = ino->mode&0777;
+	if(ino->uid == au->uid)
+		have >>= 6;
+	else if(ino->gid == au->gid || ingroup(au, ino->gid))
+		have >>= 3;
+
+	if((have&need) != need)
+		return Nfs3ErrNotOwner;	/* really EPERM */
+	return Nfs3Ok;
+}
+
+static Nfs3Status
+ext2getattr(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, Nfs3Attr *attr)
+{
+	Inode ino;
+	u32int inum;
+	Ext2 *fs;
+	Nfs3Status ok;
+
+	fs = fsys->priv;
+	if((ok = handle2ino(fs, h, &inum, &ino)) != Nfs3Ok)
+		return ok;
+
+	USED(au);	/* anyone can getattr */
+	return ino2attr(fs, &ino, inum, attr);
+}
+
+static Nfs3Status
+ext2access(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int want, u32int *got, Nfs3Attr *attr)
+{
+	int have;
+	Inode ino;
+	u32int inum;
+	Ext2 *fs;
+	Nfs3Status ok;
+
+	fs = fsys->priv;
+	if((ok = handle2ino(fs, h, &inum, &ino)) != Nfs3Ok)
+		return ok;
+
+	have = ino.mode&0777;
+	if(ino.uid == au->uid)
+		have >>= 6;
+	else if(ino.gid == au->gid || ingroup(au, ino.gid))
+		have >>= 3;
+
+	*got = 0;
+	if((want&Nfs3AccessRead) && (have&AREAD))
+		*got |= Nfs3AccessRead;
+	if((want&Nfs3AccessLookup) && (ino.mode&IFMT)==IFDIR && (have&AEXEC))
+		*got |= Nfs3AccessLookup;
+	if((want&Nfs3AccessExecute) && (ino.mode&IFMT)!=IFDIR && (have&AEXEC))
+		*got |= Nfs3AccessExecute;
+
+	return ino2attr(fs, &ino, inum, attr);
+}
+
+static Nfs3Status
+ext2lookup(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, char *name, Nfs3Handle *nh)
+{
+	u32int nblock;
+	u32int i;
+	uchar *p, *ep;
+	Dirent *de;
+	Inode ino;
+	Block *b;
+	Ext2 *fs;
+	Nfs3Status ok;
+	int len, want;
+
+	fs = fsys->priv;
+	if((ok = handle2ino(fs, h, nil, &ino)) != Nfs3Ok)
+		return ok;
+
+	if((ino.mode&IFMT) != IFDIR)
+		return Nfs3ErrNotDir;
+
+	if((ok = inoperm(&ino, au, AEXEC)) != Nfs3Ok)
+		return ok;
+
+	len = strlen(name);
+	nblock = (ino.size+fs->blocksize-1) / fs->blocksize;
+	if(debug) fprint(2, "%d blocks in dir...", nblock);
+	for(i=0; i<nblock; i++){
+		if(i==nblock-1)
+			want = ino.size % fs->blocksize;
+		else
+			want = fs->blocksize;
+		b = ext2fileblock(fs, &ino, i, want);
+		if(b == nil){
+			if(debug) fprint(2, "empty block...");
+			continue;
+		}
+		p = b->data;
+		ep = p+b->len;
+		while(p < ep){
+			de = (Dirent*)p;
+			if(de->reclen == 0){
+				if(debug)
+					fprint(2, "reclen 0 at offset %d of %d\n", (int)(p-b->data), b->len);
+				break;
+			}
+			p += de->reclen;
+			if(p > ep){
+				if(debug)
+					fprint(2, "bad len %d at offset %d of %d\n", de->reclen, (int)(p-b->data), b->len);
+				break;
+			}
+			if(de->ino == 0)
+				continue;
+			if(4+2+2+de->namlen > de->reclen){
+				if(debug)
+					fprint(2, "bad namelen %d at offset %d of %d\n", de->namlen, (int)(p-b->data), b->len);
+				break;
+			}
+			if(de->namlen == len && memcmp(de->name, name, len) == 0){
+				mkhandle(nh, de->ino);
+				blockput(b);
+				return Nfs3Ok;
+			}
+		}
+		blockput(b);
+	}
+	return Nfs3ErrNoEnt;
+}
+
+static Nfs3Status
+ext2readdir(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int count, u64int cookie, uchar **pdata, u32int *pcount, u1int *peof)
+{
+	u32int nblock;
+	u32int i;
+	int off, done;
+	uchar *data, *dp, *dep, *p, *ep, *ndp;
+	Dirent *de;
+	Inode ino;
+	Block *b;
+	Ext2 *fs;
+	Nfs3Status ok;
+	Nfs3Entry e;
+	int want;
+
+	fs = fsys->priv;
+	if((ok = handle2ino(fs, h, nil, &ino)) != Nfs3Ok)
+		return ok;
+
+	if((ino.mode&IFMT) != IFDIR)
+		return Nfs3ErrNotDir;
+
+	if((ok = inoperm(&ino, au, AREAD)) != Nfs3Ok)
+		return ok;
+
+	if(cookie >= ino.size){
+		*pcount = 0;
+		*pdata = 0;
+		return Nfs3Ok;
+	}
+
+	dp = malloc(count);
+	data = dp;
+	if(dp == nil)
+		return Nfs3ErrNoMem;
+	dep = dp+count;
+	*peof = 0;
+	nblock = (ino.size+fs->blocksize-1) / fs->blocksize;
+	i = cookie/fs->blocksize;
+	off = cookie%fs->blocksize;
+	done = 0;
+	for(; i<nblock && !done; i++){
+		if(i==nblock-1)
+			want = ino.size % fs->blocksize;
+		else
+			want = fs->blocksize;
+		b = ext2fileblock(fs, &ino, i, want);
+		if(b == nil)
+			continue;
+		p = b->data;
+		ep = p+b->len;
+		memset(&e, 0, sizeof e);
+		while(p < ep){
+			de = (Dirent*)p;
+			if(de->reclen == 0){
+				if(debug) fprint(2, "reclen 0 at offset %d of %d\n", (int)(p-b->data), b->len);
+				break;
+			}
+			p += de->reclen;
+			if(p > ep){
+				if(debug) fprint(2, "reclen %d at offset %d of %d\n", de->reclen, (int)(p-b->data), b->len);
+				break;
+			}
+			if(de->ino == 0){
+				if(debug) fprint(2, "zero inode\n");
+				continue;
+			}
+			if(4+2+2+de->namlen > de->reclen){
+				if(debug) fprint(2, "bad namlen %d reclen %d at offset %d of %d\n", de->namlen, de->reclen, (int)(p-b->data), b->len);
+				break;
+			}
+			if(de->name[de->namlen] != 0){
+				if(debug) fprint(2, "bad name %d %.*s\n", de->namlen, de->namlen, de->name);
+				continue;
+			}
+			if(debug) print("%s/%d ", de->name, (int)de->ino);
+			if((uchar*)de - b->data < off)
+				continue;
+			e.fileid = de->ino;
+			e.name = de->name;
+			e.cookie = (u64int)i*fs->blocksize + (p - b->data);
+			if(nfs3entrypack(dp, dep, &ndp, &e) < 0){
+				done = 1;
+				break;
+			}
+			dp = ndp;
+		}
+		off = 0;
+		blockput(b);
+	}
+	if(i==nblock)
+		*peof = 1;
+
+	*pcount = dp - data;
+	*pdata = data;
+	return Nfs3Ok;
+}
+
+static Nfs3Status
+ext2readfile(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int count,
+	u64int offset, uchar **pdata, u32int *pcount, u1int *peof)
+{
+	uchar *data;
+	Block *b;
+	Ext2 *fs;
+	int off, want, fragcount;
+	Inode ino;
+	Nfs3Status ok;
+
+	fs = fsys->priv;
+	if((ok = handle2ino(fs, h, nil, &ino)) != Nfs3Ok)
+		return ok;
+
+	if((ok = inoperm(&ino, au, AREAD)) != Nfs3Ok)
+		return ok;
+
+	if(offset >= ino.size){
+		*pdata = 0;
+		*pcount = 0;
+		*peof = 1;
+		return Nfs3Ok;
+	}
+	if(offset+count > ino.size)
+		count = ino.size-offset;
+	if(offset/fs->blocksize != (offset+count-1)/fs->blocksize)
+		count = fs->blocksize - offset%fs->blocksize;
+
+	data = malloc(count);
+	if(data == nil)
+		return Nfs3ErrNoMem;
+
+	want = offset%fs->blocksize+count;
+	if(want%fs->blocksize)
+		want += fs->blocksize - want%fs->blocksize;
+
+	b = ext2fileblock(fs, &ino, offset/fs->blocksize, want);
+	if(b == nil){
+		/* BUG: distinguish sparse file from I/O error */
+		memset(data, 0, count);
+	}else{
+		off = offset%fs->blocksize;
+		fragcount = count;	/* need signed variable */
+		if(off+fragcount > b->len){
+			fragcount = b->len - off;
+			if(fragcount < 0)
+				fragcount = 0;
+		}
+		if(fragcount > 0)
+			memmove(data, b->data+off, fragcount);
+		count = fragcount;
+		blockput(b);
+	}
+	*peof = (offset+count == ino.size);
+	*pcount = count;
+	*pdata = data;
+	return Nfs3Ok;
+}
+
+static Nfs3Status
+ext2readlink(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, char **link)
+{
+	Ext2 *fs;
+	Nfs3Status ok;
+	int len;
+	Inode ino;
+	Block *b;
+
+	fs = fsys->priv;
+	if((ok = handle2ino(fs, h, nil, &ino)) != Nfs3Ok)
+		return ok;
+	if((ok = inoperm(&ino, au, AREAD)) != Nfs3Ok)
+		return ok;
+
+	if(ino.size > 1024)
+		return Nfs3ErrIo;
+	len = ino.size;
+
+	if(ino.nblock != 0){
+		/* BUG: assumes symlink fits in one block */
+		b = ext2fileblock(fs, &ino, 0, len);
+		if(b == nil)
+			return Nfs3ErrIo;
+		if(memchr(b->data, 0, len) != nil){
+			blockput(b);
+			return Nfs3ErrIo;
+		}
+		*link = malloc(len+1);
+		if(*link == 0){
+			blockput(b);
+			return Nfs3ErrNoMem;
+		}
+		memmove(*link, b->data, len);
+		(*link)[len] = 0;
+		blockput(b);
+		return Nfs3Ok;
+	}
+
+	if(len > sizeof ino.block)
+		return Nfs3ErrIo;
+
+	*link = malloc(len+1);
+	if(*link == 0)
+		return Nfs3ErrNoMem;
+	memmove(*link, ino.block, ino.size);
+	(*link)[len] = 0;
+	return Nfs3Ok;
+}
+
blob - /dev/null
blob + 29bf60b5cd83aedbba83833f37195b21052b0f5a (mode 644)
--- /dev/null
+++ src/libdiskfs/ext2.h
@@ -0,0 +1,167 @@
+typedef struct Super Super;
+typedef struct Group Group;
+typedef struct Inode Inode;
+typedef struct Dirent Dirent;
+typedef struct Ext2 Ext2;
+
+enum
+{
+	BYTESPERSEC	= 512,
+
+	SBOFF = 1024,
+	SBSIZE = 1024,
+
+	SUPERMAGIC = 0xEF53,
+	MINBLOCKSIZE = 1024,
+	MAXBLOCKSIZE = 4096,
+	ROOTINODE = 2,
+	FIRSTINODE = 11,
+	VALIDFS = 0x0001,
+	ERRORFS = 0x0002,
+
+	NDIRBLOCKS = 12,
+	INDBLOCK = NDIRBLOCKS,
+	DINDBLOCK = INDBLOCK+1,
+	TINDBLOCK = DINDBLOCK+1,
+	NBLOCKS = TINDBLOCK+1,
+
+	NAMELEN = 255,
+
+	/* permissions in Inode.mode */
+	IEXEC = 00100,
+	IWRITE = 0200,
+	IREAD = 0400,
+	ISVTX = 01000,
+	ISGID = 02000,
+	ISUID = 04000,
+
+	/* type in Inode.mode */
+	IFMT = 0170000,
+	IFIFO = 0010000,
+	IFCHR = 0020000,
+	IFDIR = 0040000,
+	IFBLK = 0060000,
+	IFREG = 0100000,
+	IFLNK = 0120000,
+	IFSOCK = 0140000,
+	IFWHT = 0160000,
+};
+
+#define DIRLEN(namlen)	(((namlen)+8+3)&~3)
+
+
+/*
+ * Super block on-disk format.
+ */
+struct Super
+{
+	u32int	ninode;		/* Inodes count */
+	u32int	nblock;		/* Blocks count */
+	u32int	rblockcount;	/* Reserved blocks count */
+	u32int	freeblockcount;	/* Free blocks count */
+	u32int	freeinodecount;	/* Free inodes count */
+	u32int	firstdatablock;	/* First Data Block */
+	u32int	logblocksize;	/* Block size */
+	u32int	logfragsize;	/* Fragment size */
+	u32int	blockspergroup;	/* # Blocks per group */
+	u32int	fragpergroup;	/* # Fragments per group */
+	u32int	inospergroup;	/* # Inodes per group */
+	u32int	mtime;		/* Mount time */
+	u32int	wtime;		/* Write time */
+	u16int	mntcount;		/* Mount count */
+	u16int	maxmntcount;	/* Maximal mount count */
+	u16int	magic;		/* Magic signature */
+	u16int	state;		/* File system state */
+	u16int	errors;		/* Behaviour when detecting errors */
+	u16int	pad;
+	u32int	lastcheck;		/* time of last check */
+	u32int	checkinterval;	/* max. time between checks */
+	u32int	creatoros;		/* OS */
+	u32int	revlevel;		/* Revision level */
+	u16int	defresuid;		/* Default uid for reserved blocks */
+	u16int	defresgid;		/* Default gid for reserved blocks */
+	u32int	reserved[235];	/* Padding to the end of the block */
+};
+
+/*
+ * Blcok group on-disk format.
+ */
+struct Group
+{
+	u32int	bitblock;		/* Blocks bitmap block */
+	u32int	inodebitblock;		/* Inodes bitmap block */
+	u32int	inodeaddr;		/* Inodes table block */
+	u16int	freeblockscount;	/* Free blocks count */
+	u16int	freeinodescount;	/* Free inodes count */
+	u16int	useddirscount;	/* Directories count */
+	u16int	pad;
+	u32int	reserved[3];
+};
+enum
+{
+	GroupSize = 32
+};
+
+/*
+ * Structure of an inode on the disk
+ */
+struct Inode
+{
+	u16int	mode;		/* File mode */
+	u16int	uid;		/* Owner Uid */
+	u32int	size;		/* Size in bytes */
+	u32int	atime;		/* Access time */
+	u32int	ctime;		/* Creation time */
+	u32int	mtime;		/* Modification time */
+	u32int	dtime;		/* Deletion Time */
+	u16int	gid;		/* Group Id */
+	u16int	nlink;	/* Links count */
+	u32int	nblock;	/* Blocks count */
+	u32int	flags;		/* File flags */
+	u32int	osd1;				
+	u32int	block[NBLOCKS];/* Pointers to blocks */
+	u32int	version;	/* File version (for NFS) */
+	u32int	fileacl;	/* File ACL */
+	u32int	diracl;	/* Directory ACL */
+	u32int	faddr;		/* Fragment address */
+	uchar	osd2[12];
+};
+enum
+{
+	InodeSize = 128
+};
+
+/*
+ * Directory entry on-disk structure.
+ */
+struct Dirent
+{
+	u32int	ino;			/* Inode number */
+	u16int	reclen;		/* Directory entry length */
+	u8int	namlen;		/* Name length */
+	u8int	pad;
+	char	name[NAMELEN];	/* File name */
+};
+enum
+{
+	MinDirentSize = 4+2+1+1,
+};
+
+/*
+ * In-core fs info.
+ */
+struct Ext2
+{
+	uint	blocksize;
+	uint	nblock;
+	uint	ngroup;
+	uint	inospergroup;
+	uint	blockspergroup;
+	uint	inosperblock;
+	uint	groupaddr;
+	uint	descperblock;
+	uint	firstblock;
+	Disk *disk;
+	Fsys *fsys;
+};
+
blob - /dev/null
blob + 4d12512c832090dafb8f0d810353305b12b676de (mode 644)
--- /dev/null
+++ src/libdiskfs/fat.c
@@ -0,0 +1,11 @@
+#include <u.h>
+#include <libc.h>
+#include <diskfs.h>
+
+Fsys*
+fsysopenfat(Disk *disk)
+{
+	USED(disk);
+	return nil;
+}
+
blob - /dev/null
blob + 2342171f5b76f038bda72ba914a37f5b3feb6458 (mode 644)
--- /dev/null
+++ src/libdiskfs/ffs.c
@@ -0,0 +1,791 @@
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+#include <sunrpc.h>
+#include <nfs3.h>
+#include <diskfs.h>
+#include "ffs.h"
+
+#define checkcg 0
+#define debug 0
+
+static int checkfsblk(Fsblk*);
+static int checkcgblk(Cgblk*);
+static Block *ffsblockread(Fsys*, u64int);
+static int ffssync(Fsys*);
+static void ffsclose(Fsys*);
+
+static Nfs3Status ffsroot(Fsys*, Nfs3Handle*);
+static Nfs3Status ffsgetattr(Fsys*, SunAuthUnix *au, Nfs3Handle*, Nfs3Attr*);
+static Nfs3Status ffslookup(Fsys*, SunAuthUnix *au, Nfs3Handle*, char*, Nfs3Handle*);
+static Nfs3Status ffsreadfile(Fsys*, SunAuthUnix *au, Nfs3Handle*, u32int, u64int, uchar**, u32int*, u1int*);
+static Nfs3Status ffsreadlink(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, char **link);
+static Nfs3Status ffsreaddir(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int, u64int, uchar**, u32int*, u1int*);
+static Nfs3Status ffsaccess(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int want, u32int *got, Nfs3Attr *attr);
+
+Fsys*
+fsysopenffs(Disk *disk)
+{
+	Ffs *fs;
+	Fsys *fsys;
+
+	fsys = emalloc(sizeof(Fsys));
+	fs = emalloc(sizeof(Ffs));
+	fs->disk = disk;
+	fsys->priv = fs;
+	fsys->type = "ffs";
+	fsys->_readblock = ffsblockread;
+	fsys->_sync = ffssync;
+	fsys->_root = ffsroot;
+	fsys->_getattr = ffsgetattr;
+	fsys->_access = ffsaccess;
+	fsys->_lookup = ffslookup;
+	fsys->_readfile = ffsreadfile;
+	fsys->_readlink = ffsreadlink;
+	fsys->_readdir = ffsreaddir;
+
+	if(ffssync(fsys) < 0)
+		goto error;
+
+	return fsys;
+
+error:
+	ffsclose(fsys);
+	return nil;
+}
+
+static Cgblk*
+ffscylgrp(Ffs *fs, int i, Block **pb)
+{
+	Block *b;
+	Cgblk *cg;
+
+	if(i >= fs->ncg)
+		return nil;
+
+	b = diskread(fs->disk, fs->blocksize, (u64int)fs->cg[i].cgblkno*fs->blocksize);
+	if(b == nil)
+		return nil;
+	cg = (Cgblk*)b->data;
+	if(checkcgblk(cg) < 0){
+fprint(2, "checkcgblk %d %lud: %r\n", i, (ulong)fs->cg[i].cgblkno);
+		blockput(b);
+		return nil;
+	}
+	*pb = b;
+	return cg;
+}
+
+static int
+ffssync(Fsys *fsys)
+{
+	int i;
+	Block *b, *cgb;
+	Cgblk *cgblk;
+	Cylgrp *cg;
+	Disk *disk;
+	Ffs *fs;
+	Fsblk *fsblk;
+
+	fs = fsys->priv;
+	disk = fs->disk;
+
+	/*
+	 * Read super block.
+	 */
+	if((b = diskread(disk, SBSIZE, SBOFF)) == nil)
+		goto error;
+	fsblk = (Fsblk*)b->data;
+	if(checkfsblk(fsblk) < 0)
+		goto error;
+
+	fs->blocksize = fsblk->blocksize;
+	fs->nblock = (fsblk->nfrag+fsblk->fragsperblock-1) / fsblk->fragsperblock;
+	fs->fragsize = fsblk->fragsize;
+	fs->fragspergroup = fsblk->fragspergroup;
+	fs->fragsperblock = fsblk->fragsperblock;
+	fs->inosperblock = fsblk->inosperblock;
+	fs->inospergroup = fsblk->inospergroup;
+
+	fs->nfrag = fsblk->nfrag;
+	fs->ndfrag = fsblk->ndfrag;
+	fs->blockspergroup = (u64int)fsblk->cylspergroup * 
+		fsblk->secspercyl * BYTESPERSEC / fsblk->blocksize;
+	fs->ncg = fsblk->ncg;
+
+	fsys->blocksize = fs->blocksize;
+	fsys->nblock = fs->nblock;
+
+	if(0) fprint(2, "ffs %d %d-byte blocks, %d cylinder groups\n",
+		fs->nblock, fs->blocksize, fs->ncg);
+
+	if(fs->cg == nil)
+		fs->cg = emalloc(fs->ncg*sizeof(Cylgrp));
+	for(i=0; i<fs->ncg; i++){
+		cg = &fs->cg[i];
+		cg->bno = fs->blockspergroup*i + fsblk->cgoffset * (i & ~fsblk->cgmask);
+		cg->cgblkno = cg->bno + fsblk->cfragno/fs->fragsperblock;
+		cg->ibno = cg->bno + fsblk->ifragno/fs->fragsperblock;
+		cg->dbno = cg->bno + fsblk->dfragno/fs->fragsperblock;
+
+		if(checkcg){
+			if((cgb = diskread(disk, fs->blocksize, (u64int)cg->cgblkno*fs->blocksize)) == nil)
+				goto error;
+
+			cgblk = (Cgblk*)cgb->data;
+			if(checkcgblk(cgblk) < 0){
+				blockput(cgb);
+				goto error;
+			}
+			if(cgblk->nfrag % fs->fragsperblock && i != fs->ncg-1){
+				werrstr("fractional number of blocks in non-last cylinder group %d", cgblk->nfrag);
+				blockput(cgb);
+				goto error;
+			}
+			// cg->nfrag = cgblk->nfrag;
+			// cg->nblock = (cgblk->nfrag+fs->fragsperblock-1) / fs->fragsperblock;
+			// fprint(2, "cg #%d: cgblk %lud, %d blocks, %d inodes\n", cgblk->num, (ulong)cg->cgblkno, cg->nblock, cg->nino);
+		}
+	}
+	blockput(b);
+	return 0;
+
+error:
+	blockput(b);
+	return -1;
+}
+
+static void
+ffsclose(Fsys *fsys)
+{
+	Ffs *fs;
+
+	fs = fsys->priv;
+	if(fs->cg)
+		free(fs->cg);
+	free(fs);
+	free(fsys);
+}
+	
+static int
+checkfsblk(Fsblk *super)
+{
+	if(super->magic != FSMAGIC){
+		werrstr("bad super block");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int
+checkcgblk(Cgblk *cg)
+{
+	if(cg->magic != CGMAGIC){
+		werrstr("bad cylinder group block");
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * Read block #bno from the disk, zeroing unused data.
+ * If there is no data whatsoever, it's okay to return nil.
+ */
+int nskipx;
+static Block*
+ffsblockread(Fsys *fsys, u64int bno)
+{
+	u32int i, o;
+	u8int *fmap;
+	int frag, fsize, avail;
+	Block *b;
+//	Cylgrp *cg;
+	Cgblk *cgblk;
+	Ffs *fs;
+
+	fs = fsys->priv;
+	i = bno / fs->blockspergroup;
+	o = bno % fs->blockspergroup;
+	if(i >= fs->ncg)
+		return nil;
+//	cg = &fs->cg[i];
+
+//	if(o >= cg->nblock)
+//		return nil;
+
+	if((cgblk = ffscylgrp(fs, i, &b)) == nil)
+		return nil;
+
+	fmap = (u8int*)cgblk+cgblk->fmapoff;
+	frag = fs->fragsperblock;
+	switch(frag){
+	default:
+		sysfatal("bad frag");
+	case 8:
+		avail = fmap[o];
+		break;
+	case 4:
+		avail = (fmap[o>>1] >> ((o&1)*4)) & 0xF;
+		break;
+	case 2:
+		avail = (fmap[o>>2] >> ((o&3)*2)) & 0x3;
+		break;
+	case 1:
+		avail = (fmap[o>>3] >> (o&7)) & 0x1;
+		break;
+	}
+	blockput(b);
+
+	if(avail == ((1<<frag)-1))
+{
+nskipx++;
+		return nil;
+}
+	if((b = diskread(fs->disk, fs->blocksize, bno*fs->blocksize)) == nil){
+		fprint(2, "diskread failed!!!\n");
+		return nil;
+	}
+
+	fsize = fs->fragsize;
+	for(i=0; i<frag; i++)
+		if(avail & (1<<i))
+			memset(b->data + fsize*i, 0, fsize);
+	return b;
+}
+
+static Block*
+ffsdatablock(Ffs *fs, u32int bno, int size)
+{
+	int fsize;
+	u64int diskaddr;
+	Block *b;
+
+	if(bno == 0)
+		return nil;
+
+	fsize = size;
+	if(fsize < fs->fragsize)
+		fsize = fs->fragsize;
+
+	if(bno >= fs->nfrag){
+		fprint(2, "ffs: request for block %#lux; nfrag %#x\n", (ulong)bno, fs->nfrag);
+		return nil;
+	}
+	diskaddr = (u64int)bno*fs->fragsize;
+	b = diskread(fs->disk, fsize, diskaddr);
+	if(b == nil){
+		fprint(2, "ffs: disk i/o at %#llux for %#ux: %r\n", diskaddr, fsize);
+		return nil;
+	}
+	if(b->len < fsize){
+		fprint(2, "ffs: disk i/o at %#llux for %#ux got %#ux\n", diskaddr, fsize,
+			b->len);
+		blockput(b);
+		return nil;
+	}
+	
+	return b;
+}
+
+static Block*
+ffsfileblock(Ffs *fs, Inode *ino, u32int bno, int size)
+{
+	int ppb;
+	Block *b;
+	u32int *a;
+
+	if(bno < NDADDR){
+		if(debug) fprint(2, "ffsfileblock %lud: direct %#lux\n", (ulong)bno, (ulong)ino->db[bno]);
+		return ffsdatablock(fs, ino->db[bno], size);
+	}
+	bno -= NDADDR;
+	ppb = fs->blocksize/4;
+
+	if(bno/ppb < NIADDR){
+		if(debug) fprint(2, "ffsfileblock %lud: indirect %#lux\n", (ulong)(bno+NDADDR),
+			(ulong)ino->ib[bno/ppb]);
+		b = ffsdatablock(fs, ino->ib[bno/ppb], fs->blocksize);
+		if(b == nil)
+			return nil;
+		a = (u32int*)b->data;
+		bno = a[bno%ppb];
+		if(debug) fprint(2, "ffsfileblock: indirect fetch %#lux size %d\n", (ulong)bno, size);
+		blockput(b);
+		return ffsdatablock(fs, bno, size);
+	}
+
+	fprint(2, "ffsfileblock %lud: too big\n", (ulong)bno+NDADDR);
+	return nil;
+}
+
+/*
+ * NFS handles are 4-byte inode number.
+ */
+static void
+mkhandle(Nfs3Handle *h, u64int ino)
+{
+	h->h[0] = ino >> 24;
+	h->h[1] = ino >> 16;
+	h->h[2] = ino >> 8;
+	h->h[3] = ino;
+	h->len = 4;
+}
+
+static u32int
+byte2u32(uchar *p)
+{
+	return (p[0]<<24) | (p[1]<<16) | (p[2]<<8) | p[3];
+}
+
+static Nfs3Status
+handle2ino(Ffs *fs, Nfs3Handle *h, u32int *pinum, Inode *ino)
+{
+	int i;
+	u32int ioff;
+	u32int inum;
+	Block *b;
+	Cylgrp *cg;
+
+	if(h->len != 4)
+		return Nfs3ErrBadHandle;
+	inum = byte2u32(h->h);
+	if(pinum)
+		*pinum = inum;
+	if(debug) print("inum %d...", (int)inum);
+
+	/* fetch inode from disk */
+	i = inum / fs->inospergroup;
+	ioff = inum % fs->inospergroup;
+	if(debug)print("cg %d off %d...", i, (int)ioff);
+	if(i >= fs->ncg)
+		return Nfs3ErrBadHandle;
+	cg = &fs->cg[i];
+/*
+	if(ioff >= cg->nino)
+		return Nfs3ErrBadHandle;
+*/
+
+	if(debug) print("cg->ibno %d...", cg->ibno);
+	if((b = diskread(fs->disk, fs->blocksize,
+		(cg->ibno+ioff/fs->inosperblock)*(vlong)fs->blocksize)) == nil)
+		return Nfs3ErrIo;
+	*ino = ((Inode*)b->data)[ioff%fs->inosperblock];
+	blockput(b);
+
+	return Nfs3Ok;
+}
+
+static Nfs3Status
+ffsroot(Fsys *fsys, Nfs3Handle *h)
+{
+	USED(fsys);
+	mkhandle(h, 2);
+	return Nfs3Ok;
+}
+
+static Nfs3Status
+ino2attr(Ffs *fs, Inode *ino, u32int inum, Nfs3Attr *attr)
+{
+	u32int rdev;
+
+	attr->type = -1;
+	switch(ino->mode&IFMT){
+	case IFIFO:
+		attr->type = Nfs3FileFifo;
+		break;
+	case IFCHR:
+		attr->type = Nfs3FileChar;
+		break;
+	case IFDIR:
+		attr->type = Nfs3FileDir;
+		break;
+	case IFBLK:
+		attr->type = Nfs3FileBlock;
+		break;
+	case IFREG:
+		attr->type = Nfs3FileReg;
+		break;
+	case IFLNK:
+		attr->type = Nfs3FileSymlink;
+		break;
+	case IFSOCK:
+		attr->type = Nfs3FileSocket;
+		break;
+	case IFWHT:
+	default:
+		return Nfs3ErrBadHandle;
+	}
+
+	attr->mode = ino->mode&07777;
+	attr->nlink = ino->nlink;
+	attr->uid = ino->uid;
+	attr->gid = ino->gid;
+	attr->size = ino->size;
+	attr->used = ino->nblock*fs->blocksize;
+	if(attr->type==Nfs3FileBlock || attr->type==Nfs3FileChar){
+		rdev = ino->db[0];
+		attr->major = (rdev>>8)&0xFF;
+		attr->minor = rdev & 0xFFFF00FF;
+	}else{
+		attr->major = 0;
+		attr->minor = 0;
+	}
+	attr->fsid = 0;
+	attr->fileid = inum;
+	attr->atime.sec = ino->atime;
+	attr->atime.nsec = ino->atimensec;
+	attr->mtime.sec = ino->mtime;
+	attr->mtime.nsec = ino->mtimensec;
+	attr->ctime.sec = ino->ctime;
+	attr->ctime.nsec = ino->ctimensec;
+	return Nfs3Ok;
+}
+
+static int
+ingroup(SunAuthUnix *au, uint gid)
+{
+	int i;
+
+	for(i=0; i<au->ng; i++)
+		if(au->g[i] == gid)
+			return 1;
+	return 0;
+}
+
+static Nfs3Status
+inoperm(Inode *ino, SunAuthUnix *au, int need)
+{
+	int have;
+
+	have = ino->mode&0777;
+	if(ino->uid == au->uid)
+		have >>= 6;
+	else if(ino->gid == au->gid || ingroup(au, ino->gid))
+		have >>= 3;
+
+	if((have&need) != need)
+		return Nfs3ErrNotOwner;	/* really EPERM */
+	return Nfs3Ok;
+}
+
+static Nfs3Status
+ffsgetattr(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, Nfs3Attr *attr)
+{
+	Inode ino;
+	u32int inum;
+	Ffs *fs;
+	Nfs3Status ok;
+
+	fs = fsys->priv;
+	if((ok = handle2ino(fs, h, &inum, &ino)) != Nfs3Ok)
+		return ok;
+
+	USED(au);	/* anyone can getattr */
+
+	return ino2attr(fs, &ino, inum, attr);
+}
+
+static Nfs3Status
+ffsaccess(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int want, u32int *got, Nfs3Attr *attr)
+{
+	int have;
+	Inode ino;
+	u32int inum;
+	Ffs *fs;
+	Nfs3Status ok;
+
+	fs = fsys->priv;
+	if((ok = handle2ino(fs, h, &inum, &ino)) != Nfs3Ok)
+		return ok;
+
+	have = ino.mode&0777;
+	if(ino.uid == au->uid)
+		have >>= 6;
+	else if(ino.gid == au->gid || ingroup(au, ino.gid))
+		have >>= 3;
+
+	*got = 0;
+	if((want&Nfs3AccessRead) && (have&AREAD))
+		*got |= Nfs3AccessRead;
+	if((want&Nfs3AccessLookup) && (ino.mode&IFMT)==IFDIR && (have&AEXEC))
+		*got |= Nfs3AccessLookup;
+	if((want&Nfs3AccessExecute) && (ino.mode&IFMT)!=IFDIR && (have&AEXEC))
+		*got |= Nfs3AccessExecute;
+
+	return ino2attr(fs, &ino, inum, attr);
+}
+
+static Nfs3Status
+ffslookup(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, char *name, Nfs3Handle *nh)
+{
+	u32int nblock;
+	u32int i;
+	uchar *p, *ep;
+	Dirent *de;
+	Inode ino;
+	Block *b;
+	Ffs *fs;
+	Nfs3Status ok;
+	int len, want;
+
+	fs = fsys->priv;
+	if((ok = handle2ino(fs, h, nil, &ino)) != Nfs3Ok)
+		return ok;
+
+	if((ino.mode&IFMT) != IFDIR)
+		return Nfs3ErrNotDir;
+
+	if((ok = inoperm(&ino, au, AEXEC)) != Nfs3Ok)
+		return ok;
+
+	len = strlen(name);
+	nblock = (ino.size+fs->blocksize-1) / fs->blocksize;
+	for(i=0; i<nblock; i++){
+		if(i==nblock-1)
+			want = ino.size % fs->blocksize;
+		else
+			want = fs->blocksize;
+		b = ffsfileblock(fs, &ino, i, want);
+		if(b == nil)
+			continue;
+		p = b->data;
+		ep = p+b->len;
+		while(p < ep){
+			de = (Dirent*)p;
+			if(de->reclen == 0){
+				if(debug)
+					fprint(2, "reclen 0 at offset %d of %d\n", (int)(p-b->data), b->len);
+				break;
+			}
+			p += de->reclen;
+			if(p > ep){
+				if(debug)
+					fprint(2, "bad len %d at offset %d of %d\n", de->reclen, (int)(p-b->data), b->len);
+				break;
+			}
+			if(de->ino == 0)
+				continue;
+			if(4+2+2+de->namlen > de->reclen){
+				if(debug)
+					fprint(2, "bad namelen %d at offset %d of %d\n", de->namlen, (int)(p-b->data), b->len);
+				break;
+			}
+			if(de->namlen == len && memcmp(de->name, name, len) == 0){
+				mkhandle(nh, de->ino);
+				blockput(b);
+				return Nfs3Ok;
+			}
+		}
+		blockput(b);
+	}
+	return Nfs3ErrNoEnt;
+}
+
+static Nfs3Status
+ffsreaddir(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int count, u64int cookie, uchar **pdata, u32int *pcount, u1int *peof)
+{
+	u32int nblock;
+	u32int i;
+	int off, done;
+	uchar *data, *dp, *dep, *p, *ep, *ndp;
+	Dirent *de;
+	Inode ino;
+	Block *b;
+	Ffs *fs;
+	Nfs3Status ok;
+	Nfs3Entry e;
+	int want;
+
+	fs = fsys->priv;
+	if((ok = handle2ino(fs, h, nil, &ino)) != Nfs3Ok)
+		return ok;
+
+	if((ino.mode&IFMT) != IFDIR)
+		return Nfs3ErrNotDir;
+
+	if((ok = inoperm(&ino, au, AREAD)) != Nfs3Ok)
+		return ok;
+
+	if(cookie >= ino.size){
+		*pcount = 0;
+		*pdata = 0;
+		return Nfs3Ok;
+	}
+
+	dp = malloc(count);
+	data = dp;
+	if(dp == nil)
+		return Nfs3ErrNoMem;
+	dep = dp+count;
+	*peof = 0;
+	nblock = (ino.size+fs->blocksize-1) / fs->blocksize;
+	i = cookie/fs->blocksize;
+	off = cookie%fs->blocksize;
+	done = 0;
+	for(; i<nblock && !done; i++){
+		if(i==nblock-1)
+			want = ino.size % fs->blocksize;
+		else
+			want = fs->blocksize;
+		b = ffsfileblock(fs, &ino, i, want);
+		if(b == nil)
+			continue;
+		p = b->data;
+		ep = p+b->len;
+		memset(&e, 0, sizeof e);
+		while(p < ep){
+			de = (Dirent*)p;
+			if(de->reclen == 0){
+				if(debug) fprint(2, "reclen 0 at offset %d of %d\n", (int)(p-b->data), b->len);
+				break;
+			}
+			p += de->reclen;
+			if(p > ep){
+				if(debug) fprint(2, "reclen %d at offset %d of %d\n", de->reclen, (int)(p-b->data), b->len);
+				break;
+			}
+			if(de->ino == 0){
+				if(debug) fprint(2, "zero inode\n");
+				continue;
+			}
+			if(4+2+2+de->namlen > de->reclen){
+				if(debug) fprint(2, "bad namlen %d reclen %d at offset %d of %d\n", de->namlen, de->reclen, (int)(p-b->data), b->len);
+				break;
+			}
+			if(de->name[de->namlen] != 0){
+				if(debug) fprint(2, "bad name %d %.*s\n", de->namlen, de->namlen, de->name);
+				continue;
+			}
+			if(debug) print("%s/%d ", de->name, (int)de->ino);
+			if((uchar*)de - b->data < off)
+				continue;
+			e.fileid = de->ino;
+			e.name = de->name;
+			e.cookie = (u64int)i*fs->blocksize + (p - b->data);
+			if(nfs3entrypack(dp, dep, &ndp, &e) < 0){
+				done = 1;
+				break;
+			}
+			dp = ndp;
+		}
+		off = 0;
+		blockput(b);
+	}
+	if(i==nblock)
+		*peof = 1;
+
+	*pcount = dp - data;
+	*pdata = data;
+	return Nfs3Ok;
+}
+
+static Nfs3Status
+ffsreadfile(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int count,
+	u64int offset, uchar **pdata, u32int *pcount, u1int *peof)
+{
+	uchar *data;
+	Block *b;
+	Ffs *fs;
+	int off, want, fragcount;
+	Inode ino;
+	Nfs3Status ok;
+
+	fs = fsys->priv;
+	if((ok = handle2ino(fs, h, nil, &ino)) != Nfs3Ok)
+		return ok;
+
+	if((ok = inoperm(&ino, au, AREAD)) != Nfs3Ok)
+		return ok;
+
+	if(offset >= ino.size){
+		*pdata = 0;
+		*pcount = 0;
+		*peof = 1;
+		return Nfs3Ok;
+	}
+	if(offset+count > ino.size)
+		count = ino.size-offset;
+	if(offset/fs->blocksize != (offset+count-1)/fs->blocksize)
+		count = fs->blocksize - offset%fs->blocksize;
+
+	data = malloc(count);
+	if(data == nil)
+		return Nfs3ErrNoMem;
+
+	want = offset%fs->blocksize+count;
+	if(want%fs->fragsize)
+		want += fs->fragsize - want%fs->fragsize;
+
+	b = ffsfileblock(fs, &ino, offset/fs->blocksize, want);
+	if(b == nil){
+		/* BUG: distinguish sparse file from I/O error */
+		memset(data, 0, count);
+	}else{
+		off = offset%fs->blocksize;
+		fragcount = count;	/* need signed variable */
+		if(off+fragcount > b->len){
+			fragcount = b->len - off;
+			if(fragcount < 0)
+				fragcount = 0;
+		}
+		if(fragcount > 0)
+			memmove(data, b->data+off, fragcount);
+		count = fragcount;
+		blockput(b);
+	}
+	*peof = (offset+count == ino.size);
+	*pcount = count;
+	*pdata = data;
+	return Nfs3Ok;
+}
+
+static Nfs3Status
+ffsreadlink(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, char **link)
+{
+	Ffs *fs;
+	Nfs3Status ok;
+	int len;
+	Inode ino;
+	Block *b;
+
+	fs = fsys->priv;
+	if((ok = handle2ino(fs, h, nil, &ino)) != Nfs3Ok)
+		return ok;
+	if((ok = inoperm(&ino, au, AREAD)) != Nfs3Ok)
+		return ok;
+
+	if(ino.size > 1024)
+		return Nfs3ErrIo;
+	len = ino.size;
+
+	if(ino.nblock != 0){
+		/* BUG: assumes symlink fits in one block */
+		b = ffsfileblock(fs, &ino, 0, len);
+		if(b == nil)
+			return Nfs3ErrIo;
+		if(memchr(b->data, 0, len) != nil){
+			blockput(b);
+			return Nfs3ErrIo;
+		}
+		*link = malloc(len+1);
+		if(*link == 0){
+			blockput(b);
+			return Nfs3ErrNoMem;
+		}
+		memmove(*link, b->data, len);
+		(*link)[len] = 0;
+		blockput(b);
+		return Nfs3Ok;
+	}
+
+	if(len > sizeof ino.db + sizeof ino.ib)
+		return Nfs3ErrIo;
+
+	*link = malloc(len+1);
+	if(*link == 0)
+		return Nfs3ErrNoMem;
+	memmove(*link, ino.db, ino.size);
+	(*link)[len] = 0;
+	return Nfs3Ok;
+}
blob - /dev/null
blob + 479ff65f71bd28b3bcf35e83edde26c111893d3a (mode 644)
--- /dev/null
+++ src/libdiskfs/ffs.h
@@ -0,0 +1,281 @@
+/*
+ * An FFS file system is a sequence of cylinder groups.
+ *
+ * Each cylinder group is laid out as follows:
+ *
+ *	fs superblock (Fsblk)
+ *	cylinder group block (Cgblk)
+ *	inodes
+ *	data
+ *
+ * The location of the fs superblock in the first cylinder
+ * group is known.  The rest of the info about cylinder group
+ * layout can be derived from the super block.
+ */
+
+#define daddr_t u32int
+#define time_t u32int
+
+typedef struct Cgblk Cgblk;
+typedef struct Cylgrp Cylgrp;
+typedef struct Cylsum Cylsum;
+typedef struct Ffs Ffs;
+typedef struct Fsblk Fsblk;
+typedef struct Inode Inode;
+typedef struct Dirent Dirent;
+
+enum
+{
+	BYTESPERSEC = 512,
+
+	/* constants for Fsblk */
+	FSMAXMNTLEN = 512,
+	FSNOCSPTRS = 128 / sizeof(void*) - 3,
+	FSMAXSNAP = 20,
+	FSMAGIC = 0x011954,
+	FSCHECKSUM = 0x7c269d38,
+	
+	/* Fsblk.inodefmt */
+	FS42INODEFMT = -1,
+	FS44INODEFMT = 2,
+
+	/* offset and size of first boot block */
+	BBOFF = 0,
+	BBSIZE = 8192,
+
+	/* offset and size of first super block */
+	SBOFF = BBOFF+BBSIZE,
+	SBSIZE = 8192,
+
+	/* minimum block size */
+	MINBSIZE = 4096,
+
+	/* maximum fragments per block */
+	MAXFRAG = 8,
+
+	/* constants for Cgblk */
+	CGMAGIC = 0x090255,
+
+	/* inode-related */
+	ROOTINODE = 2,
+	WHITEOUT = 1,
+
+	NDADDR = 12,
+	NIADDR = 3,
+
+	/* permissions in Inode.mode */
+	IEXEC = 00100,
+	IWRITE = 0200,
+	IREAD = 0400,
+	ISVTX = 01000,
+	ISGID = 02000,
+	ISUID = 04000,
+
+	/* type in Inode.mode */
+	IFMT = 0170000,
+	IFIFO = 0010000,
+	IFCHR = 0020000,
+	IFDIR = 0040000,
+	IFBLK = 0060000,
+	IFREG = 0100000,
+	IFLNK = 0120000,
+	IFSOCK = 0140000,
+	IFWHT = 0160000,
+
+	/* type in Dirent.type */
+	DTUNKNOWN = 0,
+	DTFIFO = 1,
+	DTCHR = 2,
+	DTDIR = 4,
+	DTBLK = 6,
+	DTREG = 8,
+	DTLNK = 10,
+	DTSOCK = 12,
+	DTWHT = 14,
+};
+
+struct Cylsum
+{
+	u32int	ndir;
+	u32int	nbfree;
+	u32int	nifree;
+	u32int	nffree;
+};
+
+struct Fsblk
+{
+	u32int	unused0;
+	u32int	unused1;
+	daddr_t	sfragno;		/* fragment address of super block in file system */
+	daddr_t	cfragno;		/* fragment address if cylinder block in file system */
+	daddr_t	ifragno;		/* fragment offset of inode blocks in file system */
+	daddr_t	dfragno;		/* fragment offset of data blocks in cg */
+	u32int	cgoffset;		/* block (maybe fragment?) offset of Cgblk in cylinder */
+	u32int	cgmask;
+	time_t	time;
+	u32int	nfrag;		/* number of blocks in fs * fragsperblock */
+	u32int	ndfrag;
+	u32int	ncg;			/* number of cylinder groups in fs */
+	u32int	blocksize;		/* block size in fs */
+	u32int	fragsize;		/* frag size in fs */
+	u32int	fragsperblock;	/* fragments per block: blocksize / fragsize */
+	u32int	minfree;		/* ignored by us */
+	u32int	rotdelay;		/* ... */
+	u32int	rps;
+	u32int	bmask;
+	u32int	fmask;
+	u32int	bshift;
+	u32int	fshift;
+	u32int	maxcontig;
+	u32int	maxbpg;
+	u32int	fragshift;
+	u32int	fsbtodbshift;
+	u32int	sbsize;		/* size of super block */
+	u32int	unused2;		/* more stuff we don't use ... */
+	u32int	unused3;
+	u32int	nindir;
+	u32int	inosperblock;	/* inodes per block */
+	u32int	nspf;
+	u32int	optim;
+	u32int	npsect;
+	u32int	interleave;
+	u32int	trackskew;
+	u32int	id[2];
+	daddr_t	csaddr;		/* blk addr of cyl grp summary area */
+	u32int	cssize;		/* size of cyl grp summary area */
+	u32int	cgsize;		/* cylinder group size */
+	u32int	trackspercyl;	/* tracks per cylinder */
+	u32int	secspertrack;	/* sectors per track */
+	u32int	secspercyl;	/* sectors per cylinder */
+	u32int	ncyl;			/* cylinders in fs */
+	u32int	cylspergroup;	/* cylinders per group */
+	u32int	inospergroup;	/* inodes per group */
+	u32int	fragspergroup;	/* data blocks per group * fragperblock */
+	Cylsum	cstotal;		/* more unused... */
+	u8int	fmod;
+	u8int	clean;
+	u8int	ronly;
+	u8int	flags;
+	char		fsmnt[FSMAXMNTLEN];
+	u32int	cgrotor;
+	void*	ocsp[FSNOCSPTRS];
+	u8int*	contigdirs;
+	Cylsum*	csp;
+	u32int*	maxcluster;
+	u32int	cpc;
+	u16int	opostbl[16][8];
+	u32int	snapinum[FSMAXSNAP];
+	u32int	avgfilesize;
+	u32int	avgfpdir;
+	u32int	sparecon[26];
+	u32int	pendingblocks;
+	u32int	pendinginodes;
+	u32int	contigsumsize;
+	u32int	maxsymlinklen;
+	u32int	inodefmt;		/* format of on-disk inodes */
+	u64int	maxfilesize;	/* maximum representable file size */
+	u64int	qbmask;
+	u64int	qfmask;
+	u32int	state;
+	u32int	postblformat;
+	u32int	nrpos;
+	u32int	postbloff;
+	u32int	rotbloff;
+	u32int	magic;		/* FS_MAGIC */
+};
+
+/*
+ * Cylinder group block for a file system.
+ */
+struct Cgblk
+{
+	u32int	unused0;
+	u32int	magic;		/* CGMAGIC */
+	u32int	time;			/* time last written */
+	u32int	num;		/* we are cg #cgnum */
+	u16int	ncyl;			/* number of cylinders in gp */
+	u16int	nino;		/* number of inodes */
+	u32int	nfrag;		/* number of fragments  */
+	Cylsum	csum;
+	u32int	rotor;
+	u32int	frotor;
+	u32int	irotor;
+	u32int	frsum[MAXFRAG];	/* counts of available frags */
+	u32int	btotoff;
+	u32int	boff;
+	u32int	imapoff;		/* offset to used inode map */
+	u32int	fmapoff;		/* offset to free fragment map */
+	u32int	nextfrag;		/* next free fragment */
+	u32int	csumoff;
+	u32int	clusteroff;
+	u32int	ncluster;
+	u32int	sparecon[13];
+};
+
+struct Cylgrp
+{
+	/* these are block numbers not fragment numbers */
+	u32int	bno;			/* disk block address of start of cg */
+	u32int	ibno;			/* disk block address of first inode */
+	u32int	dbno;		/* disk block address of first data */
+	u32int	cgblkno;
+};
+
+/*
+ * this is the on-disk structure
+ */
+struct Inode
+{
+	u16int	mode;
+	u16int	nlink;
+	u32int	unused;
+	u64int	size;
+	u32int	atime;
+	u32int	atimensec;
+	u32int	mtime;
+	u32int	mtimensec;
+	u32int	ctime;
+	u32int	ctimensec;
+	/* rdev is db[0] */
+	u32int	db[NDADDR];
+	u32int	ib[NIADDR];
+	u32int	flags;
+	u32int	nblock;
+	u32int	gen;
+	u32int	uid;
+	u32int	gid;
+	u32int	spare[2];
+};
+
+struct Dirent
+{
+	u32int	ino;
+	u16int	reclen;
+	u8int	type;
+	u8int	namlen;
+	char		name[1];
+};
+
+/*
+ * main file system structure
+ */
+struct Ffs
+{
+	int		blocksize;
+	int		nblock;
+	int		fragsize;
+	int		fragsperblock;
+	int		inosperblock;
+	int		blockspergroup;
+	int		fragspergroup;
+	int		inospergroup;
+
+	u32int	nfrag;
+	u32int	ndfrag;
+
+	int		ncg;
+	Cylgrp	*cg;
+
+	Disk		*disk;
+};
+
blob - /dev/null
blob + b72f053daa70f6ff5170c198403c2ca088262384 (mode 644)
--- /dev/null
+++ src/libdiskfs/file.c
@@ -0,0 +1,99 @@
+#include <u.h>
+#include <libc.h>
+#include <diskfs.h>
+
+typedef struct DiskFile DiskFile;
+struct DiskFile
+{
+	Disk disk;
+	int fd;
+};
+
+int nfilereads;
+
+static long
+preadn(int fd, void *vdata, u32int ulen, u64int offset)
+{
+	long n;
+	uchar *data;
+	long len;
+
+	nfilereads++;
+	len = ulen;
+	data = vdata;
+//	fprint(2, "readn 0x%llux 0x%ux\n", offset, ulen);
+	while(len > 0){
+		n = pread(fd, data, len, offset);
+		if(n <= 0)
+			break;
+		data += n;
+		offset += n;
+		len -= n;
+	}
+	return data-(uchar*)vdata;
+}
+
+static void
+diskfileblockput(Block *b)
+{
+	free(b);
+}
+
+uvlong nreadx;
+static Block*
+diskfileread(Disk *dd, u32int len, u64int offset)
+{
+	int n;
+	Block *b;
+	DiskFile *d = (DiskFile*)dd;
+
+	b = mallocz(sizeof(Block)+len, 1);
+	if(b == nil)
+		return nil;
+	b->data = (uchar*)&b[1];
+nreadx += len;
+	n = preadn(d->fd, b->data, len, offset);
+	if(n <= 0){
+		free(b);
+		return nil;
+	}
+	b->_close = diskfileblockput;
+	b->len = n;
+	return b;
+}
+
+static int
+diskfilesync(Disk *dd)
+{
+	USED(dd);
+	return 0;
+}
+
+static void
+diskfileclose(Disk *dd)
+{
+	DiskFile *d = (DiskFile*)dd;
+
+	close(d->fd);
+	free(d);
+}
+
+Disk*
+diskopenfile(char *file)
+{
+	int fd;
+	DiskFile *d;
+
+	if((fd = open(file, OREAD)) < 0)
+		return nil;
+	d = mallocz(sizeof(DiskFile), 1);
+	if(d == nil){
+		close(fd);
+		return nil;
+	}
+	d->disk._read = diskfileread;
+	d->disk._sync = diskfilesync;
+	d->disk._close = diskfileclose;
+	d->fd = fd;
+	return &d->disk;
+}
blob - /dev/null
blob + 3875e50b9a3e08567dfad8ba3564bce34a54ec05 (mode 644)
--- /dev/null
+++ src/libdiskfs/fsys.c
@@ -0,0 +1,114 @@
+#include <u.h>
+#include <libc.h>
+#include <thread.h>
+#include <sunrpc.h>
+#include <nfs3.h>
+#include <diskfs.h>
+
+int allowall;
+
+static Fsys *(*opentab[])(Disk*) =
+{
+	fsysopenffs,
+	fsysopenkfs,
+	fsysopenext2,
+	fsysopenfat,
+};
+
+Fsys*
+fsysopen(Disk *disk)
+{
+	int i;
+	Fsys *fsys;
+
+	for(i=0; i<nelem(opentab); i++)
+		if((fsys = (*opentab[i])(disk)) != nil)
+			return fsys;
+	return nil;
+}
+
+Block*
+fsysreadblock(Fsys *fsys, u64int blockno)
+{
+	if(!fsys->_readblock){
+		werrstr("no read dispatch function");
+		return nil;
+	}
+	return (*fsys->_readblock)(fsys, blockno);
+}
+
+int
+fsyssync(Fsys *fsys)
+{
+	if(disksync(fsys->disk) < 0)
+		return -1;
+	if(!fsys->_sync)
+		return 0;
+	return (*fsys->_sync)(fsys);
+}
+
+void
+fsysclose(Fsys *fsys)
+{
+	if(!fsys->_close){
+		fprint(2, "no fsysClose\n");
+		abort();
+	}
+	(*fsys->_close)(fsys);
+}
+
+Nfs3Status
+fsysroot(Fsys *fsys, Nfs3Handle *h)
+{
+	if(!fsys->_root)
+		return Nfs3ErrNxio;
+	return (*fsys->_root)(fsys, h);
+}
+
+Nfs3Status
+fsyslookup(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, char *name, Nfs3Handle *nh)
+{
+	if(!fsys->_lookup)
+		return Nfs3ErrNxio;
+	return (*fsys->_lookup)(fsys, au, h, name, nh);
+}
+
+Nfs3Status
+fsysgetattr(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, Nfs3Attr *attr)
+{
+	if(!fsys->_getattr)
+		return Nfs3ErrNxio;
+	return (*fsys->_getattr)(fsys, au, h, attr);
+}
+
+Nfs3Status
+fsysreaddir(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int count, u64int cookie, uchar **e, u32int *ne, u1int *peof)
+{
+	if(!fsys->_readdir)
+		return Nfs3ErrNxio;
+	return (*fsys->_readdir)(fsys, au, h, count, cookie, e, ne, peof);
+}
+
+Nfs3Status
+fsysreadfile(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int count, u64int offset, uchar **data, u32int *pcount, uchar *peof)
+{
+	if(!fsys->_readfile)
+		return Nfs3ErrNxio;
+	return (*fsys->_readfile)(fsys, au, h, count, offset, data, pcount, peof);
+}
+
+Nfs3Status
+fsysreadlink(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, char **plink)
+{
+	if(!fsys->_readlink)
+		return Nfs3ErrNxio;
+	return (*fsys->_readlink)(fsys, au, h, plink);
+}
+
+Nfs3Status
+fsysaccess(Fsys *fsys, SunAuthUnix *au, Nfs3Handle *h, u32int want, u32int *got, Nfs3Attr *attr)
+{
+	if(!fsys->_access)
+		return Nfs3ErrNxio;
+	return (*fsys->_access)(fsys, au, h, want, got, attr);
+}
blob - /dev/null
blob + 51d7c5e30b3ed61e35e17a56bceee1e1747a6953 (mode 644)
--- /dev/null
+++ src/libdiskfs/kfs.c
@@ -0,0 +1,11 @@
+#include <u.h>
+#include <libc.h>
+#include <diskfs.h>
+
+Fsys*
+fsysopenkfs(Disk *disk)
+{
+	USED(disk);
+	return nil;
+}
+
blob - /dev/null
blob + 61538da1bcc56a69bc02c7aa2e81eae96c220842 (mode 644)
--- /dev/null
+++ src/libdiskfs/mkfile
@@ -0,0 +1,31 @@
+<$PLAN9/src/mkhdr
+
+LIB=libdiskfs.a
+
+FSOFILES=\
+	ext2.$O\
+	fat.$O\
+	ffs.$O\
+	kfs.$O\
+
+DISKOFILES=\
+	cache.$O\
+	file.$O\
+	venti.$O\
+
+OFILES=\
+	block.$O\
+	disk.$O\
+	fsys.$O\
+	$DISKOFILES\
+	$FSOFILES\
+
+HFILES=\
+	fs.h
+
+<$PLAN9/src/mksyslib
+
+CFLAGS=$CFLAGS
+
+%.acid: %.$O %.c
+	$CC $CFLAGS -a $stem.c >$stem.acid
blob - /dev/null
blob + ba314388d8ab6a7fb4030b91d87a9ecf6d48e758 (mode 644)
--- /dev/null
+++ src/libdiskfs/venti.c
@@ -0,0 +1,163 @@
+#include <u.h>
+#include <libc.h>
+#include <diskfs.h>
+#include <venti.h>
+
+extern void vtlibthread(void);
+
+typedef struct DiskVenti DiskVenti;
+struct DiskVenti
+{
+	Disk disk;
+	VtEntry e;
+	VtCache *c;
+};
+
+int nfilereads;
+
+/*
+ * This part is like file.c but doesn't require storing the root block
+ * in the cache permanently and doesn't care about locking since
+ * all the blocks are read-only.  Perhaps at some point this functionality
+ * should go into libvac in some form.
+ */
+static int
+vtfileindices(VtEntry *e, u32int bn, int *index)
+{
+	int i, np;
+
+	memset(index, 0, VtPointerDepth*sizeof(int));
+
+	np = e->psize/VtScoreSize;
+	memset(index, 0, sizeof(index));
+	for(i=0; bn > 0; i++){
+		if(i >= VtPointerDepth){
+			werrstr("bad block number %lud", (ulong)bn);
+			return -1;
+		}
+		index[i] = bn % np;
+		bn /= np;
+	}
+	return i;
+}
+
+static VtBlock*
+_vtfileblock(VtCache *c, VtEntry *e, u32int bn)
+{
+	VtBlock *b, *bb;
+	int i, d, index[VtPointerDepth+1], t;
+
+	i = vtfileindices(e, bn, index);
+	if(i < 0)
+		return nil;
+	d = (e->type&VtTypeDepthMask);
+	if(i > d){
+		werrstr("bad address %d > %d (%x %x)", i, d, e->type, e->flags);
+		return nil;
+	}
+
+//fprint(2, "vtread %V\n", e->score);
+	b = vtcacheglobal(c, e->score, e->type);
+	if(b == nil)
+		return nil;
+
+	for(i=d-1; i>=0; i--){
+		t = VtDataType+i;
+//fprint(2, "vtread %V\n", b->data+index[i]*VtScoreSize);
+		bb = vtcacheglobal(c, b->data+index[i]*VtScoreSize, t);
+		vtblockput(b);
+		if(bb == nil)
+			return nil;
+		b = bb;
+	}
+	return b;
+}
+
+static void
+diskventiblockput(Block *b)
+{
+	vtblockput(b->priv);
+	free(b);
+}
+
+static Block*
+diskventiread(Disk *dd, u32int len, u64int offset)
+{
+	DiskVenti *d = (DiskVenti*)dd;
+	VtBlock *vb;
+	Block *b;
+	int frag;
+
+nfilereads++;
+	vb = _vtfileblock(d->c, &d->e, offset/d->e.dsize);
+	if(vb == nil)
+		return nil;
+
+	b = mallocz(sizeof(Block), 1);
+	if(b == nil){
+		vtblockput(vb);
+		return nil;
+	}
+
+	b->priv = vb;
+	b->_close = diskventiblockput;
+	frag = offset%d->e.dsize;
+	b->data = (uchar*)vb->data + frag;
+	b->len = d->e.dsize - frag;
+	if(b->len > len)
+		b->len = len;
+	return b;
+}
+
+static void
+diskventiclose(Disk *dd)
+{
+	DiskVenti *d = (DiskVenti*)dd;
+	free(d);
+}
+
+Disk*
+diskopenventi(VtCache *c, uchar score[VtScoreSize])
+{
+	DiskVenti *d;
+	VtEntry e;
+	VtRoot root;
+	VtBlock *b;
+
+	if((b = vtcacheglobal(c, score, VtRootType)) == nil)
+		goto Err;
+	if(vtrootunpack(&root, b->data) < 0)
+		goto Err;
+	if(root.blocksize < 512 || (root.blocksize&(root.blocksize-1))){
+		werrstr("bad blocksize %d", root.blocksize);
+		goto Err;
+	}
+	vtblockput(b);
+
+	if((b = vtcacheglobal(c, root.score, VtDirType)) == nil)
+		goto Err;
+	if(vtentryunpack(&e, b->data, 0) < 0)
+		goto Err;
+	vtblockput(b);
+	b = nil;
+	if((e.type&VtTypeBaseMask) != VtDataType){
+		werrstr("not a single file");
+		goto Err;
+	}
+
+	d = mallocz(sizeof(DiskVenti), 1);
+	if(d == nil)
+		goto Err;
+
+	d->disk._read = diskventiread;
+	d->disk._close = diskventiclose;
+	d->e = e;
+	d->c = c;
+	return &d->disk;
+
+Err:
+	if(b)
+		vtblockput(b);
+	return nil;
+}
+
blob - /dev/null
blob + 72ca0446c71c73aff19c60236211b6e2613321ec (mode 644)
--- /dev/null
+++ src/libdiskfs/vfile.c
@@ -0,0 +1,35 @@
+#include <u.h>
+#include <libc.h>
+#include <diskfs.h>
+#include <venti.h>
+
+extern void vtLibThread(void);
+
+typedef struct DiskVenti DiskVenti;
+struct DiskVenti
+{
+	TvCache *c;
+	Entry e;
+};
+
+Disk*
+diskOpenVenti(TvCache *c, uchar score[VtScoreSize])
+{
+	vtLibThread();
+
+	fetch vtroot
+	fetch dir block
+	copy e
+}
+
+Block*
+diskVentiRead(Disk *dd, u32int len, u64int offset)
+{
+	DiskVenti *d = (DiskVenti*)dd;
+
+	make offset list
+	walk down blocks
+	return the one
+}
+
+