Blob


1 /*
2 * TO DO:
3 * - gc of file systems (not going to do just yet?)
4 * - statistics file
5 * - configure on amsterdam
6 */
8 #include <u.h>
9 #include <libc.h>
10 #include <bio.h>
11 #include <ip.h>
12 #include <thread.h>
13 #include <libsec.h>
14 #include <sunrpc.h>
15 #include <nfs3.h>
16 #include <diskfs.h>
17 #include <venti.h>
18 #include "nfs3srv.h"
20 #define trace if(!tracecalls){}else print
22 typedef struct Ipokay Ipokay;
23 typedef struct Config Config;
24 typedef struct Ctree Ctree;
25 typedef struct Cnode Cnode;
27 struct Ipokay
28 {
29 int okay;
30 uchar ip[IPaddrlen];
31 uchar mask[IPaddrlen];
32 };
34 struct Config
35 {
36 Ipokay *ok;
37 uint nok;
38 ulong mtime;
39 Ctree *ctree;
40 };
42 char *addr;
43 int blocksize;
44 int cachesize;
45 Config config;
46 char *configfile;
47 int encryptedhandles = 1;
48 Channel *nfschan;
49 Channel *mountchan;
50 Channel *timerchan;
51 Nfs3Handle root;
52 SunSrv *srv;
53 int tracecalls;
54 VtCache *vcache;
55 VtConn *z;
57 void cryptinit(void);
58 void timerthread(void*);
59 void timerproc(void*);
61 extern void handleunparse(Fsys*, Nfs3Handle*, Nfs3Handle*, int);
62 extern Nfs3Status handleparse(Nfs3Handle*, Fsys**, Nfs3Handle*, int);
64 Nfs3Status logread(Cnode*, u32int, u64int, uchar**, u32int*, u1int*);
65 Nfs3Status refreshdiskread(Cnode*, u32int, u64int, uchar**, u32int*, u1int*);
66 Nfs3Status refreshconfigread(Cnode*, u32int, u64int, uchar**, u32int*, u1int*);
68 int readconfigfile(Config *cp);
69 void setrootfid(void);
70 int ipokay(uchar *ip, ushort port);
72 u64int unittoull(char*);
74 void
75 usage(void)
76 {
77 fprint(2, "usage: vnfs [-LLRVir] [-a addr] [-b blocksize] [-c cachesize] configfile\n");
78 threadexitsall("usage");
79 }
81 void
82 threadmain(int argc, char **argv)
83 {
84 fmtinstall('B', sunrpcfmt);
85 fmtinstall('C', suncallfmt);
86 fmtinstall('F', vtfcallfmt);
87 fmtinstall('H', encodefmt);
88 fmtinstall('I', eipfmt);
89 fmtinstall('V', vtscorefmt);
90 sunfmtinstall(&nfs3prog);
91 sunfmtinstall(&nfsmount3prog);
93 addr = "udp!*!2049";
94 blocksize = 8192;
95 cachesize = 400;
96 srv = sunsrv();
97 srv->ipokay = ipokay;
98 cryptinit();
100 ARGBEGIN{
101 default:
102 usage();
103 case 'E':
104 encryptedhandles = 0;
105 break;
106 case 'L':
107 if(srv->localonly == 0)
108 srv->localonly = 1;
109 else
110 srv->localparanoia = 1;
111 break;
112 case 'R':
113 srv->chatty++;
114 break;
115 case 'T':
116 tracecalls = 1;
117 break;
118 case 'V':
119 if(chattyventi++)
120 vttracelevel++;
121 break;
122 case 'a':
123 addr = EARGF(usage());
124 break;
125 case 'b':
126 blocksize = unittoull(EARGF(usage()));
127 break;
128 case 'c':
129 cachesize = unittoull(EARGF(usage()));
130 break;
131 case 'i':
132 insecure = 1;
133 break;
134 case 'r':
135 srv->alwaysreject++;
136 break;
137 }ARGEND
139 if(argc != 1)
140 usage();
142 if((z = vtdial(nil)) == nil)
143 sysfatal("vtdial: %r");
144 if(vtconnect(z) < 0)
145 sysfatal("vtconnect: %r");
146 if((vcache = vtcachealloc(z, blocksize*cachesize)) == nil)
147 sysfatal("vtcache: %r");
149 configfile = argv[0];
150 if(readconfigfile(&config) < 0)
151 sysfatal("readConfig: %r");
152 setrootfid();
154 nfschan = chancreate(sizeof(SunMsg*), 0);
155 mountchan = chancreate(sizeof(SunMsg*), 0);
156 timerchan = chancreate(sizeof(void*), 0);
158 if(sunsrvudp(srv, addr) < 0)
159 sysfatal("starting server: %r");
161 sunsrvthreadcreate(srv, nfs3proc, nfschan);
162 sunsrvthreadcreate(srv, mount3proc, mountchan);
163 sunsrvthreadcreate(srv, timerthread, nil);
164 proccreate(timerproc, nil, 32768);
166 sunsrvprog(srv, &nfs3prog, nfschan);
167 sunsrvprog(srv, &nfsmount3prog, mountchan);
169 threadexits(nil);
172 #define TWID64 ((u64int)~(u64int)0)
174 u64int
175 unittoull(char *s)
177 char *es;
178 u64int n;
180 if(s == nil)
181 return TWID64;
182 n = strtoul(s, &es, 0);
183 if(*es == 'k' || *es == 'K'){
184 n *= 1024;
185 es++;
186 }else if(*es == 'm' || *es == 'M'){
187 n *= 1024*1024;
188 es++;
189 }else if(*es == 'g' || *es == 'G'){
190 n *= 1024*1024*1024;
191 es++;
192 }else if(*es == 't' || *es == 'T'){
193 n *= 1024*1024;
194 n *= 1024*1024;
196 if(*es != '\0')
197 return TWID64;
198 return n;
201 /*
202 * Handles.
204 * We store all the state about which file a client is accessing in
205 * the handle, so that we don't have to maintain any per-client state
206 * ourselves. In order to avoid leaking handles or letting clients
207 * create arbitrary handles, we sign and encrypt each handle with
208 * AES using a key selected randomly when the server starts.
209 * Thus, handles cannot be used across sessions.
211 * The decrypted handles begin with the following header:
213 * sessid[8] random session id chosen at start time
214 * len[4] length of handle that follows
216 * If we're pressed for space in the rest of the handle, we could
217 * probably reduce the amount of sessid bytes. Note that the sessid
218 * bytes must be consistent during a run of vnfs, or else some
219 * clients (e.g., Linux 2.4) eventually notice that successive TLookups
220 * return different handles, and they return "Stale NFS file handle"
221 * errors to system calls in response (even though we never sent
222 * that error!).
224 * Security woes aside, the fact that we have to shove everything
225 * into the handles is quite annoying. We have to encode, in 40 bytes:
227 * - position in the synthesized config tree
228 * - enough of the path to do glob matching
229 * - position in an archived file system image
231 * and the handles need to be stable across changes in the config file
232 * (though not across server restarts since encryption screws
233 * that up nicely).
235 * We encode each of the first two as a 10-byte hash that is
236 * the first half of a SHA1 hash.
237 */
239 enum
241 SessidSize = 8,
242 HeaderSize = SessidSize+4,
243 MaxHandleSize = Nfs3MaxHandleSize - HeaderSize
244 };
246 AESstate aesstate;
247 uchar sessid[SessidSize];
249 static void
250 hencrypt(Nfs3Handle *h)
252 uchar *p;
253 AESstate aes;
255 /*
256 * root handle has special encryption - a single 0 byte - so that it
257 * never goes stale.
258 */
259 if(h->len == root.len && memcmp(h->h, root.h, root.len) == 0){
260 h->h[0] = 0;
261 h->len = 1;
262 return;
265 if(!encryptedhandles)
266 return;
268 if(h->len > MaxHandleSize){
269 /* oops */
270 fprint(2, "handle too long: %.*lH\n", h->len, h->h);
271 memset(h->h, 'X', Nfs3MaxHandleSize);
272 h->len = Nfs3MaxHandleSize;
273 return;
276 p = h->h;
277 memmove(p+HeaderSize, p, h->len);
278 memmove(p, sessid, SessidSize);
279 *(u32int*)(p+SessidSize) = h->len;
280 h->len += HeaderSize;
282 if(encryptedhandles){
283 while(h->len < MaxHandleSize)
284 h->h[h->len++] = 0;
285 aes = aesstate;
286 aesCBCencrypt(h->h, MaxHandleSize, &aes);
290 static Nfs3Status
291 hdecrypt(Nfs3Handle *h)
293 AESstate aes;
295 if(h->len == 1 && h->h[0] == 0){ /* single 0 byte is root */
296 *h = root;
297 return Nfs3Ok;
300 if(!encryptedhandles)
301 return Nfs3Ok;
303 if(h->len <= HeaderSize)
304 return Nfs3ErrBadHandle;
305 if(encryptedhandles){
306 if(h->len != MaxHandleSize)
307 return Nfs3ErrBadHandle;
308 aes = aesstate;
309 aesCBCdecrypt(h->h, h->len, &aes);
311 if(memcmp(h->h, sessid, SessidSize) != 0)
312 return Nfs3ErrStale; /* give benefit of doubt */
313 h->len = *(u32int*)(h->h+SessidSize);
314 if(h->len >= MaxHandleSize-HeaderSize)
315 return Nfs3ErrBadHandle;
316 memmove(h->h, h->h+HeaderSize, h->len);
317 return Nfs3Ok;
320 void
321 cryptinit(void)
323 uchar key[32], ivec[AESbsize];
324 int i;
325 u32int u32;
327 u32 = truerand();
328 memmove(sessid, &u32, 4);
329 for(i=0; i<nelem(key); i+=4) {
330 u32 = truerand();
331 memmove(key+i, &u32, 4);
333 for(i=0; i<nelem(ivec); i++)
334 ivec[i] = fastrand();
335 setupAESstate(&aesstate, key, sizeof key, ivec);
338 /*
339 * Config file.
341 * The main purpose of the configuration file is to define a tree
342 * in which the archived file system images are mounted.
343 * The tree is stored as Entry structures, defined below.
345 * The configuration file also allows one to define shell-like
346 * glob expressions matching paths that are not to be displayed.
347 * The matched files or directories are shown in directory listings
348 * (could suppress these if we cared) but they cannot be opened,
349 * read, or written, and getattr returns zeroed data.
350 */
351 enum
353 /* sizes used in handles; see nfs server below */
354 CnodeHandleSize = 8,
355 FsysHandleOffset = CnodeHandleSize
356 };
358 /*
359 * Config file tree.
360 */
361 struct Ctree
363 Cnode *root;
364 Cnode *hash[1024];
365 };
367 struct Cnode
369 char *name; /* path element */
370 Cnode *parent; /* in tree */
371 Cnode *nextsib; /* in tree */
372 Cnode *kidlist; /* in tree */
373 Cnode *nexthash; /* in hash list */
375 Nfs3Status (*read)(Cnode*, u32int, u64int, uchar**, u32int*, u1int*); /* synthesized read fn */
377 uchar handle[VtScoreSize]; /* sha1(path to here) */
378 ulong mtime; /* mtime for this directory entry */
380 /* fsys overlay on this node */
381 Fsys *fsys; /* cache of memory structure */
382 Nfs3Handle fsyshandle;
383 int isblackhole; /* walking down keeps you here */
385 /*
386 * mount point info.
387 * if a mount point is inside another file system,
388 * the fsys and fsyshandle above have the old fs info,
389 * the mfsys and mfsyshandle below have the new one.
390 * getattrs must use the old info for consistency.
391 */
392 int ismtpt; /* whether there is an fsys mounted here */
393 uchar fsysscore[VtScoreSize]; /* score of fsys image on venti */
394 char *fsysimage; /* raw disk image */
395 Fsys *mfsys; /* mounted file system (nil until walked) */
396 Nfs3Handle mfsyshandle; /* handle to root of mounted fsys */
398 int mark; /* gc */
399 };
401 static uint
402 dumbhash(uchar *s)
404 return (s[0]<<2)|(s[1]>>6); /* first 10 bits */
407 static Cnode*
408 mkcnode(Ctree *t, Cnode *parent, char *elem, uint elen, char *path, uint plen)
410 uint h;
411 Cnode *n;
413 n = emalloc(sizeof *n + elen+1);
414 n->name = (char*)(n+1);
415 memmove(n->name, elem, elen);
416 n->name[elen] = 0;
417 n->parent = parent;
418 if(parent){
419 n->nextsib = parent->kidlist;
420 parent->kidlist = n;
422 n->kidlist = nil;
423 sha1((uchar*)path, plen, n->handle, nil);
424 h = dumbhash(n->handle);
425 n->nexthash = t->hash[h];
426 t->hash[h] = n;
428 return n;
431 void
432 markctree(Ctree *t)
434 int i;
435 Cnode *n;
437 for(i=0; i<nelem(t->hash); i++)
438 for(n=t->hash[i]; n; n=n->nexthash)
439 if(n->name[0] != '+')
440 n->mark = 1;
443 int
444 refreshdisk(void)
446 int i;
447 Cnode *n;
448 Ctree *t;
450 t = config.ctree;
451 for(i=0; i<nelem(t->hash); i++)
452 for(n=t->hash[i]; n; n=n->nexthash){
453 if(n->mfsys)
454 disksync(n->mfsys->disk);
455 if(n->fsys)
456 disksync(n->fsys->disk);
458 return 0;
461 void
462 sweepctree(Ctree *t)
464 int i;
465 Cnode *n;
467 /* just zero all the garbage and leave it linked into the tree */
468 for(i=0; i<nelem(t->hash); i++){
469 for(n=t->hash[i]; n; n=n->nexthash){
470 if(!n->mark)
471 continue;
472 n->fsys = nil;
473 free(n->fsysimage);
474 n->fsysimage = nil;
475 memset(n->fsysscore, 0, sizeof n->fsysscore);
476 n->mfsys = nil;
477 n->ismtpt = 0;
478 memset(&n->fsyshandle, 0, sizeof n->fsyshandle);
479 memset(&n->mfsyshandle, 0, sizeof n->mfsyshandle);
484 static Cnode*
485 cnodewalk(Cnode *n, char *name, uint len, int markokay)
487 Cnode *nn;
489 for(nn=n->kidlist; nn; nn=nn->nextsib)
490 if(strncmp(nn->name, name, len) == 0 && nn->name[len] == 0)
491 if(!nn->mark || markokay)
492 return nn;
493 return nil;
496 Cnode*
497 ctreewalkpath(Ctree *t, char *name, ulong createmtime)
499 Cnode *n, *nn;
500 char *p, *nextp;
502 n = t->root;
503 p = name;
504 for(; *p; p=nextp){
505 n->mark = 0;
506 assert(*p == '/');
507 p++;
508 nextp = strchr(p, '/');
509 if(nextp == nil)
510 nextp = p+strlen(p);
511 if((nn = cnodewalk(n, p, nextp-p, 1)) == nil){
512 if(createmtime == 0)
513 return nil;
514 nn = mkcnode(t, n, p, nextp-p, name, nextp-name);
515 nn->mtime = createmtime;
517 if(nn->mark)
518 nn->mark = 0;
519 n = nn;
521 n->mark = 0;
522 return n;
525 Ctree*
526 mkctree(void)
528 Ctree *t;
530 t = emalloc(sizeof *t);
531 t->root = mkcnode(t, nil, "", 0, "", 0);
533 ctreewalkpath(t, "/+log", time(0))->read = logread;
534 ctreewalkpath(t, "/+refreshdisk", time(0))->read = refreshdiskread;
535 ctreewalkpath(t, "/+refreshconfig", time(0))->read = refreshconfigread;
537 return t;
540 Cnode*
541 ctreemountfsys(Ctree *t, char *path, ulong time, uchar *score, char *file)
543 Cnode *n;
545 if(time == 0)
546 time = 1;
547 n = ctreewalkpath(t, path, time);
548 if(score){
549 if(n->ismtpt && (n->fsysimage || memcmp(n->fsysscore, score, VtScoreSize) != 0)){
550 free(n->fsysimage);
551 n->fsysimage = nil;
552 n->fsys = nil; /* leak (might be other refs) */
554 memmove(n->fsysscore, score, VtScoreSize);
555 }else{
556 if(n->ismtpt && (n->fsysimage==nil || strcmp(n->fsysimage, file) != 0)){
557 free(n->fsysimage);
558 n->fsysimage = nil;
559 n->fsys = nil; /* leak (might be other refs) */
561 n->fsysimage = emalloc(strlen(file)+1);
562 strcpy(n->fsysimage, file);
564 n->ismtpt = 1;
565 return n;
568 Cnode*
569 cnodebyhandle(Ctree *t, uchar *p)
571 int h;
572 Cnode *n;
574 h = dumbhash(p);
575 for(n=t->hash[h]; n; n=n->nexthash)
576 if(memcmp(n->handle, p, CnodeHandleSize) == 0)
577 return n;
578 return nil;
581 static int
582 parseipandmask(char *s, uchar *ip, uchar *mask)
584 char *p, *q;
586 p = strchr(s, '/');
587 if(p)
588 *p++ = 0;
589 if(parseip(ip, s) == ~0UL)
590 return -1;
591 if(p == nil)
592 memset(mask, 0xFF, IPaddrlen);
593 else{
594 if(isdigit((uchar)*p) && strtol(p, &q, 10)>=0 && *q==0)
595 *--p = '/';
596 if(parseipmask(mask, p) == ~0UL)
597 return -1;
598 if(*p != '/')
599 *--p = '/';
601 /*fprint(2, "parseipandmask %s => %I %I\n", s, ip, mask); */
602 return 0;
605 static int
606 parsetime(char *s, ulong *time)
608 ulong x;
609 char *p;
610 int i;
611 Tm tm;
613 /* decimal integer is seconds since 1970 */
614 x = strtoul(s, &p, 10);
615 if(x > 0 && *p == 0){
616 *time = x;
617 return 0;
620 /* otherwise expect yyyy/mmdd/hhmm */
621 if(strlen(s) != 14 || s[4] != '/' || s[9] != '/')
622 return -1;
623 for(i=0; i<4; i++)
624 if(!isdigit((uchar)s[i]) || !isdigit((uchar)s[i+5]) || !isdigit((uchar)s[i+10]))
625 return -1;
626 memset(&tm, 0, sizeof tm);
627 tm.year = atoi(s)-1900;
628 if(tm.year < 0 || tm.year > 200)
629 return -1;
630 tm.mon = (s[5]-'0')*10+s[6]-'0' - 1;
631 if(tm.mon < 0 || tm.mon > 11)
632 return -1;
633 tm.mday = (s[7]-'0')*10+s[8]-'0';
634 if(tm.mday < 0 || tm.mday > 31)
635 return -1;
636 tm.hour = (s[10]-'0')*10+s[11]-'0';
637 if(tm.hour < 0 || tm.hour > 23)
638 return -1;
639 tm.min = (s[12]-'0')*10+s[13]-'0';
640 if(tm.min < 0 || tm.min > 59)
641 return -1;
642 strcpy(tm.zone, "XXX"); /* anything but GMT */
643 if(0){
644 print("tm2sec %d/%d/%d/%d/%d\n",
645 tm.year, tm.mon, tm.mday, tm.hour, tm.min);
647 *time = tm2sec(&tm);
648 if(0) print("time %lud\n", *time);
649 return 0;
653 int
654 readconfigfile(Config *cp)
656 char *f[10], *image, *p, *pref, *q, *name;
657 int nf, line;
658 uchar scorebuf[VtScoreSize], *score;
659 ulong time;
660 Biobuf *b;
661 Config c;
662 Dir *dir;
664 name = configfile;
665 c = *cp;
666 if((dir = dirstat(name)) == nil)
667 return -1;
668 if(c.mtime == dir->mtime){
669 free(dir);
670 return 0;
672 c.mtime = dir->mtime;
673 free(dir);
674 if((b = Bopen(name, OREAD)) == nil)
675 return -1;
677 /*
678 * Reuse old tree, garbage collecting entries that
679 * are not mentioned in the new config file.
680 */
681 if(c.ctree == nil)
682 c.ctree = mkctree();
684 markctree(c.ctree);
685 c.ok = nil;
686 c.nok = 0;
688 line = 0;
689 for(; (p=Brdstr(b, '\n', 1)) != nil; free(p)){
690 line++;
691 if((q = strchr(p, '#')) != nil)
692 *q = 0;
693 nf = tokenize(p, f, nelem(f));
694 if(nf == 0)
695 continue;
696 if(strcmp(f[0], "mount") == 0){
697 if(nf != 4){
698 werrstr("syntax error: mount /path /dev|score mtime");
699 goto badline;
701 if(f[1][0] != '/'){
702 werrstr("unrooted path %s", f[1]);
703 goto badline;
705 score = nil;
706 image = nil;
707 if(f[2][0] == '/'){
708 if(access(f[2], AEXIST) < 0){
709 werrstr("image %s does not exist", f[2]);
710 goto badline;
712 image = f[2];
713 }else{
714 if(vtparsescore(f[2], &pref, scorebuf) < 0){
715 werrstr("bad score %s", f[2]);
716 goto badline;
718 score = scorebuf;
720 if(parsetime(f[3], &time) < 0){
721 fprint(2, "%s:%d: bad time %s\n", name, line, f[3]);
722 time = 1;
724 ctreemountfsys(c.ctree, f[1], time, score, image);
725 continue;
727 if(strcmp(f[0], "allow") == 0 || strcmp(f[0], "deny") == 0){
728 if(nf != 2){
729 werrstr("syntax error: allow|deny ip[/mask]");
730 goto badline;
732 c.ok = erealloc(c.ok, (c.nok+1)*sizeof(c.ok[0]));
733 if(parseipandmask(f[1], c.ok[c.nok].ip, c.ok[c.nok].mask) < 0){
734 werrstr("bad ip[/mask]: %s", f[1]);
735 goto badline;
737 c.ok[c.nok].okay = (strcmp(f[0], "allow") == 0);
738 c.nok++;
739 continue;
741 werrstr("unknown verb '%s'", f[0]);
742 badline:
743 fprint(2, "%s:%d: %r\n", name, line);
745 Bterm(b);
747 sweepctree(c.ctree);
748 free(cp->ok);
749 *cp = c;
750 return 0;
753 int
754 ipokay(uchar *ip, ushort port)
756 int i;
757 uchar ipx[IPaddrlen];
758 Ipokay *ok;
760 for(i=0; i<config.nok; i++){
761 ok = &config.ok[i];
762 maskip(ip, ok->mask, ipx);
763 if(0) fprint(2, "%I & %I = %I (== %I?)\n",
764 ip, ok->mask, ipx, ok->ip);
765 if(memcmp(ipx, ok->ip, IPaddrlen) == 0)
766 return ok->okay;
768 if(config.nok == 0) /* all is permitted */
769 return 1;
770 /* otherwise default is none allowed */
771 return 0;
774 Nfs3Status
775 cnodelookup(Ctree *t, Cnode **np, char *name)
777 Cnode *n, *nn;
779 n = *np;
780 if(n->isblackhole)
781 return Nfs3Ok;
782 if((nn = cnodewalk(n, name, strlen(name), 0)) == nil){
783 if(n->ismtpt || n->fsys){
784 if((nn = cnodewalk(n, "", 0, 1)) == nil){
785 nn = mkcnode(t, n, "", 0, (char*)n->handle, SHA1dlen);
786 nn->isblackhole = 1;
788 nn->mark = 0;
791 if(nn == nil)
792 return Nfs3ErrNoEnt;
793 *np = nn;
794 return Nfs3Ok;
797 Nfs3Status
798 cnodegetattr(Cnode *n, Nfs3Attr *attr)
800 uint64 u64;
802 memset(attr, 0, sizeof *attr);
803 if(n->read){
804 attr->type = Nfs3FileReg;
805 attr->mode = 0444;
806 attr->size = 512;
807 attr->nlink = 1;
808 }else{
809 attr->type = Nfs3FileDir;
810 attr->mode = 0555;
811 attr->size = 1024;
812 attr->nlink = 10;
814 memmove(&u64, n->handle, 8);
815 attr->fileid = u64;
816 attr->atime.sec = n->mtime;
817 attr->mtime.sec = n->mtime;
818 attr->ctime.sec = n->mtime;
819 return Nfs3Ok;
822 Nfs3Status
823 cnodereaddir(Cnode *n, u32int count, u64int cookie, uchar **pdata, u32int *pcount, u1int *peof)
825 uchar *data, *p, *ep, *np;
826 u64int c;
827 u64int u64;
828 Nfs3Entry ne;
830 n = n->kidlist;
831 c = cookie;
832 for(; c && n; c--)
833 n = n->nextsib;
834 if(n == nil){
835 *pdata = 0;
836 *pcount = 0;
837 *peof = 1;
838 return Nfs3Ok;
841 data = emalloc(count);
842 p = data;
843 ep = data+count;
844 while(n && p < ep){
845 if(n->mark || n->name[0] == '+'){
846 n = n->nextsib;
847 ++cookie;
848 continue;
850 ne.name = n->name;
851 ne.namelen = strlen(n->name);
852 ne.cookie = ++cookie;
853 memmove(&u64, n->handle, 8);
854 ne.fileid = u64;
855 if(nfs3entrypack(p, ep, &np, &ne) < 0)
856 break;
857 p = np;
858 n = n->nextsib;
860 *pdata = data;
861 *pcount = p - data;
862 *peof = n==nil;
863 return Nfs3Ok;
866 void
867 timerproc(void *v)
869 for(;;){
870 sleep(60*1000);
871 sendp(timerchan, 0);
875 void
876 timerthread(void *v)
878 for(;;){
879 recvp(timerchan);
880 /* refreshconfig(); */
884 /*
885 * Actually serve the NFS requests. Called from nfs3srv.c.
886 * Each request runs in its own thread (coroutine).
888 * Decrypted handles have the form:
890 * config[20] - SHA1 hash identifying a config tree node
891 * glob[10] - SHA1 hash prefix identifying a glob state
892 * fsyshandle[<=10] - disk file system handle (usually 4 bytes)
893 */
895 /*
896 * A fid represents a point in the file tree.
897 * There are three components, all derived from the handle:
899 * - config tree position (also used to find fsys)
900 * - glob state for exclusions
901 * - file system position
902 */
903 enum
905 HAccess,
906 HAttr,
907 HWalk,
908 HDotdot,
909 HRead
910 };
911 typedef struct Fid Fid;
912 struct Fid
914 Cnode *cnode;
915 Fsys *fsys;
916 Nfs3Handle fsyshandle;
917 };
919 int
920 handlecmp(Nfs3Handle *h, Nfs3Handle *h1)
922 if(h->len != h1->len)
923 return h->len - h1->len;
924 return memcmp(h->h, h1->h, h->len);
927 Nfs3Status
928 handletofid(Nfs3Handle *eh, Fid *fid, int mode)
930 int domount;
931 Cnode *n;
932 Disk *disk, *cdisk;
933 Fsys *fsys;
934 Nfs3Status ok;
935 Nfs3Handle h2, *h, *fh;
937 memset(fid, 0, sizeof *fid);
939 domount = 1;
940 if(mode == HDotdot)
941 domount = 0;
942 /*
943 * Not necessary, but speeds up ls -l /dump/2005
944 * HAttr and HAccess must be handled the same way
945 * because both can be used to fetch attributes.
946 * Acting differently yields inconsistencies at mount points,
947 * and causes FreeBSD ls -l to fail.
948 */
949 if(mode == HAttr || mode == HAccess)
950 domount = 0;
952 /*
953 * Decrypt handle.
954 */
955 h2 = *eh;
956 h = &h2;
957 if((ok = hdecrypt(h)) != Nfs3Ok)
958 return ok;
959 trace("handletofid: decrypted %.*lH\n", h->len, h->h);
960 if(h->len < FsysHandleOffset)
961 return Nfs3ErrBadHandle;
963 /*
964 * Find place in config tree.
965 */
966 if((n = cnodebyhandle(config.ctree, h->h)) == nil)
967 return Nfs3ErrStale;
968 fid->cnode = n;
970 if(n->ismtpt && domount){
971 /*
972 * Open fsys for mount point if needed.
973 */
974 if(n->mfsys == nil){
975 trace("handletofid: mounting %V/%s\n", n->fsysscore, n->fsysimage);
976 if(n->fsysimage){
977 if(strcmp(n->fsysimage, "/dev/null") == 0)
978 return Nfs3ErrAcces;
979 if((disk = diskopenfile(n->fsysimage)) == nil){
980 fprint(2, "cannot open disk %s: %r\n", n->fsysimage);
981 return Nfs3ErrIo;
983 if((cdisk = diskcache(disk, blocksize, 64)) == nil){
984 fprint(2, "cannot cache disk %s: %r\n", n->fsysimage);
985 diskclose(disk);
987 disk = cdisk;
988 }else{
989 if((disk = diskopenventi(vcache, n->fsysscore)) == nil){
990 fprint(2, "cannot open venti disk %V: %r\n", n->fsysscore);
991 return Nfs3ErrIo;
994 if((fsys = fsysopen(disk)) == nil){
995 fprint(2, "cannot open fsys on %V: %r\n", n->fsysscore);
996 diskclose(disk);
997 return Nfs3ErrIo;
999 n->mfsys = fsys;
1000 fsysroot(fsys, &n->mfsyshandle);
1004 * Use inner handle.
1006 fid->fsys = n->mfsys;
1007 fid->fsyshandle = n->mfsyshandle;
1008 }else{
1010 * Use fsys handle from tree or from handle.
1011 * This assumes that fsyshandle was set by fidtohandle
1012 * earlier, so it's not okay to reuse handles (except the root)
1013 * across sessions. The encryption above makes and
1014 * enforces the same restriction, so this is okay.
1016 fid->fsys = n->fsys;
1017 fh = &fid->fsyshandle;
1018 if(n->isblackhole){
1019 fh->len = h->len-FsysHandleOffset;
1020 memmove(fh->h, h->h+FsysHandleOffset, fh->len);
1021 }else
1022 *fh = n->fsyshandle;
1023 trace("handletofid: fsyshandle %.*lH\n", fh->len, fh->h);
1027 * TO DO (maybe): some sort of path restriction here.
1029 trace("handletofid: cnode %s fsys %p fsyshandle %.*lH\n",
1030 n->name, fid->fsys, fid->fsyshandle.len, fid->fsyshandle.h);
1031 return Nfs3Ok;
1034 void
1035 _fidtohandle(Fid *fid, Nfs3Handle *h)
1037 Cnode *n;
1039 n = fid->cnode;
1041 * Record fsys handle in n, don't bother sending it to client
1042 * for black holes.
1044 n->fsys = fid->fsys;
1045 if(!n->isblackhole){
1046 n->fsyshandle = fid->fsyshandle;
1047 fid->fsyshandle.len = 0;
1049 memmove(h->h, n->handle, CnodeHandleSize);
1050 memmove(h->h+FsysHandleOffset, fid->fsyshandle.h, fid->fsyshandle.len);
1051 h->len = FsysHandleOffset+fid->fsyshandle.len;
1054 void
1055 fidtohandle(Fid *fid, Nfs3Handle *h)
1057 _fidtohandle(fid, h);
1058 hencrypt(h);
1061 void
1062 setrootfid(void)
1064 Fid fid;
1066 memset(&fid, 0, sizeof fid);
1067 fid.cnode = config.ctree->root;
1068 _fidtohandle(&fid, &root);
1071 void
1072 fsgetroot(Nfs3Handle *h)
1074 *h = root;
1075 hencrypt(h);
1078 Nfs3Status
1079 fsgetattr(SunAuthUnix *au, Nfs3Handle *h, Nfs3Attr *attr)
1081 Fid fid;
1082 Nfs3Status ok;
1084 trace("getattr %.*lH\n", h->len, h->h);
1085 if((ok = handletofid(h, &fid, HAttr)) != Nfs3Ok)
1086 return ok;
1087 if(fid.fsys)
1088 return fsysgetattr(fid.fsys, au, &fid.fsyshandle, attr);
1089 else
1090 return cnodegetattr(fid.cnode, attr);
1094 * Lookup is always the hard part.
1096 Nfs3Status
1097 fslookup(SunAuthUnix *au, Nfs3Handle *h, char *name, Nfs3Handle *nh)
1099 Fid fid;
1100 Cnode *n;
1101 Nfs3Status ok;
1102 Nfs3Handle xh;
1103 int mode;
1105 trace("lookup %.*lH %s\n", h->len, h->h, name);
1107 mode = HWalk;
1108 if(strcmp(name, "..") == 0 || strcmp(name, ".") == 0)
1109 mode = HDotdot;
1110 if((ok = handletofid(h, &fid, mode)) != Nfs3Ok){
1111 nfs3errstr(ok);
1112 trace("lookup: handletofid %r\n");
1113 return ok;
1116 if(strcmp(name, ".") == 0){
1117 fidtohandle(&fid, nh);
1118 return Nfs3Ok;
1122 * Walk down file system and cnode simultaneously.
1123 * If dotdot and file system doesn't move, need to walk
1124 * up cnode. Save the corresponding fsys handles in
1125 * the cnode as we walk down so that we'll have them
1126 * for dotdotting back up.
1128 n = fid.cnode;
1129 if(mode == HWalk){
1131 * Walk down config tree and file system simultaneously.
1133 if((ok = cnodelookup(config.ctree, &n, name)) != Nfs3Ok){
1134 nfs3errstr(ok);
1135 trace("lookup: cnodelookup: %r\n");
1136 return ok;
1138 fid.cnode = n;
1139 if(fid.fsys){
1140 if((ok = fsyslookup(fid.fsys, au, &fid.fsyshandle, name, &xh)) != Nfs3Ok){
1141 nfs3errstr(ok);
1142 trace("lookup: fsyslookup: %r\n");
1143 return ok;
1145 fid.fsyshandle = xh;
1147 }else{
1149 * Walking dotdot. Ick.
1151 trace("lookup dotdot fsys=%p\n", fid.fsys);
1152 if(fid.fsys){
1154 * Walk up file system, then try up config tree.
1156 if((ok = fsyslookup(fid.fsys, au, &fid.fsyshandle, "..", &xh)) != Nfs3Ok){
1157 nfs3errstr(ok);
1158 trace("lookup fsyslookup: %r\n");
1159 return ok;
1161 fid.fsyshandle = xh;
1164 * Usually just go to n->parent.
1166 * If we're in a subtree of the mounted file system that
1167 * isn't represented explicitly by the config tree (instead
1168 * the black hole node represents the entire file tree),
1169 * then we only go to n->parent when we've dotdotted back
1170 * to the right handle.
1172 if(n->parent == nil)
1173 trace("lookup dotdot: no parent\n");
1174 else{
1175 trace("lookup dotdot: parent %.*lH, have %.*lH\n",
1176 n->parent->fsyshandle.len, n->parent->fsyshandle.h,
1177 xh.len, xh.h);
1180 if(n->isblackhole){
1181 if(handlecmp(&n->parent->mfsyshandle, &xh) == 0)
1182 n = n->parent;
1183 }else{
1184 if(n->parent)
1185 n = n->parent;
1187 }else{
1189 * No file system, just walk up.
1191 if(n->parent)
1192 n = n->parent;
1194 fid.fsys = n->fsys;
1195 if(!n->isblackhole)
1196 fid.fsyshandle = n->fsyshandle;
1197 fid.cnode = n;
1199 fidtohandle(&fid, nh);
1200 return Nfs3Ok;
1203 Nfs3Status
1204 fsaccess(SunAuthUnix *au, Nfs3Handle *h, u32int want, u32int *got, Nfs3Attr *attr)
1206 Fid fid;
1207 Nfs3Status ok;
1209 trace("access %.*lH 0x%ux\n", h->len, h->h, want);
1210 if((ok = handletofid(h, &fid, HAccess)) != Nfs3Ok)
1211 return ok;
1212 if(fid.fsys)
1213 return fsysaccess(fid.fsys, au, &fid.fsyshandle, want, got, attr);
1214 *got = want & (Nfs3AccessRead|Nfs3AccessLookup|Nfs3AccessExecute);
1215 return cnodegetattr(fid.cnode, attr);
1218 Nfs3Status
1219 fsreadlink(SunAuthUnix *au, Nfs3Handle *h, char **link)
1221 Fid fid;
1222 Nfs3Status ok;
1224 trace("readlink %.*lH\n", h->len, h->h);
1225 if((ok = handletofid(h, &fid, HRead)) != Nfs3Ok)
1226 return ok;
1227 if(fid.fsys)
1228 return fsysreadlink(fid.fsys, au, &fid.fsyshandle, link);
1229 *link = 0;
1230 return Nfs3ErrNotSupp;
1233 Nfs3Status
1234 fsreadfile(SunAuthUnix *au, Nfs3Handle *h, u32int count, u64int offset, uchar **data, u32int *pcount, u1int *peof)
1236 Fid fid;
1237 Nfs3Status ok;
1239 trace("readfile %.*lH\n", h->len, h->h);
1240 if((ok = handletofid(h, &fid, HRead)) != Nfs3Ok)
1241 return ok;
1242 if(fid.cnode->read)
1243 return fid.cnode->read(fid.cnode, count, offset, data, pcount, peof);
1244 if(fid.fsys)
1245 return fsysreadfile(fid.fsys, au, &fid.fsyshandle, count, offset, data, pcount, peof);
1246 return Nfs3ErrNotSupp;
1249 Nfs3Status
1250 fsreaddir(SunAuthUnix *au, Nfs3Handle *h, u32int len, u64int cookie, uchar **pdata, u32int *pcount, u1int *peof)
1252 Fid fid;
1253 Nfs3Status ok;
1255 trace("readdir %.*lH\n", h->len, h->h);
1256 if((ok = handletofid(h, &fid, HRead)) != Nfs3Ok)
1257 return ok;
1258 if(fid.fsys)
1259 return fsysreaddir(fid.fsys, au, &fid.fsyshandle, len, cookie, pdata, pcount, peof);
1260 return cnodereaddir(fid.cnode, len, cookie, pdata, pcount, peof);
1263 Nfs3Status
1264 logread(Cnode *n, u32int count, u64int offset, uchar **data, u32int *pcount, u1int *peof)
1266 *pcount = 0;
1267 *peof = 1;
1268 return Nfs3Ok;
1271 Nfs3Status
1272 refreshdiskread(Cnode *n, u32int count, u64int offset, uchar **data, u32int *pcount, u1int *peof)
1274 char buf[128];
1276 if(offset != 0){
1277 *pcount = 0;
1278 *peof = 1;
1279 return Nfs3Ok;
1281 if(refreshdisk() < 0)
1282 snprint(buf, sizeof buf, "refreshdisk: %r\n");
1283 else
1284 strcpy(buf, "ok\n");
1285 *data = emalloc(strlen(buf));
1286 strcpy((char*)*data, buf);
1287 *pcount = strlen(buf);
1288 *peof = 1;
1289 return Nfs3Ok;
1292 Nfs3Status
1293 refreshconfigread(Cnode *n, u32int count, u64int offset, uchar **data, u32int *pcount, u1int *peof)
1295 char buf[128];
1297 if(offset != 0){
1298 *pcount = 0;
1299 *peof = 1;
1300 return Nfs3Ok;
1302 if(readconfigfile(&config) < 0)
1303 snprint(buf, sizeof buf, "readconfig: %r\n");
1304 else
1305 strcpy(buf, "ok\n");
1306 *data = emalloc(strlen(buf));
1307 strcpy((char*)*data, buf);
1308 *pcount = strlen(buf);
1309 *peof = 1;
1310 return Nfs3Ok;