Blob


1 /*
2 * TO DO:
3 * - gc of file systems (not going to do just yet?)
4 * - statistics file
5 * - configure on amsterdam
6 */
8 #include <u.h>
9 #include <libc.h>
10 #include <bio.h>
11 #include <ip.h>
12 #include <thread.h>
13 #include <libsec.h>
14 #include <sunrpc.h>
15 #include <nfs3.h>
16 #include <diskfs.h>
17 #include <venti.h>
18 #include "nfs3srv.h"
20 #define trace if(!tracecalls){}else print
22 typedef struct Ipokay Ipokay;
23 typedef struct Config Config;
24 typedef struct Ctree Ctree;
25 typedef struct Cnode Cnode;
27 struct Ipokay
28 {
29 int okay;
30 uchar ip[IPaddrlen];
31 uchar mask[IPaddrlen];
32 };
34 struct Config
35 {
36 Ipokay *ok;
37 uint nok;
38 ulong mtime;
39 Ctree *ctree;
40 };
42 char *addr;
43 int blocksize;
44 int cachesize;
45 Config config;
46 char *configfile;
47 int encryptedhandles = 1;
48 Channel *nfschan;
49 Channel *mountchan;
50 Channel *timerchan;
51 Nfs3Handle root;
52 SunSrv *srv;
53 int tracecalls;
54 VtCache *vcache;
55 VtConn *z;
57 void cryptinit(void);
58 void timerthread(void*);
59 void timerproc(void*);
61 extern void handleunparse(Fsys*, Nfs3Handle*, Nfs3Handle*, int);
62 extern Nfs3Status handleparse(Nfs3Handle*, Fsys**, Nfs3Handle*, int);
64 Nfs3Status logread(Cnode*, u32int, u64int, uchar**, u32int*, u1int*);
65 Nfs3Status refreshdiskread(Cnode*, u32int, u64int, uchar**, u32int*, u1int*);
66 Nfs3Status refreshconfigread(Cnode*, u32int, u64int, uchar**, u32int*, u1int*);
68 int readconfigfile(Config *cp);
69 void setrootfid(void);
70 int ipokay(uchar *ip, ushort port);
72 u64int unittoull(char*);
74 void
75 usage(void)
76 {
77 fprint(2, "usage: vnfs [-LLRVr] [-a addr] [-b blocksize] [-c cachesize] configfile\n");
78 threadexitsall("usage");
79 }
81 void
82 threadmain(int argc, char **argv)
83 {
84 fmtinstall('B', sunrpcfmt);
85 fmtinstall('C', suncallfmt);
86 fmtinstall('F', vtfcallfmt);
87 fmtinstall('H', encodefmt);
88 fmtinstall('I', eipfmt);
89 fmtinstall('V', vtscorefmt);
90 sunfmtinstall(&nfs3prog);
91 sunfmtinstall(&nfsmount3prog);
93 addr = "udp!*!2049";
94 blocksize = 8192;
95 cachesize = 400;
96 srv = sunsrv();
97 srv->ipokay = ipokay;
98 cryptinit();
100 ARGBEGIN{
101 default:
102 usage();
103 case 'E':
104 encryptedhandles = 0;
105 break;
106 case 'L':
107 if(srv->localonly == 0)
108 srv->localonly = 1;
109 else
110 srv->localparanoia = 1;
111 break;
112 case 'R':
113 srv->chatty++;
114 break;
115 case 'T':
116 tracecalls = 1;
117 break;
118 case 'V':
119 if(chattyventi++)
120 vttracelevel++;
121 break;
122 case 'a':
123 addr = EARGF(usage());
124 break;
125 case 'b':
126 blocksize = unittoull(EARGF(usage()));
127 break;
128 case 'c':
129 cachesize = unittoull(EARGF(usage()));
130 break;
131 case 'r':
132 srv->alwaysreject++;
133 break;
134 }ARGEND
136 if(argc != 1)
137 usage();
139 if((z = vtdial(nil)) == nil)
140 sysfatal("vtdial: %r");
141 if(vtconnect(z) < 0)
142 sysfatal("vtconnect: %r");
143 if((vcache = vtcachealloc(z, blocksize, cachesize)) == nil)
144 sysfatal("vtcache: %r");
146 configfile = argv[0];
147 if(readconfigfile(&config) < 0)
148 sysfatal("readConfig: %r");
149 setrootfid();
151 nfschan = chancreate(sizeof(SunMsg*), 0);
152 mountchan = chancreate(sizeof(SunMsg*), 0);
153 timerchan = chancreate(sizeof(void*), 0);
155 if(sunsrvudp(srv, addr) < 0)
156 sysfatal("starting server: %r");
158 sunsrvthreadcreate(srv, nfs3proc, nfschan);
159 sunsrvthreadcreate(srv, mount3proc, mountchan);
160 sunsrvthreadcreate(srv, timerthread, nil);
161 proccreate(timerproc, nil, 32768);
163 sunsrvprog(srv, &nfs3prog, nfschan);
164 sunsrvprog(srv, &nfsmount3prog, mountchan);
166 threadexits(nil);
169 #define TWID64 ((u64int)~(u64int)0)
171 u64int
172 unittoull(char *s)
174 char *es;
175 u64int n;
177 if(s == nil)
178 return TWID64;
179 n = strtoul(s, &es, 0);
180 if(*es == 'k' || *es == 'K'){
181 n *= 1024;
182 es++;
183 }else if(*es == 'm' || *es == 'M'){
184 n *= 1024*1024;
185 es++;
186 }else if(*es == 'g' || *es == 'G'){
187 n *= 1024*1024*1024;
188 es++;
189 }else if(*es == 't' || *es == 'T'){
190 n *= 1024*1024;
191 n *= 1024*1024;
193 if(*es != '\0')
194 return TWID64;
195 return n;
198 /*
199 * Handles.
201 * We store all the state about which file a client is accessing in
202 * the handle, so that we don't have to maintain any per-client state
203 * ourselves. In order to avoid leaking handles or letting clients
204 * create arbitrary handles, we sign and encrypt each handle with
205 * AES using a key selected randomly when the server starts.
206 * Thus, handles cannot be used across sessions.
208 * The decrypted handles begin with the following header:
210 * sessid[8] random session id chosen at start time
211 * len[4] length of handle that follows
213 * If we're pressed for space in the rest of the handle, we could
214 * probably reduce the amount of sessid bytes. Note that the sessid
215 * bytes must be consistent during a run of vnfs, or else some
216 * clients (e.g., Linux 2.4) eventually notice that successive TLookups
217 * return different handles, and they return "Stale NFS file handle"
218 * errors to system calls in response (even though we never sent
219 * that error!).
221 * Security woes aside, the fact that we have to shove everything
222 * into the handles is quite annoying. We have to encode, in 40 bytes:
224 * - position in the synthesized config tree
225 * - enough of the path to do glob matching
226 * - position in an archived file system image
228 * and the handles need to be stable across changes in the config file
229 * (though not across server restarts since encryption screws
230 * that up nicely).
232 * We encode each of the first two as a 10-byte hash that is
233 * the first half of a SHA1 hash.
234 */
236 enum
238 SessidSize = 8,
239 HeaderSize = SessidSize+4,
240 MaxHandleSize = Nfs3MaxHandleSize - HeaderSize
241 };
243 AESstate aesstate;
244 uchar sessid[SessidSize];
246 static void
247 hencrypt(Nfs3Handle *h)
249 uchar *p;
250 AESstate aes;
252 /*
253 * root handle has special encryption - a single 0 byte - so that it
254 * never goes stale.
255 */
256 if(h->len == root.len && memcmp(h->h, root.h, root.len) == 0){
257 h->h[0] = 0;
258 h->len = 1;
259 return;
262 if(!encryptedhandles)
263 return;
265 if(h->len > MaxHandleSize){
266 /* oops */
267 fprint(2, "handle too long: %.*lH\n", h->len, h->h);
268 memset(h->h, 'X', Nfs3MaxHandleSize);
269 h->len = Nfs3MaxHandleSize;
270 return;
273 p = h->h;
274 memmove(p+HeaderSize, p, h->len);
275 memmove(p, sessid, SessidSize);
276 *(u32int*)(p+SessidSize) = h->len;
277 h->len += HeaderSize;
279 if(encryptedhandles){
280 while(h->len < MaxHandleSize)
281 h->h[h->len++] = 0;
282 aes = aesstate;
283 aesCBCencrypt(h->h, MaxHandleSize, &aes);
287 static Nfs3Status
288 hdecrypt(Nfs3Handle *h)
290 AESstate aes;
292 if(h->len == 1 && h->h[0] == 0){ /* single 0 byte is root */
293 *h = root;
294 return Nfs3Ok;
297 if(!encryptedhandles)
298 return Nfs3Ok;
300 if(h->len <= HeaderSize)
301 return Nfs3ErrBadHandle;
302 if(encryptedhandles){
303 if(h->len != MaxHandleSize)
304 return Nfs3ErrBadHandle;
305 aes = aesstate;
306 aesCBCdecrypt(h->h, h->len, &aes);
308 if(memcmp(h->h, sessid, SessidSize) != 0)
309 return Nfs3ErrStale; /* give benefit of doubt */
310 h->len = *(u32int*)(h->h+SessidSize);
311 if(h->len >= MaxHandleSize-HeaderSize)
312 return Nfs3ErrBadHandle;
313 memmove(h->h, h->h+HeaderSize, h->len);
314 return Nfs3Ok;
317 void
318 cryptinit(void)
320 uchar key[32], ivec[AESbsize];
321 int i;
323 *(u32int*)sessid = truerand();
324 for(i=0; i<nelem(key); i+=4)
325 *(u32int*)&key[i] = truerand();
326 for(i=0; i<nelem(ivec); i++)
327 ivec[i] = fastrand();
328 setupAESstate(&aesstate, key, sizeof key, ivec);
331 /*
332 * Config file.
334 * The main purpose of the configuration file is to define a tree
335 * in which the archived file system images are mounted.
336 * The tree is stored as Entry structures, defined below.
338 * The configuration file also allows one to define shell-like
339 * glob expressions matching paths that are not to be displayed.
340 * The matched files or directories are shown in directory listings
341 * (could suppress these if we cared) but they cannot be opened,
342 * read, or written, and getattr returns zeroed data.
343 */
344 enum
346 /* sizes used in handles; see nfs server below */
347 CnodeHandleSize = 8,
348 FsysHandleOffset = CnodeHandleSize
349 };
351 /*
352 * Config file tree.
353 */
354 struct Ctree
356 Cnode *root;
357 Cnode *hash[1024];
358 };
360 struct Cnode
362 char *name; /* path element */
363 Cnode *parent; /* in tree */
364 Cnode *nextsib; /* in tree */
365 Cnode *kidlist; /* in tree */
366 Cnode *nexthash; /* in hash list */
368 Nfs3Status (*read)(Cnode*, u32int, u64int, uchar**, u32int*, u1int*); /* synthesized read fn */
370 uchar handle[VtScoreSize]; /* sha1(path to here) */
371 ulong mtime; /* mtime for this directory entry */
373 /* fsys overlay on this node */
374 Fsys *fsys; /* cache of memory structure */
375 Nfs3Handle fsyshandle;
376 int isblackhole; /* walking down keeps you here */
378 /*
379 * mount point info.
380 * if a mount point is inside another file system,
381 * the fsys and fsyshandle above have the old fs info,
382 * the mfsys and mfsyshandle below have the new one.
383 * getattrs must use the old info for consistency.
384 */
385 int ismtpt; /* whether there is an fsys mounted here */
386 uchar fsysscore[VtScoreSize]; /* score of fsys image on venti */
387 char *fsysimage; /* raw disk image */
388 Fsys *mfsys; /* mounted file system (nil until walked) */
389 Nfs3Handle mfsyshandle; /* handle to root of mounted fsys */
391 int mark; /* gc */
392 };
394 static uint
395 dumbhash(uchar *s)
397 return (s[0]<<2)|(s[1]>>6); /* first 10 bits */
400 static Cnode*
401 mkcnode(Ctree *t, Cnode *parent, char *elem, uint elen, char *path, uint plen)
403 uint h;
404 Cnode *n;
406 n = emalloc(sizeof *n + elen+1);
407 n->name = (char*)(n+1);
408 memmove(n->name, elem, elen);
409 n->name[elen] = 0;
410 n->parent = parent;
411 if(parent){
412 n->nextsib = parent->kidlist;
413 parent->kidlist = n;
415 n->kidlist = nil;
416 sha1((uchar*)path, plen, n->handle, nil);
417 h = dumbhash(n->handle);
418 n->nexthash = t->hash[h];
419 t->hash[h] = n;
421 return n;
424 void
425 markctree(Ctree *t)
427 int i;
428 Cnode *n;
430 for(i=0; i<nelem(t->hash); i++)
431 for(n=t->hash[i]; n; n=n->nexthash)
432 if(n->name[0] != '+')
433 n->mark = 1;
436 int
437 refreshdisk(void)
439 int i;
440 Cnode *n;
441 Ctree *t;
443 t = config.ctree;
444 for(i=0; i<nelem(t->hash); i++)
445 for(n=t->hash[i]; n; n=n->nexthash){
446 if(n->mfsys)
447 disksync(n->mfsys->disk);
448 if(n->fsys)
449 disksync(n->fsys->disk);
451 return 0;
454 void
455 sweepctree(Ctree *t)
457 int i;
458 Cnode *n;
460 /* just zero all the garbage and leave it linked into the tree */
461 for(i=0; i<nelem(t->hash); i++){
462 for(n=t->hash[i]; n; n=n->nexthash){
463 if(!n->mark)
464 continue;
465 n->fsys = nil;
466 free(n->fsysimage);
467 n->fsysimage = nil;
468 memset(n->fsysscore, 0, sizeof n->fsysscore);
469 n->mfsys = nil;
470 n->ismtpt = 0;
471 memset(&n->fsyshandle, 0, sizeof n->fsyshandle);
472 memset(&n->mfsyshandle, 0, sizeof n->mfsyshandle);
477 static Cnode*
478 cnodewalk(Cnode *n, char *name, uint len, int markokay)
480 Cnode *nn;
482 for(nn=n->kidlist; nn; nn=nn->nextsib)
483 if(strncmp(nn->name, name, len) == 0 && nn->name[len] == 0)
484 if(!nn->mark || markokay)
485 return nn;
486 return nil;
489 Cnode*
490 ctreewalkpath(Ctree *t, char *name, ulong createmtime)
492 Cnode *n, *nn;
493 char *p, *nextp;
495 n = t->root;
496 p = name;
497 for(; *p; p=nextp){
498 n->mark = 0;
499 assert(*p == '/');
500 p++;
501 nextp = strchr(p, '/');
502 if(nextp == nil)
503 nextp = p+strlen(p);
504 if((nn = cnodewalk(n, p, nextp-p, 1)) == nil){
505 if(createmtime == 0)
506 return nil;
507 nn = mkcnode(t, n, p, nextp-p, name, nextp-name);
508 nn->mtime = createmtime;
510 if(nn->mark)
511 nn->mark = 0;
512 n = nn;
514 n->mark = 0;
515 return n;
518 Ctree*
519 mkctree(void)
521 Ctree *t;
523 t = emalloc(sizeof *t);
524 t->root = mkcnode(t, nil, "", 0, "", 0);
526 ctreewalkpath(t, "/+log", time(0))->read = logread;
527 ctreewalkpath(t, "/+refreshdisk", time(0))->read = refreshdiskread;
528 ctreewalkpath(t, "/+refreshconfig", time(0))->read = refreshconfigread;
530 return t;
533 Cnode*
534 ctreemountfsys(Ctree *t, char *path, ulong time, uchar *score, char *file)
536 Cnode *n;
538 if(time == 0)
539 time = 1;
540 n = ctreewalkpath(t, path, time);
541 if(score){
542 if(n->ismtpt && (n->fsysimage || memcmp(n->fsysscore, score, VtScoreSize) != 0)){
543 free(n->fsysimage);
544 n->fsysimage = nil;
545 n->fsys = nil; /* leak (might be other refs) */
547 memmove(n->fsysscore, score, VtScoreSize);
548 }else{
549 if(n->ismtpt && (n->fsysimage==nil || strcmp(n->fsysimage, file) != 0)){
550 free(n->fsysimage);
551 n->fsysimage = nil;
552 n->fsys = nil; /* leak (might be other refs) */
554 n->fsysimage = emalloc(strlen(file)+1);
555 strcpy(n->fsysimage, file);
557 n->ismtpt = 1;
558 return n;
561 Cnode*
562 cnodebyhandle(Ctree *t, uchar *p)
564 int h;
565 Cnode *n;
567 h = dumbhash(p);
568 for(n=t->hash[h]; n; n=n->nexthash)
569 if(memcmp(n->handle, p, CnodeHandleSize) == 0)
570 return n;
571 return nil;
574 static int
575 parseipandmask(char *s, uchar *ip, uchar *mask)
577 char *p, *q;
579 p = strchr(s, '/');
580 if(p)
581 *p++ = 0;
582 if(parseip(ip, s) == ~0UL)
583 return -1;
584 if(p == nil)
585 memset(mask, 0xFF, IPaddrlen);
586 else{
587 if(isdigit((uchar)*p) && strtol(p, &q, 10)>=0 && *q==0)
588 *--p = '/';
589 if(parseipmask(mask, p) == ~0UL)
590 return -1;
591 if(*p != '/')
592 *--p = '/';
594 /*fprint(2, "parseipandmask %s => %I %I\n", s, ip, mask); */
595 return 0;
598 static int
599 parsetime(char *s, ulong *time)
601 ulong x;
602 char *p;
603 int i;
604 Tm tm;
606 /* decimal integer is seconds since 1970 */
607 x = strtoul(s, &p, 10);
608 if(x > 0 && *p == 0){
609 *time = x;
610 return 0;
613 /* otherwise expect yyyy/mmdd/hhmm */
614 if(strlen(s) != 14 || s[4] != '/' || s[9] != '/')
615 return -1;
616 for(i=0; i<4; i++)
617 if(!isdigit((uchar)s[i]) || !isdigit((uchar)s[i+5]) || !isdigit((uchar)s[i+10]))
618 return -1;
619 memset(&tm, 0, sizeof tm);
620 tm.year = atoi(s)-1900;
621 if(tm.year < 0 || tm.year > 200)
622 return -1;
623 tm.mon = (s[5]-'0')*10+s[6]-'0' - 1;
624 if(tm.mon < 0 || tm.mon > 11)
625 return -1;
626 tm.mday = (s[7]-'0')*10+s[8]-'0';
627 if(tm.mday < 0 || tm.mday > 31)
628 return -1;
629 tm.hour = (s[10]-'0')*10+s[11]-'0';
630 if(tm.hour < 0 || tm.hour > 23)
631 return -1;
632 tm.min = (s[12]-'0')*10+s[13]-'0';
633 if(tm.min < 0 || tm.min > 59)
634 return -1;
635 strcpy(tm.zone, "XXX"); /* anything but GMT */
636 if(0){
637 print("tm2sec %d/%d/%d/%d/%d\n",
638 tm.year, tm.mon, tm.mday, tm.hour, tm.min);
640 *time = tm2sec(&tm);
641 if(0) print("time %lud\n", *time);
642 return 0;
646 int
647 readconfigfile(Config *cp)
649 char *f[10], *image, *p, *pref, *q, *name;
650 int nf, line;
651 uchar scorebuf[VtScoreSize], *score;
652 ulong time;
653 Biobuf *b;
654 Config c;
655 Dir *dir;
657 name = configfile;
658 c = *cp;
659 if((dir = dirstat(name)) == nil)
660 return -1;
661 if(c.mtime == dir->mtime){
662 free(dir);
663 return 0;
665 c.mtime = dir->mtime;
666 free(dir);
667 if((b = Bopen(name, OREAD)) == nil)
668 return -1;
670 /*
671 * Reuse old tree, garbage collecting entries that
672 * are not mentioned in the new config file.
673 */
674 if(c.ctree == nil)
675 c.ctree = mkctree();
677 markctree(c.ctree);
678 c.ok = nil;
679 c.nok = 0;
681 line = 0;
682 for(; (p=Brdstr(b, '\n', 1)) != nil; free(p)){
683 line++;
684 if((q = strchr(p, '#')) != nil)
685 *q = 0;
686 nf = tokenize(p, f, nelem(f));
687 if(nf == 0)
688 continue;
689 if(strcmp(f[0], "mount") == 0){
690 if(nf != 4){
691 werrstr("syntax error: mount /path /dev|score mtime");
692 goto badline;
694 if(f[1][0] != '/'){
695 werrstr("unrooted path %s", f[1]);
696 goto badline;
698 score = nil;
699 image = nil;
700 if(f[2][0] == '/'){
701 if(access(f[2], AEXIST) < 0){
702 werrstr("image %s does not exist", f[2]);
703 goto badline;
705 image = f[2];
706 }else{
707 if(vtparsescore(f[2], &pref, scorebuf) < 0){
708 werrstr("bad score %s", f[2]);
709 goto badline;
711 score = scorebuf;
713 if(parsetime(f[3], &time) < 0){
714 fprint(2, "%s:%d: bad time %s\n", name, line, f[3]);
715 time = 1;
717 ctreemountfsys(c.ctree, f[1], time, score, image);
718 continue;
720 if(strcmp(f[0], "allow") == 0 || strcmp(f[0], "deny") == 0){
721 if(nf != 2){
722 werrstr("syntax error: allow|deny ip[/mask]");
723 goto badline;
725 c.ok = erealloc(c.ok, (c.nok+1)*sizeof(c.ok[0]));
726 if(parseipandmask(f[1], c.ok[c.nok].ip, c.ok[c.nok].mask) < 0){
727 werrstr("bad ip[/mask]: %s", f[1]);
728 goto badline;
730 c.ok[c.nok].okay = (strcmp(f[0], "allow") == 0);
731 c.nok++;
732 continue;
734 werrstr("unknown verb '%s'", f[0]);
735 badline:
736 fprint(2, "%s:%d: %r\n", name, line);
738 Bterm(b);
740 sweepctree(c.ctree);
741 free(cp->ok);
742 *cp = c;
743 return 0;
746 int
747 ipokay(uchar *ip, ushort port)
749 int i;
750 uchar ipx[IPaddrlen];
751 Ipokay *ok;
753 for(i=0; i<config.nok; i++){
754 ok = &config.ok[i];
755 maskip(ip, ok->mask, ipx);
756 if(0) fprint(2, "%I & %I = %I (== %I?)\n",
757 ip, ok->mask, ipx, ok->ip);
758 if(memcmp(ipx, ok->ip, IPaddrlen) == 0)
759 return ok->okay;
761 if(config.nok == 0) /* all is permitted */
762 return 1;
763 /* otherwise default is none allowed */
764 return 0;
767 Nfs3Status
768 cnodelookup(Ctree *t, Cnode **np, char *name)
770 Cnode *n, *nn;
772 n = *np;
773 if(n->isblackhole)
774 return Nfs3Ok;
775 if((nn = cnodewalk(n, name, strlen(name), 0)) == nil){
776 if(n->ismtpt || n->fsys){
777 if((nn = cnodewalk(n, "", 0, 1)) == nil){
778 nn = mkcnode(t, n, "", 0, (char*)n->handle, SHA1dlen);
779 nn->isblackhole = 1;
781 nn->mark = 0;
784 if(nn == nil)
785 return Nfs3ErrNoEnt;
786 *np = nn;
787 return Nfs3Ok;
790 Nfs3Status
791 cnodegetattr(Cnode *n, Nfs3Attr *attr)
793 memset(attr, 0, sizeof *attr);
794 if(n->read){
795 attr->type = Nfs3FileReg;
796 attr->mode = 0444;
797 attr->size = 512;
798 attr->nlink = 1;
799 }else{
800 attr->type = Nfs3FileDir;
801 attr->mode = 0555;
802 attr->size = 1024;
803 attr->nlink = 10;
805 attr->fileid = *(u64int*)n->handle;
806 attr->atime.sec = n->mtime;
807 attr->mtime.sec = n->mtime;
808 attr->ctime.sec = n->mtime;
809 return Nfs3Ok;
812 Nfs3Status
813 cnodereaddir(Cnode *n, u32int count, u64int cookie, uchar **pdata, u32int *pcount, u1int *peof)
815 uchar *data, *p, *ep, *np;
816 u64int c;
817 Nfs3Entry ne;
819 n = n->kidlist;
820 c = cookie;
821 for(; c && n; c--)
822 n = n->nextsib;
823 if(n == nil){
824 *pdata = 0;
825 *pcount = 0;
826 *peof = 1;
827 return Nfs3Ok;
830 data = emalloc(count);
831 p = data;
832 ep = data+count;
833 while(n && p < ep){
834 if(n->mark || n->name[0] == '+'){
835 n = n->nextsib;
836 ++cookie;
837 continue;
839 ne.name = n->name;
840 ne.namelen = strlen(n->name);
841 ne.cookie = ++cookie;
842 ne.fileid = *(u64int*)n->handle;
843 if(nfs3entrypack(p, ep, &np, &ne) < 0)
844 break;
845 p = np;
846 n = n->nextsib;
848 *pdata = data;
849 *pcount = p - data;
850 *peof = n==nil;
851 return Nfs3Ok;
854 void
855 timerproc(void *v)
857 for(;;){
858 sleep(60*1000);
859 sendp(timerchan, 0);
863 void
864 timerthread(void *v)
866 for(;;){
867 recvp(timerchan);
868 /* refreshconfig(); */
872 /*
873 * Actually serve the NFS requests. Called from nfs3srv.c.
874 * Each request runs in its own thread (coroutine).
876 * Decrypted handles have the form:
878 * config[20] - SHA1 hash identifying a config tree node
879 * glob[10] - SHA1 hash prefix identifying a glob state
880 * fsyshandle[<=10] - disk file system handle (usually 4 bytes)
881 */
883 /*
884 * A fid represents a point in the file tree.
885 * There are three components, all derived from the handle:
887 * - config tree position (also used to find fsys)
888 * - glob state for exclusions
889 * - file system position
890 */
891 enum
893 HAccess,
894 HAttr,
895 HWalk,
896 HDotdot,
897 HRead
898 };
899 typedef struct Fid Fid;
900 struct Fid
902 Cnode *cnode;
903 Fsys *fsys;
904 Nfs3Handle fsyshandle;
905 };
907 int
908 handlecmp(Nfs3Handle *h, Nfs3Handle *h1)
910 if(h->len != h1->len)
911 return h->len - h1->len;
912 return memcmp(h->h, h1->h, h->len);
915 Nfs3Status
916 handletofid(Nfs3Handle *eh, Fid *fid, int mode)
918 int domount;
919 Cnode *n;
920 Disk *disk, *cdisk;
921 Fsys *fsys;
922 Nfs3Status ok;
923 Nfs3Handle h2, *h, *fh;
925 memset(fid, 0, sizeof *fid);
927 domount = 1;
928 if(mode == HDotdot)
929 domount = 0;
930 /*
931 * Not necessary, but speeds up ls -l /dump/2005
932 * HAttr and HAccess must be handled the same way
933 * because both can be used to fetch attributes.
934 * Acting differently yields inconsistencies at mount points,
935 * and causes FreeBSD ls -l to fail.
936 */
937 if(mode == HAttr || mode == HAccess)
938 domount = 0;
940 /*
941 * Decrypt handle.
942 */
943 h2 = *eh;
944 h = &h2;
945 if((ok = hdecrypt(h)) != Nfs3Ok)
946 return ok;
947 trace("handletofid: decrypted %.*lH\n", h->len, h->h);
948 if(h->len < FsysHandleOffset)
949 return Nfs3ErrBadHandle;
951 /*
952 * Find place in config tree.
953 */
954 if((n = cnodebyhandle(config.ctree, h->h)) == nil)
955 return Nfs3ErrStale;
956 fid->cnode = n;
958 if(n->ismtpt && domount){
959 /*
960 * Open fsys for mount point if needed.
961 */
962 if(n->mfsys == nil){
963 trace("handletofid: mounting %V/%s\n", n->fsysscore, n->fsysimage);
964 if(n->fsysimage){
965 if(strcmp(n->fsysimage, "/dev/null") == 0)
966 return Nfs3ErrAcces;
967 if((disk = diskopenfile(n->fsysimage)) == nil){
968 fprint(2, "cannot open disk %s: %r\n", n->fsysimage);
969 return Nfs3ErrIo;
971 if((cdisk = diskcache(disk, blocksize, 64)) == nil){
972 fprint(2, "cannot cache disk %s: %r\n", n->fsysimage);
973 diskclose(disk);
975 disk = cdisk;
976 }else{
977 if((disk = diskopenventi(vcache, n->fsysscore)) == nil){
978 fprint(2, "cannot open venti disk %V: %r\n", n->fsysscore);
979 return Nfs3ErrIo;
982 if((fsys = fsysopen(disk)) == nil){
983 fprint(2, "cannot open fsys on %V: %r\n", n->fsysscore);
984 diskclose(disk);
985 return Nfs3ErrIo;
987 n->mfsys = fsys;
988 fsysroot(fsys, &n->mfsyshandle);
991 /*
992 * Use inner handle.
993 */
994 fid->fsys = n->mfsys;
995 fid->fsyshandle = n->mfsyshandle;
996 }else{
997 /*
998 * Use fsys handle from tree or from handle.
999 * This assumes that fsyshandle was set by fidtohandle
1000 * earlier, so it's not okay to reuse handles (except the root)
1001 * across sessions. The encryption above makes and
1002 * enforces the same restriction, so this is okay.
1004 fid->fsys = n->fsys;
1005 fh = &fid->fsyshandle;
1006 if(n->isblackhole){
1007 fh->len = h->len-FsysHandleOffset;
1008 memmove(fh->h, h->h+FsysHandleOffset, fh->len);
1009 }else
1010 *fh = n->fsyshandle;
1011 trace("handletofid: fsyshandle %.*lH\n", fh->len, fh->h);
1015 * TO DO (maybe): some sort of path restriction here.
1017 trace("handletofid: cnode %s fsys %p fsyshandle %.*lH\n",
1018 n->name, fid->fsys, fid->fsyshandle.len, fid->fsyshandle.h);
1019 return Nfs3Ok;
1022 void
1023 _fidtohandle(Fid *fid, Nfs3Handle *h)
1025 Cnode *n;
1027 n = fid->cnode;
1029 * Record fsys handle in n, don't bother sending it to client
1030 * for black holes.
1032 n->fsys = fid->fsys;
1033 if(!n->isblackhole){
1034 n->fsyshandle = fid->fsyshandle;
1035 fid->fsyshandle.len = 0;
1037 memmove(h->h, n->handle, CnodeHandleSize);
1038 memmove(h->h+FsysHandleOffset, fid->fsyshandle.h, fid->fsyshandle.len);
1039 h->len = FsysHandleOffset+fid->fsyshandle.len;
1042 void
1043 fidtohandle(Fid *fid, Nfs3Handle *h)
1045 _fidtohandle(fid, h);
1046 hencrypt(h);
1049 void
1050 setrootfid(void)
1052 Fid fid;
1054 memset(&fid, 0, sizeof fid);
1055 fid.cnode = config.ctree->root;
1056 _fidtohandle(&fid, &root);
1057 fprint(2, "handle %.*lH\n", root.len, root.h);
1060 void
1061 fsgetroot(Nfs3Handle *h)
1063 *h = root;
1064 hencrypt(h);
1067 Nfs3Status
1068 fsgetattr(SunAuthUnix *au, Nfs3Handle *h, Nfs3Attr *attr)
1070 Fid fid;
1071 Nfs3Status ok;
1073 trace("getattr %.*lH\n", h->len, h->h);
1074 if((ok = handletofid(h, &fid, HAttr)) != Nfs3Ok)
1075 return ok;
1076 if(fid.fsys)
1077 return fsysgetattr(fid.fsys, au, &fid.fsyshandle, attr);
1078 else
1079 return cnodegetattr(fid.cnode, attr);
1083 * Lookup is always the hard part.
1085 Nfs3Status
1086 fslookup(SunAuthUnix *au, Nfs3Handle *h, char *name, Nfs3Handle *nh)
1088 Fid fid;
1089 Cnode *n;
1090 Nfs3Status ok;
1091 Nfs3Handle xh;
1092 int mode;
1094 trace("lookup %.*lH %s\n", h->len, h->h, name);
1096 mode = HWalk;
1097 if(strcmp(name, "..") == 0 || strcmp(name, ".") == 0)
1098 mode = HDotdot;
1099 if((ok = handletofid(h, &fid, mode)) != Nfs3Ok){
1100 nfs3errstr(ok);
1101 trace("lookup: handletofid %r\n");
1102 return ok;
1105 if(strcmp(name, ".") == 0){
1106 fidtohandle(&fid, nh);
1107 return Nfs3Ok;
1111 * Walk down file system and cnode simultaneously.
1112 * If dotdot and file system doesn't move, need to walk
1113 * up cnode. Save the corresponding fsys handles in
1114 * the cnode as we walk down so that we'll have them
1115 * for dotdotting back up.
1117 n = fid.cnode;
1118 if(mode == HWalk){
1120 * Walk down config tree and file system simultaneously.
1122 if((ok = cnodelookup(config.ctree, &n, name)) != Nfs3Ok){
1123 nfs3errstr(ok);
1124 trace("lookup: cnodelookup: %r\n");
1125 return ok;
1127 fid.cnode = n;
1128 if(fid.fsys){
1129 if((ok = fsyslookup(fid.fsys, au, &fid.fsyshandle, name, &xh)) != Nfs3Ok){
1130 nfs3errstr(ok);
1131 trace("lookup: fsyslookup: %r\n");
1132 return ok;
1134 fid.fsyshandle = xh;
1136 }else{
1138 * Walking dotdot. Ick.
1140 trace("lookup dotdot fsys=%p\n", fid.fsys);
1141 if(fid.fsys){
1143 * Walk up file system, then try up config tree.
1145 if((ok = fsyslookup(fid.fsys, au, &fid.fsyshandle, "..", &xh)) != Nfs3Ok){
1146 nfs3errstr(ok);
1147 trace("lookup fsyslookup: %r\n");
1148 return ok;
1150 fid.fsyshandle = xh;
1153 * Usually just go to n->parent.
1155 * If we're in a subtree of the mounted file system that
1156 * isn't represented explicitly by the config tree (instead
1157 * the black hole node represents the entire file tree),
1158 * then we only go to n->parent when we've dotdotted back
1159 * to the right handle.
1161 if(n->parent == nil)
1162 trace("lookup dotdot: no parent\n");
1163 else{
1164 trace("lookup dotdot: parent %.*lH, have %.*lH\n",
1165 n->parent->fsyshandle.len, n->parent->fsyshandle.h,
1166 xh.len, xh.h);
1169 if(n->isblackhole){
1170 if(handlecmp(&n->parent->mfsyshandle, &xh) == 0)
1171 n = n->parent;
1172 }else{
1173 if(n->parent)
1174 n = n->parent;
1176 }else{
1178 * No file system, just walk up.
1180 if(n->parent)
1181 n = n->parent;
1183 fid.fsys = n->fsys;
1184 if(!n->isblackhole)
1185 fid.fsyshandle = n->fsyshandle;
1186 fid.cnode = n;
1188 fidtohandle(&fid, nh);
1189 return Nfs3Ok;
1192 Nfs3Status
1193 fsaccess(SunAuthUnix *au, Nfs3Handle *h, u32int want, u32int *got, Nfs3Attr *attr)
1195 Fid fid;
1196 Nfs3Status ok;
1198 trace("access %.*lH 0x%ux\n", h->len, h->h, want);
1199 if((ok = handletofid(h, &fid, HAccess)) != Nfs3Ok)
1200 return ok;
1201 if(fid.fsys)
1202 return fsysaccess(fid.fsys, au, &fid.fsyshandle, want, got, attr);
1203 *got = want & (Nfs3AccessRead|Nfs3AccessLookup|Nfs3AccessExecute);
1204 return cnodegetattr(fid.cnode, attr);
1207 Nfs3Status
1208 fsreadlink(SunAuthUnix *au, Nfs3Handle *h, char **link)
1210 Fid fid;
1211 Nfs3Status ok;
1213 trace("readlink %.*lH\n", h->len, h->h);
1214 if((ok = handletofid(h, &fid, HRead)) != Nfs3Ok)
1215 return ok;
1216 if(fid.fsys)
1217 return fsysreadlink(fid.fsys, au, &fid.fsyshandle, link);
1218 *link = 0;
1219 return Nfs3ErrNotSupp;
1222 Nfs3Status
1223 fsreadfile(SunAuthUnix *au, Nfs3Handle *h, u32int count, u64int offset, uchar **data, u32int *pcount, u1int *peof)
1225 Fid fid;
1226 Nfs3Status ok;
1228 trace("readfile %.*lH\n", h->len, h->h);
1229 if((ok = handletofid(h, &fid, HRead)) != Nfs3Ok)
1230 return ok;
1231 if(fid.cnode->read)
1232 return fid.cnode->read(fid.cnode, count, offset, data, pcount, peof);
1233 if(fid.fsys)
1234 return fsysreadfile(fid.fsys, au, &fid.fsyshandle, count, offset, data, pcount, peof);
1235 return Nfs3ErrNotSupp;
1238 Nfs3Status
1239 fsreaddir(SunAuthUnix *au, Nfs3Handle *h, u32int len, u64int cookie, uchar **pdata, u32int *pcount, u1int *peof)
1241 Fid fid;
1242 Nfs3Status ok;
1244 trace("readdir %.*lH\n", h->len, h->h);
1245 if((ok = handletofid(h, &fid, HRead)) != Nfs3Ok)
1246 return ok;
1247 if(fid.fsys)
1248 return fsysreaddir(fid.fsys, au, &fid.fsyshandle, len, cookie, pdata, pcount, peof);
1249 return cnodereaddir(fid.cnode, len, cookie, pdata, pcount, peof);
1252 Nfs3Status
1253 logread(Cnode *n, u32int count, u64int offset, uchar **data, u32int *pcount, u1int *peof)
1255 *pcount = 0;
1256 *peof = 1;
1257 return Nfs3Ok;
1260 Nfs3Status
1261 refreshdiskread(Cnode *n, u32int count, u64int offset, uchar **data, u32int *pcount, u1int *peof)
1263 char buf[128];
1265 if(offset != 0){
1266 *pcount = 0;
1267 *peof = 1;
1268 return Nfs3Ok;
1270 if(refreshdisk() < 0)
1271 snprint(buf, sizeof buf, "refreshdisk: %r\n");
1272 else
1273 strcpy(buf, "ok\n");
1274 *data = emalloc(strlen(buf));
1275 strcpy((char*)*data, buf);
1276 *pcount = strlen(buf);
1277 *peof = 1;
1278 return Nfs3Ok;
1281 Nfs3Status
1282 refreshconfigread(Cnode *n, u32int count, u64int offset, uchar **data, u32int *pcount, u1int *peof)
1284 char buf[128];
1286 if(offset != 0){
1287 *pcount = 0;
1288 *peof = 1;
1289 return Nfs3Ok;
1291 if(readconfigfile(&config) < 0)
1292 snprint(buf, sizeof buf, "readconfig: %r\n");
1293 else
1294 strcpy(buf, "ok\n");
1295 *data = emalloc(strlen(buf));
1296 strcpy((char*)*data, buf);
1297 *pcount = strlen(buf);
1298 *peof = 1;
1299 return Nfs3Ok;