All of the above --- fs/Kconfig | 49 + fs/Makefile | 1 fs/exec.c | 12 fs/lockd/clntlock.c | 113 ++-- fs/lockd/clntproc.c | 40 + fs/lockd/host.c | 8 fs/lockd/mon.c | 7 fs/locks.c | 6 fs/namei.c | 21 fs/namespace.c | 38 + fs/nfs/Makefile | 4 fs/nfs/callback.c | 1 fs/nfs/callback_proc.c | 1 fs/nfs/callback_xdr.c | 2 fs/nfs/delegation.c | 26 + fs/nfs/delegation.h | 1 fs/nfs/dir.c | 230 ++++++-- fs/nfs/direct.c | 12 fs/nfs/file.c | 70 ++ fs/nfs/idmap.c | 2 fs/nfs/inode.c | 1039 ++++++++++++++++++++++++++++++++-------- fs/nfs/mount_clnt.c | 4 fs/nfs/namespace.c | 111 ++++ fs/nfs/nfs2xdr.c | 3 fs/nfs/nfs3acl.c | 403 +++++++++++++++ fs/nfs/nfs3proc.c | 45 + fs/nfs/nfs3xdr.c | 150 +++++ fs/nfs/nfs4_fs.h | 270 ++++++++++ fs/nfs/nfs4proc.c | 810 +++++++++++++++++++++---------- fs/nfs/nfs4renewd.c | 1 fs/nfs/nfs4state.c | 254 +++++---- fs/nfs/nfs4xdr.c | 419 +++++++++++++++- fs/nfs/nfs_iostat.h | 87 +++ fs/nfs/nfsroot.c | 9 fs/nfs/pagelist.c | 147 ++++- fs/nfs/proc.c | 3 fs/nfs/read.c | 12 fs/nfs/write.c | 119 ++-- fs/nfs_common/Makefile | 7 fs/nfs_common/nfsacl.c | 257 +++++++++ fs/nfsd/Makefile | 2 fs/nfsd/nfs2acl.c | 336 ++++++++++++ fs/nfsd/nfs3acl.c | 267 ++++++++++ fs/nfsd/nfs3xdr.c | 13 fs/nfsd/nfs4callback.c | 4 fs/nfsd/nfsproc.c | 1 fs/nfsd/nfssvc.c | 28 + fs/nfsd/nfsxdr.c | 11 fs/nfsd/vfs.c | 107 ++++ fs/open.c | 14 fs/proc/base.c | 40 + fs/super.c | 22 include/linux/fs.h | 2 include/linux/iosem.h | 110 ++++ include/linux/lockd/lockd.h | 7 include/linux/mount.h | 5 include/linux/namei.h | 10 include/linux/nfs4.h | 3 include/linux/nfs_fs.h | 324 ++---------- include/linux/nfs_fs_i.h | 5 include/linux/nfs_fs_sb.h | 8 include/linux/nfs_mount.h | 1 include/linux/nfs_page.h | 31 - include/linux/nfs_xdr.h | 98 +++ include/linux/nfsacl.h | 58 ++ include/linux/nfsd/nfsd.h | 16 include/linux/nfsd/xdr.h | 4 include/linux/nfsd/xdr3.h | 26 + include/linux/sunrpc/auth_gss.h | 3 include/linux/sunrpc/clnt.h | 17 include/linux/sunrpc/sched.h | 3 include/linux/sunrpc/svc.h | 14 include/linux/sunrpc/xdr.h | 22 include/linux/sunrpc/xprt.h | 2 lib/Makefile | 2 lib/iosem.c | 177 ++++++ net/sunrpc/auth.c | 6 net/sunrpc/auth_gss/auth_gss.c | 341 ++++++++++++- net/sunrpc/clnt.c | 268 ++++++++-- net/sunrpc/pmap_clnt.c | 11 net/sunrpc/rpc_pipe.c | 6 net/sunrpc/sched.c | 86 +-- net/sunrpc/sunrpc_syms.c | 6 net/sunrpc/svc.c | 36 - net/sunrpc/xdr.c | 326 ++++++++++++ net/sunrpc/xprt.c | 83 ++- 86 files changed, 6454 insertions(+), 1302 deletions(-) Index: linux-2.6.12/fs/Kconfig =================================================================== --- linux-2.6.12.orig/fs/Kconfig +++ linux-2.6.12/fs/Kconfig @@ -1318,6 +1318,7 @@ config NFS_FS depends on INET select LOCKD select SUNRPC + select NFS_ACL_SUPPORT if NFS_V3_ACL help If you are connected to some other (usually local) Unix computer (using SLIP, PLIP, PPP or Ethernet) and want to mount files residing @@ -1360,6 +1361,16 @@ config NFS_V3 If unsure, say Y. +config NFS_V3_ACL + bool "Provide client support for the NFSv3 ACL protocol extension" + depends on NFS_V3 + help + Implement the NFSv3 ACL protocol extension for manipulating POSIX + Access Control Lists. The server should also be compiled with + the NFSv3 ACL protocol extension; see the CONFIG_NFSD_V3_ACL option. + + If unsure, say N. + config NFS_V4 bool "Provide NFSv4 client support (EXPERIMENTAL)" depends on NFS_FS && EXPERIMENTAL @@ -1403,6 +1414,7 @@ config NFSD select LOCKD select SUNRPC select EXPORTFS + select NFS_ACL_SUPPORT if NFSD_V3_ACL || NFSD_V2_ACL help If you want your Linux box to act as an NFS *server*, so that other computers on your local network which support NFS can access certain @@ -1426,6 +1438,10 @@ config NFSD To compile the NFS server support as a module, choose M here: the module will be called nfsd. If unsure, say N. +config NFSD_V2_ACL + bool + depends on NFSD + config NFSD_V3 bool "Provide NFSv3 server support" depends on NFSD @@ -1433,6 +1449,16 @@ config NFSD_V3 If you would like to include the NFSv3 server as well as the NFSv2 server, say Y here. If unsure, say Y. +config NFSD_V3_ACL + bool "Provide server support for the NFSv3 ACL protocol extension" + depends on NFSD_V3 + select NFSD_V2_ACL + help + Implement the NFSv3 ACL protocol extension for manipulating POSIX + Access Control Lists on exported file systems. NFS clients should + be compiled with the NFSv3 ACL protocol extension; see the + CONFIG_NFS_V3_ACL option. If unsure, say N. + config NFSD_V4 bool "Provide NFSv4 server support (EXPERIMENTAL)" depends on NFSD_V3 && EXPERIMENTAL @@ -1477,6 +1503,15 @@ config LOCKD_V4 config EXPORTFS tristate +config NFS_ACL_SUPPORT + tristate + select FS_POSIX_ACL + +config NFS_COMMON + bool + depends on NFSD || NFS_FS + default y + config SUNRPC tristate @@ -1516,6 +1551,20 @@ config RPCSEC_GSS_SPKM3 If unsure, say N. +config RPCSEC_GSS_KEYRING + bool "Secure RPC: keyring support (EXPERIMENTAL)" + depends on SUNRPC_GSS && KEYS && EXPERIMENTAL + help + Use the new RPCSEC_GSS upcall mechanism based on keyrings. + This allows individual threads, processes or groups of + processes to specify their own authentication tokens, + providing much the same functionality that AFS pags used to. + + Note: requires the new helper program /sbin/request-key, as + well as an updated rpc.gssd daemon in order to work. + + If unsure, say N + config SMB_FS tristate "SMB file system support (to mount Windows shares etc.)" depends on INET Index: linux-2.6.12/fs/Makefile =================================================================== --- linux-2.6.12.orig/fs/Makefile +++ linux-2.6.12/fs/Makefile @@ -31,6 +31,7 @@ obj-$(CONFIG_BINFMT_FLAT) += binfmt_flat obj-$(CONFIG_FS_MBCACHE) += mbcache.o obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o +obj-$(CONFIG_NFS_COMMON) += nfs_common/ obj-$(CONFIG_QUOTA) += dquot.o obj-$(CONFIG_QFMT_V1) += quota_v1.o Index: linux-2.6.12/fs/exec.c =================================================================== --- linux-2.6.12.orig/fs/exec.c +++ linux-2.6.12/fs/exec.c @@ -123,7 +123,7 @@ asmlinkage long sys_uselib(const char __ struct nameidata nd; int error; - nd.intent.open.flags = FMODE_READ; + nd_init_open_intent(&nd, FMODE_READ, 0); error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); if (error) goto out; @@ -136,7 +136,7 @@ asmlinkage long sys_uselib(const char __ if (error) goto exit; - file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + file = nd_open_file(&nd, O_RDONLY); error = PTR_ERR(file); if (IS_ERR(file)) goto out; @@ -164,7 +164,7 @@ asmlinkage long sys_uselib(const char __ out: return error; exit: - path_release(&nd); + path_release_open_intent(&nd); goto out; } @@ -492,7 +492,7 @@ struct file *open_exec(const char *name) int err; struct file *file; - nd.intent.open.flags = FMODE_READ; + nd_init_open_intent(&nd, FMODE_READ, 0); err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); file = ERR_PTR(err); @@ -506,7 +506,7 @@ struct file *open_exec(const char *name) err = -EACCES; file = ERR_PTR(err); if (!err) { - file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); + file = nd_open_file(&nd, O_RDONLY); if (!IS_ERR(file)) { err = deny_write_access(file); if (err) { @@ -518,7 +518,7 @@ out: return file; } } - path_release(&nd); + path_release_open_intent(&nd); } goto out; } Index: linux-2.6.12/fs/lockd/clntlock.c =================================================================== --- linux-2.6.12.orig/fs/lockd/clntlock.c +++ linux-2.6.12/fs/lockd/clntlock.c @@ -31,7 +31,7 @@ static int reclaimer(void *ptr); * This is the representation of a blocked client lock. */ struct nlm_wait { - struct nlm_wait * b_next; /* linked list */ + struct list_head b_list; /* linked list */ wait_queue_head_t b_wait; /* where to wait on */ struct nlm_host * b_host; struct file_lock * b_lock; /* local file lock */ @@ -39,27 +39,54 @@ struct nlm_wait { u32 b_status; /* grant callback status */ }; -static struct nlm_wait * nlm_blocked; +static LIST_HEAD(nlm_blocked); + +/* + * Queue up a lock for blocking so that the GRANTED request can see it + */ +int nlmclnt_prepare_block(struct nlm_rqst *req, struct nlm_host *host, struct file_lock *fl) +{ + struct nlm_wait *block; + + BUG_ON(req->a_block != NULL); + block = kmalloc(sizeof(*block), GFP_KERNEL); + if (block == NULL) + return -ENOMEM; + block->b_host = host; + block->b_lock = fl; + init_waitqueue_head(&block->b_wait); + block->b_status = NLM_LCK_BLOCKED; + + list_add(&block->b_list, &nlm_blocked); + req->a_block = block; + + return 0; +} + +void nlmclnt_finish_block(struct nlm_rqst *req) +{ + struct nlm_wait *block = req->a_block; + + if (block == NULL) + return; + req->a_block = NULL; + list_del(&block->b_list); + kfree(block); +} /* * Block on a lock */ -int -nlmclnt_block(struct nlm_host *host, struct file_lock *fl, u32 *statp) +long nlmclnt_block(struct nlm_rqst *req, long timeout) { - struct nlm_wait block, **head; - int err; - u32 pstate; - - block.b_host = host; - block.b_lock = fl; - init_waitqueue_head(&block.b_wait); - block.b_status = NLM_LCK_BLOCKED; - block.b_next = nlm_blocked; - nlm_blocked = █ + struct nlm_wait *block = req->a_block; + long ret; - /* Remember pseudo nsm state */ - pstate = host->h_state; + /* A borken server might ask us to block even if we didn't + * request it. Just say no! + */ + if (!req->a_args.block) + return -EAGAIN; /* Go to sleep waiting for GRANT callback. Some servers seem * to lose callbacks, however, so we're going to poll from @@ -69,28 +96,16 @@ nlmclnt_block(struct nlm_host *host, str * a 1 minute timeout would do. See the comment before * nlmclnt_lock for an explanation. */ - sleep_on_timeout(&block.b_wait, 30*HZ); + ret = wait_event_interruptible_timeout(block->b_wait, + block->b_status != NLM_LCK_BLOCKED, + timeout); - for (head = &nlm_blocked; *head; head = &(*head)->b_next) { - if (*head == &block) { - *head = block.b_next; - break; - } - } - - if (!signalled()) { - *statp = block.b_status; - return 0; + if (block->b_status != NLM_LCK_BLOCKED) { + req->a_res.status = block->b_status; + block->b_status = NLM_LCK_BLOCKED; } - /* Okay, we were interrupted. Cancel the pending request - * unless the server has rebooted. - */ - if (pstate == host->h_state && (err = nlmclnt_cancel(host, fl)) < 0) - printk(KERN_NOTICE - "lockd: CANCEL call failed (errno %d)\n", -err); - - return -ERESTARTSYS; + return ret; } /* @@ -100,27 +115,23 @@ u32 nlmclnt_grant(struct nlm_lock *lock) { struct nlm_wait *block; + u32 res = nlm_lck_denied; /* * Look up blocked request based on arguments. * Warning: must not use cookie to match it! */ - for (block = nlm_blocked; block; block = block->b_next) { - if (nlm_compare_locks(block->b_lock, &lock->fl)) - break; + list_for_each_entry(block, &nlm_blocked, b_list) { + if (nlm_compare_locks(block->b_lock, &lock->fl)) { + /* Alright, we found a lock. Set the return status + * and wake up the caller + */ + block->b_status = NLM_LCK_GRANTED; + wake_up(&block->b_wait); + res = nlm_granted; + } } - - /* Ooops, no blocked request found. */ - if (block == NULL) - return nlm_lck_denied; - - /* Alright, we found the lock. Set the return status and - * wake up the caller. - */ - block->b_status = NLM_LCK_GRANTED; - wake_up(&block->b_wait); - - return nlm_granted; + return res; } /* @@ -230,7 +241,7 @@ restart: host->h_reclaiming = 0; /* Now, wake up all processes that sleep on a blocked lock */ - for (block = nlm_blocked; block; block = block->b_next) { + list_for_each_entry(block, &nlm_blocked, b_list) { if (block->b_host == host) { block->b_status = NLM_LCK_DENIED_GRACE_PERIOD; wake_up(&block->b_wait); Index: linux-2.6.12/fs/lockd/clntproc.c =================================================================== --- linux-2.6.12.orig/fs/lockd/clntproc.c +++ linux-2.6.12/fs/lockd/clntproc.c @@ -21,6 +21,7 @@ #define NLMDBG_FACILITY NLMDBG_CLIENT #define NLMCLNT_GRACE_WAIT (5*HZ) +#define NLMCLNT_POLL_TIMEOUT (30*HZ) static int nlmclnt_test(struct nlm_rqst *, struct file_lock *); static int nlmclnt_lock(struct nlm_rqst *, struct file_lock *); @@ -553,7 +554,8 @@ nlmclnt_lock(struct nlm_rqst *req, struc { struct nlm_host *host = req->a_host; struct nlm_res *resp = &req->a_res; - int status; + long timeout; + int status; if (!host->h_monitored && nsm_monitor(host) < 0) { printk(KERN_NOTICE "lockd: failed to monitor %s\n", @@ -562,15 +564,32 @@ nlmclnt_lock(struct nlm_rqst *req, struc goto out; } - do { - if ((status = nlmclnt_call(req, NLMPROC_LOCK)) >= 0) { - if (resp->status != NLM_LCK_BLOCKED) - break; - status = nlmclnt_block(host, fl, &resp->status); - } + if (req->a_args.block) { + status = nlmclnt_prepare_block(req, host, fl); if (status < 0) goto out; - } while (resp->status == NLM_LCK_BLOCKED && req->a_args.block); + } + for(;;) { + status = nlmclnt_call(req, NLMPROC_LOCK); + if (status < 0) + goto out_unblock; + if (resp->status != NLM_LCK_BLOCKED) + break; + /* Wait on an NLM blocking lock */ + timeout = nlmclnt_block(req, NLMCLNT_POLL_TIMEOUT); + /* Did a reclaimer thread notify us of a server reboot? */ + if (resp->status == NLM_LCK_DENIED_GRACE_PERIOD) + continue; + if (resp->status != NLM_LCK_BLOCKED) + break; + if (timeout >= 0) + continue; + /* We were interrupted. Send a CANCEL request to the server + * and exit + */ + status = (int)timeout; + goto out_unblock; + } if (resp->status == NLM_LCK_GRANTED) { fl->fl_u.nfs_fl.state = host->h_state; @@ -579,6 +598,11 @@ nlmclnt_lock(struct nlm_rqst *req, struc do_vfs_lock(fl); } status = nlm_stat_to_errno(resp->status); +out_unblock: + nlmclnt_finish_block(req); + /* Cancel the blocked request if it is still pending */ + if (resp->status == NLM_LCK_BLOCKED) + nlmclnt_cancel(host, fl); out: nlmclnt_release_lockargs(req); return status; Index: linux-2.6.12/fs/lockd/host.c =================================================================== --- linux-2.6.12.orig/fs/lockd/host.c +++ linux-2.6.12/fs/lockd/host.c @@ -189,17 +189,15 @@ nlm_bind_host(struct nlm_host *host) goto forgetit; xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout); + xprt->nocong = 1; /* No congestion control for NLM */ + xprt->resvport = 1; /* NLM requires a reserved port */ /* Existing NLM servers accept AUTH_UNIX only */ clnt = rpc_create_client(xprt, host->h_name, &nlm_program, host->h_version, RPC_AUTH_UNIX); - if (IS_ERR(clnt)) { - xprt_destroy(xprt); + if (IS_ERR(clnt)) goto forgetit; - } clnt->cl_autobind = 1; /* turn on pmap queries */ - xprt->nocong = 1; /* No congestion control for NLM */ - xprt->resvport = 1; /* NLM requires a reserved port */ host->h_rpcclnt = clnt; } Index: linux-2.6.12/fs/lockd/mon.c =================================================================== --- linux-2.6.12.orig/fs/lockd/mon.c +++ linux-2.6.12/fs/lockd/mon.c @@ -115,20 +115,19 @@ nsm_create(void) xprt = xprt_create_proto(IPPROTO_UDP, &sin, NULL); if (IS_ERR(xprt)) return (struct rpc_clnt *)xprt; + xprt->resvport = 1; /* NSM requires a reserved port */ clnt = rpc_create_client(xprt, "localhost", &nsm_program, SM_VERSION, RPC_AUTH_NULL); if (IS_ERR(clnt)) - goto out_destroy; + goto out_err; clnt->cl_softrtry = 1; clnt->cl_chatty = 1; clnt->cl_oneshot = 1; - xprt->resvport = 1; /* NSM requires a reserved port */ return clnt; -out_destroy: - xprt_destroy(xprt); +out_err: return clnt; } Index: linux-2.6.12/fs/locks.c =================================================================== --- linux-2.6.12.orig/fs/locks.c +++ linux-2.6.12/fs/locks.c @@ -1548,6 +1548,8 @@ int fcntl_getlk(struct file *filp, struc if (filp->f_op && filp->f_op->lock) { error = filp->f_op->lock(filp, F_GETLK, &file_lock); + if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private) + file_lock.fl_ops->fl_release_private(&file_lock); if (error < 0) goto out; else @@ -1690,6 +1692,8 @@ int fcntl_getlk64(struct file *filp, str if (filp->f_op && filp->f_op->lock) { error = filp->f_op->lock(filp, F_GETLK, &file_lock); + if (file_lock.fl_ops && file_lock.fl_ops->fl_release_private) + file_lock.fl_ops->fl_release_private(&file_lock); if (error < 0) goto out; else @@ -1873,6 +1877,8 @@ void locks_remove_flock(struct file *fil .fl_end = OFFSET_MAX, }; filp->f_op->flock(filp, F_SETLKW, &fl); + if (fl.fl_ops && fl.fl_ops->fl_release_private) + fl.fl_ops->fl_release_private(&fl); } lock_kernel(); Index: linux-2.6.12/fs/namei.c =================================================================== --- linux-2.6.12.orig/fs/namei.c +++ linux-2.6.12/fs/namei.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -318,6 +319,19 @@ void path_release_on_umount(struct namei } /* + * Open intents have to release any file pointer that was allocated + * but not used by the VFS. + */ +void path_release_open_intent(struct nameidata *nd) +{ + if ((nd->flags & LOOKUP_OPEN) && nd->intent.open.file != NULL) { + fput(nd->intent.open.file); + nd->intent.open.file = NULL; + } + path_release(nd); +} + +/* * Internal lookup() using the new generic dcache. * SMP-safe */ @@ -733,6 +747,7 @@ static fastcall int __link_path_walk(con struct qstr this; unsigned int c; + nd->flags |= LOOKUP_CONTINUE; err = exec_permission_lite(inode, nd); if (err == -EAGAIN) { err = permission(inode, MAY_EXEC, nd); @@ -785,7 +800,6 @@ static fastcall int __link_path_walk(con if (err < 0) break; } - nd->flags |= LOOKUP_CONTINUE; /* This does the actual lookups.. */ err = do_lookup(nd, &this, &next); if (err) @@ -1426,8 +1440,7 @@ int open_namei(const char * pathname, in acc_mode |= MAY_APPEND; /* Fill in the open() intent data */ - nd->intent.open.flags = flag; - nd->intent.open.create_mode = mode; + nd_init_open_intent(nd, flag, mode); /* * The simplest case - just a plain lookup. @@ -1523,7 +1536,7 @@ exit_dput: if (nd->mnt != path.mnt) mntput(path.mnt); exit: - path_release(nd); + path_release_open_intent(nd); return error; do_link: Index: linux-2.6.12/fs/namespace.c =================================================================== --- linux-2.6.12.orig/fs/namespace.c +++ linux-2.6.12/fs/namespace.c @@ -265,6 +265,44 @@ struct seq_operations mounts_op = { .show = show_vfsmnt }; +static int show_vfsstat(struct seq_file *m, void *v) +{ + struct vfsmount *mnt = v; + int err = 0; + + /* device */ + if (mnt->mnt_devname) { + seq_puts(m, "device "); + mangle(m, mnt->mnt_devname); + } else + seq_puts(m, "no device"); + + /* mount point */ + seq_puts(m, " mounted on "); + seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); + seq_putc(m, ' '); + + /* file system type */ + seq_puts(m, "with fstype "); + mangle(m, mnt->mnt_sb->s_type->name); + + /* optional statistics */ + if (mnt->mnt_sb->s_op->show_stats) { + seq_putc(m, ' '); + err = mnt->mnt_sb->s_op->show_stats(m, mnt); + } + + seq_putc(m, '\n'); + return err; +} + +struct seq_operations mountstats_op = { + .start = m_start, + .next = m_next, + .stop = m_stop, + .show = show_vfsstat, +}; + /** * may_umount_tree - check if a mount tree is busy * @mnt: root of mount tree Index: linux-2.6.12/fs/nfs/Makefile =================================================================== --- linux-2.6.12.orig/fs/nfs/Makefile +++ linux-2.6.12/fs/nfs/Makefile @@ -5,9 +5,11 @@ obj-$(CONFIG_NFS_FS) += nfs.o nfs-y := dir.o file.o inode.o nfs2xdr.o pagelist.o \ - proc.o read.o symlink.o unlink.o write.o + proc.o read.o symlink.o unlink.o write.o \ + namespace.o nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o +nfs-$(CONFIG_NFS_V3_ACL) += nfs3acl.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ delegation.o idmap.o \ callback.o callback_xdr.o callback_proc.o Index: linux-2.6.12/fs/nfs/callback.c =================================================================== --- linux-2.6.12.orig/fs/nfs/callback.c +++ linux-2.6.12/fs/nfs/callback.c @@ -14,6 +14,7 @@ #include #include #include +#include "nfs4_fs.h" #include "callback.h" #define NFSDBG_FACILITY NFSDBG_CALLBACK Index: linux-2.6.12/fs/nfs/callback_proc.c =================================================================== --- linux-2.6.12.orig/fs/nfs/callback_proc.c +++ linux-2.6.12/fs/nfs/callback_proc.c @@ -8,6 +8,7 @@ #include #include #include +#include "nfs4_fs.h" #include "callback.h" #include "delegation.h" Index: linux-2.6.12/fs/nfs/callback_xdr.c =================================================================== --- linux-2.6.12.orig/fs/nfs/callback_xdr.c +++ linux-2.6.12/fs/nfs/callback_xdr.c @@ -10,6 +10,7 @@ #include #include #include +#include "nfs4_fs.h" #include "callback.h" #define CB_OP_TAGLEN_MAXSZ (512) @@ -410,7 +411,6 @@ static int nfs4_callback_compound(struct xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base); p = (uint32_t*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len); - rqstp->rq_res.head[0].iov_len = PAGE_SIZE; xdr_init_encode(&xdr_out, &rqstp->rq_res, p); decode_compound_hdr_arg(&xdr_in, &hdr_arg); Index: linux-2.6.12/fs/nfs/delegation.c =================================================================== --- linux-2.6.12.orig/fs/nfs/delegation.c +++ linux-2.6.12/fs/nfs/delegation.c @@ -16,6 +16,7 @@ #include #include +#include "nfs4_fs.h" #include "delegation.h" static struct nfs_delegation *nfs_alloc_delegation(void) @@ -194,6 +195,31 @@ restart: } /* + * Return a delegation for clear_inode() + */ +void nfs_inode_clear_delegation(struct inode *inode) +{ + struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_delegation *delegation; + + down_read(&clp->cl_sem); + spin_lock(&clp->cl_lock); + delegation = nfsi->delegation; + if (delegation != NULL) { + list_del_init(&delegation->super_list); + nfsi->delegation = NULL; + nfsi->delegation_state = 0; + } + spin_unlock(&clp->cl_lock); + up_read(&clp->cl_sem); + if (delegation != NULL) { + nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid); + nfs_free_delegation(delegation); + } +} + +/* * Return all delegations following an NFS4ERR_CB_PATH_DOWN error. */ void nfs_handle_cb_pathdown(struct nfs4_client *clp) Index: linux-2.6.12/fs/nfs/delegation.h =================================================================== --- linux-2.6.12.orig/fs/nfs/delegation.h +++ linux-2.6.12/fs/nfs/delegation.h @@ -27,6 +27,7 @@ int nfs_inode_set_delegation(struct inod void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); int nfs_inode_return_delegation(struct inode *inode); int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); +void nfs_inode_clear_delegation(struct inode *inode); struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle); void nfs_return_all_delegations(struct super_block *sb); Index: linux-2.6.12/fs/nfs/dir.c =================================================================== --- linux-2.6.12.orig/fs/nfs/dir.c +++ linux-2.6.12/fs/nfs/dir.c @@ -27,11 +27,13 @@ #include #include #include +#include "nfs_iostat.h" #include #include #include #include +#include "nfs4_fs.h" #include "delegation.h" #define NFS_PARANOIA 1 @@ -50,8 +52,10 @@ static int nfs_mknod(struct inode *, str static int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); static int nfs_fsync_dir(struct file *, struct dentry *, int); +static loff_t nfs_llseek_dir(struct file *, loff_t, int); struct file_operations nfs_dir_operations = { + .llseek = nfs_llseek_dir, .read = generic_read_dir, .readdir = nfs_readdir, .open = nfs_opendir, @@ -74,6 +78,27 @@ struct inode_operations nfs_dir_inode_op .setattr = nfs_setattr, }; +#ifdef CONFIG_NFS_V3 +struct inode_operations nfs3_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_lookup, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .listxattr = nfs3_listxattr, + .getxattr = nfs3_getxattr, + .setxattr = nfs3_setxattr, + .removexattr = nfs3_removexattr, +}; +#endif /* CONFIG_NFS_V3 */ + #ifdef CONFIG_NFS_V4 static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); @@ -90,6 +115,9 @@ struct inode_operations nfs4_dir_inode_o .permission = nfs_permission, .getattr = nfs_getattr, .setattr = nfs_setattr, + .getxattr = nfs4_getxattr, + .setxattr = nfs4_setxattr, + .listxattr = nfs4_listxattr, }; #endif /* CONFIG_NFS_V4 */ @@ -116,7 +144,8 @@ typedef struct { struct page *page; unsigned long page_index; u32 *ptr; - u64 target; + u64 *dir_cookie; + loff_t current_index; struct nfs_entry *entry; decode_dirent_t decode; int plus; @@ -164,12 +193,10 @@ int nfs_readdir_filler(nfs_readdir_descr NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either - * throught inode->i_sem or some other mechanism. + * through inode->i_sem or some other mechanism. */ - if (page->index == 0) { - invalidate_inode_pages(inode->i_mapping); - NFS_I(inode)->readdir_timestamp = timestamp; - } + if (page->index == 0) + invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1); unlock_page(page); return 0; error: @@ -202,22 +229,22 @@ void dir_page_release(nfs_readdir_descri /* * Given a pointer to a buffer that has already been filled by a call - * to readdir, find the next entry. + * to readdir, find the next entry with cookie '*desc->dir_cookie'. * * If the end of the buffer has been reached, return -EAGAIN, if not, * return the offset within the buffer of the next entry to be * read. */ static inline -int find_dirent(nfs_readdir_descriptor_t *desc, struct page *page) +int find_dirent(nfs_readdir_descriptor_t *desc) { struct nfs_entry *entry = desc->entry; int loop_count = 0, status; while((status = dir_decode(desc)) == 0) { - dfprintk(VFS, "NFS: found cookie %Lu\n", (long long)entry->cookie); - if (entry->prev_cookie == desc->target) + dfprintk(VFS, "NFS: found cookie %Lu\n", (unsigned long long)entry->cookie); + if (entry->prev_cookie == *desc->dir_cookie) break; if (loop_count++ > 200) { loop_count = 0; @@ -229,8 +256,44 @@ int find_dirent(nfs_readdir_descriptor_t } /* - * Find the given page, and call find_dirent() in order to try to - * return the next entry. + * Given a pointer to a buffer that has already been filled by a call + * to readdir, find the entry at offset 'desc->file->f_pos'. + * + * If the end of the buffer has been reached, return -EAGAIN, if not, + * return the offset within the buffer of the next entry to be + * read. + */ +static inline +int find_dirent_index(nfs_readdir_descriptor_t *desc) +{ + struct nfs_entry *entry = desc->entry; + int loop_count = 0, + status; + + for(;;) { + status = dir_decode(desc); + if (status) + break; + + dfprintk(VFS, "NFS: found cookie %Lu at index %Ld\n", (unsigned long long)entry->cookie, desc->current_index); + + if (desc->file->f_pos == desc->current_index) { + *desc->dir_cookie = entry->cookie; + break; + } + desc->current_index++; + if (loop_count++ > 200) { + loop_count = 0; + schedule(); + } + } + dfprintk(VFS, "NFS: find_dirent_index() returns %d\n", status); + return status; +} + +/* + * Find the given page, and call find_dirent() or find_dirent_index in + * order to try to return the next entry. */ static inline int find_dirent_page(nfs_readdir_descriptor_t *desc) @@ -253,7 +316,10 @@ int find_dirent_page(nfs_readdir_descrip /* NOTE: Someone else may have changed the READDIRPLUS flag */ desc->page = page; desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ - status = find_dirent(desc, page); + if (*desc->dir_cookie != 0) + status = find_dirent(desc); + else + status = find_dirent_index(desc); if (status < 0) dir_page_release(desc); out: @@ -268,7 +334,8 @@ int find_dirent_page(nfs_readdir_descrip * Recurse through the page cache pages, and return a * filled nfs_entry structure of the next directory entry if possible. * - * The target for the search is 'desc->target'. + * The target for the search is '*desc->dir_cookie' if non-0, + * 'desc->file->f_pos' otherwise */ static inline int readdir_search_pagecache(nfs_readdir_descriptor_t *desc) @@ -276,7 +343,16 @@ int readdir_search_pagecache(nfs_readdir int loop_count = 0; int res; - dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (long long)desc->target); + /* Always search-by-index from the beginning of the cache */ + if (*desc->dir_cookie == 0) { + dfprintk(VFS, "NFS: readdir_search_pagecache() searching for offset %Ld\n", (long long)desc->file->f_pos); + desc->page_index = 0; + desc->entry->cookie = desc->entry->prev_cookie = 0; + desc->entry->eof = 0; + desc->current_index = 0; + } else + dfprintk(VFS, "NFS: readdir_search_pagecache() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); + for (;;) { res = find_dirent_page(desc); if (res != -EAGAIN) @@ -313,7 +389,7 @@ int nfs_do_filldir(nfs_readdir_descripto int loop_count = 0, res; - dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)desc->target); + dfprintk(VFS, "NFS: nfs_do_filldir() filling starting @ cookie %Lu\n", (long long)entry->cookie); for(;;) { unsigned d_type = DT_UNKNOWN; @@ -333,10 +409,11 @@ int nfs_do_filldir(nfs_readdir_descripto } res = filldir(dirent, entry->name, entry->len, - entry->prev_cookie, fileid, d_type); + file->f_pos, fileid, d_type); if (res < 0) break; - file->f_pos = desc->target = entry->cookie; + file->f_pos++; + *desc->dir_cookie = entry->cookie; if (dir_decode(desc) != 0) { desc->page_index ++; break; @@ -349,7 +426,7 @@ int nfs_do_filldir(nfs_readdir_descripto dir_page_release(desc); if (dentry != NULL) dput(dentry); - dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (long long)desc->target, res); + dfprintk(VFS, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", (unsigned long long)*desc->dir_cookie, res); return res; } @@ -375,14 +452,14 @@ int uncached_readdir(nfs_readdir_descrip struct page *page = NULL; int status; - dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (long long)desc->target); + dfprintk(VFS, "NFS: uncached_readdir() searching for cookie %Lu\n", (unsigned long long)*desc->dir_cookie); page = alloc_page(GFP_HIGHUSER); if (!page) { status = -ENOMEM; goto out; } - desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->target, + desc->error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, *desc->dir_cookie, page, NFS_SERVER(inode)->dtsize, desc->plus); @@ -391,7 +468,7 @@ int uncached_readdir(nfs_readdir_descrip desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ if (desc->error >= 0) { if ((status = dir_decode(desc)) == 0) - desc->entry->prev_cookie = desc->target; + desc->entry->prev_cookie = *desc->dir_cookie; } else status = -EIO; if (status < 0) @@ -412,8 +489,9 @@ int uncached_readdir(nfs_readdir_descrip goto out; } -/* The file offset position is now represented as a true offset into the - * page cache as is the case in most of the other filesystems. +/* The file offset position represents the dirent entry number. A + last cookie cache takes care of the common case of reading the + whole directory. */ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir) { @@ -426,6 +504,8 @@ static int nfs_readdir(struct file *filp struct nfs_fattr fattr; long res; + nfs_inc_stats(inode, NFSIOS_VFSGETDENTS); + lock_kernel(); res = nfs_revalidate_inode(NFS_SERVER(inode), inode); @@ -435,15 +515,15 @@ static int nfs_readdir(struct file *filp } /* - * filp->f_pos points to the file offset in the page cache. - * but if the cache has meanwhile been zapped, we need to - * read from the last dirent to revalidate f_pos - * itself. + * filp->f_pos points to the dirent entry number. + * *desc->dir_cookie has the cookie for the next entry. We have + * to either find the entry with the appropriate number or + * revalidate the cookie. */ memset(desc, 0, sizeof(*desc)); desc->file = filp; - desc->target = filp->f_pos; + desc->dir_cookie = &((struct nfs_open_context *)filp->private_data)->dir_cookie; desc->decode = NFS_PROTO(inode)->decode_dirent; desc->plus = NFS_USE_READDIRPLUS(inode); @@ -455,9 +535,10 @@ static int nfs_readdir(struct file *filp while(!desc->entry->eof) { res = readdir_search_pagecache(desc); + if (res == -EBADCOOKIE) { /* This means either end of directory */ - if (desc->entry->cookie != desc->target) { + if (*desc->dir_cookie && desc->entry->cookie != *desc->dir_cookie) { /* Or that the server has 'lost' a cookie */ res = uncached_readdir(desc, dirent, filldir); if (res >= 0) @@ -490,6 +571,28 @@ static int nfs_readdir(struct file *filp return 0; } +loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin) +{ + down(&filp->f_dentry->d_inode->i_sem); + switch (origin) { + case 1: + offset += filp->f_pos; + case 0: + if (offset >= 0) + break; + default: + offset = -EINVAL; + goto out; + } + if (offset != filp->f_pos) { + filp->f_pos = offset; + ((struct nfs_open_context *)filp->private_data)->dir_cookie = 0; + } +out: + up(&filp->f_dentry->d_inode->i_sem); + return offset; +} + /* * All directory operations under NFS are synchronous, so fsync() * is a dummy operation. @@ -610,6 +713,7 @@ static int nfs_lookup_revalidate(struct parent = dget_parent(dentry); lock_kernel(); dir = parent->d_inode; + nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE); inode = dentry->d_inode; if (!inode) { @@ -730,6 +834,17 @@ int nfs_is_exclusive_create(struct inode return (nd->intent.open.flags & O_EXCL) != 0; } +static inline int nfs_reval_fsid(struct inode *dir, + struct nfs_fh *fh, struct nfs_fattr *fattr) +{ + struct nfs_server *server = NFS_SERVER(dir); + + if (!nfs_fsid_equal(&server->fsid, &fattr->fsid)) + /* Revalidate fsid on root dir */ + return __nfs_revalidate_inode(server, dir->i_sb->s_root->d_inode); + return 0; +} + static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) { struct dentry *res; @@ -740,6 +855,7 @@ static struct dentry *nfs_lookup(struct dfprintk(VFS, "NFS: lookup(%s/%s)\n", dentry->d_parent->d_name.name, dentry->d_name.name); + nfs_inc_stats(dir, NFSIOS_VFSLOOKUP); res = ERR_PTR(-ENAMETOOLONG); if (dentry->d_name.len > NFS_SERVER(dir)->namelen) @@ -751,10 +867,8 @@ static struct dentry *nfs_lookup(struct lock_kernel(); /* Revalidate parent directory attribute cache */ error = nfs_revalidate_inode(NFS_SERVER(dir), dir); - if (error < 0) { - res = ERR_PTR(error); - goto out_unlock; - } + if (error < 0) + goto out_err; /* If we're doing an exclusive create, optimize away the lookup */ if (nfs_is_exclusive_create(dir, nd)) @@ -763,10 +877,11 @@ static struct dentry *nfs_lookup(struct error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); if (error == -ENOENT) goto no_entry; - if (error < 0) { - res = ERR_PTR(error); - goto out_unlock; - } + if (error < 0) + goto out_err; + error = nfs_reval_fsid(dir, &fhandle, &fattr); + if (error < 0) + goto out_err; res = ERR_PTR(-EACCES); inode = nfs_fhget(dentry->d_sb, &fhandle, &fattr); if (!inode) @@ -781,6 +896,9 @@ out_unlock: unlock_kernel(); out: return res; +out_err: + res = ERR_PTR(error); + goto out_unlock; } #ifdef CONFIG_NFS_V4 @@ -812,7 +930,6 @@ static int is_atomic_open(struct inode * static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { struct dentry *res = NULL; - struct inode *inode = NULL; int error; /* Check that we are indeed trying to open this file */ @@ -826,8 +943,10 @@ static struct dentry *nfs_atomic_lookup( dentry->d_op = NFS_PROTO(dir)->dentry_ops; /* Let vfs_create() deal with O_EXCL */ - if (nd->intent.open.flags & O_EXCL) - goto no_entry; + if (nd->intent.open.flags & O_EXCL) { + d_add(dentry, NULL); + goto out; + } /* Open the file on the server */ lock_kernel(); @@ -840,32 +959,30 @@ static struct dentry *nfs_atomic_lookup( if (nd->intent.open.flags & O_CREAT) { nfs_begin_data_update(dir); - inode = nfs4_atomic_open(dir, dentry, nd); + res = nfs4_atomic_open(dir, dentry, nd); nfs_end_data_update(dir); } else - inode = nfs4_atomic_open(dir, dentry, nd); + res = nfs4_atomic_open(dir, dentry, nd); unlock_kernel(); - if (IS_ERR(inode)) { - error = PTR_ERR(inode); + if (IS_ERR(res)) { + error = PTR_ERR(res); switch (error) { /* Make a negative dentry */ case -ENOENT: - inode = NULL; - break; + res = NULL; + goto out; /* This turned out not to be a regular file */ + case -EISDIR: + case -ENOTDIR: + goto no_open; case -ELOOP: if (!(nd->intent.open.flags & O_NOFOLLOW)) goto no_open; - /* case -EISDIR: */ /* case -EINVAL: */ default: - res = ERR_PTR(error); goto out; } - } -no_entry: - res = d_add_unique(dentry, inode); - if (res != NULL) + } else if (res != NULL) dentry = res; nfs_renew_times(dentry); nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); @@ -909,7 +1026,7 @@ static int nfs_open_revalidate(struct de */ lock_kernel(); verifier = nfs_save_change_attribute(dir); - ret = nfs4_open_revalidate(dir, dentry, openflags); + ret = nfs4_open_revalidate(dir, dentry, openflags, nd); if (!ret) nfs_set_verifier(dentry, verifier); unlock_kernel(); @@ -1032,7 +1149,7 @@ static int nfs_create(struct inode *dir, lock_kernel(); nfs_begin_data_update(dir); - error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags); + error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags, nd); nfs_end_data_update(dir); if (error != 0) goto out_err; @@ -1143,6 +1260,7 @@ static int nfs_sillyrename(struct inode dfprintk(VFS, "NFS: silly-rename(%s/%s, ct=%d)\n", dentry->d_parent->d_name.name, dentry->d_name.name, atomic_read(&dentry->d_count)); + nfs_inc_stats(dir, NFSIOS_SILLYRENAME); #ifdef NFS_PARANOIA if (!dentry->d_inode) @@ -1527,6 +1645,8 @@ int nfs_permission(struct inode *inode, struct rpc_cred *cred; int res = 0; + nfs_inc_stats(inode, NFSIOS_VFSACCESS); + if (mask == 0) goto out; /* Is this sys_access() ? */ Index: linux-2.6.12/fs/nfs/direct.c =================================================================== --- linux-2.6.12.orig/fs/nfs/direct.c +++ linux-2.6.12/fs/nfs/direct.c @@ -47,6 +47,7 @@ #include #include +#include "nfs_iostat.h" #include #include @@ -66,6 +67,7 @@ struct nfs_direct_req { struct kref kref; /* release manager */ struct list_head list; /* nfs_read_data structs */ wait_queue_head_t wait; /* wait for i/o completion */ + struct inode * inode; /* target file of I/O */ struct page ** pages; /* pages in our buffer */ unsigned int npages; /* count of pages */ atomic_t complete, /* i/os we're waiting for */ @@ -207,6 +209,8 @@ static void nfs_direct_read_result(struc { struct nfs_direct_req *dreq = (struct nfs_direct_req *) data->req; + nfs_add_stats(dreq->inode, NFSIOS_SERVERREADBYTES, data->res.count); + if (likely(status >= 0)) atomic_add(data->res.count, &dreq->count); else @@ -347,6 +351,7 @@ static ssize_t nfs_direct_read_seg(struc dreq->pages = pages; dreq->npages = nr_pages; + dreq->inode = inode; rpc_clnt_sigmask(clnt, &oldset); nfs_direct_read_schedule(dreq, inode, ctx, user_addr, count, @@ -354,6 +359,8 @@ static ssize_t nfs_direct_read_seg(struc result = nfs_direct_read_wait(dreq, clnt->cl_intr); rpc_clnt_sigunmask(clnt, &oldset); + if (result > 0) + nfs_add_stats(inode, NFSIOS_DIRECTREADBYTES, result); return result; } @@ -517,7 +524,7 @@ retry: result = tot_bytes; out: - nfs_end_data_update_defer(inode); + nfs_end_data_update(inode); nfs_writedata_free(wdata); return result; @@ -571,11 +578,14 @@ static ssize_t nfs_direct_write(struct i break; return result; } + nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result); tot_bytes += result; file_offset += result; if (result < size) break; } + if (tot_bytes > 0) + nfs_add_stats(inode, NFSIOS_DIRECTWRITTENBYTES, tot_bytes); return tot_bytes; } Index: linux-2.6.12/fs/nfs/file.c =================================================================== --- linux-2.6.12.orig/fs/nfs/file.c +++ linux-2.6.12/fs/nfs/file.c @@ -22,6 +22,7 @@ #include #include #include +#include "nfs_iostat.h" #include #include #include @@ -71,6 +72,18 @@ struct inode_operations nfs_file_inode_o .setattr = nfs_setattr, }; +#ifdef CONFIG_NFS_V3 +struct inode_operations nfs3_file_inode_operations = { + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, + .listxattr = nfs3_listxattr, + .getxattr = nfs3_getxattr, + .setxattr = nfs3_setxattr, + .removexattr = nfs3_removexattr, +}; +#endif /* CONFIG_NFS_v3 */ + /* Hack for future NFS swap support */ #ifndef IS_SWAPFILE # define IS_SWAPFILE(inode) (0) @@ -90,18 +103,15 @@ static int nfs_check_flags(int flags) static int nfs_file_open(struct inode *inode, struct file *filp) { - struct nfs_server *server = NFS_SERVER(inode); - int (*open)(struct inode *, struct file *); int res; res = nfs_check_flags(filp->f_flags); if (res) return res; + nfs_inc_stats(inode, NFSIOS_VFSOPEN); lock_kernel(); - /* Do NFSv4 open() call */ - if ((open = server->rpc_ops->file_open) != NULL) - res = open(inode, filp); + res = NFS_SERVER(inode)->rpc_ops->file_open(inode, filp); unlock_kernel(); return res; } @@ -112,10 +122,26 @@ nfs_file_release(struct inode *inode, st /* Ensure that dirty pages are flushed out with the right creds */ if (filp->f_mode & FMODE_WRITE) filemap_fdatawrite(filp->f_mapping); + nfs_inc_stats(inode, NFSIOS_VFSRELEASE); return NFS_PROTO(inode)->file_release(inode, filp); } /** + * nfs_revalidate_file - Revalidate the page cache & related metadata + * @inode - pointer to inode struct + * @file - pointer to file + */ +static int nfs_revalidate_file(struct inode *inode, struct file *filp) +{ + int retval = 0; + + if ((NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) || nfs_attribute_timeout(inode)) + retval = __nfs_revalidate_inode(NFS_SERVER(inode), inode); + nfs_revalidate_mapping(inode, filp->f_mapping); + return 0; +} + +/** * nfs_revalidate_size - Revalidate the file size * @inode - pointer to inode struct * @file - pointer to struct file @@ -137,7 +163,8 @@ static int nfs_revalidate_file_size(stru goto force_reval; if (nfsi->npages != 0) return 0; - return nfs_revalidate_inode(server, inode); + if (!(NFS_FLAGS(inode) & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode)) + return 0; force_reval: return __nfs_revalidate_inode(server, inode); } @@ -169,6 +196,7 @@ nfs_file_flush(struct file *file) if ((file->f_mode & FMODE_WRITE) == 0) return 0; + nfs_inc_stats(inode, NFSIOS_VFSFLUSH); lock_kernel(); /* Ensure that data+attribute caches are up to date after close() */ status = nfs_wb_all(inode); @@ -198,9 +226,11 @@ nfs_file_read(struct kiocb *iocb, char _ dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) count, (unsigned long) pos); - result = nfs_revalidate_inode(NFS_SERVER(inode), inode); + result = nfs_revalidate_file(inode, iocb->ki_filp); if (!result) result = generic_file_aio_read(iocb, buf, count, pos); + if (result > 0) + nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result); return result; } @@ -216,7 +246,7 @@ nfs_file_sendfile(struct file *filp, lof dentry->d_parent->d_name.name, dentry->d_name.name, (unsigned long) count, (unsigned long long) *ppos); - res = nfs_revalidate_inode(NFS_SERVER(inode), inode); + res = nfs_revalidate_file(inode, filp); if (!res) res = generic_file_sendfile(filp, ppos, count, actor, target); return res; @@ -232,7 +262,7 @@ nfs_file_mmap(struct file * file, struct dfprintk(VFS, "nfs: mmap(%s/%s)\n", dentry->d_parent->d_name.name, dentry->d_name.name); - status = nfs_revalidate_inode(NFS_SERVER(inode), inode); + status = nfs_revalidate_file(inode, file); if (!status) status = generic_file_mmap(file, vma); return status; @@ -252,6 +282,7 @@ nfs_fsync(struct file *file, struct dent dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + nfs_inc_stats(inode, NFSIOS_VFSFSYNC); lock_kernel(); status = nfs_wb_all(inode); if (!status) { @@ -321,15 +352,23 @@ nfs_file_write(struct kiocb *iocb, const result = -EBUSY; if (IS_SWAPFILE(inode)) goto out_swapfile; - result = nfs_revalidate_inode(NFS_SERVER(inode), inode); - if (result) - goto out; + /* + * O_APPEND implies that we must revalidate the file length. + */ + if (iocb->ki_filp->f_flags & O_APPEND) { + result = nfs_revalidate_file_size(inode, iocb->ki_filp); + if (result) + goto out; + } + nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); result = count; if (!count) goto out; result = generic_file_aio_write(iocb, buf, count, pos); + if (result > 0) + nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, result); out: return result; @@ -470,13 +509,14 @@ static int nfs_lock(struct file *filp, i { struct inode * inode = filp->f_mapping->host; + if (!inode) + return -EINVAL; + dprintk("NFS: nfs_lock(f=%s/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n", inode->i_sb->s_id, inode->i_ino, fl->fl_type, fl->fl_flags, (long long)fl->fl_start, (long long)fl->fl_end); - - if (!inode) - return -EINVAL; + nfs_inc_stats(inode, NFSIOS_VFSLOCK); /* No mandatory locks over NFS */ if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) Index: linux-2.6.12/fs/nfs/idmap.c =================================================================== --- linux-2.6.12.orig/fs/nfs/idmap.c +++ linux-2.6.12/fs/nfs/idmap.c @@ -46,10 +46,10 @@ #include #include -#include #include #include +#include "nfs4_fs.h" #define IDMAP_HASH_SZ 128 Index: linux-2.6.12/fs/nfs/inode.c =================================================================== --- linux-2.6.12.orig/fs/nfs/inode.c +++ linux-2.6.12/fs/nfs/inode.c @@ -27,11 +27,13 @@ #include #include #include +#include "nfs_iostat.h" #include #include #include #include #include +#include #include #include #include @@ -39,6 +41,7 @@ #include #include +#include "nfs4_fs.h" #include "delegation.h" #define NFSDBG_FACILITY NFSDBG_VFS @@ -63,6 +66,8 @@ static void nfs_clear_inode(struct inode static void nfs_umount_begin(struct super_block *); static int nfs_statfs(struct super_block *, struct kstatfs *); static int nfs_show_options(struct seq_file *, struct vfsmount *); +static int nfs_show_stats(struct seq_file *, struct vfsmount *); +static void nfs_zap_acl_cache(struct inode *); static struct rpc_program nfs_program; @@ -75,6 +80,7 @@ static struct super_operations nfs_sops .clear_inode = nfs_clear_inode, .umount_begin = nfs_umount_begin, .show_options = nfs_show_options, + .show_stats = nfs_show_stats, }; /* @@ -106,6 +112,75 @@ static struct rpc_program nfs_program = .pipe_dir_name = "/nfs", }; +#ifdef CONFIG_NFS_V3_ACL +static struct rpc_stat nfsacl_rpcstat = { &nfsacl_program }; +static struct rpc_version * nfsacl_version[] = { + [3] = &nfsacl_version3, +}; + +struct rpc_program nfsacl_program = { + .name = "nfsacl", + .number = NFS_ACL_PROGRAM, + .nrvers = sizeof(nfsacl_version) / sizeof(nfsacl_version[0]), + .version = nfsacl_version, + .stats = &nfsacl_rpcstat, +}; +#endif /* CONFIG_NFS_V3_ACL */ + +#ifdef CONFIG_SYSCTL +/* Follow the established convention in NLM */ +#define CTL_UNNUMBERED -2 + +static ctl_table nfs_sysctls[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nfs_mountpoint_timeout", + .data = &nfs_mountpoint_expiry_timeout, + .maxlen = sizeof(nfs_mountpoint_expiry_timeout), + .mode = 0644, + .proc_handler = &proc_dointvec_jiffies, + .strategy = &sysctl_jiffies, + }, + { .ctl_name = 0 } +}; + +static ctl_table nfs_sysctl_dir[] = { + { + .ctl_name = CTL_UNNUMBERED, + .procname = "nfs", + .mode = 0555, + .child = nfs_sysctls, + }, + { .ctl_name = 0 } +}; + +static ctl_table nfs_sysctl_root[] = { + { + .ctl_name = CTL_FS, + .procname = "fs", + .mode = 0555, + .child = nfs_sysctl_dir, + }, + { .ctl_name = 0 } +}; + +static struct ctl_table_header *nfs_sysctl_table; + +static inline int nfs_init_sysctl(void) +{ + nfs_sysctl_table = register_sysctl_table(nfs_sysctl_root, 0); + return nfs_sysctl_table != NULL ? 0 : -ENOMEM; +} + +static inline void nfs_destroy_sysctl(void) +{ + unregister_sysctl_table(nfs_sysctl_table); +} +#else +#define nfs_init_sysctl() (0) +#define nfs_destroy_sysctl() do { } while(0) +#endif /* CONFIG_SYSCTL */ + static inline unsigned long nfs_fattr_to_ino_t(struct nfs_fattr *fattr) { @@ -118,7 +193,7 @@ nfs_write_inode(struct inode *inode, int int flags = sync ? FLUSH_WAIT : 0; int ret; - ret = nfs_commit_inode(inode, 0, 0, flags); + ret = nfs_commit_inode(inode, flags); if (ret < 0) return ret; return 0; @@ -140,10 +215,6 @@ nfs_delete_inode(struct inode * inode) clear_inode(inode); } -/* - * For the moment, the only task for the NFS clear_inode method is to - * release the mmap credential - */ static void nfs_clear_inode(struct inode *inode) { @@ -152,6 +223,7 @@ nfs_clear_inode(struct inode *inode) nfs_wb_all(inode); BUG_ON (!list_empty(&nfsi->open_files)); + nfs_zap_acl_cache(inode); cred = nfsi->cache_access.cred; if (cred) put_rpccred(cred); @@ -161,11 +233,13 @@ nfs_clear_inode(struct inode *inode) void nfs_umount_begin(struct super_block *sb) { - struct nfs_server *server = NFS_SB(sb); - struct rpc_clnt *rpc; + struct rpc_clnt *rpc = NFS_SB(sb)->client; /* -EIO all pending I/O */ - if ((rpc = server->client) != NULL) + if (!IS_ERR(rpc)) + rpc_killall_tasks(rpc); + rpc = NFS_SB(sb)->client_acl; + if (!IS_ERR(rpc)) rpc_killall_tasks(rpc); } @@ -211,6 +285,14 @@ nfs_block_size(unsigned long bsize, unsi return nfs_block_bits(bsize, nrbitsp); } +static inline void +nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize) +{ + sb->s_maxbytes = (loff_t)maxfilesize; + if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0) + sb->s_maxbytes = MAX_LFS_FILESIZE; +} + /* * Obtain the root inode of the file system. */ @@ -227,6 +309,7 @@ nfs_get_root(struct super_block *sb, str return ERR_PTR(error); } + server->fsid = fsinfo->fattr->fsid; rooti = nfs_fhget(sb, rootfh, fsinfo->fattr); if (!rooti) return ERR_PTR(-ENOMEM); @@ -271,6 +354,12 @@ nfs_sb_init(struct super_block *sb, rpc_ } sb->s_root->d_op = server->rpc_ops->dentry_ops; + server->io_stats = nfs_alloc_iostats(); + if (!server->io_stats) { + no_root_error = -ENOMEM; + goto out_no_root; + } + /* Get some general file system info */ if (server->namelen == 0 && server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) @@ -322,9 +411,7 @@ nfs_sb_init(struct super_block *sb, rpc_ } server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD; - sb->s_maxbytes = fsinfo.maxfilesize; - if (sb->s_maxbytes > MAX_LFS_FILESIZE) - sb->s_maxbytes = MAX_LFS_FILESIZE; + nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); server->client->cl_intr = (server->flags & NFS_MOUNT_INTR) ? 1 : 0; server->client->cl_softrtry = (server->flags & NFS_MOUNT_SOFT) ? 1 : 0; @@ -362,17 +449,22 @@ nfs_create_client(struct nfs_server *ser if (!timeparms.to_retries) timeparms.to_retries = 5; + server->retrans_timeo = timeparms.to_initval; + server->retrans_count = timeparms.to_retries; + /* create transport and client */ xprt = xprt_create_proto(tcp ? IPPROTO_TCP : IPPROTO_UDP, &server->addr, &timeparms); if (IS_ERR(xprt)) { - printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); + dprintk("%s: cannot create RPC transport. Error = %ld\n", + __FUNCTION__, PTR_ERR(xprt)); return (struct rpc_clnt *)xprt; } clnt = rpc_create_client(xprt, server->hostname, &nfs_program, server->rpc_ops->version, data->pseudoflavor); if (IS_ERR(clnt)) { - printk(KERN_WARNING "NFS: cannot create RPC client.\n"); + dprintk("%s: cannot create RPC client. Error = %ld\n", + __FUNCTION__, PTR_ERR(xprt)); goto out_fail; } @@ -383,7 +475,6 @@ nfs_create_client(struct nfs_server *ser return clnt; out_fail: - xprt_destroy(xprt); return clnt; } @@ -427,21 +518,16 @@ nfs_fill_super(struct super_block *sb, s /* Check NFS protocol revision and initialize RPC op vector * and file handle pool. */ - if (server->flags & NFS_MOUNT_VER3) { #ifdef CONFIG_NFS_V3 + if (server->flags & NFS_MOUNT_VER3) { server->rpc_ops = &nfs_v3_clientops; server->caps |= NFS_CAP_READDIRPLUS; - if (data->version < 4) { - printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n"); - return -EIO; - } -#else - printk(KERN_NOTICE "NFS: NFSv3 not supported.\n"); - return -EIO; -#endif } else { server->rpc_ops = &nfs_v2_clientops; } +#else + server->rpc_ops = &nfs_v2_clientops; +#endif /* Fill in pseudoflavor for mount version < 5 */ if (!(data->flags & NFS_MOUNT_SECFLAVOUR)) @@ -455,17 +541,34 @@ nfs_fill_super(struct super_block *sb, s return PTR_ERR(server->client); /* RFC 2623, sec 2.3.2 */ if (authflavor != RPC_AUTH_UNIX) { + struct rpc_auth *auth; + server->client_sys = rpc_clone_client(server->client); if (IS_ERR(server->client_sys)) return PTR_ERR(server->client_sys); - if (!rpcauth_create(RPC_AUTH_UNIX, server->client_sys)) - return -ENOMEM; + auth = rpcauth_create(RPC_AUTH_UNIX, server->client_sys); + if (IS_ERR(auth)) + return PTR_ERR(auth); } else { atomic_inc(&server->client->cl_count); server->client_sys = server->client; } - if (server->flags & NFS_MOUNT_VER3) { +#ifdef CONFIG_NFS_V3_ACL + if (!(server->flags & NFS_MOUNT_NOACL)) { + server->client_acl = rpc_bind_new_program(server->client, &nfsacl_program, 3); + /* No errors! Assume that Sun nfsacls are supported */ + if (!IS_ERR(server->client_acl)) + server->caps |= NFS_CAP_ACLS; + } +#else + server->flags &= ~NFS_MOUNT_NOACL; +#endif /* CONFIG_NFS_V3_ACL */ + /* + * The VFS shouldn't apply the umask to mode bits. We will + * do so ourselves when necessary. + */ + sb->s_flags |= MS_POSIXACL; if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) server->namelen = NFS3_MAXNAMLEN; sb->s_time_gran = 1; @@ -535,7 +638,7 @@ nfs_statfs(struct super_block *sb, struc } -static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) +static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, int showdefaults) { static struct proc_nfs_info { int flag; @@ -543,36 +646,120 @@ static int nfs_show_options(struct seq_f char *nostr; } nfs_info[] = { { NFS_MOUNT_SOFT, ",soft", ",hard" }, - { NFS_MOUNT_INTR, ",intr", "" }, - { NFS_MOUNT_POSIX, ",posix", "" }, + { NFS_MOUNT_INTR, ",intr", ",nointr" }, { NFS_MOUNT_TCP, ",tcp", ",udp" }, { NFS_MOUNT_NOCTO, ",nocto", "" }, { NFS_MOUNT_NOAC, ",noac", "" }, - { NFS_MOUNT_NONLM, ",nolock", ",lock" }, + { NFS_MOUNT_NONLM, ",nolock", "" }, + { NFS_MOUNT_NOACL, ",noacl", "" }, { 0, NULL, NULL } }; struct proc_nfs_info *nfs_infop; - struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); - seq_printf(m, ",v%d", nfss->rpc_ops->version); + seq_printf(m, ",vers=%d", nfss->rpc_ops->version); seq_printf(m, ",rsize=%d", nfss->rsize); seq_printf(m, ",wsize=%d", nfss->wsize); - if (nfss->acregmin != 3*HZ) + if (nfss->acregmin != 3*HZ || showdefaults) seq_printf(m, ",acregmin=%d", nfss->acregmin/HZ); - if (nfss->acregmax != 60*HZ) + if (nfss->acregmax != 60*HZ || showdefaults) seq_printf(m, ",acregmax=%d", nfss->acregmax/HZ); - if (nfss->acdirmin != 30*HZ) + if (nfss->acdirmin != 30*HZ || showdefaults) seq_printf(m, ",acdirmin=%d", nfss->acdirmin/HZ); - if (nfss->acdirmax != 60*HZ) + if (nfss->acdirmax != 60*HZ || showdefaults) seq_printf(m, ",acdirmax=%d", nfss->acdirmax/HZ); + if (nfss->flags & NFS_MOUNT_TCP) + seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ); + seq_printf(m, ",retrans=%u", nfss->retrans_count); + for (nfs_infop = nfs_info; nfs_infop->flag; nfs_infop++) { if (nfss->flags & nfs_infop->flag) seq_puts(m, nfs_infop->str); else seq_puts(m, nfs_infop->nostr); } +} + +static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt) +{ + struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); + + nfs_show_mount_options(m, nfss, 0); + seq_puts(m, ",addr="); seq_escape(m, nfss->hostname, " \t\n\\"); + + return 0; +} + +static int nfs_show_stats(struct seq_file *m, struct vfsmount *mnt) +{ + int i, cpu; + struct nfs_server *nfss = NFS_SB(mnt->mnt_sb); + struct rpc_auth *auth = nfss->client->cl_auth; + struct nfs_iostats totals = { }; + + seq_printf(m, "statvers=%s", NFS_IOSTAT_VERS); + + /* + * Display all mount option settings + * need ro/rw, sync/async + */ + seq_printf(m, "\n\topts:\t"); + seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? "ro" : "rw"); + seq_puts(m, mnt->mnt_sb->s_flags & MS_SYNCHRONOUS ? ",sync" : ""); + seq_puts(m, mnt->mnt_sb->s_flags & MS_NOATIME ? ",noatime" : ""); + nfs_show_mount_options(m, nfss, 1); + + seq_printf(m, "\n\tcaps:\t"); + seq_printf(m, "caps=0x%x", nfss->caps); + seq_printf(m, ",wtmult=%d", nfss->wtmult); + seq_printf(m, ",dtsize=%d", nfss->dtsize); + seq_printf(m, ",bsize=%d", nfss->bsize); + seq_printf(m, ",namelen=%d", nfss->namelen); + +#ifdef CONFIG_NFS_V4 + if (nfss->rpc_ops->version == 4) { + seq_printf(m, "\n\tnfsv4:\t"); + seq_printf(m, "bm0=0x%x", nfss->attr_bitmask[0]); + seq_printf(m, ",bm1=0x%x", nfss->attr_bitmask[1]); + seq_printf(m, ",acl=0x%x", nfss->acl_bitmask); + } +#endif + + /* + * Display security flavor in effect for this mount + */ + seq_printf(m, "\n\tsec:\tflavor=%d", auth->au_ops->au_flavor); + if (auth->au_flavor) + seq_printf(m, ",pseudoflavor=%d", auth->au_flavor); + + /* + * Display superblock I/O counters + */ + for (cpu = 0; cpu < NR_CPUS; cpu++) { + struct nfs_iostats *stats; + + if (!cpu_possible(cpu)) + continue; + + preempt_disable(); + stats = per_cpu_ptr(nfss->io_stats, cpu); + + for (i = 0; i < __NFSIOS_COUNTSMAX; i++) + totals.events[i] += stats->events[i]; + for (i = 0; i < __NFSIOS_BYTESMAX; i++) + totals.bytes[i] += stats->bytes[i]; + + preempt_enable(); + } + + seq_printf(m, "\n\tevents:\t"); + for (i = 0; i < __NFSIOS_COUNTSMAX; i++) + seq_printf(m, "%lu ", totals.events[i]); + seq_printf(m, "\n\tbytes:\t"); + for (i = 0; i < __NFSIOS_BYTESMAX; i++) + seq_printf(m, "%Lu ", totals.bytes[i]); + return 0; } @@ -585,14 +772,26 @@ nfs_zap_caches(struct inode *inode) struct nfs_inode *nfsi = NFS_I(inode); int mode = inode->i_mode; + nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); NFS_ATTRTIMEO_UPDATE(inode) = jiffies; memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) - nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS; + nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; else - nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS; + nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL|NFS_INO_REVAL_PAGECACHE; +} + +static void nfs_zap_acl_cache(struct inode *inode) +{ + void (*clear_acl_cache)(struct inode *); + + clear_acl_cache = NFS_PROTO(inode)->clear_acl_cache; + if (clear_acl_cache != NULL) + clear_acl_cache(inode); + NFS_I(inode)->flags &= ~NFS_INO_INVALID_ACL; } /* @@ -689,7 +888,7 @@ nfs_fhget(struct super_block *sb, struct /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. */ - inode->i_op = &nfs_file_inode_operations; + inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops; if (S_ISREG(inode->i_mode)) { inode->i_fop = &nfs_file_operations; inode->i_data.a_ops = &nfs_file_aops; @@ -700,6 +899,11 @@ nfs_fhget(struct super_block *sb, struct if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) && fattr->size <= NFS_LIMIT_READDIRPLUS) NFS_FLAGS(inode) |= NFS_INO_ADVISE_RDPLUS; + /* Deal with crossing mountpoints */ + if (!nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) { + inode->i_op = &nfs_mountpoint_inode_operations; + inode->i_fop = NULL; + } } else if (S_ISLNK(inode->i_mode)) inode->i_op = &nfs_symlink_inode_operations; else @@ -787,12 +991,13 @@ nfs_setattr(struct dentry *dentry, struc if ((attr->ia_valid & ATTR_GID) != 0) inode->i_gid = attr->ia_gid; if ((attr->ia_valid & ATTR_SIZE) != 0) { + nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); inode->i_size = attr->ia_size; vmtruncate(inode, attr->ia_size); } } if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) - NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS; + NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; nfs_end_data_update(inode); unlock_kernel(); return error; @@ -807,14 +1012,17 @@ nfs_wait_on_inode(struct inode *inode, i { struct rpc_clnt *clnt = NFS_CLIENT(inode); struct nfs_inode *nfsi = NFS_I(inode); + unsigned long start = jiffies; + int error = 0; - int error; - if (!(NFS_FLAGS(inode) & flag)) - return 0; - atomic_inc(&inode->i_count); - error = nfs_wait_event(clnt, nfsi->nfs_i_wait, + if ((NFS_FLAGS(inode) & flag)) { + atomic_inc(&inode->i_count); + error = nfs_wait_event(clnt, nfsi->nfs_i_wait, !(NFS_FLAGS(inode) & flag)); - iput(inode); + nfs_add_stats(inode, NFSIOS_WAITEVENTJIFFIES, (jiffies - start)); + nfs_inc_stats(inode, NFSIOS_WAITEVENT); + iput(inode); + } return error; } @@ -851,7 +1059,7 @@ struct nfs_open_context *alloc_nfs_open_ ctx->state = NULL; ctx->lockowner = current->files; ctx->error = 0; - init_waitqueue_head(&ctx->waitq); + ctx->dir_cookie = 0; } return ctx; } @@ -1015,6 +1223,7 @@ __nfs_revalidate_inode(struct nfs_server goto out; } flags = nfsi->flags; + nfsi->flags &= ~NFS_INO_REVAL_PAGECACHE; /* * We may need to keep the attributes marked as invalid if * we raced with nfs_end_attr_update(). @@ -1022,21 +1231,9 @@ __nfs_revalidate_inode(struct nfs_server if (verifier == nfsi->cache_change_attribute) nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); /* Do the page cache invalidation */ - if (flags & NFS_INO_INVALID_DATA) { - if (S_ISREG(inode->i_mode)) { - if (filemap_fdatawrite(inode->i_mapping) == 0) - filemap_fdatawait(inode->i_mapping); - nfs_wb_all(inode); - } - nfsi->flags &= ~NFS_INO_INVALID_DATA; - invalidate_inode_pages2(inode->i_mapping); - memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); - dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", - inode->i_sb->s_id, - (long long)NFS_FILEID(inode)); - /* This ensures we revalidate dentries */ - nfsi->cache_change_attribute++; - } + nfs_revalidate_mapping(inode, inode->i_mapping); + if (flags & NFS_INO_INVALID_ACL) + nfs_zap_acl_cache(inode); dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -1067,6 +1264,7 @@ int nfs_attribute_timeout(struct inode * */ int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) { + nfs_inc_stats(inode, NFSIOS_INODEREVALIDATE); if (!(NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) && !nfs_attribute_timeout(inode)) return NFS_STALE(inode) ? -ESTALE : 0; @@ -1074,6 +1272,35 @@ int nfs_revalidate_inode(struct nfs_serv } /** + * nfs_revalidate_mapping - Revalidate the pagecache + * @inode - pointer to host inode + * @mapping - pointer to mapping + */ +void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (nfsi->flags & NFS_INO_INVALID_DATA) { + nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE); + if (S_ISREG(inode->i_mode)) { + if (filemap_fdatawrite(mapping) == 0) + filemap_fdatawait(mapping); + nfs_wb_all(inode); + } + invalidate_inode_pages2(mapping); + nfsi->flags &= ~NFS_INO_INVALID_DATA; + if (S_ISDIR(inode->i_mode)) { + memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); + /* This ensures we revalidate child dentries */ + nfsi->cache_change_attribute++; + } + dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode)); + } +} + +/** * nfs_begin_data_update * @inode - pointer to inode * Declare that a set of operations will update file data on the server @@ -1106,27 +1333,6 @@ void nfs_end_data_update(struct inode *i } /** - * nfs_end_data_update_defer - * @inode - pointer to inode - * Declare end of the operations that will update file data - * This will defer marking the inode as needing revalidation - * unless there are no other pending updates. - */ -void nfs_end_data_update_defer(struct inode *inode) -{ - struct nfs_inode *nfsi = NFS_I(inode); - - if (atomic_dec_and_test(&nfsi->data_updates)) { - /* Mark the attribute cache for revalidation */ - nfsi->flags |= NFS_INO_INVALID_ATTR; - /* Directories and symlinks: invalidate page cache too */ - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - nfsi->flags |= NFS_INO_INVALID_DATA; - nfsi->cache_change_attribute ++; - } -} - -/** * nfs_refresh_inode - verify consistency of the inode attribute cache * @inode - pointer to inode * @fattr - updated attributes @@ -1152,8 +1358,11 @@ int nfs_refresh_inode(struct inode *inod if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0 && nfsi->change_attr == fattr->pre_change_attr) nfsi->change_attr = fattr->change_attr; - if (!data_unstable && nfsi->change_attr != fattr->change_attr) + if (nfsi->change_attr != fattr->change_attr) { nfsi->flags |= NFS_INO_INVALID_ATTR; + if (!data_unstable) + nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + } } if ((fattr->valid & NFS_ATTR_FATTR) == 0) @@ -1176,18 +1385,22 @@ int nfs_refresh_inode(struct inode *inod } /* Verify a few of the more important attributes */ - if (!data_unstable) { - if (!timespec_equal(&inode->i_mtime, &fattr->mtime) - || cur_size != new_isize) - nfsi->flags |= NFS_INO_INVALID_ATTR; - } else if (S_ISREG(inode->i_mode) && new_isize > cur_size) - nfsi->flags |= NFS_INO_INVALID_ATTR; + if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { + nfsi->flags |= NFS_INO_INVALID_ATTR; + if (!data_unstable) + nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + } + if (cur_size != new_isize) { + nfsi->flags |= NFS_INO_INVALID_ATTR; + if (nfsi->npages == 0) + nfsi->flags |= NFS_INO_REVAL_PAGECACHE; + } /* Have any file permissions changed? */ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || inode->i_gid != fattr->gid) - nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS; + nfsi->flags |= NFS_INO_INVALID_ATTR | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL; /* Has the link count changed? */ if (inode->i_nlink != fattr->nlink) @@ -1214,11 +1427,10 @@ int nfs_refresh_inode(struct inode *inod */ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsigned long verifier) { + struct nfs_server *server; struct nfs_inode *nfsi = NFS_I(inode); - __u64 new_size; - loff_t new_isize; + loff_t cur_isize, new_isize; unsigned int invalid = 0; - loff_t cur_isize; int data_unstable; dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", @@ -1243,6 +1455,12 @@ static int nfs_update_inode(struct inode if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) goto out_changed; + server = NFS_SERVER(inode); + /* Update the fsid if and only if this is the root directory */ + if (inode == inode->i_sb->s_root->d_inode + && !nfs_fsid_equal(&server->fsid, &fattr->fsid)) + server->fsid = fattr->fsid; + /* * Update the read time so we don't revalidate too often. */ @@ -1251,61 +1469,56 @@ static int nfs_update_inode(struct inode /* Are we racing with known updates of the metadata on the server? */ data_unstable = ! nfs_verify_change_attribute(inode, verifier); - /* Check if the file size agrees */ - new_size = fattr->size; + /* Check if our cached file size is stale */ new_isize = nfs_size_to_loff_t(fattr->size); cur_isize = i_size_read(inode); - if (cur_isize != new_size) { -#ifdef NFS_DEBUG_VERBOSE - printk(KERN_DEBUG "NFS: isize change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); -#endif - /* - * If we have pending writebacks, things can get - * messy. - */ - if (S_ISREG(inode->i_mode) && data_unstable) { - if (new_isize > cur_isize) { + if (new_isize != cur_isize) { + /* Do we perhaps have any outstanding writes? */ + if (nfsi->npages == 0) { + /* No, but did we race with nfs_end_data_update()? */ + if (verifier == nfsi->cache_change_attribute) { inode->i_size = new_isize; - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + invalid |= NFS_INO_INVALID_DATA; } - } else { + invalid |= NFS_INO_INVALID_ATTR; + } else if (new_isize > cur_isize) { inode->i_size = new_isize; invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } + dprintk("NFS: isize change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); } - /* - * Note: we don't check inode->i_mtime since pipes etc. - * can change this value in VFS without requiring a - * cache revalidation. - */ + /* Check if the mtime agrees */ if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); -#ifdef NFS_DEBUG_VERBOSE - printk(KERN_DEBUG "NFS: mtime change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); -#endif + dprintk("NFS: mtime change on server for file %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); if (!data_unstable) invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } if ((fattr->valid & NFS_ATTR_FATTR_V4) && nfsi->change_attr != fattr->change_attr) { -#ifdef NFS_DEBUG_VERBOSE - printk(KERN_DEBUG "NFS: change_attr change on %s/%ld\n", + dprintk("NFS: change_attr change on server for file %s/%ld\n", inode->i_sb->s_id, inode->i_ino); -#endif nfsi->change_attr = fattr->change_attr; if (!data_unstable) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; } - memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + /* If ctime has changed we should definitely clear access+acl caches */ + if (!timespec_equal(&inode->i_ctime, &fattr->ctime)) { + if (!data_unstable) + invalid |= NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + } memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || inode->i_gid != fattr->gid) - invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS; + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL; inode->i_mode = fattr->mode; inode->i_nlink = fattr->nlink; @@ -1325,6 +1538,7 @@ static int nfs_update_inode(struct inode /* Update attrtimeo value if we're out of the unstable period */ if (invalid & NFS_INO_INVALID_ATTR) { + nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; } else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) { @@ -1360,9 +1574,170 @@ static int nfs_update_inode(struct inode } /* + * nfs_try_migrate_filehandle - Check if we can migrate the inode filehandle + * @inode - pointer to inode + * @fh - the filehandle resulting from lookup() + * @fattr - attributes associated with the new filehandle + * + * Do our very best to update existing inodes when the user wants to migrate + * this filesystem to a replica server. + * + * Note that here be HUGE dragons, with endless possibilities for causing + * trouble... + */ +int nfs_try_migrate_filehandle(struct inode *inode, struct nfs_fh *fh, struct nfs_fattr *fattr, uint32_t generation) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + /* Argh! The basic file type has changed */ + if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) + goto out_bad; + /* Fileid + filehandle are the same. Good! */ + if (nfs_compare_fh(&nfsi->fh, fh) == 0 && nfsi->fileid == fattr->fileid) + goto out_good; + if (fattr->valid && NFS_ATTR_FATTR_V4) { + /* Do the NFSv4 change attributes match our cached value? */ + if (nfsi->change_attr != fattr->change_attr) + goto out_bad; + } else { + /* Does the ctime match? */ + if (!timespec_equal(&fattr->ctime, &inode->i_ctime)) + goto out_bad; + } + /* Does the file size match? */ + if (nfs_size_to_loff_t(fattr->size) != inode->i_size) + goto out_bad; + /* FIXME: Here lie the biggest dragons: + * Try considering all possible races w.r.t. iget5_locked() + */ + nfs_copy_fh(&nfsi->fh, fh); + if (nfsi->fileid != fattr->fileid) { + /* The very concept of migrating to a new hash bucket + * is so full of holes and races that it defies belief! + */ + remove_inode_hash(inode); + nfsi->fileid = fattr->fileid; + inode->i_ino = nfs_fattr_to_ino_t(fattr); + insert_inode_hash(inode); + } +out_good: + inode->i_generation = generation; + return 0; +out_bad: + return -EIO; +} + +/* + * nfs_try_migrate_inode - Update an inode's filehandle after migration + * @inode - pointer to inode to migrate + * @dentry - pointer to dentry + */ +int nfs_try_migrate_inode(struct inode *inode, struct dentry *dentry) +{ + struct nfs_fh fh; + struct nfs_fattr fattr; + struct dentry *next, *next_parent; + uint32_t generation; + int status; + + if (dentry == NULL) { + status = -ENOENT; + dentry = d_find_alias(inode); + if (dentry == NULL) + goto out; + } else + dget(dentry); +repeat: + /* Has this inode already been revalidated? */ + status = 0; + generation = NFS_SERVER(inode)->generation; + if ((long)generation - (long)inode->i_generation <= 0) + goto out; + /* No. Search for a previously revalidated path element */ + next = dget(dentry); + next_parent = dget_parent(dentry); + while((long)generation - (long)next_parent->d_inode->i_generation > 0) { + BUG_ON(IS_ROOT(next_parent)); + dput(next); + next = next_parent; + next_parent = dget_parent(next); + } + status = NFS_PROTO(inode)->lookup(next_parent->d_inode, &next->d_name, + &fh, &fattr); + if (status == 0) + status = nfs_try_migrate_filehandle(next->d_inode, &fh, &fattr, generation); + switch (status) { + case -ESTALE: + if (IS_ROOT(next_parent)) + break; + case 0: + if (dentry->d_inode == inode) + break; + dput(next_parent); + dput(next); + goto repeat; + default: + d_drop(next); + } + dput(next_parent); + dput(next); +out: + dput(dentry); + dprintk("%s: returned error %d\n", __FUNCTION__, status); + return status; +} + +/* * File system information */ +/* + * nfs_path - reconstruct the path given an arbitrary dentry + * @base - arbitrary string to prepend to the path + * @dentry - pointer to dentry + * @buffer - result buffer + * @buflen - length of buffer + * + * Helper function for constructing the path from the + * root dentry to an arbitrary hashed dentry. + * + * This is mainly for use in figuring out the path on the + * server side when automounting on top of an existing partition. + */ +static char *nfs_path(const char *base, const struct dentry *dentry, + char *buffer, ssize_t buflen) +{ + char *end = buffer+buflen; + int namelen; + + *--end = '\0'; + buflen--; + spin_lock(&dcache_lock); + while (!IS_ROOT(dentry)) { + namelen = dentry->d_name.len; + buflen -= namelen + 1; + if (buflen < 0) + goto Elong; + end -= namelen; + memcpy(end, dentry->d_name.name, namelen); + *--end = '/'; + dentry = dentry->d_parent; + } + spin_unlock(&dcache_lock); + namelen = strlen(base); + /* Strip off excess slashes in base string */ + while (namelen > 0 && base[namelen - 1] == '/') + namelen--; + buflen -= namelen; + if (buflen < 0) + goto Elong; + end -= namelen; + memcpy(end, base, namelen); + return end; +Elong: + return ERR_PTR(-ENAMETOOLONG); +} + static int nfs_set_super(struct super_block *s, void *data) { s->s_fs_info = data; @@ -1385,74 +1760,95 @@ static struct super_block *nfs_get_sb(st int flags, const char *dev_name, void *raw_data) { int error; - struct nfs_server *server; + struct nfs_server *server = NULL; struct super_block *s; struct nfs_fh *root; struct nfs_mount_data *data = raw_data; - if (!data) { - printk("nfs_read_super: missing data argument\n"); - return ERR_PTR(-EINVAL); + s = ERR_PTR(-EINVAL); + if (data == NULL) { + dprintk("%s: missing data argument\n", __FUNCTION__); + goto out_err; } + if (data->version <= 0 || data->version > NFS_MOUNT_VERSION) { + dprintk("%s: bad mount version\n", __FUNCTION__); + goto out_err; + } + switch (data->version) { + case 1: + data->namlen = 0; + case 2: + data->bsize = 0; + case 3: + if (data->flags & NFS_MOUNT_VER3) { + dprintk("%s: mount structure version %d does not support NFSv3\n", + __FUNCTION__, + data->version); + goto out_err; + } + data->root.size = NFS2_FHSIZE; + memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); + case 4: + if (data->flags & NFS_MOUNT_SECFLAVOUR) { + dprintk("%s: mount structure version %d does not support strong security\n", + __FUNCTION__, + data->version); + goto out_err; + } + case 5: + memset(data->context, 0, sizeof(data->context)); + } +#ifndef CONFIG_NFS_V3 + /* If NFSv3 is not compiled in, return -EPROTONOSUPPORT */ + s = ERR_PTR(-EPROTONOSUPPORT); + if (data->flags & NFS_MOUNT_VER3) { + dprintk("%s: NFSv3 not compiled into kernel\n", __FUNCTION__); + goto out_err; + } +#endif /* CONFIG_NFS_V3 */ + s = ERR_PTR(-ENOMEM); server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); if (!server) - return ERR_PTR(-ENOMEM); + goto out_err; memset(server, 0, sizeof(struct nfs_server)); /* Zero out the NFS state stuff */ init_nfsv4_state(server); - - if (data->version != NFS_MOUNT_VERSION) { - printk("nfs warning: mount version %s than kernel\n", - data->version < NFS_MOUNT_VERSION ? "older" : "newer"); - if (data->version < 2) - data->namlen = 0; - if (data->version < 3) - data->bsize = 0; - if (data->version < 4) { - data->flags &= ~NFS_MOUNT_VER3; - data->root.size = NFS2_FHSIZE; - memcpy(data->root.data, data->old_root.data, NFS2_FHSIZE); - } - if (data->version < 5) - data->flags &= ~NFS_MOUNT_SECFLAVOUR; - } + server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); root = &server->fh; if (data->flags & NFS_MOUNT_VER3) root->size = data->root.size; else root->size = NFS2_FHSIZE; + s = ERR_PTR(-EINVAL); if (root->size > sizeof(root->data)) { - printk("nfs_get_sb: invalid root filehandle\n"); - kfree(server); - return ERR_PTR(-EINVAL); + dprintk("%s: invalid root filehandle\n", __FUNCTION__); + goto out_err; } memcpy(root->data, data->root.data, root->size); /* We now require that the mount process passes the remote address */ memcpy(&server->addr, &data->addr, sizeof(server->addr)); if (server->addr.sin_addr.s_addr == INADDR_ANY) { - printk("NFS: mount program didn't pass remote address!\n"); - kfree(server); - return ERR_PTR(-EINVAL); + dprintk("%s: mount program didn't pass remote address!\n", + __FUNCTION__); + goto out_err; } - s = sget(fs_type, nfs_compare_super, nfs_set_super, server); - - if (IS_ERR(s) || s->s_root) { - kfree(server); - return s; + /* Fire up rpciod if not yet running */ + s = ERR_PTR(rpciod_up()); + if (IS_ERR(s)) { + dprintk("%s: couldn't start rpciod! Error = %ld\n", + __FUNCTION__, PTR_ERR(s)); + goto out_err; } - s->s_flags = flags; + s = sget(fs_type, nfs_compare_super, nfs_set_super, server); + if (IS_ERR(s) || s->s_root) + goto out_rpciod_down; - /* Fire up rpciod if not yet running */ - if (rpciod_up() != 0) { - printk(KERN_WARNING "NFS: couldn't start rpciod!\n"); - kfree(server); - return ERR_PTR(-EIO); - } + s->s_flags = flags; error = nfs_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); if (error) { @@ -1462,6 +1858,11 @@ static struct super_block *nfs_get_sb(st } s->s_flags |= MS_ACTIVE; return s; +out_rpciod_down: + rpciod_down(); +out_err: + kfree(server); + return s; } static void nfs_kill_super(struct super_block *s) @@ -1470,10 +1871,12 @@ static void nfs_kill_super(struct super_ kill_anon_super(s); - if (server->client != NULL && !IS_ERR(server->client)) + if (!IS_ERR(server->client)) rpc_shutdown_client(server->client); - if (server->client_sys != NULL && !IS_ERR(server->client_sys)) + if (!IS_ERR(server->client_sys)) rpc_shutdown_client(server->client_sys); + if (!IS_ERR(server->client_acl)) + rpc_shutdown_client(server->client_acl); if (!(server->flags & NFS_MOUNT_NONLM)) lockd_down(); /* release rpc.lockd */ @@ -1483,6 +1886,7 @@ static void nfs_kill_super(struct super_ if (server->hostname != NULL) kfree(server->hostname); kfree(server); + nfs_release_automount_timer(); } static struct file_system_type nfs_fs_type = { @@ -1507,6 +1911,7 @@ static struct super_operations nfs4_sops .clear_inode = nfs4_clear_inode, .umount_begin = nfs_umount_begin, .show_options = nfs_show_options, + .show_stats = nfs_show_stats, }; /* @@ -1518,9 +1923,6 @@ static void nfs4_clear_inode(struct inod { struct nfs_inode *nfsi = NFS_I(inode); - /* If we are holding a delegation, return it! */ - if (nfsi->delegation != NULL) - nfs_inode_return_delegation(inode); /* First call standard NFS clear_inode() code */ nfs_clear_inode(inode); /* Now clear out any remaining state */ @@ -1538,6 +1940,9 @@ static void nfs4_clear_inode(struct inod BUG_ON(atomic_read(&state->count) != 1); nfs4_close_state(state, state->state); } + /* If we are holding a delegation, return it! */ + if (nfsi->delegation != NULL) + nfs_inode_clear_delegation(inode); } @@ -1592,17 +1997,24 @@ static int nfs4_fill_super(struct super_ return -EINVAL; } + server->retrans_timeo = timeparms.to_initval; + server->retrans_count = timeparms.to_retries; + clp = nfs4_get_client(&server->addr.sin_addr); if (!clp) { - printk(KERN_WARNING "NFS: failed to create NFS4 client.\n"); + dprintk("%s: failed to create NFS4 client.\n", __FUNCTION__); return -EIO; } /* Now create transport and client */ authflavour = RPC_AUTH_UNIX; if (data->auth_flavourlen != 0) { - if (data->auth_flavourlen > 1) - printk(KERN_INFO "NFS: cannot yet deal with multiple auth flavours.\n"); + if (data->auth_flavourlen != 1) { + dprintk("%s: Invalid number of RPC auth flavours %d.\n", + __FUNCTION__, data->auth_flavourlen); + err = -EINVAL; + goto out_fail; + } if (copy_from_user(&authflavour, data->auth_flavours, sizeof(authflavour))) { err = -EFAULT; goto out_fail; @@ -1610,21 +2022,22 @@ static int nfs4_fill_super(struct super_ } down_write(&clp->cl_sem); - if (clp->cl_rpcclient == NULL) { + if (IS_ERR(clp->cl_rpcclient)) { xprt = xprt_create_proto(proto, &server->addr, &timeparms); if (IS_ERR(xprt)) { up_write(&clp->cl_sem); - printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); err = PTR_ERR(xprt); + dprintk("%s: cannot create RPC transport. Error = %d\n", + __FUNCTION__, err); goto out_fail; } clnt = rpc_create_client(xprt, server->hostname, &nfs_program, server->rpc_ops->version, authflavour); if (IS_ERR(clnt)) { up_write(&clp->cl_sem); - printk(KERN_WARNING "NFS: cannot create RPC client.\n"); - xprt_destroy(xprt); err = PTR_ERR(clnt); + dprintk("%s: cannot create RPC client. Error = %d\n", + __FUNCTION__, err); goto out_fail; } clnt->cl_intr = 1; @@ -1656,21 +2069,26 @@ static int nfs4_fill_super(struct super_ clp = NULL; if (IS_ERR(clnt)) { - printk(KERN_WARNING "NFS: cannot create RPC client.\n"); - return PTR_ERR(clnt); + err = PTR_ERR(clnt); + dprintk("%s: cannot create RPC client. Error = %d\n", + __FUNCTION__, err); + return err; } server->client = clnt; if (server->nfs4_state->cl_idmap == NULL) { - printk(KERN_WARNING "NFS: failed to create idmapper.\n"); + dprintk("%s: failed to create idmapper.\n", __FUNCTION__); return -ENOMEM; } if (clnt->cl_auth->au_flavor != authflavour) { - if (rpcauth_create(authflavour, clnt) == NULL) { - printk(KERN_WARNING "NFS: couldn't create credcache!\n"); - return -ENOMEM; + struct rpc_auth *auth; + + auth = rpcauth_create(authflavour, clnt); + if (IS_ERR(auth)) { + dprintk("%s: couldn't create credcache!\n", __FUNCTION__); + return PTR_ERR(auth); } } @@ -1730,8 +2148,12 @@ static struct super_block *nfs4_get_sb(s struct nfs4_mount_data *data = raw_data; void *p; - if (!data) { - printk("nfs_read_super: missing data argument\n"); + if (data == NULL) { + dprintk("%s: missing data argument\n", __FUNCTION__); + return ERR_PTR(-EINVAL); + } + if (data->version <= 0 || data->version > NFS4_MOUNT_VERSION) { + dprintk("%s: bad mount version\n", __FUNCTION__); return ERR_PTR(-EINVAL); } @@ -1741,11 +2163,7 @@ static struct super_block *nfs4_get_sb(s memset(server, 0, sizeof(struct nfs_server)); /* Zero out the NFS state stuff */ init_nfsv4_state(server); - - if (data->version != NFS4_MOUNT_VERSION) { - printk("nfs warning: mount version %s than kernel\n", - data->version < NFS4_MOUNT_VERSION ? "older" : "newer"); - } + server->client = server->client_sys = server->client_acl = ERR_PTR(-EINVAL); p = nfs_copy_user_string(NULL, &data->hostname, 256); if (IS_ERR(p)) @@ -1773,11 +2191,20 @@ static struct super_block *nfs4_get_sb(s } if (server->addr.sin_family != AF_INET || server->addr.sin_addr.s_addr == INADDR_ANY) { - printk("NFS: mount program didn't pass remote IP address!\n"); + dprintk("%s: mount program didn't pass remote IP address!\n", + __FUNCTION__); s = ERR_PTR(-EINVAL); goto out_free; } + /* Fire up rpciod if not yet running */ + s = ERR_PTR(rpciod_up()); + if (IS_ERR(s)) { + dprintk("%s: couldn't start rpciod! Error = %ld\n", + __FUNCTION__, PTR_ERR(s)); + goto out_free; + } + s = sget(fs_type, nfs4_compare_super, nfs_set_super, server); if (IS_ERR(s) || s->s_root) @@ -1785,13 +2212,6 @@ static struct super_block *nfs4_get_sb(s s->s_flags = flags; - /* Fire up rpciod if not yet running */ - if (rpciod_up() != 0) { - printk(KERN_WARNING "NFS: couldn't start rpciod!\n"); - s = ERR_PTR(-EIO); - goto out_free; - } - error = nfs4_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); if (error) { up_write(&s->s_umount); @@ -1807,6 +2227,7 @@ out_free: kfree(server->mnt_path); if (server->hostname) kfree(server->hostname); + nfs_free_iostats(server->io_stats); kfree(server); return s; } @@ -1829,6 +2250,7 @@ static void nfs4_kill_super(struct super if (server->hostname != NULL) kfree(server->hostname); kfree(server); + nfs_release_automount_timer(); } static struct file_system_type nfs4_fs_type = { @@ -1839,6 +2261,59 @@ static struct file_system_type nfs4_fs_t .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; +/* Constructs the SERVER-side path */ +static inline char *nfs4_path(const struct dentry *dentry, char *buffer, ssize_t buflen) +{ + return nfs_path(NFS_SB(dentry->d_sb)->mnt_path, dentry, buffer, buflen); +} + +static inline char *nfs4_dup_path(const struct dentry *dentry) +{ + char *page = (char *) __get_free_page(GFP_USER); + char *path; + + path = nfs4_path(dentry, page, PAGE_SIZE); + if (!IS_ERR(path)) { + int len = PAGE_SIZE + page - path; + char *tmp = path; + + path = kmalloc(len, GFP_KERNEL); + if (path) + memcpy(path, tmp, len); + else + path = ERR_PTR(-ENOMEM); + } + free_page((unsigned long)page); + return path; +} + +static struct super_block *nfs4_clone_client(struct nfs_server *server, const struct dentry *dentry) +{ + struct nfs4_client *clp = server->nfs4_state; + struct super_block *sb; + + server->mnt_path = nfs4_dup_path(dentry); + if (IS_ERR(server->mnt_path)) { + sb = (struct super_block *)server->mnt_path; + goto err; + } + sb = sget(&nfs4_fs_type, nfs4_compare_super, nfs_set_super, server); + if (IS_ERR(sb) || sb->s_root) + goto free_path; + nfs4_server_capabilities(server, &server->fh); + + down_write(&clp->cl_sem); + atomic_inc(&clp->cl_count); + list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); + up_write(&clp->cl_sem); + return sb; +free_path: + kfree(server->mnt_path); +err: + server->mnt_path = NULL; + return sb; +} + #define nfs4_init_once(nfsi) \ do { \ INIT_LIST_HEAD(&(nfsi)->open_states); \ @@ -1849,12 +2324,174 @@ static struct file_system_type nfs4_fs_t #define register_nfs4fs() register_filesystem(&nfs4_fs_type) #define unregister_nfs4fs() unregister_filesystem(&nfs4_fs_type) #else +#define nfs4_clone_client(a,b) ERR_PTR(-EINVAL) #define nfs4_init_once(nfsi) \ do { } while (0) #define register_nfs4fs() (0) #define unregister_nfs4fs() #endif +static inline struct super_block *nfs_clone_client(struct nfs_server *server) +{ + struct super_block *sb; + + sb = sget(&nfs_fs_type, nfs_compare_super, nfs_set_super, server); + if (!IS_ERR(sb) && sb->s_root == NULL && !(server->flags & NFS_MOUNT_NONLM)) + lockd_up(); + return sb; +} + +struct nfs_clone_mount { + const struct super_block *sb; + const struct dentry *dentry; + struct nfs_fh *fh; + struct nfs_fattr *fattr; +}; + +static struct super_block *clone_nfs_sb(struct file_system_type *fs_type, + int flags, const char *dev_name, void *raw_data) +{ + struct nfs_clone_mount *data = raw_data; + struct nfs_server *server; + struct nfs_server *parent = NFS_SB(data->sb); + struct super_block *sb = ERR_PTR(-EINVAL); + void *err = ERR_PTR(-ENOMEM); + struct inode *root_inode; + struct nfs_fsinfo fsinfo; + int len; + + server = kmalloc(sizeof(struct nfs_server), GFP_KERNEL); + if (server == NULL) + goto out_err; + memcpy(server, parent, sizeof(*server)); + len = strlen(parent->hostname) + 1; + server->hostname = kmalloc(len, GFP_KERNEL); + if (server->hostname == NULL) + goto free_server; + memcpy(server->hostname, parent->hostname, len); + server->fsid = data->fattr->fsid; + nfs_copy_fh(&server->fh, data->fh); + server->io_stats = nfs_alloc_iostats(); + if (server->io_stats == NULL) + goto free_hostname; + if (rpciod_up() != 0) + goto free_iostats; + + switch (parent->rpc_ops->version) { + case 2: + case 3: + sb = nfs_clone_client(server); + break; + case 4: + sb = nfs4_clone_client(server, data->dentry); + break; + default: + BUG(); + } + if (IS_ERR((err = sb)) || sb->s_root) + goto kill_rpciod; + sb->s_op = data->sb->s_op; + sb->s_blocksize = data->sb->s_blocksize; + sb->s_blocksize_bits = data->sb->s_blocksize_bits; + sb->s_maxbytes = data->sb->s_maxbytes; + + server->client_sys = server->client_acl = ERR_PTR(-EINVAL); + server->client = rpc_clone_client(parent->client); + if (IS_ERR((err = server->client))) + goto out_deactivate; + if (!IS_ERR(parent->client_sys)) { + server->client_sys = rpc_clone_client(parent->client_sys); + if (IS_ERR((err = server->client_sys))) + goto out_deactivate; + } + if (!IS_ERR(parent->client_acl)) { + server->client_acl = rpc_clone_client(parent->client_acl); + if (IS_ERR((err = server->client_acl))) + goto out_deactivate; + } + root_inode = nfs_fhget(sb, data->fh, data->fattr); + if (!root_inode) + goto out_deactivate; + sb->s_root = d_alloc_root(root_inode); + if (!sb->s_root) + goto out_put_root; + fsinfo.fattr = data->fattr; + if (NFS_PROTO(root_inode)->fsinfo(server, data->fh, &fsinfo) == 0) + nfs_super_set_maxbytes(sb, fsinfo.maxfilesize); + sb->s_root->d_op = server->rpc_ops->dentry_ops; + sb->s_flags |= MS_ACTIVE; + return sb; +out_put_root: + iput(root_inode); +out_deactivate: + up_write(&sb->s_umount); + deactivate_super(sb); + return (struct super_block *)err; +kill_rpciod: + rpciod_down(); +free_iostats: + nfs_free_iostats(server->io_stats); +free_hostname: + kfree(server->hostname); +free_server: + kfree(server); +out_err: + return (struct super_block *)err; +} + +static struct file_system_type clone_nfs_fs_type = { + .owner = THIS_MODULE, + .name = "nfs", + .get_sb = clone_nfs_sb, + .kill_sb = nfs_kill_super, + .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, +}; + +static inline char *nfs_devname(const struct vfsmount *mnt_parent, + const struct dentry *dentry, + char *buffer, ssize_t buflen) +{ + return nfs_path(mnt_parent->mnt_devname, dentry, buffer, buflen); +} + +/** + * nfs_do_submount - set up mountpoint when crossing a filesystem boundary + * @mnt_parent - mountpoint of parent directory + * @dentry - parent directory + * @fh - filehandle for new root dentry + * @fattr - attributes for new root inode + * + */ +struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, + const struct dentry *dentry, struct nfs_fh *fh, + struct nfs_fattr *fattr) +{ + struct nfs_clone_mount mountdata = { + .sb = mnt_parent->mnt_sb, + .dentry = dentry, + .fh = fh, + .fattr = fattr, + }; + struct vfsmount *mnt = ERR_PTR(-ENOMEM); + char *page = (char *) __get_free_page(GFP_USER); + char *devname; + + dprintk("%s: submounting on %s/%s\n", __FUNCTION__, + dentry->d_parent->d_name.name, + dentry->d_name.name); + if (page == NULL) + goto out; + devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE); + if (!IS_ERR(devname)) + mnt = vfs_kern_mount(&clone_nfs_fs_type, 0, devname, &mountdata); + else + mnt = (struct vfsmount *)devname; + free_page((unsigned long)page); +out: + dprintk("%s: done\n", __FUNCTION__); + return mnt; +} + extern int nfs_init_nfspagecache(void); extern void nfs_destroy_nfspagecache(void); extern int nfs_init_readpagecache(void); @@ -1875,6 +2512,13 @@ static struct inode *nfs_alloc_inode(str if (!nfsi) return NULL; nfsi->flags = 0; +#ifdef CONFIG_NFS_V3_ACL + nfsi->acl_access = ERR_PTR(-EAGAIN); + nfsi->acl_default = ERR_PTR(-EAGAIN); +#endif +#ifdef CONFIG_NFS_V4 + nfsi->nfs4_acl = NULL; +#endif /* CONFIG_NFS_V4 */ return &nfsi->vfs_inode; } @@ -1929,6 +2573,10 @@ static int __init init_nfs_fs(void) { int err; + err = nfs_init_sysctl(); + if (err) + goto out5; + err = nfs_init_nfspagecache(); if (err) goto out4; @@ -1976,6 +2624,8 @@ out2: out3: nfs_destroy_nfspagecache(); out4: + nfs_destroy_sysctl(); +out5: return err; } @@ -1991,6 +2641,7 @@ static void __exit exit_nfs_fs(void) #ifdef CONFIG_PROC_FS rpc_proc_unregister("nfs"); #endif + nfs_destroy_sysctl(); unregister_filesystem(&nfs_fs_type); unregister_nfs4fs(); } Index: linux-2.6.12/fs/nfs/mount_clnt.c =================================================================== --- linux-2.6.12.orig/fs/nfs/mount_clnt.c +++ linux-2.6.12/fs/nfs/mount_clnt.c @@ -80,9 +80,7 @@ mnt_create(char *hostname, struct sockad clnt = rpc_create_client(xprt, hostname, &mnt_program, version, RPC_AUTH_UNIX); - if (IS_ERR(clnt)) { - xprt_destroy(xprt); - } else { + if (!IS_ERR(clnt)) { clnt->cl_softrtry = 1; clnt->cl_chatty = 1; clnt->cl_oneshot = 1; Index: linux-2.6.12/fs/nfs/namespace.c =================================================================== --- /dev/null +++ linux-2.6.12/fs/nfs/namespace.c @@ -0,0 +1,111 @@ +/* + * linux/fs/nfs/namespace.c + * + * Copyright (C) 2005 Trond Myklebust + * + * NFS namespace + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#define NFSDBG_FACILITY NFSDBG_VFS + +static LIST_HEAD(nfs_automount_list); +static void nfs_expire_automounts(void *list); +static DECLARE_WORK(nfs_automount_task, nfs_expire_automounts, &nfs_automount_list); +int nfs_mountpoint_expiry_timeout = 500 * HZ; + +/* + * nfs_follow_mountpoint - handle crossing a mountpoint on the server + * @dentry - dentry of mountpoint + * @nd - nameidata info + * + * When we encounter a mountpoint on the server, we want to set up + * a mountpoint on the client too, to prevent inode numbers from + * colliding, and to allow "df" to work properly. + * On NFSv4, we also want to allow for the fact that different + * filesystems may be migrated to different servers in a failover + * situation, and that different filesystems may want to use + * different security flavours. + */ +static int nfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd) +{ + struct vfsmount *mnt; + struct nfs_server *server = NFS_SERVER(dentry->d_inode); + struct dentry *parent; + struct nfs_fh fh; + struct nfs_fattr fattr; + int err; + + BUG_ON(IS_ROOT(dentry)); + dprintk("%s: enter\n", __FUNCTION__); + dput(nd->dentry); + nd->dentry = dget(dentry); + if (d_mountpoint(nd->dentry)) + goto out_follow; + /* Look it up again */ + parent = dget_parent(nd->dentry); + err = server->rpc_ops->lookup(parent->d_inode, &nd->dentry->d_name, &fh, &fattr); + dput(parent); + if (err != 0) + goto out_err; + mnt = nfs_do_submount(nd->mnt, nd->dentry, &fh, &fattr); + if (IS_ERR(mnt)) { + err = PTR_ERR(mnt); + goto out_err; + } + mntget(mnt); + err = do_add_mount(mnt, nd, nd->mnt->mnt_flags, &nfs_automount_list); + if (err < 0) { + mntput(mnt); + if (err == -EBUSY) + goto out_follow; + goto out_err; + } + mntput(nd->mnt); + dput(nd->dentry); + nd->mnt = mnt; + nd->dentry = dget(mnt->mnt_root); + schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); +out: + dprintk("%s: done, returned %d\n", __FUNCTION__, err); + return err; +out_err: + path_release(nd); + goto out; +out_follow: + while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) + ; + err = 0; + goto out; +} + +struct inode_operations nfs_mountpoint_inode_operations = { + .follow_link = nfs_follow_mountpoint, + .getattr = nfs_getattr, +}; + +static void nfs_expire_automounts(void *data) +{ + struct list_head *list = (struct list_head *)data; + + mark_mounts_for_expiry(list); + if (!list_empty(list)) + schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); +} + +void nfs_release_automount_timer(void) +{ + if (list_empty(&nfs_automount_list)) { + cancel_delayed_work(&nfs_automount_task); + flush_scheduled_work(); + } +} Index: linux-2.6.12/fs/nfs/nfs2xdr.c =================================================================== --- linux-2.6.12.orig/fs/nfs/nfs2xdr.c +++ linux-2.6.12/fs/nfs/nfs2xdr.c @@ -131,7 +131,8 @@ xdr_decode_fattr(u32 *p, struct nfs_fatt fattr->du.nfs2.blocksize = ntohl(*p++); rdev = ntohl(*p++); fattr->du.nfs2.blocks = ntohl(*p++); - fattr->fsid_u.nfs3 = ntohl(*p++); + fattr->fsid.major = ntohl(*p++); + fattr->fsid.minor = 0; fattr->fileid = ntohl(*p++); p = xdr_decode_time(p, &fattr->atime); p = xdr_decode_time(p, &fattr->mtime); Index: linux-2.6.12/fs/nfs/nfs3acl.c =================================================================== --- /dev/null +++ linux-2.6.12/fs/nfs/nfs3acl.c @@ -0,0 +1,403 @@ +#include +#include +#include +#include +#include +#include + +#define NFSDBG_FACILITY NFSDBG_PROC + +ssize_t nfs3_listxattr(struct dentry *dentry, char *buffer, size_t size) +{ + struct inode *inode = dentry->d_inode; + struct posix_acl *acl; + int pos=0, len=0; + +# define output(s) do { \ + if (pos + sizeof(s) <= size) { \ + memcpy(buffer + pos, s, sizeof(s)); \ + pos += sizeof(s); \ + } \ + len += sizeof(s); \ + } while(0) + + acl = nfs3_proc_getacl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + output("system.posix_acl_access"); + posix_acl_release(acl); + } + + if (S_ISDIR(inode->i_mode)) { + acl = nfs3_proc_getacl(inode, ACL_TYPE_DEFAULT); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl) { + output("system.posix_acl_default"); + posix_acl_release(acl); + } + } + +# undef output + + if (!buffer || len <= size) + return len; + return -ERANGE; +} + +ssize_t nfs3_getxattr(struct dentry *dentry, const char *name, + void *buffer, size_t size) +{ + struct inode *inode = dentry->d_inode; + struct posix_acl *acl; + int type, error = 0; + + if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) + type = ACL_TYPE_ACCESS; + else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) + type = ACL_TYPE_DEFAULT; + else + return -EOPNOTSUPP; + + acl = nfs3_proc_getacl(inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + else if (acl) { + if (type == ACL_TYPE_ACCESS && acl->a_count == 0) + error = -ENODATA; + else + error = posix_acl_to_xattr(acl, buffer, size); + posix_acl_release(acl); + } else + error = -ENODATA; + + return error; +} + +int nfs3_setxattr(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags) +{ + struct inode *inode = dentry->d_inode; + struct posix_acl *acl; + int type, error; + + if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) + type = ACL_TYPE_ACCESS; + else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) + type = ACL_TYPE_DEFAULT; + else + return -EOPNOTSUPP; + + acl = posix_acl_from_xattr(value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + error = nfs3_proc_setacl(inode, type, acl); + posix_acl_release(acl); + + return error; +} + +int nfs3_removexattr(struct dentry *dentry, const char *name) +{ + struct inode *inode = dentry->d_inode; + int type; + + if (strcmp(name, XATTR_NAME_ACL_ACCESS) == 0) + type = ACL_TYPE_ACCESS; + else if (strcmp(name, XATTR_NAME_ACL_DEFAULT) == 0) + type = ACL_TYPE_DEFAULT; + else + return -EOPNOTSUPP; + + return nfs3_proc_setacl(inode, type, NULL); +} + +static void __nfs3_forget_cached_acls(struct nfs_inode *nfsi) +{ + if (!IS_ERR(nfsi->acl_access)) { + posix_acl_release(nfsi->acl_access); + nfsi->acl_access = ERR_PTR(-EAGAIN); + } + if (!IS_ERR(nfsi->acl_default)) { + posix_acl_release(nfsi->acl_default); + nfsi->acl_default = ERR_PTR(-EAGAIN); + } +} + +void nfs3_forget_cached_acls(struct inode *inode) +{ + dprintk("NFS: nfs3_forget_cached_acls(%s/%ld)\n", inode->i_sb->s_id, + inode->i_ino); + spin_lock(&inode->i_lock); + __nfs3_forget_cached_acls(NFS_I(inode)); + spin_unlock(&inode->i_lock); +} + +static struct posix_acl *nfs3_get_cached_acl(struct inode *inode, int type) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct posix_acl *acl = ERR_PTR(-EINVAL); + + spin_lock(&inode->i_lock); + switch(type) { + case ACL_TYPE_ACCESS: + acl = nfsi->acl_access; + break; + + case ACL_TYPE_DEFAULT: + acl = nfsi->acl_default; + break; + + default: + goto out; + } + if (IS_ERR(acl)) + acl = ERR_PTR(-EAGAIN); + else + acl = posix_acl_dup(acl); +out: + spin_unlock(&inode->i_lock); + dprintk("NFS: nfs3_get_cached_acl(%s/%ld, %d) = %p\n", inode->i_sb->s_id, + inode->i_ino, type, acl); + return acl; +} + +static void nfs3_cache_acls(struct inode *inode, struct posix_acl *acl, + struct posix_acl *dfacl) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + dprintk("nfs3_cache_acls(%s/%ld, %p, %p)\n", inode->i_sb->s_id, + inode->i_ino, acl, dfacl); + spin_lock(&inode->i_lock); + __nfs3_forget_cached_acls(NFS_I(inode)); + nfsi->acl_access = posix_acl_dup(acl); + nfsi->acl_default = posix_acl_dup(dfacl); + spin_unlock(&inode->i_lock); +} + +struct posix_acl *nfs3_proc_getacl(struct inode *inode, int type) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_fattr fattr; + struct page *pages[NFSACL_MAXPAGES] = { }; + struct nfs3_getaclargs args = { + .fh = NFS_FH(inode), + /* The xdr layer may allocate pages here. */ + .pages = pages, + }; + struct nfs3_getaclres res = { + .fattr = &fattr, + }; + struct posix_acl *acl; + int status, count; + + if (!nfs_server_capable(inode, NFS_CAP_ACLS)) + return ERR_PTR(-EOPNOTSUPP); + + status = nfs_revalidate_inode(server, inode); + if (status < 0) + return ERR_PTR(status); + acl = nfs3_get_cached_acl(inode, type); + if (acl != ERR_PTR(-EAGAIN)) + return acl; + acl = NULL; + + /* + * Only get the access acl when explicitly requested: We don't + * need it for access decisions, and only some applications use + * it. Applications which request the access acl first are not + * penalized from this optimization. + */ + if (type == ACL_TYPE_ACCESS) + args.mask |= NFS_ACLCNT|NFS_ACL; + if (S_ISDIR(inode->i_mode)) + args.mask |= NFS_DFACLCNT|NFS_DFACL; + if (args.mask == 0) + return NULL; + + dprintk("NFS call getacl\n"); + status = rpc_call(server->client_acl, ACLPROC3_GETACL, + &args, &res, 0); + dprintk("NFS reply getacl: %d\n", status); + + /* pages may have been allocated at the xdr layer. */ + for (count = 0; count < NFSACL_MAXPAGES && args.pages[count]; count++) + __free_page(args.pages[count]); + + switch (status) { + case 0: + status = nfs_refresh_inode(inode, &fattr); + break; + case -EPFNOSUPPORT: + case -EPROTONOSUPPORT: + dprintk("NFS_V3_ACL extension not supported; disabling\n"); + server->caps &= ~NFS_CAP_ACLS; + case -ENOTSUPP: + status = -EOPNOTSUPP; + default: + goto getout; + } + if ((args.mask & res.mask) != args.mask) { + status = -EIO; + goto getout; + } + + if (res.acl_access != NULL) { + if (posix_acl_equiv_mode(res.acl_access, NULL) == 0) { + posix_acl_release(res.acl_access); + res.acl_access = NULL; + } + } + nfs3_cache_acls(inode, res.acl_access, res.acl_default); + + switch(type) { + case ACL_TYPE_ACCESS: + acl = res.acl_access; + res.acl_access = NULL; + break; + + case ACL_TYPE_DEFAULT: + acl = res.acl_default; + res.acl_default = NULL; + } + +getout: + posix_acl_release(res.acl_access); + posix_acl_release(res.acl_default); + + if (status != 0) { + posix_acl_release(acl); + acl = ERR_PTR(status); + } + return acl; +} + +static int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl, + struct posix_acl *dfacl) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_fattr fattr; + struct page *pages[NFSACL_MAXPAGES] = { }; + struct nfs3_setaclargs args = { + .inode = inode, + .mask = NFS_ACL, + .acl_access = acl, + .pages = pages, + }; + int status, count; + + status = -EOPNOTSUPP; + if (!nfs_server_capable(inode, NFS_CAP_ACLS)) + goto out; + + /* We are doing this here, because XDR marshalling can only + return -ENOMEM. */ + status = -ENOSPC; + if (acl != NULL && acl->a_count > NFS_ACL_MAX_ENTRIES) + goto out; + if (dfacl != NULL && dfacl->a_count > NFS_ACL_MAX_ENTRIES) + goto out; + if (S_ISDIR(inode->i_mode)) { + args.mask |= NFS_DFACL; + args.acl_default = dfacl; + } + + dprintk("NFS call setacl\n"); + nfs_begin_data_update(inode); + status = rpc_call(server->client_acl, ACLPROC3_SETACL, + &args, &fattr, 0); + NFS_FLAGS(inode) |= NFS_INO_INVALID_ACCESS; + nfs_end_data_update(inode); + dprintk("NFS reply setacl: %d\n", status); + + /* pages may have been allocated at the xdr layer. */ + for (count = 0; count < NFSACL_MAXPAGES && args.pages[count]; count++) + __free_page(args.pages[count]); + + switch (status) { + case 0: + status = nfs_refresh_inode(inode, &fattr); + break; + case -EPFNOSUPPORT: + case -EPROTONOSUPPORT: + dprintk("NFS_V3_ACL SETACL RPC not supported" + "(will not retry)\n"); + server->caps &= ~NFS_CAP_ACLS; + case -ENOTSUPP: + status = -EOPNOTSUPP; + } +out: + return status; +} + +int nfs3_proc_setacl(struct inode *inode, int type, struct posix_acl *acl) +{ + struct posix_acl *alloc = NULL, *dfacl = NULL; + int status; + + if (S_ISDIR(inode->i_mode)) { + switch(type) { + case ACL_TYPE_ACCESS: + alloc = dfacl = nfs3_proc_getacl(inode, + ACL_TYPE_DEFAULT); + if (IS_ERR(alloc)) + goto fail; + break; + + case ACL_TYPE_DEFAULT: + dfacl = acl; + alloc = acl = nfs3_proc_getacl(inode, + ACL_TYPE_ACCESS); + if (IS_ERR(alloc)) + goto fail; + break; + + default: + return -EINVAL; + } + } else if (type != ACL_TYPE_ACCESS) + return -EINVAL; + + if (acl == NULL) { + alloc = acl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); + if (IS_ERR(alloc)) + goto fail; + } + status = nfs3_proc_setacls(inode, acl, dfacl); + posix_acl_release(alloc); + return status; + +fail: + return PTR_ERR(alloc); +} + +int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, + mode_t mode) +{ + struct posix_acl *dfacl, *acl; + int error = 0; + + dfacl = nfs3_proc_getacl(dir, ACL_TYPE_DEFAULT); + if (IS_ERR(dfacl)) { + error = PTR_ERR(dfacl); + return (error == -EOPNOTSUPP) ? 0 : error; + } + if (!dfacl) + return 0; + acl = posix_acl_clone(dfacl, GFP_KERNEL); + error = -ENOMEM; + if (!acl) + goto out_release_dfacl; + error = posix_acl_create_masq(acl, &mode); + if (error < 0) + goto out_release_acl; + error = nfs3_proc_setacls(inode, acl, S_ISDIR(inode->i_mode) ? + dfacl : NULL); +out_release_acl: + posix_acl_release(acl); +out_release_dfacl: + posix_acl_release(dfacl); + return error; +} Index: linux-2.6.12/fs/nfs/nfs3proc.c =================================================================== --- linux-2.6.12.orig/fs/nfs/nfs3proc.c +++ linux-2.6.12/fs/nfs/nfs3proc.c @@ -17,6 +17,7 @@ #include #include #include +#include #define NFSDBG_FACILITY NFSDBG_PROC @@ -45,7 +46,7 @@ static inline int nfs3_rpc_call_wrapper(struct rpc_clnt *clnt, u32 proc, void *argp, void *resp, int flags) { struct rpc_message msg = { - .rpc_proc = &nfs3_procedures[proc], + .rpc_proc = &clnt->cl_procinfo[proc], .rpc_argp = argp, .rpc_resp = resp, }; @@ -297,7 +298,7 @@ static int nfs3_proc_commit(struct nfs_w */ static int nfs3_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr, - int flags) + int flags, struct nameidata *nd) { struct nfs_fh fhandle; struct nfs_fattr fattr; @@ -313,7 +314,8 @@ nfs3_proc_create(struct inode *dir, stru .fh = &fhandle, .fattr = &fattr }; - int status; + mode_t mode = sattr->ia_mode; + int status; dprintk("NFS call create %s\n", dentry->d_name.name); arg.createmode = NFS3_CREATE_UNCHECKED; @@ -323,6 +325,8 @@ nfs3_proc_create(struct inode *dir, stru arg.verifier[1] = current->pid; } + sattr->ia_mode &= ~current->fs->umask; + again: dir_attr.valid = 0; fattr.valid = 0; @@ -369,6 +373,9 @@ again: nfs_refresh_inode(dentry->d_inode, &fattr); dprintk("NFS reply setattr (post-create): %d\n", status); } + if (status != 0) + goto out; + status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); out: dprintk("NFS reply create: %d\n", status); return status; @@ -538,15 +545,24 @@ nfs3_proc_mkdir(struct inode *dir, struc .fh = &fhandle, .fattr = &fattr }; - int status; + int mode = sattr->ia_mode; + int status; dprintk("NFS call mkdir %s\n", dentry->d_name.name); dir_attr.valid = 0; fattr.valid = 0; + + sattr->ia_mode &= ~current->fs->umask; + status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKDIR, &arg, &res, 0); nfs_refresh_inode(dir, &dir_attr); - if (status == 0) - status = nfs_instantiate(dentry, &fhandle, &fattr); + if (status != 0) + goto out; + status = nfs_instantiate(dentry, &fhandle, &fattr); + if (status != 0) + goto out; + status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); +out: dprintk("NFS reply mkdir: %d\n", status); return status; } @@ -641,6 +657,7 @@ nfs3_proc_mknod(struct inode *dir, struc .fh = &fh, .fattr = &fattr }; + mode_t mode = sattr->ia_mode; int status; switch (sattr->ia_mode & S_IFMT) { @@ -653,12 +670,20 @@ nfs3_proc_mknod(struct inode *dir, struc dprintk("NFS call mknod %s %u:%u\n", dentry->d_name.name, MAJOR(rdev), MINOR(rdev)); + + sattr->ia_mode &= ~current->fs->umask; + dir_attr.valid = 0; fattr.valid = 0; status = rpc_call(NFS_CLIENT(dir), NFS3PROC_MKNOD, &arg, &res, 0); nfs_refresh_inode(dir, &dir_attr); - if (status == 0) - status = nfs_instantiate(dentry, &fh, &fattr); + if (status != 0) + goto out; + status = nfs_instantiate(dentry, &fh, &fattr); + if (status != 0) + goto out; + status = nfs3_proc_set_default_acl(dir, dentry->d_inode, mode); +out: dprintk("NFS reply mknod: %d\n", status); return status; } @@ -825,7 +850,8 @@ nfs3_proc_lock(struct file *filp, int cm struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ .dentry_ops = &nfs_dentry_operations, - .dir_inode_ops = &nfs_dir_inode_operations, + .dir_inode_ops = &nfs3_dir_inode_operations, + .file_inode_ops = &nfs3_file_inode_operations, .getroot = nfs3_proc_get_root, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, @@ -856,4 +882,5 @@ struct nfs_rpc_ops nfs_v3_clientops = { .file_open = nfs_open, .file_release = nfs_release, .lock = nfs3_proc_lock, + .clear_acl_cache = nfs3_forget_cached_acls, }; Index: linux-2.6.12/fs/nfs/nfs3xdr.c =================================================================== --- linux-2.6.12.orig/fs/nfs/nfs3xdr.c +++ linux-2.6.12/fs/nfs/nfs3xdr.c @@ -21,6 +21,7 @@ #include #include #include +#include #define NFSDBG_FACILITY NFSDBG_XDR @@ -79,6 +80,11 @@ extern int nfs_stat_to_errno(int); #define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6) #define NFS3_commitres_sz (1+NFS3_wcc_data_sz+2) +#define ACL3_getaclargs_sz (NFS3_fh_sz+1) +#define ACL3_setaclargs_sz (NFS3_fh_sz+1+2*(2+5*3)) +#define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+2*(2+5*3)) +#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz) + /* * Map file type to S_IFMT bits */ @@ -160,7 +166,8 @@ xdr_decode_fattr(u32 *p, struct nfs_fatt if (MAJOR(fattr->rdev) != major || MINOR(fattr->rdev) != minor) fattr->rdev = 0; - p = xdr_decode_hyper(p, &fattr->fsid_u.nfs3); + p = xdr_decode_hyper(p, &fattr->fsid.major); + fattr->fsid.minor = 0; p = xdr_decode_hyper(p, &fattr->fileid); p = xdr_decode_time3(p, &fattr->atime); p = xdr_decode_time3(p, &fattr->mtime); @@ -627,6 +634,74 @@ nfs3_xdr_commitargs(struct rpc_rqst *req return 0; } +#ifdef CONFIG_NFS_V3_ACL +/* + * Encode GETACL arguments + */ +static int +nfs3_xdr_getaclargs(struct rpc_rqst *req, u32 *p, + struct nfs3_getaclargs *args) +{ + struct rpc_auth *auth = req->rq_task->tk_auth; + unsigned int replen; + + p = xdr_encode_fhandle(p, args->fh); + *p++ = htonl(args->mask); + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p); + + if (args->mask & (NFS_ACL | NFS_DFACL)) { + /* Inline the page array */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + + ACL3_getaclres_sz) << 2; + xdr_inline_pages(&req->rq_rcv_buf, replen, args->pages, 0, + NFSACL_MAXPAGES << PAGE_SHIFT); + } + return 0; +} + +/* + * Encode SETACL arguments + */ +static int +nfs3_xdr_setaclargs(struct rpc_rqst *req, u32 *p, + struct nfs3_setaclargs *args) +{ + struct xdr_buf *buf = &req->rq_snd_buf; + unsigned int base, len_in_head, len = nfsacl_size( + (args->mask & NFS_ACL) ? args->acl_access : NULL, + (args->mask & NFS_DFACL) ? args->acl_default : NULL); + int count, err; + + p = xdr_encode_fhandle(p, NFS_FH(args->inode)); + *p++ = htonl(args->mask); + base = (char *)p - (char *)buf->head->iov_base; + /* put as much of the acls into head as possible. */ + len_in_head = min_t(unsigned int, buf->head->iov_len - base, len); + len -= len_in_head; + req->rq_slen = xdr_adjust_iovec(req->rq_svec, p + (len_in_head >> 2)); + + for (count = 0; (count << PAGE_SHIFT) < len; count++) { + args->pages[count] = alloc_page(GFP_KERNEL); + if (!args->pages[count]) { + while (count) + __free_page(args->pages[--count]); + return -ENOMEM; + } + } + xdr_encode_pages(buf, args->pages, 0, len); + + err = nfsacl_encode(buf, base, args->inode, + (args->mask & NFS_ACL) ? + args->acl_access : NULL, 1, 0); + if (err > 0) + err = nfsacl_encode(buf, base + err, args->inode, + (args->mask & NFS_DFACL) ? + args->acl_default : NULL, 1, + NFS_ACL_DEFAULT); + return (err > 0) ? 0 : err; +} +#endif /* CONFIG_NFS_V3_ACL */ + /* * NFS XDR decode functions */ @@ -978,6 +1053,54 @@ nfs3_xdr_commitres(struct rpc_rqst *req, return 0; } +#ifdef CONFIG_NFS_V3_ACL +/* + * Decode GETACL reply + */ +static int +nfs3_xdr_getaclres(struct rpc_rqst *req, u32 *p, + struct nfs3_getaclres *res) +{ + struct xdr_buf *buf = &req->rq_rcv_buf; + int status = ntohl(*p++); + struct posix_acl **acl; + unsigned int *aclcnt; + int err, base; + + if (status != 0) + return -nfs_stat_to_errno(status); + p = xdr_decode_post_op_attr(p, res->fattr); + res->mask = ntohl(*p++); + if (res->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT)) + return -EINVAL; + base = (char *)p - (char *)req->rq_rcv_buf.head->iov_base; + + acl = (res->mask & NFS_ACL) ? &res->acl_access : NULL; + aclcnt = (res->mask & NFS_ACLCNT) ? &res->acl_access_count : NULL; + err = nfsacl_decode(buf, base, aclcnt, acl); + + acl = (res->mask & NFS_DFACL) ? &res->acl_default : NULL; + aclcnt = (res->mask & NFS_DFACLCNT) ? &res->acl_default_count : NULL; + if (err > 0) + err = nfsacl_decode(buf, base + err, aclcnt, acl); + return (err > 0) ? 0 : err; +} + +/* + * Decode setacl reply. + */ +static int +nfs3_xdr_setaclres(struct rpc_rqst *req, u32 *p, struct nfs_fattr *fattr) +{ + int status = ntohl(*p++); + + if (status) + return -nfs_stat_to_errno(status); + xdr_decode_post_op_attr(p, fattr); + return 0; +} +#endif /* CONFIG_NFS_V3_ACL */ + #ifndef MAX # define MAX(a, b) (((a) > (b))? (a) : (b)) #endif @@ -1021,3 +1144,28 @@ struct rpc_version nfs_version3 = { .procs = nfs3_procedures }; +#ifdef CONFIG_NFS_V3_ACL +static struct rpc_procinfo nfs3_acl_procedures[] = { + [ACLPROC3_GETACL] = { + .p_proc = ACLPROC3_GETACL, + .p_encode = (kxdrproc_t) nfs3_xdr_getaclargs, + .p_decode = (kxdrproc_t) nfs3_xdr_getaclres, + .p_bufsiz = MAX(ACL3_getaclargs_sz, ACL3_getaclres_sz) << 2, + .p_timer = 1, + }, + [ACLPROC3_SETACL] = { + .p_proc = ACLPROC3_SETACL, + .p_encode = (kxdrproc_t) nfs3_xdr_setaclargs, + .p_decode = (kxdrproc_t) nfs3_xdr_setaclres, + .p_bufsiz = MAX(ACL3_setaclargs_sz, ACL3_setaclres_sz) << 2, + .p_timer = 0, + }, +}; + +struct rpc_version nfsacl_version3 = { + .number = 3, + .nrprocs = sizeof(nfs3_acl_procedures)/ + sizeof(nfs3_acl_procedures[0]), + .procs = nfs3_acl_procedures, +}; +#endif /* CONFIG_NFS_V3_ACL */ Index: linux-2.6.12/fs/nfs/nfs4_fs.h =================================================================== --- /dev/null +++ linux-2.6.12/fs/nfs/nfs4_fs.h @@ -0,0 +1,270 @@ +/* + * linux/fs/nfs/nfs4_fs.h + * + * Copyright (C) 2005 Trond Myklebust + * + * NFSv4-specific filesystem definitions and declarations + */ + +#ifndef __LINUX_FS_NFS_NFS4_FS_H +#define __LINUX_FS_NFS_NFS4_FS_H + +#ifdef CONFIG_NFS_V4 + +#include + +struct idmap; + +/* + * In a seqid-mutating op, this macro controls which error return + * values trigger incrementation of the seqid. + * + * from rfc 3010: + * The client MUST monotonically increment the sequence number for the + * CLOSE, LOCK, LOCKU, OPEN, OPEN_CONFIRM, and OPEN_DOWNGRADE + * operations. This is true even in the event that the previous + * operation that used the sequence number received an error. The only + * exception to this rule is if the previous operation received one of + * the following errors: NFSERR_STALE_CLIENTID, NFSERR_STALE_STATEID, + * NFSERR_BAD_STATEID, NFSERR_BAD_SEQID, NFSERR_BADXDR, + * NFSERR_RESOURCE, NFSERR_NOFILEHANDLE. + * + */ +#define seqid_mutating_err(err) \ +(((err) != NFSERR_STALE_CLIENTID) && \ + ((err) != NFSERR_STALE_STATEID) && \ + ((err) != NFSERR_BAD_STATEID) && \ + ((err) != NFSERR_BAD_SEQID) && \ + ((err) != NFSERR_BAD_XDR) && \ + ((err) != NFSERR_RESOURCE) && \ + ((err) != NFSERR_NOFILEHANDLE)) + +enum nfs4_client_state { + NFS4CLNT_OK = 0, +}; + +/* + * The nfs4_client identifies our client state to the server. + */ +struct nfs4_client { + struct list_head cl_servers; /* Global list of servers */ + struct in_addr cl_addr; /* Server identifier */ + u64 cl_clientid; /* constant */ + nfs4_verifier cl_confirm; + unsigned long cl_state; + + u32 cl_lockowner_id; + + /* + * The following rwsem ensures exclusive access to the server + * while we recover the state following a lease expiration. + */ + struct rw_semaphore cl_sem; + + struct list_head cl_delegations; + struct list_head cl_state_owners; + struct list_head cl_unused; + int cl_nunused; + spinlock_t cl_lock; + atomic_t cl_count; + + struct rpc_clnt * cl_rpcclient; + struct rpc_cred * cl_cred; + + struct list_head cl_superblocks; /* List of nfs_server structs */ + + unsigned long cl_lease_time; + unsigned long cl_last_renewal; + struct work_struct cl_renewd; + struct work_struct cl_recoverd; + + wait_queue_head_t cl_waitq; + struct rpc_wait_queue cl_rpcwaitq; + + /* used for the setclientid verifier */ + struct timespec cl_boot_time; + + /* idmapper */ + struct idmap * cl_idmap; + + /* Our own IP address, as a null-terminated string. + * This is used to generate the clientid, and the callback address. + */ + char cl_ipaddr[16]; + unsigned char cl_id_uniquifier; +}; + +/* + * NFS4 state_owners and lock_owners are simply labels for ordered + * sequences of RPC calls. Their sole purpose is to provide once-only + * semantics by allowing the server to identify replayed requests. + * + * The ->so_iosem is held during all state_owner seqid-mutating operations: + * OPEN, OPEN_DOWNGRADE, and CLOSE. Its purpose is to properly serialize + * so_seqid. + */ +struct nfs4_state_owner { + struct list_head so_list; /* per-clientid list of state_owners */ + struct nfs4_client *so_client; + u32 so_id; /* 32-bit identifier, unique */ + struct iosem so_iosem; + u32 so_seqid; /* protected by so_iosem */ + atomic_t so_count; + + struct rpc_cred *so_cred; /* Associated cred */ + struct list_head so_states; + struct list_head so_delegations; +}; + +/* + * struct nfs4_state maintains the client-side state for a given + * (state_owner,inode) tuple (OPEN) or state_owner (LOCK). + * + * OPEN: + * In order to know when to OPEN_DOWNGRADE or CLOSE the state on the server, + * we need to know how many files are open for reading or writing on a + * given inode. This information too is stored here. + * + * LOCK: one nfs4_state (LOCK) to hold the lock stateid nfs4_state(OPEN) + */ + +struct nfs4_lock_state { + struct list_head ls_locks; /* Other lock stateids */ + struct nfs4_state * ls_state; /* Pointer to open state */ + fl_owner_t ls_owner; /* POSIX lock owner */ +#define NFS_LOCK_INITIALIZED 1 + int ls_flags; + u32 ls_seqid; + u32 ls_id; + nfs4_stateid ls_stateid; + atomic_t ls_count; +}; + +/* bits for nfs4_state->flags */ +enum { + LK_STATE_IN_USE, + NFS_DELEGATED_STATE, +}; + +struct nfs4_state { + struct list_head open_states; /* List of states for the same state_owner */ + struct list_head inode_states; /* List of states for the same inode */ + struct list_head lock_states; /* List of subservient lock stateids */ + + struct nfs4_state_owner *owner; /* Pointer to the open owner */ + struct inode *inode; /* Pointer to the inode */ + + unsigned long flags; /* Do we hold any locks? */ + struct iosem lock_iosem; /* Serializes file locking operations */ + spinlock_t state_lock; /* Protects the lock_states list */ + + nfs4_stateid stateid; + + unsigned int nreaders; + unsigned int nwriters; + int state; /* State on the server (R,W, or RW) */ + atomic_t count; +}; + + +struct nfs4_exception { + long timeout; + int retry; +}; + +struct nfs4_state_recovery_ops { + int (*recover_open)(struct nfs4_state_owner *, struct nfs4_state *); + int (*recover_lock)(struct nfs4_state *, struct file_lock *); +}; + +extern struct dentry_operations nfs4_dentry_operations; +extern struct inode_operations nfs4_dir_inode_operations; + +/* inode.c */ +extern ssize_t nfs4_getxattr(struct dentry *, const char *, void *, size_t); +extern int nfs4_setxattr(struct dentry *, const char *, const void *, size_t, int); +extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t); + + +/* nfs4proc.c */ +extern int nfs4_map_errors(int err); +extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short); +extern int nfs4_proc_setclientid_confirm(struct nfs4_client *); +extern int nfs4_proc_async_renew(struct nfs4_client *); +extern int nfs4_proc_renew(struct nfs4_client *); +extern int nfs4_do_close(struct inode *inode, struct nfs4_state *state, mode_t mode); +extern struct dentry *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); +extern int nfs4_open_revalidate(struct inode *, struct dentry *, int, struct nameidata *); +extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); +extern int nfs4_proc_fs_locations(struct inode *dir, struct dentry *dentry, + struct nfs4_fs_locations *fs_locations, struct page *page); + +extern struct nfs4_state_recovery_ops nfs4_reboot_recovery_ops; +extern struct nfs4_state_recovery_ops nfs4_network_partition_recovery_ops; + +extern const u32 nfs4_fattr_bitmap[2]; +extern const u32 nfs4_statfs_bitmap[2]; +extern const u32 nfs4_pathconf_bitmap[2]; +extern const u32 nfs4_fsinfo_bitmap[2]; + +/* nfs4renewd.c */ +extern void nfs4_schedule_state_renewal(struct nfs4_client *); +extern void nfs4_renewd_prepare_shutdown(struct nfs_server *); +extern void nfs4_kill_renewd(struct nfs4_client *); +extern void nfs4_renew_state(void *); + +/* nfs4state.c */ +extern void init_nfsv4_state(struct nfs_server *); +extern void destroy_nfsv4_state(struct nfs_server *); +extern struct nfs4_client *nfs4_get_client(struct in_addr *); +extern void nfs4_put_client(struct nfs4_client *clp); +extern int nfs4_init_client(struct nfs4_client *clp); +extern struct nfs4_client *nfs4_find_client(struct in_addr *); +extern u32 nfs4_alloc_lockowner_id(struct nfs4_client *); + +extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); +extern void nfs4_put_state_owner(struct nfs4_state_owner *); +extern void nfs4_drop_state_owner(struct nfs4_state_owner *); +extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); +extern void nfs4_put_open_state(struct nfs4_state *); +extern void nfs4_close_state(struct nfs4_state *, mode_t); +extern struct nfs4_state *nfs4_find_state(struct inode *, struct rpc_cred *, mode_t mode); +extern void nfs4_increment_seqid(int status, struct nfs4_state_owner *sp); +extern void nfs4_schedule_state_recovery(struct nf