Documentation/filesystems/Locking | 27 fs/Kconfig | 20 fs/cifs/file.c | 2 fs/lockd/clntlock.c | 4 fs/lockd/clntproc.c | 176 +++- fs/lockd/host.c | 18 fs/lockd/svc4proc.c | 1 fs/lockd/svclock.c | 22 fs/lockd/svcproc.c | 1 fs/lockd/svcsubs.c | 2 fs/locks.c | 115 +- fs/nfs/Makefile | 3 fs/nfs/callback.c | 325 ++++++++ fs/nfs/callback.h | 70 + fs/nfs/callback_proc.c | 85 ++ fs/nfs/callback_xdr.c | 481 ++++++++++++ fs/nfs/delegation.c | 342 ++++++++ fs/nfs/delegation.h | 57 + fs/nfs/dir.c | 133 +-- fs/nfs/direct.c | 47 - fs/nfs/file.c | 147 ++- fs/nfs/inode.c | 358 +++++---- fs/nfs/mount_clnt.c | 2 fs/nfs/nfs2xdr.c | 27 fs/nfs/nfs3proc.c | 81 -- fs/nfs/nfs3xdr.c | 4 fs/nfs/nfs4proc.c | 1269 +++++++++++++++++++++++--------- fs/nfs/nfs4state.c | 371 +++++---- fs/nfs/nfs4xdr.c | 517 +++++++++---- fs/nfs/nfsroot.c | 6 fs/nfs/pagelist.c | 62 - fs/nfs/proc.c | 55 - fs/nfs/read.c | 62 - fs/nfs/unlink.c | 3 fs/nfs/write.c | 170 ++-- fs/nfsd/nfs4state.c | 13 include/linux/fs.h | 19 include/linux/lockd/lockd.h | 19 include/linux/nfs.h | 17 include/linux/nfs4.h | 5 include/linux/nfs_fs.h | 116 +- include/linux/nfs_fs_i.h | 4 include/linux/nfs_fs_sb.h | 1 include/linux/nfs_page.h | 37 include/linux/nfs_xdr.h | 64 - include/linux/sunrpc/gss_asn1.h | 1 include/linux/sunrpc/gss_spkm3.h | 61 + include/linux/sunrpc/sched.h | 67 + include/linux/sunrpc/svc.h | 10 net/sunrpc/auth_gss/Makefile | 4 net/sunrpc/auth_gss/auth_gss.c | 2 net/sunrpc/auth_gss/gss_generic_token.c | 2 net/sunrpc/auth_gss/gss_krb5_unseal.c | 2 net/sunrpc/auth_gss/gss_spkm3_mech.c | 296 +++++++ net/sunrpc/auth_gss/gss_spkm3_seal.c | 132 +++ net/sunrpc/auth_gss/gss_spkm3_token.c | 266 ++++++ net/sunrpc/auth_gss/gss_spkm3_unseal.c | 128 +++ net/sunrpc/clnt.c | 41 - net/sunrpc/sched.c | 534 ++++--------- net/sunrpc/sunrpc_syms.c | 1 net/sunrpc/svc.c | 9 net/sunrpc/xprt.c | 2 62 files changed, 5131 insertions(+), 1787 deletions(-) diff -u --recursive --new-file --show-c-function linux-2.6.8.1/Documentation/filesystems/Locking linux-2.6.8.1-50-rpc_queue_lock/Documentation/filesystems/Locking --- linux-2.6.8.1/Documentation/filesystems/Locking 2004-08-14 14:27:32.000000000 -0400 +++ linux-2.6.8.1-50-rpc_queue_lock/Documentation/filesystems/Locking 2004-08-22 21:45:18.000000000 -0400 @@ -276,21 +276,34 @@ foo_get_block(). It's an overkill, since internal fs locking and real critical areas are much smaller than the areas filesystems protect now. ---------------------------- file_lock ------------------------------------ +----------------------- file_lock_operations ------------------------------ prototypes: - void (*fl_notify)(struct file_lock *); /* unblock callback */ void (*fl_insert)(struct file_lock *); /* lock insertion callback */ void (*fl_remove)(struct file_lock *); /* lock removal callback */ + void (*fl_copy_lock)(struct file_lock *, struct file_lock *); + void (*fl_release_private)(struct file_lock *); + locking rules: - BKL may block -fl_notify: yes no -fl_insert: yes no -fl_remove: yes no + BKL may block +fl_insert: yes no +fl_remove: yes no +fl_copy_lock: yes no +fl_release_private: yes yes + +----------------------- lock_manager_operations --------------------------- +prototypes: + int (*fl_compare_owner)(struct file_lock *, struct file_lock *); + void (*fl_notify)(struct file_lock *); /* unblock callback */ + +locking rules: + BKL may block +fl_compare_owner: yes no +fl_notify: yes no + Currently only NLM provides instances of this class. None of the them block. If you have out-of-tree instances - please, show up. Locking in that area will change. - --------------------------- buffer_head ----------------------------------- prototypes: void (*b_end_io)(struct buffer_head *bh, int uptodate); diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/cifs/file.c linux-2.6.8.1-50-rpc_queue_lock/fs/cifs/file.c --- linux-2.6.8.1/fs/cifs/file.c 2004-08-14 14:25:48.000000000 -0400 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/cifs/file.c 2004-08-22 21:45:11.000000000 -0400 @@ -569,6 +569,8 @@ cifs_lock(struct file *file, int cmd, st netfid, length, pfLock->fl_start, numUnlock, numLock, lockType, wait_flag); + if (rc == 0 && (pfLock->fl_flags & FL_POSIX)) + posix_lock_file(file, pfLock); FreeXid(xid); return rc; } diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/Kconfig linux-2.6.8.1-50-rpc_queue_lock/fs/Kconfig --- linux-2.6.8.1/fs/Kconfig 2004-08-14 14:26:47.000000000 -0400 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/Kconfig 2004-08-22 21:46:56.000000000 -0400 @@ -1415,8 +1415,8 @@ config NFS_V3 bool "Provide NFSv3 client support" depends on NFS_FS help - Say Y here if you want your NFS client to be able to speak the newer - version 3 of the NFS protocol. + Say Y here if you want your NFS client to be able to speak version + 3 of the NFS protocol. If unsure, say Y. @@ -1559,6 +1559,22 @@ config RPCSEC_GSS_KRB5 If unsure, say N. +config RPCSEC_GSS_SPKM3 + tristate "Secure RPC: SPKM3 mechanism (EXPERIMENTAL)" + depends on SUNRPC && EXPERIMENTAL + select SUNRPC_GSS + select CRYPTO + select CRYPTO_MD5 + select CRYPTO_DES + help + Provides for secure RPC calls by means of a gss-api + mechanism based on the SPKM3 public-key mechanism. + + Note: Requires an auxiliary userspace daemon which may be found on + http://www.citi.umich.edu/projects/nfsv4/ + + If unsure, say N. + config SMB_FS tristate "SMB file system support (to mount Windows shares etc.)" depends on INET diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/lockd/clntlock.c linux-2.6.8.1-50-rpc_queue_lock/fs/lockd/clntlock.c --- linux-2.6.8.1/fs/lockd/clntlock.c 2004-08-14 14:27:44.000000000 -0400 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/lockd/clntlock.c 2004-08-22 21:44:58.000000000 -0400 @@ -146,7 +146,7 @@ void nlmclnt_mark_reclaim(struct nlm_hos inode = fl->fl_file->f_dentry->d_inode; if (inode->i_sb->s_magic != NFS_SUPER_MAGIC) continue; - if (fl->fl_u.nfs_fl.host != host) + if (fl->fl_u.nfs_fl.owner->host != host) continue; if (!(fl->fl_u.nfs_fl.flags & NFS_LCK_GRANTED)) continue; @@ -215,7 +215,7 @@ restart: inode = fl->fl_file->f_dentry->d_inode; if (inode->i_sb->s_magic != NFS_SUPER_MAGIC) continue; - if (fl->fl_u.nfs_fl.host != host) + if (fl->fl_u.nfs_fl.owner->host != host) continue; if (!(fl->fl_u.nfs_fl.flags & NFS_LCK_RECLAIM)) continue; diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/lockd/clntproc.c linux-2.6.8.1-50-rpc_queue_lock/fs/lockd/clntproc.c --- linux-2.6.8.1/fs/lockd/clntproc.c 2004-08-14 14:25:49.000000000 -0400 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/lockd/clntproc.c 2004-08-22 21:45:11.000000000 -0400 @@ -27,6 +27,7 @@ static int nlmclnt_unlock(struct nlm_rqs static void nlmclnt_unlock_callback(struct rpc_task *); static void nlmclnt_cancel_callback(struct rpc_task *); static int nlm_stat_to_errno(u32 stat); +static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *host); /* * Cookie counter for NLM requests @@ -41,11 +42,83 @@ static inline void nlmclnt_next_cookie(s nlm_cookie++; } +static struct nlm_lockowner *nlm_get_lockowner(struct nlm_lockowner *lockowner) +{ + atomic_inc(&lockowner->count); + return lockowner; +} + +static void nlm_put_lockowner(struct nlm_lockowner *lockowner) +{ + if (!atomic_dec_and_lock(&lockowner->count, &lockowner->host->h_lock)) + return; + list_del(&lockowner->list); + spin_unlock(&lockowner->host->h_lock); + nlm_release_host(lockowner->host); + kfree(lockowner); +} + +static inline int nlm_pidbusy(struct nlm_host *host, uint32_t pid) +{ + struct nlm_lockowner *lockowner; + list_for_each_entry(lockowner, &host->h_lockowners, list) { + if (lockowner->pid == pid) + return -EBUSY; + } + return 0; +} + +static inline uint32_t __nlm_alloc_pid(struct nlm_host *host) +{ + uint32_t res; + do { + res = host->h_pidcount++; + } while (nlm_pidbusy(host, res) < 0); + return res; +} + +static struct nlm_lockowner *__nlm_find_lockowner(struct nlm_host *host, fl_owner_t owner) +{ + struct nlm_lockowner *lockowner; + list_for_each_entry(lockowner, &host->h_lockowners, list) { + if (lockowner->owner != owner) + continue; + return nlm_get_lockowner(lockowner); + } + return NULL; +} + +static struct nlm_lockowner *nlm_find_lockowner(struct nlm_host *host, fl_owner_t owner) +{ + struct nlm_lockowner *res, *new = NULL; + + spin_lock(&host->h_lock); + res = __nlm_find_lockowner(host, owner); + if (res == NULL) { + spin_unlock(&host->h_lock); + new = (struct nlm_lockowner *)kmalloc(sizeof(*new), GFP_KERNEL); + spin_lock(&host->h_lock); + res = __nlm_find_lockowner(host, owner); + if (res == NULL && new != NULL) { + res = new; + atomic_set(&new->count, 1); + new->owner = owner; + new->pid = __nlm_alloc_pid(host); + new->host = nlm_get_host(host); + list_add(&new->list, &host->h_lockowners); + new = NULL; + } + } + spin_unlock(&host->h_lock); + if (new != NULL) + kfree(new); + return res; +} + /* * Initialize arguments for TEST/LOCK/UNLOCK/CANCEL calls */ -static inline void -nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl) +static void nlmclnt_setlockargs(struct nlm_rqst *req, struct file_lock *fl) { struct nlm_args *argp = &req->a_args; struct nlm_lock *lock = &argp->lock; @@ -60,6 +133,14 @@ nlmclnt_setlockargs(struct nlm_rqst *req locks_copy_lock(&lock->fl, fl); } +static void nlmclnt_release_lockargs(struct nlm_rqst *req) +{ + struct file_lock *fl = &req->a_args.lock.fl; + + if (fl->fl_ops && fl->fl_ops->fl_release_private) + fl->fl_ops->fl_release_private(fl); +} + /* * Initialize arguments for GRANTED call. The nlm_rqst structure * has been cleared already. @@ -77,8 +158,10 @@ nlmclnt_setgrantargs(struct nlm_rqst *ca if (lock->oh.len > NLMCLNT_OHSIZE) { void *data = kmalloc(lock->oh.len, GFP_KERNEL); - if (!data) + if (!data) { + nlmclnt_freegrantargs(call); return 0; + } call->a_args.lock.oh.data = (u8 *) data; } @@ -89,12 +172,15 @@ nlmclnt_setgrantargs(struct nlm_rqst *ca void nlmclnt_freegrantargs(struct nlm_rqst *call) { + struct file_lock *fl = &call->a_args.lock.fl; /* * Check whether we allocated memory for the owner. */ if (call->a_args.lock.oh.data != (u8 *) call->a_owner) { kfree(call->a_args.lock.oh.data); } + if (fl->fl_ops && fl->fl_ops->fl_release_private) + fl->fl_ops->fl_release_private(fl); } /* @@ -165,6 +251,8 @@ nlmclnt_proc(struct inode *inode, int cm } call->a_host = host; + nlmclnt_locks_init_private(fl, host); + /* Set up the argument struct */ nlmclnt_setlockargs(call, fl); @@ -179,9 +267,6 @@ nlmclnt_proc(struct inode *inode, int cm else status = -EINVAL; - if (status < 0 && (call->a_flags & RPC_TASK_ASYNC)) - kfree(call); - out_restore: spin_lock_irqsave(¤t->sighand->siglock, flags); current->blocked = oldset; @@ -382,7 +467,9 @@ nlmclnt_test(struct nlm_rqst *req, struc { int status; - if ((status = nlmclnt_call(req, NLMPROC_TEST)) < 0) + status = nlmclnt_call(req, NLMPROC_TEST); + nlmclnt_release_lockargs(req); + if (status < 0) return status; status = req->a_res.status; @@ -391,10 +478,9 @@ nlmclnt_test(struct nlm_rqst *req, struc } if (status == NLM_LCK_DENIED) { /* * Report the conflicting lock back to the application. - * FIXME: Is it OK to report the pid back as well? */ locks_copy_lock(fl, &req->a_res.lock.fl); - /* fl->fl_pid = 0; */ + fl->fl_pid = 0; } else { return nlm_stat_to_errno(req->a_res.status); } @@ -402,18 +488,30 @@ nlmclnt_test(struct nlm_rqst *req, struc return 0; } -static -void nlmclnt_insert_lock_callback(struct file_lock *fl) +static void nlmclnt_locks_copy_lock(struct file_lock *new, struct file_lock *fl) { - nlm_get_host(fl->fl_u.nfs_fl.host); + memcpy(&new->fl_u.nfs_fl, &fl->fl_u.nfs_fl, sizeof(new->fl_u.nfs_fl)); + nlm_get_lockowner(new->fl_u.nfs_fl.owner); } -static -void nlmclnt_remove_lock_callback(struct file_lock *fl) + +static void nlmclnt_locks_release_private(struct file_lock *fl) { - if (fl->fl_u.nfs_fl.host) { - nlm_release_host(fl->fl_u.nfs_fl.host); - fl->fl_u.nfs_fl.host = NULL; - } + nlm_put_lockowner(fl->fl_u.nfs_fl.owner); + fl->fl_ops = NULL; +} + +static struct file_lock_operations nlmclnt_lock_ops = { + .fl_copy_lock = nlmclnt_locks_copy_lock, + .fl_release_private = nlmclnt_locks_release_private, +}; + +static void nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *host) +{ + BUG_ON(fl->fl_ops != NULL); + fl->fl_u.nfs_fl.state = 0; + fl->fl_u.nfs_fl.flags = 0; + fl->fl_u.nfs_fl.owner = nlm_find_lockowner(host, fl->fl_owner); + fl->fl_ops = &nlmclnt_lock_ops; } /* @@ -446,7 +544,8 @@ nlmclnt_lock(struct nlm_rqst *req, struc if (!host->h_monitored && nsm_monitor(host) < 0) { printk(KERN_NOTICE "lockd: failed to monitor %s\n", host->h_name); - return -ENOLCK; + status = -ENOLCK; + goto out; } do { @@ -456,18 +555,21 @@ nlmclnt_lock(struct nlm_rqst *req, struc status = nlmclnt_block(host, fl, &resp->status); } if (status < 0) - return status; + goto out; } while (resp->status == NLM_LCK_BLOCKED && req->a_args.block); if (resp->status == NLM_LCK_GRANTED) { fl->fl_u.nfs_fl.state = host->h_state; fl->fl_u.nfs_fl.flags |= NFS_LCK_GRANTED; - fl->fl_u.nfs_fl.host = host; - fl->fl_insert = nlmclnt_insert_lock_callback; - fl->fl_remove = nlmclnt_remove_lock_callback; - } - - return nlm_stat_to_errno(resp->status); + fl->fl_flags |= FL_SLEEP; + if (posix_lock_file_wait(fl->fl_file, fl) < 0) + printk(KERN_WARNING "%s: VFS is out of sync with lock manager!\n", + __FUNCTION__); + } + status = nlm_stat_to_errno(resp->status); +out: + nlmclnt_release_lockargs(req); + return status; } /* @@ -527,13 +629,24 @@ nlmclnt_unlock(struct nlm_rqst *req, str fl->fl_u.nfs_fl.flags &= ~NFS_LCK_GRANTED; if (req->a_flags & RPC_TASK_ASYNC) { - return nlmclnt_async_call(req, NLMPROC_UNLOCK, + status = nlmclnt_async_call(req, NLMPROC_UNLOCK, nlmclnt_unlock_callback); + /* Hrmf... Do the unlock early since locks_remove_posix() + * really expects us to free the lock synchronously */ + posix_lock_file(fl->fl_file, fl); + if (status < 0) { + nlmclnt_release_lockargs(req); + kfree(req); + } + return status; } - if ((status = nlmclnt_call(req, NLMPROC_UNLOCK)) < 0) + status = nlmclnt_call(req, NLMPROC_UNLOCK); + nlmclnt_release_lockargs(req); + if (status < 0) return status; + posix_lock_file(fl->fl_file, fl); if (resp->status == NLM_LCK_GRANTED) return 0; @@ -564,9 +677,9 @@ nlmclnt_unlock_callback(struct rpc_task } if (status != NLM_LCK_GRANTED) printk(KERN_WARNING "lockd: unexpected unlock status: %d\n", status); - die: nlm_release_host(req->a_host); + nlmclnt_release_lockargs(req); kfree(req); return; retry_rebind: @@ -605,8 +718,10 @@ nlmclnt_cancel(struct nlm_host *host, st status = nlmclnt_async_call(req, NLMPROC_CANCEL, nlmclnt_cancel_callback); - if (status < 0) + if (status < 0) { + nlmclnt_release_lockargs(req); kfree(req); + } spin_lock_irqsave(¤t->sighand->siglock, flags); current->blocked = oldset; @@ -648,6 +763,7 @@ nlmclnt_cancel_callback(struct rpc_task die: nlm_release_host(req->a_host); + nlmclnt_release_lockargs(req); kfree(req); return; diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/lockd/host.c linux-2.6.8.1-50-rpc_queue_lock/fs/lockd/host.c --- linux-2.6.8.1/fs/lockd/host.c 2004-08-14 14:26:44.000000000 -0400 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/lockd/host.c 2004-08-22 21:44:58.000000000 -0400 @@ -119,13 +119,15 @@ nlm_lookup_host(int server, struct socka init_MUTEX(&host->h_sema); host->h_nextrebind = jiffies + NLM_HOST_REBIND; host->h_expires = jiffies + NLM_HOST_EXPIRE; - host->h_count = 1; + atomic_set(&host->h_count, 1); init_waitqueue_head(&host->h_gracewait); host->h_state = 0; /* pseudo NSM state */ host->h_nsmstate = 0; /* real NSM state */ host->h_server = server; host->h_next = nlm_hosts[hash]; nlm_hosts[hash] = host; + INIT_LIST_HEAD(&host->h_lockowners); + spin_lock_init(&host->h_lock); if (++nrhosts > NLM_HOST_MAX) next_gc = 0; @@ -235,7 +237,7 @@ struct nlm_host * nlm_get_host(struct nl { if (host) { dprintk("lockd: get host %s\n", host->h_name); - host->h_count ++; + atomic_inc(&host->h_count); host->h_expires = jiffies + NLM_HOST_EXPIRE; } return host; @@ -246,9 +248,10 @@ struct nlm_host * nlm_get_host(struct nl */ void nlm_release_host(struct nlm_host *host) { - if (host && host->h_count) { + if (host != NULL) { dprintk("lockd: release host %s\n", host->h_name); - host->h_count --; + atomic_dec(&host->h_count); + BUG_ON(atomic_read(&host->h_count) < 0); } } @@ -283,7 +286,7 @@ nlm_shutdown_hosts(void) for (i = 0; i < NLM_HOST_NRHASH; i++) { for (host = nlm_hosts[i]; host; host = host->h_next) { dprintk(" %s (cnt %d use %d exp %ld)\n", - host->h_name, host->h_count, + host->h_name, atomic_read(&host->h_count), host->h_inuse, host->h_expires); } } @@ -314,10 +317,10 @@ nlm_gc_hosts(void) for (i = 0; i < NLM_HOST_NRHASH; i++) { q = &nlm_hosts[i]; while ((host = *q) != NULL) { - if (host->h_count || host->h_inuse + if (atomic_read(&host->h_count) || host->h_inuse || time_before(jiffies, host->h_expires)) { dprintk("nlm_gc_hosts skipping %s (cnt %d use %d exp %ld)\n", - host->h_name, host->h_count, + host->h_name, atomic_read(&host->h_count), host->h_inuse, host->h_expires); q = &host->h_next; continue; @@ -336,6 +339,7 @@ nlm_gc_hosts(void) rpc_destroy_client(host->h_rpcclnt); } } + BUG_ON(!list_empty(&host->h_lockowners)); kfree(host); nrhosts--; } diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/lockd/svc4proc.c linux-2.6.8.1-50-rpc_queue_lock/fs/lockd/svc4proc.c --- linux-2.6.8.1/fs/lockd/svc4proc.c 2004-08-14 14:26:13.000000000 -0400 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/lockd/svc4proc.c 2004-08-22 21:44:53.000000000 -0400 @@ -55,6 +55,7 @@ nlm4svc_retrieve_args(struct svc_rqst *r /* Set up the missing parts of the file_lock structure */ lock->fl.fl_file = &file->f_file; lock->fl.fl_owner = (fl_owner_t) host; + lock->fl.fl_lmops = &nlmsvc_lock_operations; } return 0; diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/lockd/svclock.c linux-2.6.8.1-50-rpc_queue_lock/fs/lockd/svclock.c --- linux-2.6.8.1/fs/lockd/svclock.c 2004-08-14 14:25:56.000000000 -0400 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/lockd/svclock.c 2004-08-22 21:45:26.000000000 -0400 @@ -42,7 +42,6 @@ static void nlmsvc_insert_block(struct nlm_block *block, unsigned long); static int nlmsvc_remove_block(struct nlm_block *block); static void nlmsvc_grant_callback(struct rpc_task *task); -static void nlmsvc_notify_blocked(struct file_lock *); /* * The list of blocked locks to retry @@ -193,7 +192,7 @@ nlmsvc_create_block(struct svc_rqst *rqs goto failed_free; /* Set notifier function for VFS, and init args */ - block->b_call.a_args.lock.fl.fl_notify = nlmsvc_notify_blocked; + block->b_call.a_args.lock.fl.fl_lmops = &nlmsvc_lock_operations; block->b_call.a_args.cookie = *cookie; /* see above */ dprintk("lockd: created block %p...\n", block); @@ -238,8 +237,13 @@ nlmsvc_delete_block(struct nlm_block *bl /* Remove block from list */ nlmsvc_remove_block(block); - posix_unblock_lock(&file->f_file, fl); - block->b_granted = 0; + if (fl->fl_next) + posix_unblock_lock(&file->f_file, fl); + if (unlock) { + fl->fl_type = F_UNLCK; + posix_lock_file(&file->f_file, fl); + block->b_granted = 0; + } /* If the block is in the middle of a GRANT callback, * don't kill it yet. */ @@ -479,6 +483,16 @@ nlmsvc_notify_blocked(struct file_lock * printk(KERN_WARNING "lockd: notification for unknown block!\n"); } +static int nlmsvc_same_owner(struct file_lock *fl1, struct file_lock *fl2) +{ + return fl1->fl_owner == fl2->fl_owner && fl1->fl_pid == fl2->fl_pid; +} + +struct lock_manager_operations nlmsvc_lock_operations = { + .fl_compare_owner = nlmsvc_same_owner, + .fl_notify = nlmsvc_notify_blocked, +}; + /* * Try to claim a lock that was previously blocked. * diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/lockd/svcproc.c linux-2.6.8.1-50-rpc_queue_lock/fs/lockd/svcproc.c --- linux-2.6.8.1/fs/lockd/svcproc.c 2004-08-14 14:27:02.000000000 -0400 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/lockd/svcproc.c 2004-08-22 21:44:54.000000000 -0400 @@ -84,6 +84,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rq /* Set up the missing parts of the file_lock structure */ lock->fl.fl_file = &file->f_file; lock->fl.fl_owner = (fl_owner_t) host; + lock->fl.fl_lmops = &nlmsvc_lock_operations; } return 0; diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/lockd/svcsubs.c linux-2.6.8.1-50-rpc_queue_lock/fs/lockd/svcsubs.c --- linux-2.6.8.1/fs/lockd/svcsubs.c 2004-08-14 14:26:56.000000000 -0400 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/lockd/svcsubs.c 2004-08-22 21:46:38.000000000 -0400 @@ -67,7 +67,7 @@ nlm_lookup_file(struct svc_rqst *rqstp, down(&nlm_file_sema); for (file = nlm_files[hash]; file; file = file->f_next) - if (!memcmp(&file->f_handle, f, sizeof(*f))) + if (!nfs_compare_fh(&file->f_handle, f)) goto found; dprintk("lockd: creating file for (%08x %08x %08x %08x %08x %08x)\n", diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/locks.c linux-2.6.8.1-50-rpc_queue_lock/fs/locks.c --- linux-2.6.8.1/fs/locks.c 2004-08-14 14:27:44.000000000 -0400 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/locks.c 2004-08-22 21:45:11.000000000 -0400 @@ -167,6 +167,13 @@ static inline void locks_free_lock(struc if (!list_empty(&fl->fl_link)) panic("Attempting to free lock on active lock list"); + if (fl->fl_ops) { + if (fl->fl_ops->fl_release_private) + fl->fl_ops->fl_release_private(fl); + fl->fl_ops = NULL; + } + fl->fl_lmops = NULL; + kmem_cache_free(filelock_cache, fl); } @@ -183,9 +190,8 @@ void locks_init_lock(struct file_lock *f fl->fl_flags = 0; fl->fl_type = 0; fl->fl_start = fl->fl_end = 0; - fl->fl_notify = NULL; - fl->fl_insert = NULL; - fl->fl_remove = NULL; + fl->fl_ops = NULL; + fl->fl_lmops = NULL; } EXPORT_SYMBOL(locks_init_lock); @@ -217,10 +223,10 @@ void locks_copy_lock(struct file_lock *n new->fl_type = fl->fl_type; new->fl_start = fl->fl_start; new->fl_end = fl->fl_end; - new->fl_notify = fl->fl_notify; - new->fl_insert = fl->fl_insert; - new->fl_remove = fl->fl_remove; - new->fl_u = fl->fl_u; + new->fl_ops = fl->fl_ops; + new->fl_lmops = fl->fl_lmops; + if (fl->fl_ops && fl->fl_ops->fl_copy_lock) + fl->fl_ops->fl_copy_lock(new, fl); } EXPORT_SYMBOL(locks_copy_lock); @@ -321,9 +327,8 @@ static int flock_to_posix_lock(struct fi fl->fl_pid = current->tgid; fl->fl_file = filp; fl->fl_flags = FL_POSIX; - fl->fl_notify = NULL; - fl->fl_insert = NULL; - fl->fl_remove = NULL; + fl->fl_ops = NULL; + fl->fl_lmops = NULL; return assign_type(fl, l->l_type); } @@ -361,9 +366,8 @@ static int flock64_to_posix_lock(struct fl->fl_pid = current->tgid; fl->fl_file = filp; fl->fl_flags = FL_POSIX; - fl->fl_notify = NULL; - fl->fl_insert = NULL; - fl->fl_remove = NULL; + fl->fl_ops = NULL; + fl->fl_lmops = NULL; switch (l->l_type) { case F_RDLCK: @@ -397,9 +401,8 @@ static int lease_alloc(struct file *filp } fl->fl_start = 0; fl->fl_end = OFFSET_MAX; - fl->fl_notify = NULL; - fl->fl_insert = NULL; - fl->fl_remove = NULL; + fl->fl_ops = NULL; + fl->fl_lmops = NULL; *flp = fl; return 0; @@ -414,14 +417,15 @@ static inline int locks_overlap(struct f } /* - * Check whether two locks have the same owner. The apparently superfluous - * check for fl_pid enables us to distinguish between locks set by lockd. + * Check whether two locks have the same owner. */ static inline int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) { - return (fl1->fl_owner == fl2->fl_owner) && - (fl1->fl_pid == fl2->fl_pid); + if (fl1->fl_lmops && fl1->fl_lmops->fl_compare_owner) + return fl2->fl_lmops == fl1->fl_lmops && + fl1->fl_lmops->fl_compare_owner(fl1, fl2); + return fl1->fl_owner == fl2->fl_owner; } /* Remove waiter from blocker's block list. @@ -472,8 +476,8 @@ static void locks_wake_up_blocks(struct struct file_lock *waiter = list_entry(blocker->fl_block.next, struct file_lock, fl_block); __locks_delete_block(waiter); - if (waiter->fl_notify) - waiter->fl_notify(waiter); + if (waiter->fl_lmops && waiter->fl_lmops->fl_notify) + waiter->fl_lmops->fl_notify(waiter); else wake_up(&waiter->fl_wait); } @@ -490,8 +494,8 @@ static void locks_insert_lock(struct fil fl->fl_next = *pos; *pos = fl; - if (fl->fl_insert) - fl->fl_insert(fl); + if (fl->fl_ops && fl->fl_ops->fl_insert) + fl->fl_ops->fl_insert(fl); } /* @@ -514,8 +518,8 @@ static void locks_delete_lock(struct fil fl->fl_fasync = NULL; } - if (fl->fl_remove) - fl->fl_remove(fl); + if (fl->fl_ops && fl->fl_ops->fl_remove) + fl->fl_ops->fl_remove(fl); locks_wake_up_blocks(fl); locks_free_lock(fl); @@ -631,24 +635,15 @@ int posix_locks_deadlock(struct file_loc struct file_lock *block_fl) { struct list_head *tmp; - fl_owner_t caller_owner, blocked_owner; - unsigned int caller_pid, blocked_pid; - - caller_owner = caller_fl->fl_owner; - caller_pid = caller_fl->fl_pid; - blocked_owner = block_fl->fl_owner; - blocked_pid = block_fl->fl_pid; next_task: - if (caller_owner == blocked_owner && caller_pid == blocked_pid) + if (posix_same_owner(caller_fl, block_fl)) return 1; list_for_each(tmp, &blocked_list) { struct file_lock *fl = list_entry(tmp, struct file_lock, fl_link); - if ((fl->fl_owner == blocked_owner) - && (fl->fl_pid == blocked_pid)) { + if (posix_same_owner(fl, block_fl)) { fl = fl->fl_next; - blocked_owner = fl->fl_owner; - blocked_pid = fl->fl_pid; + block_fl = fl; goto next_task; } } @@ -912,6 +907,34 @@ int posix_lock_file(struct file *filp, s } /** + * posix_lock_file_wait - Apply a POSIX-style lock to a file + * @filp: The file to apply the lock to + * @fl: The lock to be applied + * + * Add a POSIX style lock to a file. + * We merge adjacent & overlapping locks whenever possible. + * POSIX locks are sorted by owner task, then by starting address + */ +int posix_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + int error; + might_sleep (); + for (;;) { + error = __posix_lock_file(filp->f_dentry->d_inode, fl); + if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP)) + break; + error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); + if (!error) + continue; + + locks_delete_block(fl); + break; + } + return error; +} +EXPORT_SYMBOL(posix_lock_file_wait); + +/** * locks_mandatory_locked - Check for an active lock * @inode: the file to check * @@ -1422,7 +1445,6 @@ int fcntl_getlk(struct file *filp, struc error = -EFAULT; if (!copy_to_user(l, &flock, sizeof(flock))) error = 0; - out: return error; } @@ -1489,8 +1511,7 @@ int fcntl_setlk(struct file *filp, unsig if (filp->f_op && filp->f_op->lock != NULL) { error = filp->f_op->lock(filp, cmd, file_lock); - if (error < 0) - goto out; + goto out; } for (;;) { @@ -1624,8 +1645,7 @@ int fcntl_setlk64(struct file *filp, uns if (filp->f_op && filp->f_op->lock != NULL) { error = filp->f_op->lock(filp, cmd, file_lock); - if (error < 0) - goto out; + goto out; } for (;;) { @@ -1672,10 +1692,12 @@ void locks_remove_posix(struct file *fil lock.fl_owner = owner; lock.fl_pid = current->tgid; lock.fl_file = filp; + lock.fl_ops = NULL; + lock.fl_lmops = NULL; if (filp->f_op && filp->f_op->lock != NULL) { filp->f_op->lock(filp, F_SETLK, &lock); - /* Ignore any error -- we must remove the locks anyway */ + goto out; } /* Can't use posix_lock_file here; we need to remove it no matter @@ -1684,13 +1706,16 @@ void locks_remove_posix(struct file *fil lock_kernel(); while (*before != NULL) { struct file_lock *fl = *before; - if (IS_POSIX(fl) && (fl->fl_owner == owner)) { + if (IS_POSIX(fl) && posix_same_owner(fl, &lock)) { locks_delete_lock(before); continue; } before = &fl->fl_next; } unlock_kernel(); +out: + if (lock.fl_ops && lock.fl_ops->fl_release_private) + lock.fl_ops->fl_release_private(&lock); } EXPORT_SYMBOL(locks_remove_posix); diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/nfs/callback.c linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/callback.c --- linux-2.6.8.1/fs/nfs/callback.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/callback.c 2004-08-22 21:47:55.000000000 -0400 @@ -0,0 +1,325 @@ +/* + * linux/fs/nfs/callback.c + * + * Copyright (C) 2004 Trond Myklebust + * + * NFSv4 callback handling + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "callback.h" + +#define NFSDBG_FACILITY NFSDBG_CALLBACK + +struct nfs_callback_data { + unsigned int users; + struct svc_serv *serv; + pid_t pid; + struct completion started; + struct completion stopped; +}; + +static struct nfs_callback_data nfs_callback_info; +static DECLARE_MUTEX(nfs_callback_sema); +static struct svc_program nfs4_callback_program; + +unsigned short nfs_callback_tcpport; + +/* + * This is the callback kernel thread. + */ +static void nfs_callback_svc(struct svc_rqst *rqstp) +{ + struct svc_serv *serv = rqstp->rq_server; + int err; + + __module_get(THIS_MODULE); + lock_kernel(); + + nfs_callback_info.pid = current->pid; + daemonize("nfsv4-svc"); + /* Process request with signals blocked, but allow SIGKILL. */ + allow_signal(SIGKILL); + + complete(&nfs_callback_info.started); + + while (nfs_callback_info.users != 0 || !signalled()) { + /* + * Listen for a request on the socket + */ + err = svc_recv(serv, rqstp, MAX_SCHEDULE_TIMEOUT); + if (err == -EAGAIN || err == -EINTR) + continue; + if (err < 0) { + printk(KERN_WARNING + "%s: terminating on error %d\n", + __FUNCTION__, -err); + break; + } + dprintk("%s: request from %u.%u.%u.%u\n", __FUNCTION__, + NIPQUAD(rqstp->rq_addr.sin_addr.s_addr)); + svc_process(serv, rqstp); + } + + nfs_callback_info.pid = 0; + complete(&nfs_callback_info.stopped); + unlock_kernel(); + module_put_and_exit(0); +} + +/* + * Bring up the server process if it is not already up. + */ +int nfs_callback_up(void) +{ + struct svc_serv *serv; + struct svc_sock *svsk; + int ret = 0; + + lock_kernel(); + down(&nfs_callback_sema); + if (nfs_callback_info.users++ || nfs_callback_info.pid != 0) + goto out; + init_completion(&nfs_callback_info.started); + init_completion(&nfs_callback_info.stopped); + serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE); + ret = -ENOMEM; + if (!serv) + goto out_err; + /* FIXME: We don't want to register this socket with the portmapper */ + ret = svc_makesock(serv, IPPROTO_TCP, 0); + if (ret < 0) + goto out_destroy; + if (!list_empty(&serv->sv_permsocks)) { + svsk = list_entry(serv->sv_permsocks.next, + struct svc_sock, sk_list); + nfs_callback_tcpport = ntohs(inet_sk(svsk->sk_sk)->sport); + dprintk ("Callback port = 0x%x\n", nfs_callback_tcpport); + } else + BUG(); + ret = svc_create_thread(nfs_callback_svc, serv); + if (ret < 0) + goto out_destroy; + nfs_callback_info.serv = serv; + wait_for_completion(&nfs_callback_info.started); +out: + up(&nfs_callback_sema); + unlock_kernel(); + return ret; +out_destroy: + svc_destroy(serv); +out_err: + nfs_callback_info.users--; + goto out; +} + +/* + * Kill the server process if it is not already up. + */ +int nfs_callback_down(void) +{ + int ret = 0; + + lock_kernel(); + down(&nfs_callback_sema); + if (--nfs_callback_info.users || nfs_callback_info.pid == 0) + goto out; + kill_proc(nfs_callback_info.pid, SIGKILL, 1); + wait_for_completion(&nfs_callback_info.stopped); +out: + up(&nfs_callback_sema); + unlock_kernel(); + return ret; +} + +/* + * AUTH_NULL authentication + */ +static int nfs_callback_null_accept(struct svc_rqst *rqstp, u32 *authp) +{ + struct kvec *argv = &rqstp->rq_arg.head[0]; + struct kvec *resv = &rqstp->rq_res.head[0]; + + if (argv->iov_len < 3*4) + return SVC_GARBAGE; + + if (svc_getu32(argv) != 0) { + dprintk("svc: bad null cred\n"); + *authp = rpc_autherr_badcred; + return SVC_DENIED; + } + if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) { + dprintk("svc: bad null verf\n"); + *authp = rpc_autherr_badverf; + return SVC_DENIED; + } + + /* Signal that mapping to nobody uid/gid is required */ + rqstp->rq_cred.cr_uid = (uid_t) -1; + rqstp->rq_cred.cr_gid = (gid_t) -1; + rqstp->rq_cred.cr_group_info = groups_alloc(0); + if (rqstp->rq_cred.cr_group_info == NULL) + return SVC_DROP; /* kmalloc failure - client must retry */ + + /* Put NULL verifier */ + svc_putu32(resv, RPC_AUTH_NULL); + svc_putu32(resv, 0); + dprintk("%s: success, returning %d!\n", __FUNCTION__, SVC_OK); + return SVC_OK; +} + +static int nfs_callback_null_release(struct svc_rqst *rqstp) +{ + if (rqstp->rq_cred.cr_group_info) + put_group_info(rqstp->rq_cred.cr_group_info); + rqstp->rq_cred.cr_group_info = NULL; + return 0; /* don't drop */ +} + +static struct auth_ops nfs_callback_auth_null = { + .name = "null", + .flavour = RPC_AUTH_NULL, + .accept = nfs_callback_null_accept, + .release = nfs_callback_null_release, +}; + +/* + * AUTH_SYS authentication + */ +static int nfs_callback_unix_accept(struct svc_rqst *rqstp, u32 *authp) +{ + struct kvec *argv = &rqstp->rq_arg.head[0]; + struct kvec *resv = &rqstp->rq_res.head[0]; + struct svc_cred *cred = &rqstp->rq_cred; + u32 slen, i; + int len = argv->iov_len; + + dprintk("%s: start\n", __FUNCTION__); + cred->cr_group_info = NULL; + rqstp->rq_client = NULL; + if ((len -= 3*4) < 0) + return SVC_GARBAGE; + + /* Get length, time stamp and machine name */ + svc_getu32(argv); + svc_getu32(argv); + slen = XDR_QUADLEN(ntohl(svc_getu32(argv))); + if (slen > 64 || (len -= (slen + 3)*4) < 0) + goto badcred; + argv->iov_base = (void*)((u32*)argv->iov_base + slen); + argv->iov_len -= slen*4; + + cred->cr_uid = ntohl(svc_getu32(argv)); + cred->cr_gid = ntohl(svc_getu32(argv)); + slen = ntohl(svc_getu32(argv)); + if (slen > 16 || (len -= (slen + 2)*4) < 0) + goto badcred; + cred->cr_group_info = groups_alloc(slen); + if (cred->cr_group_info == NULL) + return SVC_DROP; + for (i = 0; i < slen; i++) + GROUP_AT(cred->cr_group_info, i) = ntohl(svc_getu32(argv)); + + if (svc_getu32(argv) != RPC_AUTH_NULL || svc_getu32(argv) != 0) { + *authp = rpc_autherr_badverf; + return SVC_DENIED; + } + /* Put NULL verifier */ + svc_putu32(resv, RPC_AUTH_NULL); + svc_putu32(resv, 0); + dprintk("%s: success, returning %d!\n", __FUNCTION__, SVC_OK); + return SVC_OK; +badcred: + *authp = rpc_autherr_badcred; + return SVC_DENIED; +} + +static int nfs_callback_unix_release(struct svc_rqst *rqstp) +{ + if (rqstp->rq_cred.cr_group_info) + put_group_info(rqstp->rq_cred.cr_group_info); + rqstp->rq_cred.cr_group_info = NULL; + return 0; +} + +static struct auth_ops nfs_callback_auth_unix = { + .name = "unix", + .flavour = RPC_AUTH_UNIX, + .accept = nfs_callback_unix_accept, + .release = nfs_callback_unix_release, +}; + +/* + * Hook the authentication protocol + */ +static int nfs_callback_auth(struct svc_rqst *rqstp, u32 *authp) +{ + struct in_addr *addr = &rqstp->rq_addr.sin_addr; + struct nfs4_client *clp; + struct kvec *argv = &rqstp->rq_arg.head[0]; + int flavour; + int retval; + + /* Don't talk to strangers */ + clp = nfs4_find_client(addr); + if (clp == NULL) + return SVC_DROP; + dprintk("%s: %u.%u.%u.%u NFSv4 callback!\n", __FUNCTION__, NIPQUAD(addr)); + nfs4_put_client(clp); + flavour = ntohl(svc_getu32(argv)); + switch(flavour) { + case RPC_AUTH_NULL: + if (rqstp->rq_proc != CB_NULL) { + *authp = rpc_autherr_tooweak; + retval = SVC_DENIED; + break; + } + rqstp->rq_authop = &nfs_callback_auth_null; + retval = nfs_callback_null_accept(rqstp, authp); + break; + case RPC_AUTH_UNIX: + /* Eat the authentication flavour */ + rqstp->rq_authop = &nfs_callback_auth_unix; + retval = nfs_callback_unix_accept(rqstp, authp); + break; + default: + /* FIXME: need to add RPCSEC_GSS upcalls */ +#if 0 + svc_ungetu32(argv); + retval = svc_authenticate(rqstp, authp); +#else + *authp = rpc_autherr_rejectedcred; + retval = SVC_DENIED; +#endif + } + dprintk("%s: flavour %d returning error %d\n", __FUNCTION__, flavour, retval); + return retval; +} + +/* + * Define NFS4 callback program + */ +extern struct svc_version nfs4_callback_version1; + +static struct svc_version *nfs4_callback_version[] = { + [1] = &nfs4_callback_version1, +}; + +static struct svc_stat nfs4_callback_stats; + +static struct svc_program nfs4_callback_program = { + .pg_prog = NFS4_CALLBACK, /* RPC service number */ + .pg_nvers = ARRAY_SIZE(nfs4_callback_version), /* Number of entries */ + .pg_vers = nfs4_callback_version, /* version table */ + .pg_name = "NFSv4 callback", /* service name */ + .pg_class = "nfs", /* authentication class */ + .pg_stats = &nfs4_callback_stats, + .pg_authenticate = nfs_callback_auth, +}; diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/nfs/callback.h linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/callback.h --- linux-2.6.8.1/fs/nfs/callback.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/callback.h 2004-08-22 21:47:55.000000000 -0400 @@ -0,0 +1,70 @@ +/* + * linux/fs/nfs/callback.h + * + * Copyright (C) 2004 Trond Myklebust + * + * NFSv4 callback definitions + */ +#ifndef __LINUX_FS_NFS_CALLBACK_H +#define __LINUX_FS_NFS_CALLBACK_H + +#define NFS4_CALLBACK 0x40000000 +#define NFS4_CALLBACK_XDRSIZE 2048 +#define NFS4_CALLBACK_BUFSIZE (1024 + NFS4_CALLBACK_XDRSIZE) + +enum nfs4_callback_procnum { + CB_NULL = 0, + CB_COMPOUND = 1, +}; + +enum nfs4_callback_opnum { + OP_CB_GETATTR = 3, + OP_CB_RECALL = 4, + OP_CB_ILLEGAL = 10044, +}; + +struct cb_compound_hdr_arg { + int taglen; + const char *tag; + unsigned int callback_ident; + unsigned nops; +}; + +struct cb_compound_hdr_res { + uint32_t *status; + int taglen; + const char *tag; + uint32_t *nops; +}; + +struct cb_getattrargs { + struct sockaddr_in *addr; + struct nfs_fh fh; + uint32_t bitmap[2]; +}; + +struct cb_getattrres { + uint32_t status; + uint32_t bitmap[2]; + uint64_t size; + uint64_t change_attr; + struct timespec ctime; + struct timespec mtime; +}; + +struct cb_recallargs { + struct sockaddr_in *addr; + struct nfs_fh fh; + nfs4_stateid stateid; + uint32_t truncate; +}; + +extern unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res); +extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy); + +extern int nfs_callback_up(void); +extern int nfs_callback_down(void); + +extern unsigned short nfs_callback_tcpport; + +#endif /* __LINUX_FS_NFS_CALLBACK_H */ diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/nfs/callback_proc.c linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/callback_proc.c --- linux-2.6.8.1/fs/nfs/callback_proc.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/callback_proc.c 2004-08-22 21:47:55.000000000 -0400 @@ -0,0 +1,85 @@ +/* + * linux/fs/nfs/callback_proc.c + * + * Copyright (C) 2004 Trond Myklebust + * + * NFSv4 callback procedures + */ +#include +#include +#include +#include "callback.h" +#include "delegation.h" + +#define NFSDBG_FACILITY NFSDBG_CALLBACK + +unsigned nfs4_callback_getattr(struct cb_getattrargs *args, struct cb_getattrres *res) +{ + struct nfs4_client *clp; + struct nfs_delegation *delegation; + struct nfs_inode *nfsi; + struct inode *inode; + + res->bitmap[0] = res->bitmap[1] = 0; + res->status = htonl(NFS4ERR_BADHANDLE); + clp = nfs4_find_client(&args->addr->sin_addr); + if (clp == NULL) + goto out; + inode = nfs_delegation_find_inode(clp, &args->fh); + if (inode == NULL) + goto out_putclient; + nfsi = NFS_I(inode); + down_read(&nfsi->rwsem); + delegation = nfsi->delegation; + if (delegation == NULL || (delegation->type & FMODE_WRITE) == 0) + goto out_iput; + res->size = i_size_read(inode); + res->change_attr = NFS_CHANGE_ATTR(inode); + res->ctime = inode->i_ctime; + res->mtime = inode->i_mtime; + res->bitmap[0] = (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) & + args->bitmap[0]; + res->bitmap[1] = (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) & + args->bitmap[1]; + res->status = 0; +out_iput: + up_read(&nfsi->rwsem); + iput(inode); +out_putclient: + nfs4_put_client(clp); +out: + dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res->status)); + return res->status; +} + +unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy) +{ + struct nfs4_client *clp; + struct inode *inode; + unsigned res; + + res = htonl(NFS4ERR_BADHANDLE); + clp = nfs4_find_client(&args->addr->sin_addr); + if (clp == NULL) + goto out; + inode = nfs_delegation_find_inode(clp, &args->fh); + if (inode == NULL) + goto out_putclient; + /* Set up a helper thread to actually return the delegation */ + switch(nfs_async_inode_return_delegation(inode, &args->stateid)) { + case 0: + res = 0; + break; + case -ENOENT: + res = htonl(NFS4ERR_BAD_STATEID); + break; + default: + res = htonl(NFS4ERR_RESOURCE); + } + iput(inode); +out_putclient: + nfs4_put_client(clp); +out: + dprintk("%s: exit with status = %d\n", __FUNCTION__, ntohl(res)); + return res; +} diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/nfs/callback_xdr.c linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/callback_xdr.c --- linux-2.6.8.1/fs/nfs/callback_xdr.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/callback_xdr.c 2004-08-22 21:47:55.000000000 -0400 @@ -0,0 +1,481 @@ +/* + * linux/fs/nfs/callback_xdr.c + * + * Copyright (C) 2004 Trond Myklebust + * + * NFSv4 callback encode/decode procedures + */ +#include +#include +#include +#include +#include +#include "callback.h" + +#define CB_OP_TAGLEN_MAXSZ (512) +#define CB_OP_HDR_RES_MAXSZ (2 + CB_OP_TAGLEN_MAXSZ) +#define CB_OP_GETATTR_BITMAP_MAXSZ (4) +#define CB_OP_GETATTR_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ + \ + CB_OP_GETATTR_BITMAP_MAXSZ + \ + 2 + 2 + 3 + 3) +#define CB_OP_RECALL_RES_MAXSZ (CB_OP_HDR_RES_MAXSZ) + +#define NFSDBG_FACILITY NFSDBG_CALLBACK + +typedef unsigned (*callback_process_op_t)(void *, void *); +typedef unsigned (*callback_decode_arg_t)(struct svc_rqst *, struct xdr_stream *, void *); +typedef unsigned (*callback_encode_res_t)(struct svc_rqst *, struct xdr_stream *, void *); + + +struct callback_op { + callback_process_op_t process_op; + callback_decode_arg_t decode_args; + callback_encode_res_t encode_res; + long res_maxsize; +}; + +static struct callback_op callback_ops[]; + +static int nfs4_callback_null(struct svc_rqst *rqstp, void *argp, void *resp) +{ + return htonl(NFS4_OK); +} + +static int nfs4_decode_void(struct svc_rqst *rqstp, uint32_t *p, void *dummy) +{ + return xdr_argsize_check(rqstp, p); +} + +static int nfs4_encode_void(struct svc_rqst *rqstp, uint32_t *p, void *dummy) +{ + return xdr_ressize_check(rqstp, p); +} + +static uint32_t *read_buf(struct xdr_stream *xdr, int nbytes) +{ + uint32_t *p; + + p = xdr_inline_decode(xdr, nbytes); + if (unlikely(p == NULL)) + printk(KERN_WARNING "NFSv4 callback reply buffer overflowed!\n"); + return p; +} + +static unsigned decode_string(struct xdr_stream *xdr, unsigned int *len, const char **str) +{ + uint32_t *p; + + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *len = ntohl(*p); + + if (*len != 0) { + p = read_buf(xdr, *len); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *str = (const char *)p; + } else + *str = NULL; + + return 0; +} + +static unsigned decode_fh(struct xdr_stream *xdr, struct nfs_fh *fh) +{ + uint32_t *p; + + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + fh->size = ntohl(*p); + if (fh->size > NFS4_FHSIZE) + return htonl(NFS4ERR_BADHANDLE); + p = read_buf(xdr, fh->size); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + memcpy(&fh->data[0], p, fh->size); + memset(&fh->data[fh->size], 0, sizeof(fh->data) - fh->size); + return 0; +} + +static unsigned decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap) +{ + uint32_t *p; + unsigned int attrlen; + + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + attrlen = ntohl(*p); + p = read_buf(xdr, attrlen << 2); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + if (likely(attrlen > 0)) + bitmap[0] = ntohl(*p++); + if (attrlen > 1) + bitmap[1] = ntohl(*p); + return 0; +} + +static unsigned decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) +{ + uint32_t *p; + + p = read_buf(xdr, 16); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + memcpy(stateid->data, p, 16); + return 0; +} + +static unsigned decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound_hdr_arg *hdr) +{ + uint32_t *p; + unsigned int minor_version; + unsigned status; + + status = decode_string(xdr, &hdr->taglen, &hdr->tag); + if (unlikely(status != 0)) + return status; + /* We do not like overly long tags! */ + if (hdr->taglen > CB_OP_TAGLEN_MAXSZ-12 || hdr->taglen < 0) { + printk("NFSv4 CALLBACK %s: client sent tag of length %u\n", + __FUNCTION__, hdr->taglen); + return htonl(NFS4ERR_RESOURCE); + } + p = read_buf(xdr, 12); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + minor_version = ntohl(*p++); + /* Check minor version is zero. */ + if (minor_version != 0) { + printk(KERN_WARNING "%s: NFSv4 server callback with illegal minor version %u!\n", + __FUNCTION__, minor_version); + return htonl(NFS4ERR_MINOR_VERS_MISMATCH); + } + hdr->callback_ident = ntohl(*p++); + hdr->nops = ntohl(*p); + return 0; +} + +static unsigned decode_op_hdr(struct xdr_stream *xdr, unsigned int *op) +{ + uint32_t *p; + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *op = ntohl(*p); + return 0; +} + +static unsigned decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_getattrargs *args) +{ + unsigned status; + + status = decode_fh(xdr, &args->fh); + if (unlikely(status != 0)) + goto out; + args->addr = &rqstp->rq_addr; + status = decode_bitmap(xdr, args->bitmap); +out: + dprintk("%s: exit with status = %d\n", __FUNCTION__, status); + return status; +} + +static unsigned decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallargs *args) +{ + uint32_t *p; + unsigned status; + + args->addr = &rqstp->rq_addr; + status = decode_stateid(xdr, &args->stateid); + if (unlikely(status != 0)) + goto out; + p = read_buf(xdr, 4); + if (unlikely(p == NULL)) { + status = htonl(NFS4ERR_RESOURCE); + goto out; + } + args->truncate = ntohl(*p); + status = decode_fh(xdr, &args->fh); +out: + dprintk("%s: exit with status = %d\n", __FUNCTION__, status); + return 0; +} + +static unsigned encode_string(struct xdr_stream *xdr, unsigned int len, const char *str) +{ + uint32_t *p; + + p = xdr_reserve_space(xdr, 4 + len); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + xdr_encode_opaque(p, str, len); + return 0; +} + +#define CB_SUPPORTED_ATTR0 (FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE) +#define CB_SUPPORTED_ATTR1 (FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY) +static unsigned encode_attr_bitmap(struct xdr_stream *xdr, const uint32_t *bitmap, uint32_t **savep) +{ + uint32_t bm[2]; + uint32_t *p; + + bm[0] = htonl(bitmap[0] & CB_SUPPORTED_ATTR0); + bm[1] = htonl(bitmap[1] & CB_SUPPORTED_ATTR1); + if (bm[1] != 0) { + p = xdr_reserve_space(xdr, 16); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *p++ = htonl(2); + *p++ = bm[0]; + *p++ = bm[1]; + } else if (bm[0] != 0) { + p = xdr_reserve_space(xdr, 12); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *p++ = htonl(1); + *p++ = bm[0]; + } else { + p = xdr_reserve_space(xdr, 8); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *p++ = htonl(0); + } + *savep = p; + return 0; +} + +static unsigned encode_attr_change(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t change) +{ + uint32_t *p; + + if (!(bitmap[0] & FATTR4_WORD0_CHANGE)) + return 0; + p = xdr_reserve_space(xdr, 8); + if (unlikely(p == 0)) + return htonl(NFS4ERR_RESOURCE); + p = xdr_encode_hyper(p, change); + return 0; +} + +static unsigned encode_attr_size(struct xdr_stream *xdr, const uint32_t *bitmap, uint64_t size) +{ + uint32_t *p; + + if (!(bitmap[0] & FATTR4_WORD0_SIZE)) + return 0; + p = xdr_reserve_space(xdr, 8); + if (unlikely(p == 0)) + return htonl(NFS4ERR_RESOURCE); + p = xdr_encode_hyper(p, size); + return 0; +} + +static unsigned encode_attr_time(struct xdr_stream *xdr, const struct timespec *time) +{ + uint32_t *p; + + p = xdr_reserve_space(xdr, 12); + if (unlikely(p == 0)) + return htonl(NFS4ERR_RESOURCE); + p = xdr_encode_hyper(p, time->tv_sec); + *p = htonl(time->tv_nsec); + return 0; +} + +static unsigned encode_attr_ctime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time) +{ + if (!(bitmap[1] & FATTR4_WORD1_TIME_METADATA)) + return 0; + return encode_attr_time(xdr,time); +} + +static unsigned encode_attr_mtime(struct xdr_stream *xdr, const uint32_t *bitmap, const struct timespec *time) +{ + if (!(bitmap[1] & FATTR4_WORD1_TIME_MODIFY)) + return 0; + return encode_attr_time(xdr,time); +} + +static unsigned encode_compound_hdr_res(struct xdr_stream *xdr, struct cb_compound_hdr_res *hdr) +{ + unsigned status; + + hdr->status = xdr_reserve_space(xdr, 4); + if (unlikely(hdr->status == NULL)) + return htonl(NFS4ERR_RESOURCE); + status = encode_string(xdr, hdr->taglen, hdr->tag); + if (unlikely(status != 0)) + return status; + hdr->nops = xdr_reserve_space(xdr, 4); + if (unlikely(hdr->nops == NULL)) + return htonl(NFS4ERR_RESOURCE); + return 0; +} + +static unsigned encode_op_hdr(struct xdr_stream *xdr, uint32_t op, uint32_t res) +{ + uint32_t *p; + + p = xdr_reserve_space(xdr, 8); + if (unlikely(p == NULL)) + return htonl(NFS4ERR_RESOURCE); + *p++ = htonl(op); + *p = res; + return 0; +} + +static unsigned encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr, const struct cb_getattrres *res) +{ + uint32_t *savep; + unsigned status = res->status; + + if (unlikely(status != 0)) + goto out; + status = encode_attr_bitmap(xdr, res->bitmap, &savep); + if (unlikely(status != 0)) + goto out; + status = encode_attr_change(xdr, res->bitmap, res->change_attr); + if (unlikely(status != 0)) + goto out; + status = encode_attr_size(xdr, res->bitmap, res->size); + if (unlikely(status != 0)) + goto out; + status = encode_attr_ctime(xdr, res->bitmap, &res->ctime); + if (unlikely(status != 0)) + goto out; + status = encode_attr_mtime(xdr, res->bitmap, &res->mtime); + *savep = htonl((unsigned int)((char *)xdr->p - (char *)(savep+1))); +out: + dprintk("%s: exit with status = %d\n", __FUNCTION__, status); + return status; +} + +static unsigned process_op(struct svc_rqst *rqstp, + struct xdr_stream *xdr_in, void *argp, + struct xdr_stream *xdr_out, void *resp) +{ + struct callback_op *op; + unsigned int op_nr; + unsigned int status = 0; + long maxlen; + unsigned res; + + dprintk("%s: start\n", __FUNCTION__); + status = decode_op_hdr(xdr_in, &op_nr); + if (unlikely(status != 0)) { + op_nr = OP_CB_ILLEGAL; + op = &callback_ops[0]; + } else if (unlikely(op_nr != OP_CB_GETATTR && op_nr != OP_CB_RECALL)) { + op_nr = OP_CB_ILLEGAL; + op = &callback_ops[0]; + status = htonl(NFS4ERR_OP_ILLEGAL); + } else + op = &callback_ops[op_nr]; + + maxlen = xdr_out->end - xdr_out->p; + if (maxlen > 0 && maxlen < PAGE_SIZE) { + if (likely(status == 0 && op->decode_args != NULL)) + status = op->decode_args(rqstp, xdr_in, argp); + if (likely(status == 0 && op->process_op != NULL)) + status = op->process_op(argp, resp); + } else + status = htonl(NFS4ERR_RESOURCE); + + res = encode_op_hdr(xdr_out, op_nr, status); + if (status == 0) + status = res; + if (op->encode_res != NULL && status == 0) + status = op->encode_res(rqstp, xdr_out, resp); + dprintk("%s: done, status = %d\n", __FUNCTION__, status); + return status; +} + +/* + * Decode, process and encode a COMPOUND + */ +static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp) +{ + struct cb_compound_hdr_arg hdr_arg; + struct cb_compound_hdr_res hdr_res; + struct xdr_stream xdr_in, xdr_out; + uint32_t *p; + unsigned int status; + unsigned int nops = 1; + + dprintk("%s: start\n", __FUNCTION__); + + xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base); + + p = (uint32_t*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len); + rqstp->rq_res.head[0].iov_len = PAGE_SIZE; + xdr_init_encode(&xdr_out, &rqstp->rq_res, p); + + decode_compound_hdr_arg(&xdr_in, &hdr_arg); + hdr_res.taglen = hdr_arg.taglen; + hdr_res.tag = hdr_arg.tag; + encode_compound_hdr_res(&xdr_out, &hdr_res); + + for (;;) { + status = process_op(rqstp, &xdr_in, argp, &xdr_out, resp); + if (status != 0) + break; + if (nops == hdr_arg.nops) + break; + nops++; + } + *hdr_res.status = status; + *hdr_res.nops = htonl(nops); + dprintk("%s: done, status = %u\n", __FUNCTION__, status); + return rpc_success; +} + +/* + * Define NFS4 callback COMPOUND ops. + */ +static struct callback_op callback_ops[] = { + [0] = { + .res_maxsize = CB_OP_HDR_RES_MAXSZ, + }, + [OP_CB_GETATTR] = { + .process_op = (callback_process_op_t)nfs4_callback_getattr, + .decode_args = (callback_decode_arg_t)decode_getattr_args, + .encode_res = (callback_encode_res_t)encode_getattr_res, + .res_maxsize = CB_OP_GETATTR_RES_MAXSZ, + }, + [OP_CB_RECALL] = { + .process_op = (callback_process_op_t)nfs4_callback_recall, + .decode_args = (callback_decode_arg_t)decode_recall_args, + .res_maxsize = CB_OP_RECALL_RES_MAXSZ, + } +}; + +/* + * Define NFS4 callback procedures + */ +static struct svc_procedure nfs4_callback_procedures1[] = { + [CB_NULL] = { + .pc_func = nfs4_callback_null, + .pc_decode = (kxdrproc_t)nfs4_decode_void, + .pc_encode = (kxdrproc_t)nfs4_encode_void, + .pc_xdrressize = 1, + }, + [CB_COMPOUND] = { + .pc_func = nfs4_callback_compound, + .pc_encode = (kxdrproc_t)nfs4_encode_void, + .pc_argsize = 256, + .pc_ressize = 256, + .pc_xdrressize = NFS4_CALLBACK_BUFSIZE, + } +}; + +struct svc_version nfs4_callback_version1 = { + .vs_vers = 1, + .vs_nproc = ARRAY_SIZE(nfs4_callback_procedures1), + .vs_proc = nfs4_callback_procedures1, + .vs_xdrsize = NFS4_CALLBACK_XDRSIZE, + .vs_dispatch = NULL, +}; + diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/nfs/delegation.c linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/delegation.c --- linux-2.6.8.1/fs/nfs/delegation.c 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/delegation.c 2004-08-22 21:48:49.000000000 -0400 @@ -0,0 +1,342 @@ +/* + * linux/fs/nfs/delegation.c + * + * Copyright (C) 2004 Trond Myklebust + * + * NFS file delegation management + * + */ +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "delegation.h" + +static struct nfs_delegation *nfs_alloc_delegation(void) +{ + return (struct nfs_delegation *)kmalloc(sizeof(struct nfs_delegation), GFP_KERNEL); +} + +static void nfs_free_delegation(struct nfs_delegation *delegation) +{ + if (delegation->cred) + put_rpccred(delegation->cred); + kfree(delegation); +} + +static void nfs_delegation_claim_opens(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_open_context *ctx; + struct nfs4_state *state; + +again: + spin_lock(&inode->i_lock); + list_for_each_entry(ctx, &nfsi->open_files, list) { + state = ctx->state; + if (state == NULL) + continue; + if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) + continue; + get_nfs_open_context(ctx); + spin_unlock(&inode->i_lock); + if (nfs4_open_delegation_recall(ctx->dentry, state) < 0) + return; + put_nfs_open_context(ctx); + goto again; + } + spin_unlock(&inode->i_lock); +} + +/* + * Set up a delegation on an inode + */ +void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) +{ + struct nfs_delegation *delegation = NFS_I(inode)->delegation; + + if (delegation == NULL) + return; + memcpy(delegation->stateid.data, res->delegation.data, + sizeof(delegation->stateid.data)); + delegation->type = res->delegation_type; + delegation->maxsize = res->maxsize; + put_rpccred(cred); + delegation->cred = get_rpccred(cred); + delegation->flags &= ~NFS_DELEGATION_NEED_RECLAIM; + NFS_I(inode)->delegation_state = delegation->type; + smp_wmb(); +} + +/* + * Set up a delegation on an inode + */ +int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res) +{ + struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_delegation *delegation; + int status = 0; + + delegation = nfs_alloc_delegation(); + if (delegation == NULL) + return -ENOMEM; + memcpy(delegation->stateid.data, res->delegation.data, + sizeof(delegation->stateid.data)); + delegation->type = res->delegation_type; + delegation->maxsize = res->maxsize; + delegation->cred = get_rpccred(cred); + delegation->inode = inode; + + spin_lock(&clp->cl_lock); + if (nfsi->delegation == NULL) { + list_add(&delegation->super_list, &clp->cl_delegations); + nfsi->delegation = delegation; + nfsi->delegation_state = delegation->type; + delegation = NULL; + } else { + if (memcmp(&delegation->stateid, &nfsi->delegation->stateid, + sizeof(delegation->stateid)) != 0 || + delegation->type != nfsi->delegation->type) { + printk("%s: server %u.%u.%u.%u, handed out a duplicate delegation!\n", + __FUNCTION__, NIPQUAD(clp->cl_addr)); + status = -EIO; + } + } + spin_unlock(&clp->cl_lock); + if (delegation != NULL) + kfree(delegation); + return status; +} + +static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *delegation) +{ + int res = 0; + + __nfs_revalidate_inode(NFS_SERVER(inode), inode); + + res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid); + nfs_free_delegation(delegation); + return res; +} + +/* Sync all data to disk upon delegation return */ +static void nfs_msync_inode(struct inode *inode) +{ + filemap_fdatawrite(inode->i_mapping); + nfs_wb_all(inode); + filemap_fdatawait(inode->i_mapping); +} + +/* + * Basic procedure for returning a delegation to the server + */ +int nfs_inode_return_delegation(struct inode *inode) +{ + struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_delegation *delegation; + int res = 0; + + nfs_msync_inode(inode); + down_read(&clp->cl_sem); + /* Guard against new delegated open calls */ + down_write(&nfsi->rwsem); + spin_lock(&clp->cl_lock); + delegation = nfsi->delegation; + if (delegation != NULL) { + list_del_init(&delegation->super_list); + nfsi->delegation = NULL; + nfsi->delegation_state = 0; + } + spin_unlock(&clp->cl_lock); + nfs_delegation_claim_opens(inode); + up_write(&nfsi->rwsem); + up_read(&clp->cl_sem); + nfs_msync_inode(inode); + + if (delegation != NULL) + res = nfs_do_return_delegation(inode, delegation); + return res; +} + +/* + * Return all delegations associated to a super block + */ +void nfs_return_all_delegations(struct super_block *sb) +{ + struct nfs4_client *clp = NFS_SB(sb)->nfs4_state; + struct nfs_delegation *delegation; + struct inode *inode; + + if (clp == NULL) + return; +restart: + spin_lock(&clp->cl_lock); + list_for_each_entry(delegation, &clp->cl_delegations, super_list) { + if (delegation->inode->i_sb != sb) + continue; + inode = igrab(delegation->inode); + if (inode == NULL) + continue; + spin_unlock(&clp->cl_lock); + nfs_inode_return_delegation(inode); + iput(inode); + goto restart; + } + spin_unlock(&clp->cl_lock); +} + +/* + * Return all delegations following an NFS4ERR_CB_PATH_DOWN error. + */ +void nfs_handle_cb_pathdown(struct nfs4_client *clp) +{ + struct nfs_delegation *delegation; + struct inode *inode; + + if (clp == NULL) + return; +restart: + spin_lock(&clp->cl_lock); + list_for_each_entry(delegation, &clp->cl_delegations, super_list) { + inode = igrab(delegation->inode); + if (inode == NULL) + continue; + spin_unlock(&clp->cl_lock); + nfs_inode_return_delegation(inode); + iput(inode); + goto restart; + } + spin_unlock(&clp->cl_lock); +} + +struct recall_threadargs { + struct inode *inode; + struct nfs4_client *clp; + const nfs4_stateid *stateid; + + struct completion started; + int result; +}; + +static int recall_thread(void *data) +{ + struct recall_threadargs *args = (struct recall_threadargs *)data; + struct inode *inode = igrab(args->inode); + struct nfs4_client *clp = NFS_SERVER(inode)->nfs4_state; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_delegation *delegation; + + daemonize("nfsv4-delegreturn"); + + nfs_msync_inode(inode); + down_read(&clp->cl_sem); + down_write(&nfsi->rwsem); + spin_lock(&clp->cl_lock); + delegation = nfsi->delegation; + if (delegation != NULL && memcmp(delegation->stateid.data, + args->stateid->data, + sizeof(delegation->stateid.data)) == 0) { + list_del_init(&delegation->super_list); + nfsi->delegation = NULL; + nfsi->delegation_state = 0; + args->result = 0; + } else { + delegation = NULL; + args->result = -ENOENT; + } + spin_unlock(&clp->cl_lock); + complete(&args->started); + nfs_delegation_claim_opens(inode); + up_write(&nfsi->rwsem); + up_read(&clp->cl_sem); + nfs_msync_inode(inode); + + if (delegation != NULL) + nfs_do_return_delegation(inode, delegation); + iput(inode); + module_put_and_exit(0); +} + +/* + * Asynchronous delegation recall! + */ +int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid) +{ + struct recall_threadargs data = { + .inode = inode, + .stateid = stateid, + }; + int status; + + init_completion(&data.started); + __module_get(THIS_MODULE); + status = kernel_thread(recall_thread, &data, CLONE_KERNEL); + if (status < 0) + goto out_module_put; + wait_for_completion(&data.started); + return data.result; +out_module_put: + module_put(THIS_MODULE); + return status; +} + +/* + * Retrieve the inode associated with a delegation + */ +struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle) +{ + struct nfs_delegation *delegation; + struct inode *res = NULL; + spin_lock(&clp->cl_lock); + list_for_each_entry(delegation, &clp->cl_delegations, super_list) { + if (nfs_compare_fh(fhandle, &NFS_I(delegation->inode)->fh) == 0) { + res = igrab(delegation->inode); + break; + } + } + spin_unlock(&clp->cl_lock); + return res; +} + +/* + * Mark all delegations as needing to be reclaimed + */ +void nfs_delegation_mark_reclaim(struct nfs4_client *clp) +{ + struct nfs_delegation *delegation; + spin_lock(&clp->cl_lock); + list_for_each_entry(delegation, &clp->cl_delegations, super_list) + delegation->flags |= NFS_DELEGATION_NEED_RECLAIM; + spin_unlock(&clp->cl_lock); +} + +/* + * Reap all unclaimed delegations after reboot recovery is done + */ +void nfs_delegation_reap_unclaimed(struct nfs4_client *clp) +{ + struct nfs_delegation *delegation, *n; + LIST_HEAD(head); + spin_lock(&clp->cl_lock); + list_for_each_entry_safe(delegation, n, &clp->cl_delegations, super_list) { + if ((delegation->flags & NFS_DELEGATION_NEED_RECLAIM) == 0) + continue; + list_move(&delegation->super_list, &head); + NFS_I(delegation->inode)->delegation = NULL; + NFS_I(delegation->inode)->delegation_state = 0; + } + spin_unlock(&clp->cl_lock); + while(!list_empty(&head)) { + delegation = list_entry(head.next, struct nfs_delegation, super_list); + list_del(&delegation->super_list); + nfs_free_delegation(delegation); + } +} diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/nfs/delegation.h linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/delegation.h --- linux-2.6.8.1/fs/nfs/delegation.h 1969-12-31 19:00:00.000000000 -0500 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/delegation.h 2004-08-22 21:48:49.000000000 -0400 @@ -0,0 +1,57 @@ +/* + * linux/fs/nfs/delegation.h + * + * Copyright (c) Trond Myklebust + * + * Definitions pertaining to NFS delegated files + */ +#ifndef FS_NFS_DELEGATION_H +#define FS_NFS_DELEGATION_H + +#if defined(CONFIG_NFS_V4) +/* + * NFSv4 delegation + */ +struct nfs_delegation { + struct list_head super_list; + struct rpc_cred *cred; + struct inode *inode; + nfs4_stateid stateid; + int type; +#define NFS_DELEGATION_NEED_RECLAIM 1 + long flags; + loff_t maxsize; +}; + +int nfs_inode_set_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); +void nfs_inode_reclaim_delegation(struct inode *inode, struct rpc_cred *cred, struct nfs_openres *res); +int nfs_inode_return_delegation(struct inode *inode); +int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid); + +struct inode *nfs_delegation_find_inode(struct nfs4_client *clp, const struct nfs_fh *fhandle); +void nfs_return_all_delegations(struct super_block *sb); +void nfs_handle_cb_pathdown(struct nfs4_client *clp); + +void nfs_delegation_mark_reclaim(struct nfs4_client *clp); +void nfs_delegation_reap_unclaimed(struct nfs4_client *clp); + +/* NFSv4 delegation-related procedures */ +int nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid); +int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state); + +static inline int nfs_have_delegation(struct inode *inode, int flags) +{ + flags &= FMODE_READ|FMODE_WRITE; + smp_rmb(); + if ((NFS_I(inode)->delegation_state & flags) == flags) + return 1; + return 0; +} +#else +static inline int nfs_have_delegation(struct inode *inode, int flags) +{ + return 0; +} +#endif + +#endif diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/nfs/dir.c linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/dir.c --- linux-2.6.8.1/fs/nfs/dir.c 2004-08-14 14:26:35.000000000 -0400 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/dir.c 2004-08-22 21:48:42.000000000 -0400 @@ -32,6 +32,8 @@ #include #include +#include "delegation.h" + #define NFS_PARANOIA 1 /* #define NFS_DEBUG_VERBOSE 1 */ @@ -610,7 +612,7 @@ static int nfs_lookup_revalidate(struct verifier = nfs_save_change_attribute(dir); error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr); if (!error) { - if (memcmp(NFS_FH(inode), &fhandle, sizeof(struct nfs_fh))!= 0) + if (nfs_compare_fh(NFS_FH(inode), &fhandle)) goto out_bad; if (nfs_lookup_verify_inode(inode, isopen)) goto out_zap_parent; @@ -623,7 +625,7 @@ static int nfs_lookup_revalidate(struct error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, &fhandle, &fattr); if (error) goto out_bad; - if (memcmp(NFS_FH(inode), &fhandle, sizeof(struct nfs_fh))!= 0) + if (nfs_compare_fh(NFS_FH(inode), &fhandle)) goto out_bad; if ((error = nfs_refresh_inode(inode, &fattr)) != 0) goto out_bad; @@ -850,22 +852,22 @@ static int nfs_open_revalidate(struct de unsigned long verifier; int openflags, ret = 0; - /* NFS only supports OPEN for regular files */ - if (inode && !S_ISREG(inode->i_mode)) - goto no_open; parent = dget_parent(dentry); dir = parent->d_inode; if (!is_atomic_open(dir, nd)) goto no_open; + /* We can't create new files in nfs_open_revalidate(), so we + * optimize away revalidation of negative dentries. + */ + if (inode == NULL) + goto out; + /* NFS only supports OPEN on regular files */ + if (!S_ISREG(inode->i_mode)) + goto no_open; openflags = nd->intent.open.flags; - if (openflags & O_CREAT) { - /* If this is a negative dentry, just drop it */ - if (!inode) - goto out; - /* If this is exclusive open, just revalidate */ - if (openflags & O_EXCL) - goto no_open; - } + /* We cannot do exclusive creation on a positive dentry */ + if ((openflags & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL)) + goto no_open; /* We can't create new files, or truncate existing ones here */ openflags &= ~(O_CREAT|O_TRUNC); @@ -887,6 +889,8 @@ out: return ret; no_open: dput(parent); + if (inode != NULL && nfs_have_delegation(inode, FMODE_READ)) + return 1; return nfs_lookup_revalidate(dentry, nd); } #endif /* CONFIG_NFSV4 */ @@ -982,12 +986,18 @@ static int nfs_instantiate(struct dentry /* We may have been initialized further down */ if (dentry->d_inode) return 0; - if (fhandle->size == 0 || !(fattr->valid & NFS_ATTR_FATTR)) { + if (fhandle->size == 0) { struct inode *dir = dentry->d_parent->d_inode; error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); if (error) goto out_err; } + if (!(fattr->valid & NFS_ATTR_FATTR)) { + struct nfs_server *server = NFS_SB(dentry->d_sb); + error = server->rpc_ops->getattr(server, fhandle, fattr); + if (error < 0) + goto out_err; + } inode = nfs_fhget(dentry->d_sb, fhandle, fattr); if (inode) { d_instantiate(dentry, inode); @@ -1299,19 +1309,6 @@ nfs_symlink(struct inode *dir, struct de dfprintk(VFS, "NFS: symlink(%s/%ld, %s, %s)\n", dir->i_sb->s_id, dir->i_ino, dentry->d_name.name, symname); - error = -ENAMETOOLONG; - switch (NFS_PROTO(dir)->version) { - case 2: - if (strlen(symname) > NFS2_MAXPATHLEN) - goto out; - break; - case 3: - if (strlen(symname) > NFS3_MAXPATHLEN) - goto out; - default: - break; - } - #ifdef NFS_PARANOIA if (dentry->d_inode) printk("nfs_proc_symlink: %s/%s not negative!\n", @@ -1341,8 +1338,6 @@ dentry->d_parent->d_name.name, dentry->d d_drop(dentry); } unlock_kernel(); - -out: return error; } @@ -1498,10 +1493,56 @@ out: return error; } -int -nfs_permission(struct inode *inode, int mask, struct nameidata *nd) +int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) +{ + struct nfs_access_entry *cache = &NFS_I(inode)->cache_access; + + if (cache->cred != cred + || time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)) + || (NFS_FLAGS(inode) & NFS_INO_INVALID_ATTR)) + return -ENOENT; + memcpy(res, cache, sizeof(*res)); + return 0; +} + +void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) +{ + struct nfs_access_entry *cache = &NFS_I(inode)->cache_access; + + if (cache->cred != set->cred) { + if (cache->cred) + put_rpccred(cache->cred); + cache->cred = get_rpccred(set->cred); + } + cache->jiffies = set->jiffies; + cache->mask = set->mask; +} + +static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) +{ + struct nfs_access_entry cache; + int status; + + status = nfs_access_get_cached(inode, cred, &cache); + if (status == 0) + goto out; + + /* Be clever: ask server to check for all possible rights */ + cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; + cache.cred = cred; + cache.jiffies = jiffies; + status = NFS_PROTO(inode)->access(inode, &cache); + if (status != 0) + return status; + nfs_access_add_cache(inode, &cache); +out: + if ((cache.mask & mask) == mask) + return 0; + return -EACCES; +} + +int nfs_permission(struct inode *inode, int mask, struct nameidata *nd) { - struct nfs_access_cache *cache = &NFS_I(inode)->cache_access; struct rpc_cred *cred; int mode = inode->i_mode; int res; @@ -1542,24 +1583,7 @@ nfs_permission(struct inode *inode, int goto out_notsup; cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); - if (cache->cred == cred - && time_before(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)) - && !(NFS_FLAGS(inode) & NFS_INO_INVALID_ATTR)) { - if (!(res = cache->err)) { - /* Is the mask a subset of an accepted mask? */ - if ((cache->mask & mask) == mask) - goto out; - } else { - /* ...or is it a superset of a rejected mask? */ - if ((cache->mask & mask) == cache->mask) - goto out; - } - } - - res = NFS_PROTO(inode)->access(inode, cred, mask); - if (!res || res == -EACCES) - goto add_cache; -out: + res = nfs_do_access(inode, cred, mask); put_rpccred(cred); unlock_kernel(); return res; @@ -1568,15 +1592,6 @@ out_notsup: res = vfs_permission(inode, mask); unlock_kernel(); return res; -add_cache: - cache->jiffies = jiffies; - if (cache->cred) - put_rpccred(cache->cred); - cache->cred = cred; - cache->mask = mask; - cache->err = res; - unlock_kernel(); - return res; } /* diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/nfs/direct.c linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/direct.c --- linux-2.6.8.1/fs/nfs/direct.c 2004-08-14 14:27:32.000000000 -0400 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/direct.c 2004-08-22 21:47:31.000000000 -0400 @@ -110,7 +110,7 @@ nfs_free_user_pages(struct page **pages, * nfs_direct_read_seg - Read in one iov segment. Generate separate * read RPCs for each "rsize" bytes. * @inode: target inode - * @file: target file (may be NULL) + * @ctx: target file open context * user_addr: starting address of this segment of user's buffer * count: size of this segment * file_offset: offset in file to begin the operation @@ -118,7 +118,7 @@ nfs_free_user_pages(struct page **pages, * nr_pages: size of pages array */ static int -nfs_direct_read_seg(struct inode *inode, struct file *file, +nfs_direct_read_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages) { @@ -127,9 +127,10 @@ nfs_direct_read_seg(struct inode *inode, int curpage = 0; struct nfs_read_data rdata = { .inode = inode, + .cred = ctx->cred, .args = { .fh = NFS_FH(inode), - .lockowner = current->files, + .context = ctx, }, .res = { .fattr = &rdata.fattr, @@ -151,7 +152,7 @@ nfs_direct_read_seg(struct inode *inode, user_addr + tot_bytes, rdata.args.pgbase, curpage); lock_kernel(); - result = NFS_PROTO(inode)->read(&rdata, file); + result = NFS_PROTO(inode)->read(&rdata); unlock_kernel(); if (result <= 0) { @@ -183,7 +184,7 @@ nfs_direct_read_seg(struct inode *inode, * nfs_direct_read - For each iov segment, map the user's buffer * then generate read RPCs. * @inode: target inode - * @file: target file (may be NULL) + * @ctx: target file open context * @iov: array of vectors that define I/O buffer * file_offset: offset in file to begin the operation * nr_segs: size of iovec array @@ -193,7 +194,7 @@ nfs_direct_read_seg(struct inode *inode, * server. */ static ssize_t -nfs_direct_read(struct inode *inode, struct file *file, +nfs_direct_read(struct inode *inode, struct nfs_open_context *ctx, const struct iovec *iov, loff_t file_offset, unsigned long nr_segs) { @@ -216,7 +217,7 @@ nfs_direct_read(struct inode *inode, str return page_count; } - result = nfs_direct_read_seg(inode, file, user_addr, size, + result = nfs_direct_read_seg(inode, ctx, user_addr, size, file_offset, pages, page_count); nfs_free_user_pages(pages, page_count, 1); @@ -239,7 +240,7 @@ nfs_direct_read(struct inode *inode, str * nfs_direct_write_seg - Write out one iov segment. Generate separate * write RPCs for each "wsize" bytes, then commit. * @inode: target inode - * @file: target file (may be NULL) + * @ctx: target file open context * user_addr: starting address of this segment of user's buffer * count: size of this segment * file_offset: offset in file to begin the operation @@ -247,7 +248,7 @@ nfs_direct_read(struct inode *inode, str * nr_pages: size of pages array */ static int -nfs_direct_write_seg(struct inode *inode, struct file *file, +nfs_direct_write_seg(struct inode *inode, struct nfs_open_context *ctx, unsigned long user_addr, size_t count, loff_t file_offset, struct page **pages, int nr_pages) { @@ -257,9 +258,10 @@ nfs_direct_write_seg(struct inode *inode struct nfs_writeverf first_verf; struct nfs_write_data wdata = { .inode = inode, + .cred = ctx->cred, .args = { .fh = NFS_FH(inode), - .lockowner = current->files, + .context = ctx, }, .res = { .fattr = &wdata.fattr, @@ -290,7 +292,7 @@ retry: user_addr + tot_bytes, wdata.args.pgbase, curpage); lock_kernel(); - result = NFS_PROTO(inode)->write(&wdata, file); + result = NFS_PROTO(inode)->write(&wdata); unlock_kernel(); if (result <= 0) { @@ -325,7 +327,7 @@ retry: wdata.args.offset = file_offset; lock_kernel(); - result = NFS_PROTO(inode)->commit(&wdata, file); + result = NFS_PROTO(inode)->commit(&wdata); unlock_kernel(); if (result < 0 || memcmp(&first_verf.verifier, @@ -349,7 +351,7 @@ sync_retry: * nfs_direct_write - For each iov segment, map the user's buffer * then generate write and commit RPCs. * @inode: target inode - * @file: target file (may be NULL) + * @ctx: target file open context * @iov: array of vectors that define I/O buffer * file_offset: offset in file to begin the operation * nr_segs: size of iovec array @@ -358,8 +360,7 @@ sync_retry: * that non-direct readers might access, so they will pick up these * writes immediately. */ -static ssize_t -nfs_direct_write(struct inode *inode, struct file *file, +static int nfs_direct_write(struct inode *inode, struct nfs_open_context *ctx, const struct iovec *iov, loff_t file_offset, unsigned long nr_segs) { @@ -382,7 +383,7 @@ nfs_direct_write(struct inode *inode, st return page_count; } - result = nfs_direct_write_seg(inode, file, user_addr, size, + result = nfs_direct_write_seg(inode, ctx, user_addr, size, file_offset, pages, page_count); nfs_free_user_pages(pages, page_count, 0); @@ -414,6 +415,7 @@ nfs_direct_IO(int rw, struct kiocb *iocb { ssize_t result = -EINVAL; struct file *file = iocb->ki_filp; + struct nfs_open_context *ctx; struct dentry *dentry = file->f_dentry; struct inode *inode = dentry->d_inode; @@ -423,19 +425,20 @@ nfs_direct_IO(int rw, struct kiocb *iocb if (!is_sync_kiocb(iocb)) return result; + ctx = (struct nfs_open_context *)file->private_data; switch (rw) { case READ: dprintk("NFS: direct_IO(read) (%s) off/no(%Lu/%lu)\n", dentry->d_name.name, file_offset, nr_segs); - result = nfs_direct_read(inode, file, iov, + result = nfs_direct_read(inode, ctx, iov, file_offset, nr_segs); break; case WRITE: dprintk("NFS: direct_IO(write) (%s) off/no(%Lu/%lu)\n", dentry->d_name.name, file_offset, nr_segs); - result = nfs_direct_write(inode, file, iov, + result = nfs_direct_write(inode, ctx, iov, file_offset, nr_segs); break; default: @@ -471,6 +474,8 @@ nfs_file_direct_read(struct kiocb *iocb, ssize_t retval = -EINVAL; loff_t *ppos = &iocb->ki_pos; struct file *file = iocb->ki_filp; + struct nfs_open_context *ctx = + (struct nfs_open_context *) file->private_data; struct dentry *dentry = file->f_dentry; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; @@ -502,7 +507,7 @@ nfs_file_direct_read(struct kiocb *iocb, goto out; } - retval = nfs_direct_read(inode, file, &iov, pos, 1); + retval = nfs_direct_read(inode, ctx, &iov, pos, 1); if (retval > 0) *ppos = pos + retval; @@ -542,6 +547,8 @@ nfs_file_direct_write(struct kiocb *iocb loff_t *ppos = &iocb->ki_pos; unsigned long limit = current->rlim[RLIMIT_FSIZE].rlim_cur; struct file *file = iocb->ki_filp; + struct nfs_open_context *ctx = + (struct nfs_open_context *) file->private_data; struct dentry *dentry = file->f_dentry; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; @@ -589,7 +596,7 @@ nfs_file_direct_write(struct kiocb *iocb goto out; } - retval = nfs_direct_write(inode, file, &iov, pos, 1); + retval = nfs_direct_write(inode, ctx, &iov, pos, 1); if (mapping->nrpages) invalidate_inode_pages2(mapping); if (retval > 0) diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/nfs/file.c linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/file.c --- linux-2.6.8.1/fs/nfs/file.c 2004-08-14 14:26:58.000000000 -0400 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/file.c 2004-08-22 21:48:42.000000000 -0400 @@ -31,6 +31,8 @@ #include #include +#include "delegation.h" + #define NFSDBG_FACILITY NFSDBG_FILE static int nfs_file_open(struct inode *, struct file *); @@ -113,6 +115,7 @@ nfs_file_release(struct inode *inode, st static int nfs_file_flush(struct file *file) { + struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; struct inode *inode = file->f_dentry->d_inode; int status; @@ -124,9 +127,9 @@ nfs_file_flush(struct file *file) /* Ensure that data+attribute caches are up to date after close() */ status = nfs_wb_all(inode); if (!status) { - status = file->f_error; - file->f_error = 0; - if (!status) + status = ctx->error; + ctx->error = 0; + if (!status && !nfs_have_delegation(inode, FMODE_READ)) __nfs_revalidate_inode(NFS_SERVER(inode), inode); } unlock_kernel(); @@ -197,6 +200,7 @@ nfs_file_mmap(struct file * file, struct static int nfs_fsync(struct file *file, struct dentry *dentry, int datasync) { + struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data; struct inode *inode = dentry->d_inode; int status; @@ -205,8 +209,8 @@ nfs_fsync(struct file *file, struct dent lock_kernel(); status = nfs_wb_all(inode); if (!status) { - status = file->f_error; - file->f_error = 0; + status = ctx->error; + ctx->error = 0; } unlock_kernel(); return status; @@ -288,6 +292,90 @@ out_swapfile: goto out; } +static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) +{ + struct inode *inode = filp->f_mapping->host; + int status; + + lock_kernel(); + status = NFS_PROTO(inode)->lock(filp, cmd, fl); + unlock_kernel(); + return status; +} + +static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) +{ + struct inode *inode = filp->f_mapping->host; + sigset_t oldset; + int status; + + rpc_clnt_sigmask(NFS_CLIENT(inode), &oldset); + /* + * Flush all pending writes before doing anything + * with locks.. + */ + filemap_fdatawrite(filp->f_mapping); + down(&inode->i_sem); + nfs_wb_all(inode); + up(&inode->i_sem); + filemap_fdatawait(filp->f_mapping); + + /* NOTE: special case + * If we're signalled while cleaning up locks on process exit, we + * still need to complete the unlock. + */ + lock_kernel(); + status = NFS_PROTO(inode)->lock(filp, cmd, fl); + rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset); + return status; +} + +static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) +{ + struct inode *inode = filp->f_mapping->host; + int status; + + /* + * Flush all pending writes before doing anything + * with locks.. + */ + status = filemap_fdatawrite(filp->f_mapping); + if (status == 0) { + down(&inode->i_sem); + status = nfs_wb_all(inode); + up(&inode->i_sem); + if (status == 0) + status = filemap_fdatawait(filp->f_mapping); + } + if (status < 0) + return status; + + lock_kernel(); + status = NFS_PROTO(inode)->lock(filp, cmd, fl); + /* If we were signalled we still need to ensure that + * we clean up any state on the server. We therefore + * record the lock call as having succeeded in order to + * ensure that locks_remove_posix() cleans it out when + * the process exits. + */ + if (status == -EINTR || status == -ERESTARTSYS) + posix_lock_file(filp, fl); + unlock_kernel(); + if (status < 0) + return status; + /* + * Make sure we clear the cache whenever we try to get the lock. + * This makes locking act as a cache coherency point. + */ + filemap_fdatawrite(filp->f_mapping); + down(&inode->i_sem); + nfs_wb_all(inode); /* we may have slept */ + up(&inode->i_sem); + filemap_fdatawait(filp->f_mapping); + nfs_zap_caches(inode); + return 0; +} + /* * Lock a (portion of) a file */ @@ -295,8 +383,6 @@ int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) { struct inode * inode = filp->f_mapping->host; - int status = 0; - int status2; dprintk("NFS: nfs_lock(f=%s/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n", inode->i_sb->s_id, inode->i_ino, @@ -314,8 +400,8 @@ nfs_lock(struct file *filp, int cmd, str /* Fake OK code if mounted without NLM support */ if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) { if (IS_GETLK(cmd)) - status = LOCK_USE_CLNT; - goto out_ok; + return LOCK_USE_CLNT; + return 0; } } @@ -329,42 +415,9 @@ nfs_lock(struct file *filp, int cmd, str if (!fl->fl_owner || !(fl->fl_flags & FL_POSIX)) return -ENOLCK; - /* - * Flush all pending writes before doing anything - * with locks.. - */ - status = filemap_fdatawrite(filp->f_mapping); - down(&inode->i_sem); - status2 = nfs_wb_all(inode); - if (!status) - status = status2; - up(&inode->i_sem); - status2 = filemap_fdatawait(filp->f_mapping); - if (!status) - status = status2; - if (status < 0) - return status; - - lock_kernel(); - status = NFS_PROTO(inode)->lock(filp, cmd, fl); - unlock_kernel(); - if (status < 0) - return status; - - status = 0; - - /* - * Make sure we clear the cache whenever we try to get the lock. - * This makes locking act as a cache coherency point. - */ - out_ok: - if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) { - filemap_fdatawrite(filp->f_mapping); - down(&inode->i_sem); - nfs_wb_all(inode); /* we may have slept */ - up(&inode->i_sem); - filemap_fdatawait(filp->f_mapping); - nfs_zap_caches(inode); - } - return status; + if (IS_GETLK(cmd)) + return do_getlk(filp, cmd, fl); + if (fl->fl_type == F_UNLCK) + return do_unlk(filp, cmd, fl); + return do_setlk(filp, cmd, fl); } diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/nfs/inode.c linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/inode.c --- linux-2.6.8.1/fs/nfs/inode.c 2004-08-14 14:27:17.000000000 -0400 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/inode.c 2004-08-22 21:48:42.000000000 -0400 @@ -39,6 +39,8 @@ #include #include +#include "delegation.h" + #define NFSDBG_FACILITY NFSDBG_VFS #define NFS_PARANOIA 1 @@ -57,7 +59,6 @@ static struct inode *nfs_alloc_inode(str static void nfs_destroy_inode(struct inode *); static void nfs_write_inode(struct inode *,int); static void nfs_delete_inode(struct inode *); -static void nfs_put_super(struct super_block *); static void nfs_clear_inode(struct inode *); static void nfs_umount_begin(struct super_block *); static int nfs_statfs(struct super_block *, struct kstatfs *); @@ -68,7 +69,6 @@ static struct super_operations nfs_sops .destroy_inode = nfs_destroy_inode, .write_inode = nfs_write_inode, .delete_inode = nfs_delete_inode, - .put_super = nfs_put_super, .statfs = nfs_statfs, .clear_inode = nfs_clear_inode, .umount_begin = nfs_umount_begin, @@ -123,8 +123,9 @@ nfs_delete_inode(struct inode * inode) { dprintk("NFS: delete_inode(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + nfs_wb_all(inode); /* - * The following can never actually happen... + * The following should never happen... */ if (nfs_have_writebacks(inode)) { printk(KERN_ERR "nfs_delete_inode: inode %ld has pending RPC requests\n", inode->i_ino); @@ -141,10 +142,10 @@ static void nfs_clear_inode(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); - struct rpc_cred *cred = nfsi->mm_cred; + struct rpc_cred *cred; - if (cred) - put_rpccred(cred); + nfs_wb_all(inode); + BUG_ON (!list_empty(&nfsi->open_files)); cred = nfsi->cache_access.cred; if (cred) put_rpccred(cred); @@ -152,27 +153,6 @@ nfs_clear_inode(struct inode *inode) } void -nfs_put_super(struct super_block *sb) -{ - struct nfs_server *server = NFS_SB(sb); - - nfs4_renewd_prepare_shutdown(server); - - if (server->client != NULL) - rpc_shutdown_client(server->client); - if (server->client_sys != NULL) - rpc_shutdown_client(server->client_sys); - - if (!(server->flags & NFS_MOUNT_NONLM)) - lockd_down(); /* release rpc.lockd */ - rpciod_down(); /* release rpciod */ - - destroy_nfsv4_state(server); - - kfree(server->hostname); -} - -void nfs_umount_begin(struct super_block *sb) { struct nfs_server *server = NFS_SB(sb); @@ -293,14 +273,6 @@ nfs_sb_init(struct super_block *sb, rpc_ server->rsize = nfs_block_size(fsinfo.rtpref, NULL); if (server->wsize == 0) server->wsize = nfs_block_size(fsinfo.wtpref, NULL); - if (sb->s_blocksize == 0) { - if (fsinfo.wtmult == 0) { - sb->s_blocksize = 512; - sb->s_blocksize_bits = 9; - } else - sb->s_blocksize = nfs_block_bits(fsinfo.wtmult, - &sb->s_blocksize_bits); - } if (fsinfo.rtmax >= 512 && server->rsize > fsinfo.rtmax) server->rsize = nfs_block_size(fsinfo.rtmax, NULL); @@ -319,6 +291,11 @@ nfs_sb_init(struct super_block *sb, rpc_ server->wsize = server->wpages << PAGE_CACHE_SHIFT; } + if (sb->s_blocksize == 0) + sb->s_blocksize = nfs_block_bits(server->wsize, + &sb->s_blocksize_bits); + server->wtmult = nfs_block_bits(fsinfo.wtmult, NULL); + server->dtsize = nfs_block_size(fsinfo.dtpref, NULL); if (server->dtsize > PAGE_CACHE_SIZE) server->dtsize = PAGE_CACHE_SIZE; @@ -405,7 +382,6 @@ static int nfs_fill_super(struct super_block *sb, struct nfs_mount_data *data, int silent) { struct nfs_server *server; - int err = -EIO; rpc_authflavor_t authflavor; server = NFS_SB(sb); @@ -424,10 +400,14 @@ nfs_fill_super(struct super_block *sb, s server->acdirmin = data->acdirmin*HZ; server->acdirmax = data->acdirmax*HZ; + /* Start lockd here, before we might error out */ + if (!(server->flags & NFS_MOUNT_NONLM)) + lockd_up(); + server->namelen = data->namlen; server->hostname = kmalloc(strlen(data->hostname) + 1, GFP_KERNEL); if (!server->hostname) - goto out_fail; + return -ENOMEM; strcpy(server->hostname, data->hostname); /* Check NFS protocol revision and initialize RPC op vector @@ -438,11 +418,11 @@ nfs_fill_super(struct super_block *sb, s server->caps |= NFS_CAP_READDIRPLUS; if (data->version < 4) { printk(KERN_NOTICE "NFS: NFSv3 not supported by mount program.\n"); - goto out_fail; + return -EIO; } #else printk(KERN_NOTICE "NFS: NFSv3 not supported.\n"); - goto out_fail; + return -EIO; #endif } else { server->rpc_ops = &nfs_v2_clientops; @@ -457,30 +437,19 @@ nfs_fill_super(struct super_block *sb, s /* Create RPC client handles */ server->client = nfs_create_client(server, data); if (IS_ERR(server->client)) - goto out_fail; + return PTR_ERR(server->client); /* RFC 2623, sec 2.3.2 */ if (authflavor != RPC_AUTH_UNIX) { server->client_sys = rpc_clone_client(server->client); - if (server->client_sys == NULL) - goto out_shutdown; + if (IS_ERR(server->client_sys)) + return PTR_ERR(server->client_sys); if (!rpcauth_create(RPC_AUTH_UNIX, server->client_sys)) - goto out_shutdown; + return -ENOMEM; } else { atomic_inc(&server->client->cl_count); server->client_sys = server->client; } - /* Fire up rpciod if not yet running */ - if (rpciod_up() != 0) { - printk(KERN_WARNING "NFS: couldn't start rpciod!\n"); - goto out_shutdown; - } - - sb->s_op = &nfs_sops; - err = nfs_sb_init(sb, authflavor); - if (err != 0) - goto out_noinit; - if (server->flags & NFS_MOUNT_VER3) { if (server->namelen == 0 || server->namelen > NFS3_MAXNAMLEN) server->namelen = NFS3_MAXNAMLEN; @@ -489,21 +458,8 @@ nfs_fill_super(struct super_block *sb, s server->namelen = NFS2_MAXNAMLEN; } - /* Check whether to start the lockd process */ - if (!(server->flags & NFS_MOUNT_NONLM)) - lockd_up(); - return 0; -out_noinit: - rpciod_down(); -out_shutdown: - if (server->client) - rpc_shutdown_client(server->client); - if (server->client_sys) - rpc_shutdown_client(server->client_sys); -out_fail: - if (server->hostname) - kfree(server->hostname); - return err; + sb->s_op = &nfs_sops; + return nfs_sb_init(sb, authflavor); } static int @@ -526,6 +482,7 @@ nfs_statfs(struct super_block *sb, struc if (error < 0) goto out_err; + buf->f_frsize = server->wtmult; buf->f_bsize = sb->s_blocksize; blockbits = sb->s_blocksize_bits; blockres = (1 << blockbits) - 1; @@ -642,7 +599,7 @@ nfs_find_actor(struct inode *inode, void if (NFS_FILEID(inode) != fattr->fileid) return 0; - if (memcmp(NFS_FH(inode), fh, sizeof(struct nfs_fh)) != 0) + if (nfs_compare_fh(NFS_FH(inode), fh)) return 0; if (is_bad_inode(inode)) return 0; @@ -653,11 +610,10 @@ static int nfs_init_locked(struct inode *inode, void *opaque) { struct nfs_find_desc *desc = (struct nfs_find_desc *)opaque; - struct nfs_fh *fh = desc->fh; struct nfs_fattr *fattr = desc->fattr; NFS_FILEID(inode) = fattr->fileid; - memcpy(NFS_FH(inode), fh, sizeof(struct nfs_fh)); + nfs_copy_fh(NFS_FH(inode), desc->fh); return 0; } @@ -859,53 +815,114 @@ int nfs_getattr(struct vfsmount *mnt, st return err; } +struct nfs_open_context *alloc_nfs_open_context(struct dentry *dentry, struct rpc_cred *cred) +{ + struct nfs_open_context *ctx; + + ctx = (struct nfs_open_context *)kmalloc(sizeof(*ctx), GFP_KERNEL); + if (ctx != NULL) { + atomic_set(&ctx->count, 1); + ctx->dentry = dget(dentry); + ctx->cred = get_rpccred(cred); + ctx->state = NULL; + ctx->lockowner = current->files; + ctx->error = 0; + init_waitqueue_head(&ctx->waitq); + } + return ctx; +} + +struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx) +{ + if (ctx != NULL) + atomic_inc(&ctx->count); + return ctx; +} + +void put_nfs_open_context(struct nfs_open_context *ctx) +{ + if (atomic_dec_and_test(&ctx->count)) { + if (ctx->state != NULL) + nfs4_close_state(ctx->state, ctx->mode); + if (ctx->cred != NULL) + put_rpccred(ctx->cred); + dput(ctx->dentry); + kfree(ctx); + } +} + /* * Ensure that mmap has a recent RPC credential for use when writing out * shared pages */ -void -nfs_set_mmcred(struct inode *inode, struct rpc_cred *cred) +void nfs_file_set_open_context(struct file *filp, struct nfs_open_context *ctx) { - struct rpc_cred **p = &NFS_I(inode)->mm_cred, - *oldcred = *p; + struct inode *inode = filp->f_dentry->d_inode; + struct nfs_inode *nfsi = NFS_I(inode); - *p = get_rpccred(cred); - if (oldcred) - put_rpccred(oldcred); + filp->private_data = get_nfs_open_context(ctx); + spin_lock(&inode->i_lock); + list_add(&ctx->list, &nfsi->open_files); + spin_unlock(&inode->i_lock); +} + +struct nfs_open_context *nfs_find_open_context(struct inode *inode, int mode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_open_context *pos, *ctx = NULL; + + spin_lock(&inode->i_lock); + list_for_each_entry(pos, &nfsi->open_files, list) { + if ((pos->mode & mode) == mode) { + ctx = get_nfs_open_context(pos); + break; + } + } + spin_unlock(&inode->i_lock); + return ctx; +} + +void nfs_file_clear_open_context(struct file *filp) +{ + struct inode *inode = filp->f_dentry->d_inode; + struct nfs_open_context *ctx = (struct nfs_open_context *)filp->private_data; + + if (ctx) { + filp->private_data = NULL; + spin_lock(&inode->i_lock); + list_del(&ctx->list); + spin_unlock(&inode->i_lock); + put_nfs_open_context(ctx); + } } /* - * These are probably going to contain hooks for - * allocating and releasing RPC credentials for - * the file. I'll have to think about Tronds patch - * a bit more.. + * These allocate and release file read/write context information. */ int nfs_open(struct inode *inode, struct file *filp) { - struct rpc_auth *auth; + struct nfs_open_context *ctx; struct rpc_cred *cred; - auth = NFS_CLIENT(inode)->cl_auth; - cred = rpcauth_lookupcred(auth, 0); - filp->private_data = cred; - if ((filp->f_mode & FMODE_WRITE) != 0) { - nfs_set_mmcred(inode, cred); + if ((cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0)) == NULL) + return -ENOMEM; + ctx = alloc_nfs_open_context(filp->f_dentry, cred); + put_rpccred(cred); + if (ctx == NULL) + return -ENOMEM; + ctx->mode = filp->f_mode; + nfs_file_set_open_context(filp, ctx); + put_nfs_open_context(ctx); + if ((filp->f_mode & FMODE_WRITE) != 0) nfs_begin_data_update(inode); - } return 0; } int nfs_release(struct inode *inode, struct file *filp) { - struct rpc_cred *cred; - - lock_kernel(); if ((filp->f_mode & FMODE_WRITE) != 0) nfs_end_data_update(inode); - cred = nfs_file_cred(filp); - if (cred) - put_rpccred(cred); - unlock_kernel(); + nfs_file_clear_open_context(filp); return 0; } @@ -946,7 +963,7 @@ __nfs_revalidate_inode(struct nfs_server /* Protect against RPC races by saving the change attribute */ verifier = nfs_save_change_attribute(inode); - status = NFS_PROTO(inode)->getattr(inode, &fattr); + status = NFS_PROTO(inode)->getattr(server, NFS_FH(inode), &fattr); if (status) { dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", inode->i_sb->s_id, @@ -1002,6 +1019,30 @@ out: return status; } +int nfs_attribute_timeout(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (nfs_have_delegation(inode, FMODE_READ)) + return 0; + return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo); +} + +/** + * nfs_revalidate_inode - Revalidate the inode attributes + * @server - pointer to nfs_server struct + * @inode - pointer to inode struct + * + * Updates inode attribute information by retrieving the data from the server. + */ +int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) +{ + if (!(NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) + && !nfs_attribute_timeout(inode)) + return NFS_STALE(inode) ? -ESTALE : 0; + return __nfs_revalidate_inode(server, inode); +} + /** * nfs_begin_data_update * @inode - pointer to inode @@ -1023,11 +1064,13 @@ void nfs_end_data_update(struct inode *i { struct nfs_inode *nfsi = NFS_I(inode); - /* Mark the attribute cache for revalidation */ - nfsi->flags |= NFS_INO_INVALID_ATTR; - /* Directories and symlinks: invalidate page cache too */ - if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - nfsi->flags |= NFS_INO_INVALID_DATA; + if (!nfs_have_delegation(inode, FMODE_READ)) { + /* Mark the attribute cache for revalidation */ + nfsi->flags |= NFS_INO_INVALID_ATTR; + /* Directories and symlinks: invalidate page cache too */ + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + nfsi->flags |= NFS_INO_INVALID_DATA; + } nfsi->cache_change_attribute ++; atomic_dec(&nfsi->data_updates); } @@ -1068,6 +1111,10 @@ int nfs_refresh_inode(struct inode *inod loff_t cur_size, new_isize; int data_unstable; + /* Do we hold a delegation? */ + if (nfs_have_delegation(inode, FMODE_READ)) + return 0; + /* Are we in the process of updating data on the server? */ data_unstable = nfs_caches_unstable(inode); @@ -1265,7 +1312,8 @@ static int nfs_update_inode(struct inode if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) invalid &= ~NFS_INO_INVALID_DATA; - nfsi->flags |= invalid; + if (!nfs_have_delegation(inode, FMODE_READ)) + nfsi->flags |= invalid; return 0; out_changed: @@ -1305,7 +1353,7 @@ static int nfs_compare_super(struct supe return 0; if (old->addr.sin_port != server->addr.sin_port) return 0; - return !memcmp(&old->fh, &server->fh, sizeof(struct nfs_fh)); + return !nfs_compare_fh(&old->fh, &server->fh); } static struct super_block *nfs_get_sb(struct file_system_type *fs_type, @@ -1330,9 +1378,7 @@ static struct super_block *nfs_get_sb(st init_nfsv4_state(server); root = &server->fh; - memcpy(root, &data->root, sizeof(*root)); - if (root->size < sizeof(root->data)) - memset(root->data+root->size, 0, sizeof(root->data)-root->size); + nfs_copy_fh(root, (struct nfs_fh *) &data->root); if (data->version != NFS_MOUNT_VERSION) { printk("nfs warning: mount version %s than kernel\n", @@ -1343,7 +1389,6 @@ static struct super_block *nfs_get_sb(st data->bsize = 0; if (data->version < 4) { data->flags &= ~NFS_MOUNT_VER3; - memset(root, 0, sizeof(*root)); root->size = NFS2_FHSIZE; memcpy(root->data, data->old_root.data, NFS2_FHSIZE); } @@ -1373,6 +1418,13 @@ static struct super_block *nfs_get_sb(st s->s_flags = flags; + /* Fire up rpciod if not yet running */ + if (rpciod_up() != 0) { + printk(KERN_WARNING "NFS: couldn't start rpciod!\n"); + kfree(server); + return ERR_PTR(-EIO); + } + error = nfs_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); if (error) { up_write(&s->s_umount); @@ -1386,7 +1438,25 @@ static struct super_block *nfs_get_sb(st static void nfs_kill_super(struct super_block *s) { struct nfs_server *server = NFS_SB(s); + kill_anon_super(s); + + nfs4_renewd_prepare_shutdown(server); + + if (server->client != NULL && !IS_ERR(server->client)) + rpc_shutdown_client(server->client); + if (server->client_sys != NULL && !IS_ERR(server->client_sys)) + rpc_shutdown_client(server->client_sys); + + if (!(server->flags & NFS_MOUNT_NONLM)) + lockd_down(); /* release rpc.lockd */ + + rpciod_down(); /* release rpciod */ + + destroy_nfsv4_state(server); + + if (server->hostname != NULL) + kfree(server->hostname); kfree(server); } @@ -1402,12 +1472,12 @@ static struct file_system_type nfs_fs_ty static void nfs4_clear_inode(struct inode *); + static struct super_operations nfs4_sops = { .alloc_inode = nfs_alloc_inode, .destroy_inode = nfs_destroy_inode, .write_inode = nfs_write_inode, .delete_inode = nfs_delete_inode, - .put_super = nfs_put_super, .statfs = nfs_statfs, .clear_inode = nfs4_clear_inode, .umount_begin = nfs_umount_begin, @@ -1423,6 +1493,12 @@ static void nfs4_clear_inode(struct inod { struct nfs_inode *nfsi = NFS_I(inode); + /* If we are holding a delegation, return it! */ + if (nfsi->delegation != NULL) + nfs_inode_return_delegation(inode); + /* First call standard NFS clear_inode() code */ + nfs_clear_inode(inode); + /* Now clear out any remaining state */ while (!list_empty(&nfsi->open_states)) { struct nfs4_state *state; @@ -1437,8 +1513,6 @@ static void nfs4_clear_inode(struct inod BUG_ON(atomic_read(&state->count) != 1); nfs4_close_state(state, state->state); } - /* Now call standard NFS clear_inode() code */ - nfs_clear_inode(inode); } @@ -1498,7 +1572,7 @@ static int nfs4_fill_super(struct super_ clp = nfs4_get_client(&server->addr.sin_addr); if (!clp) { printk(KERN_WARNING "NFS: failed to create NFS4 client.\n"); - goto out_fail; + return -EIO; } /* Now create transport and client */ @@ -1536,8 +1610,13 @@ static int nfs4_fill_super(struct super_ memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); nfs_idmap_new(clp); } - if (list_empty(&clp->cl_superblocks)) - clear_bit(NFS4CLNT_OK, &clp->cl_state); + if (list_empty(&clp->cl_superblocks)) { + err = nfs4_init_client(clp); + if (err != 0) { + up_write(&clp->cl_sem); + goto out_fail; + } + } list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); clnt = rpc_clone_client(clp->cl_rpcclient); if (!IS_ERR(clnt)) @@ -1547,45 +1626,29 @@ static int nfs4_fill_super(struct super_ if (IS_ERR(clnt)) { printk(KERN_WARNING "NFS: cannot create RPC client.\n"); - err = PTR_ERR(clnt); - goto out_remove_list; + return PTR_ERR(clnt); } clnt->cl_intr = (server->flags & NFS4_MOUNT_INTR) ? 1 : 0; clnt->cl_softrtry = (server->flags & NFS4_MOUNT_SOFT) ? 1 : 0; server->client = clnt; - err = -ENOMEM; if (server->nfs4_state->cl_idmap == NULL) { printk(KERN_WARNING "NFS: failed to create idmapper.\n"); - goto out_shutdown; + return -ENOMEM; } if (clnt->cl_auth->au_flavor != authflavour) { if (rpcauth_create(authflavour, clnt) == NULL) { printk(KERN_WARNING "NFS: couldn't create credcache!\n"); - goto out_shutdown; + return -ENOMEM; } } - /* Fire up rpciod if not yet running */ - if (rpciod_up() != 0) { - printk(KERN_WARNING "NFS: couldn't start rpciod!\n"); - goto out_shutdown; - } - sb->s_op = &nfs4_sops; err = nfs_sb_init(sb, authflavour); if (err == 0) return 0; - rpciod_down(); -out_shutdown: - rpc_shutdown_client(server->client); -out_remove_list: - down_write(&server->nfs4_state->cl_sem); - list_del_init(&server->nfs4_siblings); - up_write(&server->nfs4_state->cl_sem); - destroy_nfsv4_state(server); out_fail: if (clp) nfs4_put_client(clp); @@ -1691,6 +1754,13 @@ static struct super_block *nfs4_get_sb(s s->s_flags = flags; + /* Fire up rpciod if not yet running */ + if (rpciod_up() != 0) { + printk(KERN_WARNING "NFS: couldn't start rpciod!\n"); + s = ERR_PTR(-EIO); + goto out_free; + } + error = nfs4_fill_super(s, data, flags & MS_VERBOSE ? 1 : 0); if (error) { up_write(&s->s_umount); @@ -1710,22 +1780,31 @@ out_free: return s; } +static void nfs4_kill_super(struct super_block *sb) +{ + nfs_return_all_delegations(sb); + nfs_kill_super(sb); +} + static struct file_system_type nfs4_fs_type = { .owner = THIS_MODULE, .name = "nfs4", .get_sb = nfs4_get_sb, - .kill_sb = nfs_kill_super, + .kill_sb = nfs4_kill_super, .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, }; -#define nfs4_zero_state(nfsi) \ +#define nfs4_init_once(nfsi) \ do { \ INIT_LIST_HEAD(&(nfsi)->open_states); \ + nfsi->delegation = NULL; \ + nfsi->delegation_state = 0; \ + init_rwsem(&nfsi->rwsem); \ } while(0) #define register_nfs4fs() register_filesystem(&nfs4_fs_type) #define unregister_nfs4fs() unregister_filesystem(&nfs4_fs_type) #else -#define nfs4_zero_state(nfsi) \ +#define nfs4_init_once(nfsi) \ do { } while (0) #define register_nfs4fs() (0) #define unregister_nfs4fs() @@ -1747,8 +1826,6 @@ static struct inode *nfs_alloc_inode(str if (!nfsi) return NULL; nfsi->flags = 0; - nfsi->mm_cred = NULL; - nfs4_zero_state(nfsi); return &nfsi->vfs_inode; } @@ -1764,14 +1841,17 @@ static void init_once(void * foo, kmem_c if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == SLAB_CTOR_CONSTRUCTOR) { inode_init_once(&nfsi->vfs_inode); + spin_lock_init(&nfsi->req_lock); INIT_LIST_HEAD(&nfsi->dirty); INIT_LIST_HEAD(&nfsi->commit); + INIT_LIST_HEAD(&nfsi->open_files); INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); atomic_set(&nfsi->data_updates, 0); nfsi->ndirty = 0; nfsi->ncommit = 0; nfsi->npages = 0; init_waitqueue_head(&nfsi->nfs_i_wait); + nfs4_init_once(nfsi); } } diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/nfs/Makefile linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/Makefile --- linux-2.6.8.1/fs/nfs/Makefile 2004-08-14 14:25:54.000000000 -0400 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/Makefile 2004-08-22 21:47:55.000000000 -0400 @@ -9,6 +9,7 @@ nfs-y := dir.o file.o inode.o nfs2xdr nfs-$(CONFIG_ROOT_NFS) += nfsroot.o mount_clnt.o nfs-$(CONFIG_NFS_V3) += nfs3proc.o nfs3xdr.o nfs-$(CONFIG_NFS_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \ - idmap.o + delegation.o idmap.o \ + callback.o callback_xdr.o callback_proc.o nfs-$(CONFIG_NFS_DIRECTIO) += direct.o nfs-objs := $(nfs-y) diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/nfs/mount_clnt.c linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/mount_clnt.c --- linux-2.6.8.1/fs/nfs/mount_clnt.c 2004-08-14 14:25:49.000000000 -0400 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/mount_clnt.c 2004-08-22 21:46:47.000000000 -0400 @@ -108,7 +108,6 @@ xdr_decode_fhstatus(struct rpc_rqst *req { struct nfs_fh *fh = res->fh; - memset((void *)fh, 0, sizeof(*fh)); if ((res->status = ntohl(*p++)) == 0) { fh->size = NFS2_FHSIZE; memcpy(fh->data, p, NFS2_FHSIZE); @@ -121,7 +120,6 @@ xdr_decode_fhstatus3(struct rpc_rqst *re { struct nfs_fh *fh = res->fh; - memset((void *)fh, 0, sizeof(*fh)); if ((res->status = ntohl(*p++)) == 0) { int size = ntohl(*p++); if (size <= NFS3_FHSIZE) { diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/nfs/nfs2xdr.c linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/nfs2xdr.c --- linux-2.6.8.1/fs/nfs/nfs2xdr.c 2004-08-14 14:26:14.000000000 -0400 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/nfs2xdr.c 2004-08-22 21:46:59.000000000 -0400 @@ -77,8 +77,6 @@ xdr_encode_fhandle(u32 *p, struct nfs_fh static inline u32 * xdr_decode_fhandle(u32 *p, struct nfs_fh *fhandle) { - /* Zero handle first to allow comparisons */ - memset(fhandle, 0, sizeof(*fhandle)); /* NFSv2 handles have a fixed length */ fhandle->size = NFS2_FHSIZE; memcpy(fhandle->data, p, NFS2_FHSIZE); @@ -95,6 +93,23 @@ xdr_encode_time(u32 *p, struct timespec } static inline u32* +xdr_encode_current_server_time(u32 *p, struct timespec *timep) +{ + /* + * Passing the invalid value useconds=1000000 is a + * Sun convention for "set to current server time". + * It's needed to make permissions checks for the + * "touch" program across v2 mounts to Solaris and + * Irix boxes work correctly. See description of + * sattr in section 6.1 of "NFS Illustrated" by + * Brent Callaghan, Addison-Wesley, ISBN 0-201-32750-5 + */ + *p++ = htonl(timep->tv_sec); + *p++ = htonl(1000000); + return p; +} + +static inline u32* xdr_decode_time(u32 *p, struct timespec *timep) { timep->tv_sec = ntohl(*p++); @@ -142,15 +157,19 @@ xdr_encode_sattr(u32 *p, struct iattr *a SATTR(p, attr, ATTR_GID, ia_gid); SATTR(p, attr, ATTR_SIZE, ia_size); - if (attr->ia_valid & (ATTR_ATIME|ATTR_ATIME_SET)) { + if (attr->ia_valid & ATTR_ATIME_SET) { p = xdr_encode_time(p, &attr->ia_atime); + } else if (attr->ia_valid & ATTR_ATIME) { + p = xdr_encode_current_server_time(p, &attr->ia_atime); } else { *p++ = ~(u32) 0; *p++ = ~(u32) 0; } - if (attr->ia_valid & (ATTR_MTIME|ATTR_MTIME_SET)) { + if (attr->ia_valid & ATTR_MTIME_SET) { p = xdr_encode_time(p, &attr->ia_mtime); + } else if (attr->ia_valid & ATTR_MTIME) { + p = xdr_encode_current_server_time(p, &attr->ia_mtime); } else { *p++ = ~(u32) 0; *p++ = ~(u32) 0; diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/nfs/nfs3proc.c linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/nfs3proc.c --- linux-2.6.8.1/fs/nfs/nfs3proc.c 2004-08-14 14:26:35.000000000 -0400 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/nfs3proc.c 2004-08-22 21:47:42.000000000 -0400 @@ -68,18 +68,6 @@ nfs3_async_handle_jukebox(struct rpc_tas return 1; } -static struct rpc_cred * -nfs_cred(struct inode *inode, struct file *filp) -{ - struct rpc_cred *cred = NULL; - - if (filp) - cred = (struct rpc_cred *)filp->private_data; - if (!cred) - cred = NFS_I(inode)->mm_cred; - return cred; -} - /* * Bare-bones access to getattr: this is for nfs_read_super. */ @@ -104,14 +92,15 @@ nfs3_proc_get_root(struct nfs_server *se * One function for each procedure in the NFS protocol. */ static int -nfs3_proc_getattr(struct inode *inode, struct nfs_fattr *fattr) +nfs3_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) { int status; dprintk("NFS call getattr\n"); fattr->valid = 0; - status = rpc_call(NFS_CLIENT(inode), NFS3PROC_GETATTR, - NFS_FH(inode), fattr, 0); + status = rpc_call(server->client, NFS3PROC_GETATTR, + fhandle, fattr, 0); dprintk("NFS reply getattr\n"); return status; } @@ -164,8 +153,7 @@ nfs3_proc_lookup(struct inode *dir, stru return status; } -static int -nfs3_proc_access(struct inode *inode, struct rpc_cred *cred, int mode) +static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry) { struct nfs_fattr fattr; struct nfs3_accessargs arg = { @@ -178,9 +166,10 @@ nfs3_proc_access(struct inode *inode, st .rpc_proc = &nfs3_procedures[NFS3PROC_ACCESS], .rpc_argp = &arg, .rpc_resp = &res, - .rpc_cred = cred + .rpc_cred = entry->cred }; - int status; + int mode = entry->mask; + int status; dprintk("NFS call access\n"); fattr.valid = 0; @@ -200,10 +189,16 @@ nfs3_proc_access(struct inode *inode, st } status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); nfs_refresh_inode(inode, &fattr); - dprintk("NFS reply access\n"); - - if (status == 0 && (arg.access & res.access) != arg.access) - status = -EACCES; + if (status == 0) { + entry->mask = 0; + if (res.access & NFS3_ACCESS_READ) + entry->mask |= MAY_READ; + if (res.access & (NFS3_ACCESS_MODIFY | NFS3_ACCESS_EXTEND | NFS3_ACCESS_DELETE)) + entry->mask |= MAY_WRITE; + if (res.access & (NFS3_ACCESS_LOOKUP|NFS3_ACCESS_EXECUTE)) + entry->mask |= MAY_EXEC; + } + dprintk("NFS reply access, status = %d\n", status); return status; } @@ -227,8 +222,7 @@ nfs3_proc_readlink(struct inode *inode, return status; } -static int -nfs3_proc_read(struct nfs_read_data *rdata, struct file *filp) +static int nfs3_proc_read(struct nfs_read_data *rdata) { int flags = rdata->flags; struct inode * inode = rdata->inode; @@ -237,13 +231,13 @@ nfs3_proc_read(struct nfs_read_data *rda .rpc_proc = &nfs3_procedures[NFS3PROC_READ], .rpc_argp = &rdata->args, .rpc_resp = &rdata->res, + .rpc_cred = rdata->cred, }; int status; dprintk("NFS call read %d @ %Ld\n", rdata->args.count, (long long) rdata->args.offset); fattr->valid = 0; - msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); if (status >= 0) nfs_refresh_inode(inode, fattr); @@ -251,8 +245,7 @@ nfs3_proc_read(struct nfs_read_data *rda return status; } -static int -nfs3_proc_write(struct nfs_write_data *wdata, struct file *filp) +static int nfs3_proc_write(struct nfs_write_data *wdata) { int rpcflags = wdata->flags; struct inode * inode = wdata->inode; @@ -261,13 +254,13 @@ nfs3_proc_write(struct nfs_write_data *w .rpc_proc = &nfs3_procedures[NFS3PROC_WRITE], .rpc_argp = &wdata->args, .rpc_resp = &wdata->res, + .rpc_cred = wdata->cred, }; int status; dprintk("NFS call write %d @ %Ld\n", wdata->args.count, (long long) wdata->args.offset); fattr->valid = 0; - msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags); if (status >= 0) nfs_refresh_inode(inode, fattr); @@ -275,8 +268,7 @@ nfs3_proc_write(struct nfs_write_data *w return status < 0? status : wdata->res.count; } -static int -nfs3_proc_commit(struct nfs_write_data *cdata, struct file *filp) +static int nfs3_proc_commit(struct nfs_write_data *cdata) { struct inode * inode = cdata->inode; struct nfs_fattr * fattr = cdata->res.fattr; @@ -284,13 +276,13 @@ nfs3_proc_commit(struct nfs_write_data * .rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT], .rpc_argp = &cdata->args, .rpc_resp = &cdata->res, + .rpc_cred = cdata->cred, }; int status; dprintk("NFS call commit %d @ %Ld\n", cdata->args.count, (long long) cdata->args.offset); fattr->valid = 0; - msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status >= 0) nfs_refresh_inode(inode, fattr); @@ -534,6 +526,8 @@ nfs3_proc_symlink(struct inode *dir, str }; int status; + if (path->len > NFS3_MAXPATHLEN) + return -ENAMETOOLONG; dprintk("NFS call symlink %s -> %s\n", name->name, path->name); dir_attr.valid = 0; fattr->valid = 0; @@ -832,27 +826,6 @@ nfs3_proc_commit_setup(struct nfs_write_ rpc_call_setup(task, &msg, 0); } -/* - * Set up the nfspage struct with the right credentials - */ -void -nfs3_request_init(struct nfs_page *req, struct file *filp) -{ - req->wb_cred = get_rpccred(nfs_cred(req->wb_inode, filp)); -} - -static int -nfs3_request_compatible(struct nfs_page *req, struct file *filp, struct page *page) -{ - if (req->wb_file != filp) - return 0; - if (req->wb_page != page) - return 0; - if (req->wb_cred != nfs_file_cred(filp)) - return 0; - return 1; -} - static int nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) { @@ -892,7 +865,5 @@ struct nfs_rpc_ops nfs_v3_clientops = { .commit_setup = nfs3_proc_commit_setup, .file_open = nfs_open, .file_release = nfs_release, - .request_init = nfs3_request_init, - .request_compatible = nfs3_request_compatible, .lock = nfs3_proc_lock, }; diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/nfs/nfs3xdr.c linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/nfs3xdr.c --- linux-2.6.8.1/fs/nfs/nfs3xdr.c 2004-08-14 14:27:54.000000000 -0400 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/nfs3xdr.c 2004-08-22 21:46:48.000000000 -0400 @@ -109,10 +109,6 @@ xdr_encode_fhandle(u32 *p, struct nfs_fh static inline u32 * xdr_decode_fhandle(u32 *p, struct nfs_fh *fh) { - /* - * Zero all nonused bytes - */ - memset((u8 *)fh, 0, sizeof(*fh)); if ((fh->size = ntohl(*p++)) <= NFS3_FHSIZE) { memcpy(fh->data, p, fh->size); return p + XDR_QUADLEN(fh->size); diff -u --recursive --new-file --show-c-function linux-2.6.8.1/fs/nfs/nfs4proc.c linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/nfs4proc.c --- linux-2.6.8.1/fs/nfs/nfs4proc.c 2004-08-14 14:27:16.000000000 -0400 +++ linux-2.6.8.1-50-rpc_queue_lock/fs/nfs/nfs4proc.c 2004-08-22 21:49:03.000000000 -0400 @@ -47,12 +47,16 @@ #include #include +#include "delegation.h" + #define NFSDBG_FACILITY NFSDBG_PROC -#define NFS4_POLL_RETRY_TIME (15*HZ) +#define NFS4_POLL_RETRY_MIN (1*HZ) +#define NFS4_POLL_RETRY_MAX (15*HZ) static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *); +static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry); extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); extern struct rpc_procinfo nfs4_procedures[]; @@ -189,53 +193,296 @@ static void update_changeattr(struct ino * reclaim state on the server after a reboot. * Assumes caller is holding the sp->so_sem */ -int -nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) +static int _nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) { struct inode *inode = state->inode; struct nfs_server *server = NFS_SERVER(inode); - struct nfs_fattr fattr = { - .valid = 0, - }; - struct nfs_open_reclaimargs o_arg = { + struct nfs_delegation *delegation = NFS_I(inode)->delegation; + struct nfs_openargs o_arg = { .fh = NFS_FH(inode), .seqid = sp->so_seqid, .id = sp->so_id, - .share_access = state->state, + .open_flags = state->state, .clientid = server->nfs4_state->cl_clientid, .claim = NFS4_OPEN_CLAIM_PREVIOUS, .bitmask = server->attr_bitmask, }; struct nfs_openres o_res = { - .f_attr = &fattr, .server = server, /* Grrr */ }; struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_RECLAIM], + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR], .rpc_argp = &o_arg, .rpc_resp = &o_res, .rpc_cred = sp->so_cred, }; int status; - status = rpc_call_sync(server->client, &msg, 0); + if (delegation != NULL) { + if (!(delegation->flags & NFS_DELEGATION_NEED_RECLAIM)) { + memcpy(&state->stateid, &delegation->stateid, + sizeof(state->stateid)); + set_bit(NFS_DELEGATED_STATE, &state->flags); + return 0; + } + o_arg.u.delegation_type = delegation->type; + } + status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); nfs4_increment_seqid(status, sp); - if (status == 0) + if (status == 0) { memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); - /* Update the inode attributes */ - nfs_refresh_inode(inode, &fattr); + if (o_res.delegation_type != 0) { + nfs_inode_reclaim_delegation(inode, sp->so_cred, &o_res); + /* Did the server issue an immediate delegation recall? */ + if (o_res.do_recall) + nfs_async_inode_return_delegation(inode, &o_res.stateid); + } + } + clear_bit(NFS_DELEGATED_STATE, &state->flags); + /* Ensure we update the inode attributes */ + NFS_CACHEINV(inode); + return status; +} + +int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + struct nfs_server *server = NFS_SERVER(state->inode); + struct nfs4_exception exception = { }; + int err; + do { + err = _nfs4_open_reclaim(sp, state); + switch (err) { + case 0: + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + return err; + } + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) +{ + struct nfs4_state_owner *sp = state->owner; + struct inode *inode = dentry->d_inode; + struct nfs_server *server = NFS_SERVER(inode); + struct dentry *parent = dget_parent(dentry); + struct nfs_openargs arg = { + .fh = NFS_FH(parent->d_inode), + .clientid = server->nfs4_state->cl_clientid, + .name = &dentry->d_name, + .id = sp->so_id, + .server = server, + .bitmask = server->attr_bitmask, + .claim = NFS4_OPEN_CLAIM_DELEGATE_CUR, + }; + struct nfs_openres res = { + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = sp->so_cred, + }; + int status = 0; + + down(&sp->so_sema); + if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) + goto out; + if (state->state == 0) + goto out; + arg.seqid = sp->so_seqid; + arg.open_flags = state->state; + memcpy(arg.u.delegation.data, state->stateid.data, sizeof(arg.u.delegation.data)); + status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); + nfs4_increment_seqid(status, sp); + if (status >= 0) { + memcpy(state->stateid.data, res.stateid.data, + sizeof(state->stateid.data)); + clear_bit(NFS_DELEGATED_STATE, &state->flags); + } +out: + up(&sp->so_sema); + dput(parent); + return status; +} + +int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state) +{ + struct nfs4_exception exception = { }; + struct nfs_server *server = NFS_SERVER(dentry->d_inode); + int err; + do { + err = _nfs4_open_delegation_recall(dentry, state); + switch (err) { + case 0: + return err; + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + /* Don't recall a delegation if it was lost */ + nfs4_schedule_state_recovery(server->nfs4_state); + return err; + } + err = nfs4_handle_exception(server, err, &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid) +{ + struct nfs_open_confirmargs arg = { + .fh = fh, + .seqid = sp->so_seqid, + .stateid = *stateid, + }; + struct nfs_open_confirmres res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = sp->so_cred, + }; + int status; + + status = rpc_call_sync(clnt, &msg, RPC_TASK_NOINTR); + nfs4_increment_seqid(status, sp); + if (status >= 0) + memcpy(stateid, &res.stateid, sizeof(*stateid)); return status; } +static int _nfs4_do_access(struct inode *inode, struct rpc_cred *cred, int mask) +{ + struct nfs_access_entry cache; + int status; + + status = nfs_access_get_cached(inode, cred, &cache); + if (status == 0) + goto out; + + /* Be clever: ask server to check for all possible rights */ + cache.mask = MAY_EXEC | MAY_WRITE | MAY_READ; + cache.cred = cred; + cache.jiffies = jiffies; + status = _nfs4_proc_access(inode, &cache); + if (status != 0) + return status; + nfs_access_add_cache(inode, &cache); +out: + if ((cache.mask & mask) == mask) + return 0; + return -EACCES; +} + +/* + * Returns an nfs4_state + an extra reference to the inode + */ +int _nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred, struct nfs4_state **res) +{ + struct nfs_delegation *delegation; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_client *clp = server->nfs4_state; + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs4_state_owner *sp = NULL; + struct nfs4_state *state = NULL; + int open_flags = flags & (FMODE_READ|FMODE_WRITE); + int mask = 0; + int err; + + /* Protect against reboot recovery - NOTE ORDER! */ + down_read(&clp->cl_sem); + /* Protect against delegation recall */ + down_read(&nfsi->rwsem); + delegation = NFS_I(inode)->delegation; + err = -ENOENT; + if (delegation == NULL || (delegation->type & open_flags) != open_flags) + goto out_err; + err = -ENOMEM; + if (!(sp = nfs4_get_state_owner(server, cred))) { + dprintk("%s: nfs4_get_state_owner failed!\n", __FUNCTION__); + goto out_err; + } + down(&sp->so_sema); + state = nfs4_get_open_state(inode, sp); + if (state == NULL) + goto out_err; + + err = -ENOENT; + if ((state->state & open_flags) == open_flags) { + spin_lock(&inode->i_lock); + if (open_flags & FMODE_READ) + state->nreaders++; + if (open_flags & FMODE_WRITE) + state->nwriters++; + spin_unlock(&inode->i_lock); + goto out_ok; + } else if (state->state != 0) + goto out_err; + + lock_kernel(); + err = _nfs4_do_access(inode, cred, mask); + unlock_kernel(); + if (err != 0) + goto out_err; + spin_lock(&inode->i_lock); + memcpy(state->stateid.data, delegation->stateid.data, + sizeof(state->stateid.data)); + state->state |= open_flags; + if (open_flags & FMODE_READ) + state->nreaders++; + if (open_flags & FMODE_WRITE) + state->nwriters++; + set_bit(NFS_DELEGATED_STATE, &state->flags); + spin_unlock(&inode->i_lock); +out_ok: + up(&sp->so_sema); + nfs4_put_state_owner(sp); + up_read(&nfsi->rwsem); + up_read(&clp->cl_sem); + igrab(inode); + *res = state; + return 0; +out_err: + if (sp != NULL) { + if (state != NULL) + nfs4_put_open_state(state); + up(&sp->so_sema); + nfs4_put_state_owner(sp); + } + up_read(&nfsi->rwsem); + up_read(&clp->cl_sem); + return err; +} + +static struct nfs4_state *nfs4_open_delegated(struct inode *inode, int flags, struct rpc_cred *cred) +{ + struct nfs4_exception exception = { }; + struct nfs4_state *res; + int err; + + do { + err = _nfs4_open_delegated(inode, flags, cred, &res); + if (err == 0) + break; + res = ERR_PTR(nfs4_handle_exception(NFS_SERVER(inode), + err, &exception)); + } while (exception.retry); + return res; +} + /* * Returns an nfs4_state + an referenced inode */ -struct nfs4_state * -nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred) +static int _nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred, struct nfs4_state **res) { struct nfs4_state_owner *sp; struct nfs4_state *state = NULL; struct nfs_server *server = NFS_SERVER(dir); + struct nfs4_client *clp = server->nfs4_state; struct inode *inode = NULL; int status; struct nfs_fattr f_attr = { @@ -243,12 +490,11 @@ nfs4_do_open(struct inode *dir, struct q }; struct nfs_openargs o_arg = { .fh = NFS_FH(dir), - .share_access = flags & (FMODE_READ|FMODE_WRITE), - .opentype = (flags & O_CREAT) ? NFS4_OPEN_CREATE : NFS4_OPEN_NOCREATE, - .createmode = (flags & O_EXCL) ? NFS4_CREATE_EXCLUSIVE : NFS4_CREATE_UNCHECKED, + .open_flags = flags, .name = name, .server = server, .bitmask = server->attr_bitmask, + .claim = NFS4_OPEN_CLAIM_NULL, }; struct nfs_openres o_res = { .f_attr = &f_attr, @@ -261,60 +507,50 @@ nfs4_do_open(struct inode *dir, struct q .rpc_cred = cred, }; -retry: + /* Protect against reboot recovery conflicts */ + down_read(&clp->cl_sem); status = -ENOMEM; - if (!(sp = nfs4_get_state_owner(NFS_SERVER(dir), cred))) { + if (!(sp = nfs4_get_state_owner(server, cred))) { dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n"); - goto out; + goto out_err; } - if (o_arg.createmode & NFS4_CREATE_EXCLUSIVE){ + if (flags & O_EXCL) { u32 *p = (u32 *) o_arg.u.verifier.data; p[0] = jiffies; p[1] = current->pid; - } else if (o_arg.createmode == NFS4_CREATE_UNCHECKED) { + } else o_arg.u.attrs = sattr; - } /* Serialization for the sequence id */ down(&sp->so_sema); o_arg.seqid = sp->so_seqid; o_arg.id = sp->so_id; - o_arg.clientid = NFS_SERVER(dir)->nfs4_state->cl_clientid, + o_arg.clientid = clp->cl_clientid, - status = rpc_call_sync(server->client, &msg, 0); + status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR); nfs4_increment_seqid(status, sp); if (status) - goto out_up; + goto out_err; update_changeatt