fs/Kconfig | 39 +- fs/inode.c | 2 fs/lockd/clntproc.c | 2 fs/lockd/host.c | 4 fs/lockd/mon.c | 16 - fs/lockd/svc4proc.c | 21 + fs/lockd/svclock.c | 28 + fs/lockd/svcproc.c | 19 + fs/nfs/dir.c | 177 +++++++++-- fs/nfs/direct.c | 5 fs/nfs/file.c | 9 fs/nfs/inode.c | 523 ++++++++++++++++++---------------- fs/nfs/mount_clnt.c | 14 fs/nfs/nfs2xdr.c | 54 +-- fs/nfs/nfs3proc.c | 97 +----- fs/nfs/nfs3xdr.c | 86 ++--- fs/nfs/nfs4proc.c | 167 ++--------- fs/nfs/nfs4state.c | 2 fs/nfs/nfs4xdr.c | 241 ++++++++-------- fs/nfs/pagelist.c | 8 fs/nfs/proc.c | 108 +++---- fs/nfs/read.c | 336 +++++++++++++++++----- fs/nfs/unlink.c | 3 fs/nfs/write.c | 645 +++++++++++++++++++++++++++++-------------- include/linux/fs.h | 2 include/linux/lockd/debug.h | 2 include/linux/lockd/lockd.h | 1 include/linux/nfs_fs.h | 127 +++++--- include/linux/nfs_page.h | 44 ++ include/linux/nfs_xdr.h | 24 - include/linux/sunrpc/debug.h | 4 include/linux/sunrpc/timer.h | 11 include/linux/sunrpc/xdr.h | 2 include/linux/sunrpc/xprt.h | 37 +- net/sunrpc/auth_unix.c | 7 net/sunrpc/clnt.c | 27 - net/sunrpc/pmap_clnt.c | 28 + net/sunrpc/sched.c | 32 +- net/sunrpc/sunrpc_syms.c | 2 net/sunrpc/sysctl.c | 28 + net/sunrpc/xdr.c | 2 net/sunrpc/xprt.c | 290 +++++++++---------- 42 files changed, 1908 insertions(+), 1368 deletions(-) diff -u --recursive --new-file --show-c-function linux-2.6.4-rc2/fs/inode.c linux-2.6.4-23-unrace/fs/inode.c --- linux-2.6.4-rc2/fs/inode.c 2004-03-05 20:00:08.000000000 -0500 +++ linux-2.6.4-23-unrace/fs/inode.c 2004-03-05 20:13:58.000000000 -0500 @@ -1178,6 +1178,8 @@ void inode_update_time(struct inode *ino struct timespec now; int sync_it = 0; + if (IS_NOCMTIME(inode)) + return; if (IS_RDONLY(inode)) return; diff -u --recursive --new-file --show-c-function linux-2.6.4-rc2/fs/Kconfig linux-2.6.4-23-unrace/fs/Kconfig --- linux-2.6.4-rc2/fs/Kconfig 2004-03-05 19:44:55.000000000 -0500 +++ linux-2.6.4-23-unrace/fs/Kconfig 2004-03-05 20:14:11.000000000 -0500 @@ -1302,15 +1302,18 @@ config NFS_V3 Say Y here if you want your NFS client to be able to speak the newer version 3 of the NFS protocol. - If unsure, say N. + If unsure, say Y. config NFS_V4 bool "Provide NFSv4 client support (EXPERIMENTAL)" depends on NFS_FS && EXPERIMENTAL + select RPCSEC_GSS_KRB5 help Say Y here if you want your NFS client to be able to speak the newer - version 4 of the NFS protocol. This feature is experimental, and - should only be used if you are interested in helping to test NFSv4. + version 4 of the NFS protocol. + + Note: Requires auxiliary userspace daemons which may be found on + http://www.citi.umich.edu/projects/nfsv4/ If unsure, say N. @@ -1419,28 +1422,24 @@ config SUNRPC tristate config SUNRPC_GSS - tristate "Provide RPCSEC_GSS authentication (EXPERIMENTAL)" + tristate + +config RPCSEC_GSS_KRB5 + tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" depends on SUNRPC && EXPERIMENTAL - default SUNRPC if NFS_V4=y + select SUNRPC_GSS + select CRYPTO + select CRYPTO_MD5 + select CRYPTO_DES help - Provides cryptographic authentication for NFS rpc requests. To - make this useful, you must also select at least one rpcsec_gss - mechanism. - Note: You should always select this option if you wish to use + Provides for secure RPC calls by means of a gss-api + mechanism based on Kerberos V5. This is required for NFSv4. -config RPCSEC_GSS_KRB5 - tristate "Kerberos V mechanism for RPCSEC_GSS (EXPERIMENTAL)" - depends on SUNRPC_GSS && CRYPTO_DES && CRYPTO_MD5 - default SUNRPC_GSS if NFS_V4=y - help - Provides a gss-api mechanism based on Kerberos V5 (this is - mandatory for RFC3010-compliant NFSv4 implementations). - Requires a userspace daemon; - see http://www.citi.umich.edu/projects/nfsv4/. + Note: Requires an auxiliary userspace daemon which may be found on + http://www.citi.umich.edu/projects/nfsv4/ - Note: If you select this option, please ensure that you also - enable the MD5 and DES crypto ciphers. + If unsure, say N. config SMB_FS tristate "SMB file system support (to mount Windows shares etc.)" diff -u --recursive --new-file --show-c-function linux-2.6.4-rc2/fs/lockd/clntproc.c linux-2.6.4-23-unrace/fs/lockd/clntproc.c --- linux-2.6.4-rc2/fs/lockd/clntproc.c 2004-03-05 19:26:28.000000000 -0500 +++ linux-2.6.4-23-unrace/fs/lockd/clntproc.c 2004-03-05 20:15:24.000000000 -0500 @@ -456,7 +456,7 @@ nlmclnt_lock(struct nlm_rqst *req, struc } if (status < 0) return status; - } while (resp->status == NLM_LCK_BLOCKED); + } while (resp->status == NLM_LCK_BLOCKED && req->a_args.block); if (resp->status == NLM_LCK_GRANTED) { fl->fl_u.nfs_fl.state = host->h_state; diff -u --recursive --new-file --show-c-function linux-2.6.4-rc2/fs/lockd/host.c linux-2.6.4-23-unrace/fs/lockd/host.c --- linux-2.6.4-rc2/fs/lockd/host.c 2004-03-05 19:43:57.000000000 -0500 +++ linux-2.6.4-23-unrace/fs/lockd/host.c 2004-03-05 20:15:03.000000000 -0500 @@ -188,14 +188,14 @@ nlm_bind_host(struct nlm_host *host) } } else { xprt = xprt_create_proto(host->h_proto, &host->h_addr, NULL); - if (xprt == NULL) + if (IS_ERR(xprt)) goto forgetit; xprt_set_timeout(&xprt->timeout, 5, nlmsvc_timeout); clnt = rpc_create_client(xprt, host->h_name, &nlm_program, host->h_version, host->h_authflavor); - if (clnt == NULL) { + if (IS_ERR(clnt)) { xprt_destroy(xprt); goto forgetit; } diff -u --recursive --new-file --show-c-function linux-2.6.4-rc2/fs/lockd/mon.c linux-2.6.4-23-unrace/fs/lockd/mon.c --- linux-2.6.4-rc2/fs/lockd/mon.c 2004-03-05 19:50:38.000000000 -0500 +++ linux-2.6.4-23-unrace/fs/lockd/mon.c 2004-03-05 20:15:03.000000000 -0500 @@ -36,10 +36,11 @@ nsm_mon_unmon(struct nlm_host *host, u32 int status; struct nsm_args args; - status = -EACCES; clnt = nsm_create(); - if (!clnt) + if (IS_ERR(clnt)) { + status = PTR_ERR(clnt); goto out; + } args.addr = host->h_addr.sin_addr.s_addr; args.proto= (host->h_proto<<1) | host->h_server; @@ -104,7 +105,7 @@ static struct rpc_clnt * nsm_create(void) { struct rpc_xprt *xprt; - struct rpc_clnt *clnt = NULL; + struct rpc_clnt *clnt; struct sockaddr_in sin; sin.sin_family = AF_INET; @@ -112,24 +113,23 @@ nsm_create(void) sin.sin_port = 0; xprt = xprt_create_proto(IPPROTO_UDP, &sin, NULL); - if (!xprt) - goto out; + if (IS_ERR(xprt)) + return (struct rpc_clnt *)xprt; clnt = rpc_create_client(xprt, "localhost", &nsm_program, SM_VERSION, RPC_AUTH_NULL); - if (!clnt) + if (IS_ERR(clnt)) goto out_destroy; clnt->cl_softrtry = 1; clnt->cl_chatty = 1; clnt->cl_oneshot = 1; xprt->resvport = 1; /* NSM requires a reserved port */ -out: return clnt; out_destroy: xprt_destroy(xprt); - goto out; + return clnt; } /* diff -u --recursive --new-file --show-c-function linux-2.6.4-rc2/fs/lockd/svc4proc.c linux-2.6.4-23-unrace/fs/lockd/svc4proc.c --- linux-2.6.4-rc2/fs/lockd/svc4proc.c 2004-03-05 19:38:01.000000000 -0500 +++ linux-2.6.4-23-unrace/fs/lockd/svc4proc.c 2004-03-05 20:15:32.000000000 -0500 @@ -453,6 +453,24 @@ nlm4svc_proc_sm_notify(struct svc_rqst * } /* + * client sent a GRANTED_RES, let's remove the associated block + */ +static int +nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res *argp, + void *resp) +{ + if (!nlmsvc_ops) + return rpc_success; + + dprintk("lockd: GRANTED_RES called\n"); + + nlmsvc_grant_reply(rqstp, &argp->cookie, argp->status); + return rpc_success; +} + + + +/* * This is the generic lockd callback for async RPC calls */ static u32 @@ -515,7 +533,6 @@ nlm4svc_callback_exit(struct rpc_task *t #define nlm4svc_proc_lock_res nlm4svc_proc_null #define nlm4svc_proc_cancel_res nlm4svc_proc_null #define nlm4svc_proc_unlock_res nlm4svc_proc_null -#define nlm4svc_proc_granted_res nlm4svc_proc_null struct nlm_void { int dummy; }; @@ -548,7 +565,7 @@ struct svc_procedure nlmsvc_procedures4 PROC(lock_res, lockres, norep, res, void, 1), PROC(cancel_res, cancelres, norep, res, void, 1), PROC(unlock_res, unlockres, norep, res, void, 1), - PROC(granted_res, grantedres, norep, res, void, 1), + PROC(granted_res, res, norep, res, void, 1), /* statd callback */ PROC(sm_notify, reboot, void, reboot, void, 1), PROC(none, void, void, void, void, 0), diff -u --recursive --new-file --show-c-function linux-2.6.4-rc2/fs/lockd/svclock.c linux-2.6.4-23-unrace/fs/lockd/svclock.c --- linux-2.6.4-rc2/fs/lockd/svclock.c 2004-03-05 19:35:41.000000000 -0500 +++ linux-2.6.4-23-unrace/fs/lockd/svclock.c 2004-03-05 20:15:43.000000000 -0500 @@ -64,7 +64,7 @@ nlmsvc_insert_block(struct nlm_block *bl if (when != NLM_NEVER) { if ((when += jiffies) == NLM_NEVER) when ++; - while ((b = *bp) && time_before_eq(b->b_when,when)) + while ((b = *bp) && time_before_eq(b->b_when,when) && b->b_when != NLM_NEVER) bp = &b->b_next; } else while ((b = *bp)) @@ -143,14 +143,15 @@ static inline int nlm_cookie_match(struc * Find a block with a given NLM cookie. */ static inline struct nlm_block * -nlmsvc_find_block(struct nlm_cookie *cookie) +nlmsvc_find_block(struct nlm_cookie *cookie, struct sockaddr_in *sin) { struct nlm_block *block; for (block = nlm_blocked; block; block = block->b_next) { dprintk("cookie: head of blocked queue %p, block %p\n", nlm_blocked, block); - if (nlm_cookie_match(&block->b_call.a_args.cookie,cookie)) + if (nlm_cookie_match(&block->b_call.a_args.cookie,cookie) + && nlm_cmp_addr(sin, &block->b_host->h_addr)) break; } @@ -566,12 +567,16 @@ nlmsvc_grant_callback(struct rpc_task *t struct nlm_rqst *call = (struct nlm_rqst *) task->tk_calldata; struct nlm_block *block; unsigned long timeout; + struct sockaddr_in *peer_addr = RPC_PEERADDR(task->tk_client); dprintk("lockd: GRANT_MSG RPC callback\n"); - dprintk("callback: looking for cookie %x \n", - *(unsigned int *)(call->a_args.cookie.data)); - if (!(block = nlmsvc_find_block(&call->a_args.cookie))) { - dprintk("lockd: no block for cookie %x\n", *(u32 *)(call->a_args.cookie.data)); + dprintk("callback: looking for cookie %x, host (%08x)\n", + *(unsigned int *)(call->a_args.cookie.data), + ntohl(peer_addr->sin_addr.s_addr)); + if (!(block = nlmsvc_find_block(&call->a_args.cookie, peer_addr))) { + dprintk("lockd: no block for cookie %x, host (%08x)\n", + *(u32 *)(call->a_args.cookie.data), + ntohl(peer_addr->sin_addr.s_addr)); return; } @@ -600,18 +605,21 @@ nlmsvc_grant_callback(struct rpc_task *t * block. */ void -nlmsvc_grant_reply(struct nlm_cookie *cookie, u32 status) +nlmsvc_grant_reply(struct svc_rqst *rqstp, struct nlm_cookie *cookie, u32 status) { struct nlm_block *block; struct nlm_file *file; - if (!(block = nlmsvc_find_block(cookie))) + dprintk("grant_reply: looking for cookie %x, host (%08x), s=%d \n", + *(unsigned int *)(cookie->data), + ntohl(rqstp->rq_addr.sin_addr.s_addr), status); + if (!(block = nlmsvc_find_block(cookie, &rqstp->rq_addr))) return; file = block->b_file; file->f_count++; down(&file->f_sema); - if ((block = nlmsvc_find_block(cookie)) != NULL) { + if ((block = nlmsvc_find_block(cookie,&rqstp->rq_addr)) != NULL) { if (status == NLM_LCK_DENIED_GRACE_PERIOD) { /* Try again in a couple of seconds */ nlmsvc_insert_block(block, 10 * HZ); diff -u --recursive --new-file --show-c-function linux-2.6.4-rc2/fs/lockd/svcproc.c linux-2.6.4-23-unrace/fs/lockd/svcproc.c --- linux-2.6.4-rc2/fs/lockd/svcproc.c 2004-03-05 19:48:52.000000000 -0500 +++ linux-2.6.4-23-unrace/fs/lockd/svcproc.c 2004-03-05 20:15:32.000000000 -0500 @@ -479,6 +479,22 @@ nlmsvc_proc_sm_notify(struct svc_rqst *r } /* + * client sent a GRANTED_RES, let's remove the associated block + */ +static int +nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res *argp, + void *resp) +{ + if (!nlmsvc_ops) + return rpc_success; + + dprintk("lockd: GRANTED_RES called\n"); + + nlmsvc_grant_reply(rqstp, &argp->cookie, argp->status); + return rpc_success; +} + +/* * This is the generic lockd callback for async RPC calls */ static u32 @@ -541,7 +557,6 @@ nlmsvc_callback_exit(struct rpc_task *ta #define nlmsvc_proc_lock_res nlmsvc_proc_null #define nlmsvc_proc_cancel_res nlmsvc_proc_null #define nlmsvc_proc_unlock_res nlmsvc_proc_null -#define nlmsvc_proc_granted_res nlmsvc_proc_null struct nlm_void { int dummy; }; @@ -576,7 +591,7 @@ struct svc_procedure nlmsvc_procedures[ PROC(lock_res, lockres, norep, res, void, 1), PROC(cancel_res, cancelres, norep, res, void, 1), PROC(unlock_res, unlockres, norep, res, void, 1), - PROC(granted_res, grantedres, norep, res, void, 1), + PROC(granted_res, res, norep, res, void, 1), /* statd callback */ PROC(sm_notify, reboot, void, reboot, void, 1), PROC(none, void, void, void, void, 1), diff -u --recursive --new-file --show-c-function linux-2.6.4-rc2/fs/nfs/dir.c linux-2.6.4-23-unrace/fs/nfs/dir.c --- linux-2.6.4-rc2/fs/nfs/dir.c 2004-03-05 19:41:27.000000000 -0500 +++ linux-2.6.4-23-unrace/fs/nfs/dir.c 2004-03-05 20:14:31.000000000 -0500 @@ -139,11 +139,13 @@ int nfs_readdir_filler(nfs_readdir_descr struct file *file = desc->file; struct inode *inode = file->f_dentry->d_inode; struct rpc_cred *cred = nfs_file_cred(file); + unsigned long timestamp; int error; dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index); again: + timestamp = jiffies; error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->entry->cookie, page, NFS_SERVER(inode)->dtsize, desc->plus); if (error < 0) { @@ -157,18 +159,21 @@ int nfs_readdir_filler(nfs_readdir_descr goto error; } SetPageUptodate(page); + NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either * throught inode->i_sem or some other mechanism. */ - if (page->index == 0) + if (page->index == 0) { invalidate_inode_pages(inode->i_mapping); + NFS_I(inode)->readdir_timestamp = timestamp; + } unlock_page(page); return 0; error: SetPageError(page); unlock_page(page); - invalidate_inode_pages(inode->i_mapping); + nfs_zap_caches(inode); desc->error = error; return -EIO; } @@ -381,6 +386,7 @@ int uncached_readdir(nfs_readdir_descrip page, NFS_SERVER(inode)->dtsize, desc->plus); + NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; desc->page = page; desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ if (desc->error >= 0) { @@ -459,7 +465,15 @@ static int nfs_readdir(struct file *filp } res = 0; break; - } else if (res < 0) + } + if (res == -ETOOSMALL && desc->plus) { + NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; + nfs_zap_caches(inode); + desc->plus = 0; + desc->entry->eof = 0; + continue; + } + if (res < 0) break; res = nfs_do_filldir(desc, dirent, filldir); @@ -481,14 +495,19 @@ static int nfs_readdir(struct file *filp * In the case it has, we assume that the dentries are untrustworthy * and may need to be looked up again. */ -static inline -int nfs_check_verifier(struct inode *dir, struct dentry *dentry) +static inline int nfs_check_verifier(struct inode *dir, struct dentry *dentry) { if (IS_ROOT(dentry)) return 1; - if (nfs_revalidate_inode(NFS_SERVER(dir), dir)) + if ((NFS_FLAGS(dir) & NFS_INO_INVALID_ATTR) != 0 + || nfs_attribute_timeout(dir)) return 0; - return time_after(dentry->d_time, NFS_MTIME_UPDATE(dir)); + return nfs_verify_change_attribute(dir, (unsigned long)dentry->d_fsdata); +} + +static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf) +{ + dentry->d_fsdata = (void *)verf; } /* @@ -528,9 +547,7 @@ int nfs_neg_need_reval(struct inode *dir /* Don't revalidate a negative dentry if we're creating a new file */ if ((ndflags & LOOKUP_CREATE) && !(ndflags & LOOKUP_CONTINUE)) return 0; - if (!nfs_check_verifier(dir, dentry)) - return 1; - return time_after(jiffies, dentry->d_time + NFS_ATTRTIMEO(dir)); + return !nfs_check_verifier(dir, dentry); } /* @@ -552,6 +569,7 @@ static int nfs_lookup_revalidate(struct int error; struct nfs_fh fhandle; struct nfs_fattr fattr; + unsigned long verifier; int isopen = 0; parent = dget_parent(dentry); @@ -574,6 +592,9 @@ static int nfs_lookup_revalidate(struct goto out_bad; } + /* Revalidate parent directory attribute cache */ + nfs_revalidate_inode(NFS_SERVER(dir), dir); + /* Force a full look up iff the parent directory has changed */ if (nfs_check_verifier(dir, dentry)) { if (nfs_lookup_verify_inode(inode, isopen)) @@ -581,6 +602,12 @@ static int nfs_lookup_revalidate(struct goto out_valid; } + /* + * Note: we're not holding inode->i_sem and so may be racing with + * operations that change the directory. We therefore save the + * change attribute *before* we do the RPC call. + */ + verifier = nfs_save_change_attribute(dir); error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr); if (!error) { if (memcmp(NFS_FH(inode), &fhandle, sizeof(struct nfs_fh))!= 0) @@ -603,6 +630,7 @@ static int nfs_lookup_revalidate(struct out_valid_renew: nfs_renew_times(dentry); + nfs_set_verifier(dentry, verifier); out_valid: unlock_kernel(); dput(parent); @@ -638,6 +666,11 @@ static int nfs_dentry_delete(struct dent /* Unhash it, so that ->d_iput() would be called */ return 1; } + if (!(dentry->d_sb->s_flags & MS_ACTIVE)) { + /* Unhash it, so that ancestors of killed async unlink + * files will be cleaned up during umount */ + return 1; + } return 0; } @@ -693,6 +726,8 @@ static struct dentry *nfs_lookup(struct dentry->d_op = NFS_PROTO(dir)->dentry_ops; lock_kernel(); + /* Revalidate parent directory attribute cache */ + nfs_revalidate_inode(NFS_SERVER(dir), dir); /* If we're doing an exclusive create, optimize away the lookup */ if (nfs_is_exclusive_create(dir, nd)) @@ -715,6 +750,7 @@ no_entry: error = 0; d_add(dentry, inode); nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_unlock: unlock_kernel(); out: @@ -768,7 +804,15 @@ static struct dentry *nfs_atomic_lookup( /* Open the file on the server */ lock_kernel(); - inode = nfs4_atomic_open(dir, dentry, nd); + /* Revalidate parent directory attribute cache */ + nfs_revalidate_inode(NFS_SERVER(dir), dir); + + if (nd->intent.open.flags & O_CREAT) { + nfs_begin_data_update(dir); + inode = nfs4_atomic_open(dir, dentry, nd); + nfs_end_data_update(dir); + } else + inode = nfs4_atomic_open(dir, dentry, nd); unlock_kernel(); if (IS_ERR(inode)) { error = PTR_ERR(inode); @@ -790,6 +834,7 @@ static struct dentry *nfs_atomic_lookup( no_entry: d_add(dentry, inode); nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out: BUG_ON(error > 0); return ERR_PTR(error); @@ -801,13 +846,16 @@ static int nfs_open_revalidate(struct de { struct dentry *parent = NULL; struct inode *inode = dentry->d_inode; + struct inode *dir; + unsigned long verifier; int openflags, ret = 0; /* NFS only supports OPEN for regular files */ if (inode && !S_ISREG(inode->i_mode)) goto no_open; parent = dget_parent(dentry); - if (!is_atomic_open(parent->d_inode, nd)) + dir = parent->d_inode; + if (!is_atomic_open(dir, nd)) goto no_open; openflags = nd->intent.open.flags; if (openflags & O_CREAT) { @@ -821,8 +869,16 @@ static int nfs_open_revalidate(struct de /* We can't create new files, or truncate existing ones here */ openflags &= ~(O_CREAT|O_TRUNC); + /* + * Note: we're not holding inode->i_sem and so may be racing with + * operations that change the directory. We therefore save the + * change attribute *before* we do the RPC call. + */ lock_kernel(); - ret = nfs4_open_revalidate(parent->d_inode, dentry, openflags); + verifier = nfs_save_change_attribute(dir); + ret = nfs4_open_revalidate(dir, dentry, openflags); + if (!ret) + nfs_set_verifier(dentry, verifier); unlock_kernel(); out: dput(parent); @@ -869,15 +925,20 @@ int nfs_cached_lookup(struct inode *dir, struct nfs_server *server; struct nfs_entry entry; struct page *page; - unsigned long timestamp = NFS_MTIME_UPDATE(dir); + unsigned long timestamp; int res; if (!NFS_USE_READDIRPLUS(dir)) return -ENOENT; server = NFS_SERVER(dir); - if (server->flags & NFS_MOUNT_NOAC) + /* Don't use readdirplus unless the cache is stable */ + if ((server->flags & NFS_MOUNT_NOAC) != 0 + || nfs_caches_unstable(dir) + || nfs_attribute_timeout(dir)) return -ENOENT; - nfs_revalidate_inode(server, dir); + if ((NFS_FLAGS(dir) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) != 0) + return -ENOENT; + timestamp = NFS_I(dir)->readdir_timestamp; entry.fh = fh; entry.fattr = fattr; @@ -931,6 +992,7 @@ static int nfs_instantiate(struct dentry if (inode) { d_instantiate(dentry, inode); nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dentry->d_parent->d_inode)); error = 0; } return error; @@ -969,11 +1031,13 @@ static int nfs_create(struct inode *dir, * does not pass the create flags. */ lock_kernel(); - nfs_zap_caches(dir); + nfs_begin_data_update(dir); inode = NFS_PROTO(dir)->create(dir, &dentry->d_name, &attr, open_flags); + nfs_end_data_update(dir); if (!IS_ERR(inode)) { d_instantiate(dentry, inode); nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); error = 0; } else { error = PTR_ERR(inode); @@ -1004,9 +1068,10 @@ nfs_mknod(struct inode *dir, struct dent attr.ia_valid = ATTR_MODE; lock_kernel(); - nfs_zap_caches(dir); + nfs_begin_data_update(dir); error = NFS_PROTO(dir)->mknod(dir, &dentry->d_name, &attr, rdev, &fhandle, &fattr); + nfs_end_data_update(dir); if (!error) error = nfs_instantiate(dentry, &fhandle, &fattr); else @@ -1041,9 +1106,10 @@ static int nfs_mkdir(struct inode *dir, */ d_drop(dentry); #endif - nfs_zap_caches(dir); + nfs_begin_data_update(dir); error = NFS_PROTO(dir)->mkdir(dir, &dentry->d_name, &attr, &fhandle, &fattr); + nfs_end_data_update(dir); if (!error) error = nfs_instantiate(dentry, &fhandle, &fattr); else @@ -1060,10 +1126,12 @@ static int nfs_rmdir(struct inode *dir, dir->i_ino, dentry->d_name.name); lock_kernel(); - nfs_zap_caches(dir); + nfs_begin_data_update(dir); error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); - if (!error) + /* Ensure the VFS deletes this inode */ + if (error == 0 && dentry->d_inode != NULL) dentry->d_inode->i_nlink = 0; + nfs_end_data_update(dir); unlock_kernel(); return error; @@ -1119,12 +1187,21 @@ dentry->d_parent->d_name.name, dentry->d goto out; } while(sdentry->d_inode != NULL); /* need negative lookup */ - nfs_zap_caches(dir); qsilly.name = silly; qsilly.len = strlen(silly); - error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, dir, &qsilly); + nfs_begin_data_update(dir); + if (dentry->d_inode) { + nfs_begin_data_update(dentry->d_inode); + error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, + dir, &qsilly); + nfs_end_data_update(dentry->d_inode); + } else + error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, + dir, &qsilly); + nfs_end_data_update(dir); if (!error) { nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); d_move(dentry, sdentry); error = nfs_async_unlink(dentry); /* If we return 0 we don't unlink */ @@ -1156,14 +1233,17 @@ static int nfs_safe_remove(struct dentry goto out; } - nfs_zap_caches(dir); - if (inode) - NFS_CACHEINV(inode); - error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); - if (error < 0) - goto out; - if (inode) - inode->i_nlink--; + nfs_begin_data_update(dir); + if (inode != NULL) { + nfs_begin_data_update(inode); + error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); + /* The VFS may want to delete this inode */ + if (error == 0) + inode->i_nlink--; + nfs_end_data_update(inode); + } else + error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); + nfs_end_data_update(dir); out: return error; } @@ -1198,9 +1278,10 @@ static int nfs_unlink(struct inode *dir, spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); error = nfs_safe_remove(dentry); - if (!error) + if (!error) { nfs_renew_times(dentry); - else if (need_rehash) + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + } else if (need_rehash) d_rehash(dentry); unlock_kernel(); return error; @@ -1247,9 +1328,10 @@ dentry->d_parent->d_name.name, dentry->d qsymname.len = strlen(symname); lock_kernel(); - nfs_zap_caches(dir); + nfs_begin_data_update(dir); error = NFS_PROTO(dir)->symlink(dir, &dentry->d_name, &qsymname, &attr, &sym_fh, &sym_attr); + nfs_end_data_update(dir); if (!error) { error = nfs_instantiate(dentry, &sym_fh, &sym_attr); } else { @@ -1281,9 +1363,12 @@ nfs_link(struct dentry *old_dentry, stru */ lock_kernel(); d_drop(dentry); - nfs_zap_caches(dir); - NFS_CACHEINV(inode); + + nfs_begin_data_update(dir); + nfs_begin_data_update(inode); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); + nfs_end_data_update(inode); + nfs_end_data_update(dir); unlock_kernel(); return error; } @@ -1388,16 +1473,23 @@ go_ahead: if (new_inode) d_delete(new_dentry); - nfs_zap_caches(new_dir); - nfs_zap_caches(old_dir); + nfs_begin_data_update(old_dir); + nfs_begin_data_update(new_dir); + nfs_begin_data_update(old_inode); error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name, new_dir, &new_dentry->d_name); + nfs_end_data_update(old_inode); + nfs_end_data_update(new_dir); + nfs_end_data_update(old_dir); out: if (rehash) d_rehash(rehash); - if (!error && !S_ISDIR(old_inode->i_mode)) - d_move(old_dentry, new_dentry); - nfs_renew_times(new_dentry); + if (!error) { + if (!S_ISDIR(old_inode->i_mode)) + d_move(old_dentry, new_dentry); + nfs_renew_times(new_dentry); + nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir)); + } /* new dentry created? */ if (dentry) @@ -1451,7 +1543,8 @@ nfs_permission(struct inode *inode, int cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); if (cache->cred == cred - && time_before(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))) { + && time_before(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)) + && !(NFS_FLAGS(inode) & NFS_INO_INVALID_ATTR)) { if (!(res = cache->err)) { /* Is the mask a subset of an accepted mask? */ if ((cache->mask & mask) == mask) diff -u --recursive --new-file --show-c-function linux-2.6.4-rc2/fs/nfs/direct.c linux-2.6.4-23-unrace/fs/nfs/direct.c --- linux-2.6.4-rc2/fs/nfs/direct.c 2004-03-05 19:55:58.000000000 -0500 +++ linux-2.6.4-23-unrace/fs/nfs/direct.c 2004-03-05 20:16:30.000000000 -0500 @@ -128,6 +128,7 @@ nfs_direct_read_seg(struct inode *inode, .inode = inode, .args = { .fh = NFS_FH(inode), + .lockowner = current->files, }, .res = { .fattr = &rdata.fattr, @@ -258,6 +259,7 @@ nfs_direct_write_seg(struct inode *inode .inode = inode, .args = { .fh = NFS_FH(inode), + .lockowner = current->files, }, .res = { .fattr = &wdata.fattr, @@ -269,6 +271,7 @@ nfs_direct_write_seg(struct inode *inode if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize) wdata.args.stable = NFS_FILE_SYNC; + nfs_begin_data_update(inode); retry: need_commit = 0; tot_bytes = 0; @@ -334,6 +337,8 @@ retry: VERF_SIZE) != 0) goto sync_retry; } + nfs_end_data_update(inode); + NFS_FLAGS(inode) |= NFS_INO_INVALID_DATA; return tot_bytes; diff -u --recursive --new-file --show-c-function linux-2.6.4-rc2/fs/nfs/file.c linux-2.6.4-23-unrace/fs/nfs/file.c --- linux-2.6.4-rc2/fs/nfs/file.c 2004-03-05 19:48:17.000000000 -0500 +++ linux-2.6.4-23-unrace/fs/nfs/file.c 2004-03-05 20:14:25.000000000 -0500 @@ -104,11 +104,16 @@ nfs_file_flush(struct file *file) dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + if ((file->f_mode & FMODE_WRITE) == 0) + return 0; lock_kernel(); - status = nfs_wb_file(inode, file); + /* Ensure that data+attribute caches are up to date after close() */ + status = nfs_wb_all(inode); if (!status) { status = file->f_error; file->f_error = 0; + if (!status) + __nfs_revalidate_inode(NFS_SERVER(inode), inode); } unlock_kernel(); return status; @@ -179,7 +184,7 @@ nfs_fsync(struct file *file, struct dent dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); lock_kernel(); - status = nfs_wb_file(inode, file); + status = nfs_wb_all(inode); if (!status) { status = file->f_error; file->f_error = 0; diff -u --recursive --new-file --show-c-function linux-2.6.4-rc2/fs/nfs/inode.c linux-2.6.4-23-unrace/fs/nfs/inode.c --- linux-2.6.4-rc2/fs/nfs/inode.c 2004-03-05 19:52:50.000000000 -0500 +++ linux-2.6.4-23-unrace/fs/nfs/inode.c 2004-03-05 20:15:58.000000000 -0500 @@ -47,14 +47,11 @@ * their needs. People that do NFS over a slow network, might for * instance want to reduce it to something closer to 1 for improved * interactive response. - * - * For the moment, though, we instead set it to RPC_MAXREQS, which - * is the maximum number of simultaneous RPC requests on the wire. */ -#define NFS_MAX_READAHEAD RPC_MAXREQS +#define NFS_MAX_READAHEAD (RPC_DEF_SLOT_TABLE - 1) -void nfs_zap_caches(struct inode *); static void nfs_invalidate_inode(struct inode *); +static int nfs_update_inode(struct inode *, struct nfs_fattr *, unsigned long); static struct inode *nfs_alloc_inode(struct super_block *sb); static void nfs_destroy_inode(struct inode *); @@ -118,7 +115,7 @@ nfs_write_inode(struct inode *inode, int { int flags = sync ? FLUSH_WAIT : 0; - nfs_commit_file(inode, NULL, 0, 0, flags); + nfs_commit_inode(inode, 0, 0, flags); } static void @@ -151,6 +148,7 @@ nfs_clear_inode(struct inode *inode) cred = nfsi->cache_access.cred; if (cred) put_rpccred(cred); + BUG_ON(atomic_read(&nfsi->data_updates) != 0); } void @@ -230,50 +228,23 @@ nfs_block_size(unsigned long bsize, unsi /* * Obtain the root inode of the file system. */ -static int -nfs_get_root(struct inode **rooti, rpc_authflavor_t authflavor, struct super_block *sb, struct nfs_fh *rootfh) +static struct inode * +nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo) { struct nfs_server *server = NFS_SB(sb); - struct nfs_fattr fattr = { }; + struct inode *rooti; int error; - error = server->rpc_ops->getroot(server, rootfh, &fattr); - if (error == -EACCES && authflavor > RPC_AUTH_MAXFLAVOR) { - /* - * Some authentication types (gss/krb5, most notably) - * are such that root won't be able to present a - * credential for GETATTR (ie, getroot()). - * - * We still want the mount to succeed. - * - * So we fake the attr values and mark the inode as such. - * On the first succesful traversal, we fix everything. - * The auth type test isn't quite correct, but whatever. - */ - dfprintk(VFS, "NFS: faking root inode\n"); - - fattr.fileid = 1; - fattr.nlink = 2; /* minimum for a dir */ - fattr.type = NFDIR; - fattr.mode = S_IFDIR|S_IRUGO|S_IXUGO; - fattr.size = 4096; - fattr.du.nfs3.used = 1; - fattr.valid = NFS_ATTR_FATTR|NFS_ATTR_FATTR_V3; - } else if (error < 0) { + error = server->rpc_ops->getroot(server, rootfh, fsinfo); + if (error < 0) { printk(KERN_NOTICE "nfs_get_root: getattr error = %d\n", -error); - *rooti = NULL; /* superfluous ... but safe */ - return error; + return ERR_PTR(error); } - *rooti = nfs_fhget(sb, rootfh, &fattr); - if (error == -EACCES && authflavor > RPC_AUTH_MAXFLAVOR) { - if (*rooti) { - NFS_FLAGS(*rooti) |= NFS_INO_FAKE_ROOT; - NFS_CACHEINV((*rooti)); - error = 0; - } - } - return error; + rooti = nfs_fhget(sb, rootfh, fsinfo->fattr); + if (!rooti) + return ERR_PTR(-ENOMEM); + return rooti; } /* @@ -283,7 +254,7 @@ static int nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) { struct nfs_server *server; - struct inode *root_inode = NULL; + struct inode *root_inode; struct nfs_fattr fattr; struct nfs_fsinfo fsinfo = { .fattr = &fattr, @@ -299,8 +270,9 @@ nfs_sb_init(struct super_block *sb, rpc_ sb->s_magic = NFS_SUPER_MAGIC; + root_inode = nfs_get_root(sb, &server->fh, &fsinfo); /* Did getting the root inode fail? */ - if (nfs_get_root(&root_inode, authflavor, sb, &server->fh) < 0) + if (IS_ERR(root_inode)) goto out_no_root; sb->s_root = d_alloc_root(root_inode); if (!sb->s_root) @@ -309,10 +281,6 @@ nfs_sb_init(struct super_block *sb, rpc_ sb->s_root->d_op = server->rpc_ops->dentry_ops; /* Get some general file system info */ - if (server->rpc_ops->fsinfo(server, &server->fh, &fsinfo) < 0) { - printk(KERN_NOTICE "NFS: cannot retrieve file system info.\n"); - goto out_no_root; - } if (server->namelen == 0 && server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) server->namelen = pathinfo.max_namelen; @@ -368,13 +336,11 @@ nfs_sb_init(struct super_block *sb, rpc_ rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); return 0; /* Yargs. It didn't work out. */ -out_free_all: - if (root_inode) - iput(root_inode); - return -EINVAL; out_no_root: printk("nfs_read_super: get root inode failed\n"); - goto out_free_all; + if (!IS_ERR(root_inode)) + iput(root_inode); + return -EINVAL; } /* @@ -402,13 +368,13 @@ nfs_create_client(struct nfs_server *ser /* create transport and client */ xprt = xprt_create_proto(tcp ? IPPROTO_TCP : IPPROTO_UDP, &server->addr, &timeparms); - if (xprt == NULL) { + if (IS_ERR(xprt)) { printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); - goto out_fail; + return (struct rpc_clnt *)xprt; } clnt = rpc_create_client(xprt, server->hostname, &nfs_program, server->rpc_ops->version, data->pseudoflavor); - if (clnt == NULL) { + if (IS_ERR(clnt)) { printk(KERN_WARNING "NFS: cannot create RPC client.\n"); goto out_fail; } @@ -421,9 +387,8 @@ nfs_create_client(struct nfs_server *ser return clnt; out_fail: - if (xprt) - xprt_destroy(xprt); - return NULL; + xprt_destroy(xprt); + return clnt; } /* @@ -627,13 +592,17 @@ static int nfs_show_options(struct seq_f void nfs_zap_caches(struct inode *inode) { + struct nfs_inode *nfsi = NFS_I(inode); + int mode = inode->i_mode; + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); NFS_ATTRTIMEO_UPDATE(inode) = jiffies; - invalidate_remote_inode(inode); - memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); - NFS_CACHEINV(inode); + if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) + nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + else + nfsi->flags |= NFS_INO_INVALID_ATTR; } /* @@ -673,9 +642,6 @@ nfs_find_actor(struct inode *inode, void return 0; if (is_bad_inode(inode)) return 0; - /* Force an attribute cache update if inode->i_count == 0 */ - if (!atomic_read(&inode->i_count)) - NFS_CACHEINV(inode); return 1; } @@ -729,7 +695,7 @@ nfs_fhget(struct super_block *sb, struct inode->i_ino = hash; /* We can't support update_atime(), since the server will reset it */ - inode->i_flags |= S_NOATIME; + inode->i_flags |= S_NOATIME|S_NOCMTIME; inode->i_mode = fattr->mode; /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. @@ -754,10 +720,6 @@ nfs_fhget(struct super_block *sb, struct inode->i_atime = fattr->atime; inode->i_mtime = fattr->mtime; inode->i_ctime = fattr->ctime; - nfsi->read_cache_ctime = fattr->ctime; - nfsi->read_cache_mtime = fattr->mtime; - nfsi->cache_mtime_jiffies = fattr->timestamp; - nfsi->read_cache_isize = fattr->size; if (fattr->valid & NFS_ATTR_FATTR_V4) nfsi->change_attr = fattr->change_attr; inode->i_size = nfs_size_to_loff_t(fattr->size); @@ -804,70 +766,50 @@ nfs_setattr(struct dentry *dentry, struc struct nfs_fattr fattr; int error; + if (attr->ia_valid & ATTR_SIZE) { + if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode)) + attr->ia_valid &= ~ATTR_SIZE; + } + /* Optimization: if the end result is no change, don't RPC */ attr->ia_valid &= NFS_VALID_ATTRS; if (attr->ia_valid == 0) return 0; lock_kernel(); - - /* - * Make sure the inode is up-to-date. - */ - error = nfs_revalidate_inode(NFS_SERVER(inode),inode); - if (error) { -#ifdef NFS_PARANOIA -printk("nfs_setattr: revalidate failed, error=%d\n", error); -#endif - goto out; + nfs_begin_data_update(inode); + /* Write all dirty data if we're changing file permissions or size */ + if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) { + if (filemap_fdatawrite(inode->i_mapping) == 0) + filemap_fdatawait(inode->i_mapping); + nfs_wb_all(inode); } - - if (!S_ISREG(inode->i_mode)) { - attr->ia_valid &= ~ATTR_SIZE; - if (attr->ia_valid == 0) - goto out; - } else { - filemap_fdatawrite(inode->i_mapping); - error = nfs_wb_all(inode); - filemap_fdatawait(inode->i_mapping); - if (error) - goto out; - /* Optimize away unnecessary truncates */ - if ((attr->ia_valid & ATTR_SIZE) && i_size_read(inode) == attr->ia_size) - attr->ia_valid &= ~ATTR_SIZE; - } - if (!attr->ia_valid) - goto out; - error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); - if (error) - goto out; - /* - * If we changed the size or mtime, update the inode - * now to avoid invalidating the page cache. - */ - if (attr->ia_valid & ATTR_SIZE) { - if (attr->ia_size != fattr.size) - printk("nfs_setattr: attr=%Ld, fattr=%Ld??\n", - (long long) attr->ia_size, (long long)fattr.size); - vmtruncate(inode, attr->ia_size); + if (error == 0) { + nfs_refresh_inode(inode, &fattr); + if ((attr->ia_valid & ATTR_MODE) != 0) { + int mode; + mode = inode->i_mode & ~S_IALLUGO; + mode |= attr->ia_mode & S_IALLUGO; + inode->i_mode = mode; + } + if ((attr->ia_valid & ATTR_UID) != 0) + inode->i_uid = attr->ia_uid; + if ((attr->ia_valid & ATTR_GID) != 0) + inode->i_gid = attr->ia_gid; + if ((attr->ia_valid & ATTR_SIZE) != 0) { + i_size_write(inode, attr->ia_size); + vmtruncate(inode, attr->ia_size); + } } - - /* - * If we changed the size or mtime, update the inode - * now to avoid invalidating the page cache. - */ - if (!(fattr.valid & NFS_ATTR_WCC)) { - struct nfs_inode *nfsi = NFS_I(inode); - fattr.pre_size = nfsi->read_cache_isize; - fattr.pre_mtime = nfsi->read_cache_mtime; - fattr.pre_ctime = nfsi->read_cache_ctime; - fattr.valid |= NFS_ATTR_WCC; - } - /* Force an attribute cache update */ - NFS_CACHEINV(inode); - error = nfs_refresh_inode(inode, &fattr); -out: + if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { + struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred; + if (*cred) { + put_rpccred(*cred); + *cred = NULL; + } + } + nfs_end_data_update(inode); unlock_kernel(); return error; } @@ -895,7 +837,19 @@ nfs_wait_on_inode(struct inode *inode, i int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; - int err = nfs_revalidate_inode(NFS_SERVER(inode), inode); + struct nfs_inode *nfsi = NFS_I(inode); + int need_atime = nfsi->flags & NFS_INO_INVALID_ATIME; + int err; + + if (__IS_FLG(inode, MS_NOATIME)) + need_atime = 0; + else if (__IS_FLG(inode, MS_NODIRATIME) && S_ISDIR(inode->i_mode)) + need_atime = 0; + /* We may force a getattr if the user cares about atime */ + if (need_atime) + err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); + else + err = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (!err) generic_fillattr(inode, stat); return err; @@ -930,8 +884,10 @@ int nfs_open(struct inode *inode, struct auth = NFS_CLIENT(inode)->cl_auth; cred = rpcauth_lookupcred(auth, 0); filp->private_data = cred; - if (filp->f_mode & FMODE_WRITE) + if ((filp->f_mode & FMODE_WRITE) != 0) { nfs_set_mmcred(inode, cred); + nfs_begin_data_update(inode); + } return 0; } @@ -940,6 +896,8 @@ int nfs_release(struct inode *inode, str struct rpc_cred *cred; lock_kernel(); + if ((filp->f_mode & FMODE_WRITE) != 0) + nfs_end_data_update(inode); cred = nfs_file_cred(filp); if (cred) put_rpccred(cred); @@ -956,6 +914,9 @@ __nfs_revalidate_inode(struct nfs_server { int status = -ESTALE; struct nfs_fattr fattr; + struct nfs_inode *nfsi = NFS_I(inode); + unsigned long verifier; + unsigned int flags; dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -965,23 +926,22 @@ __nfs_revalidate_inode(struct nfs_server goto out_nowait; if (NFS_STALE(inode) && inode != inode->i_sb->s_root->d_inode) goto out_nowait; - if (NFS_FAKE_ROOT(inode)) { - dfprintk(VFS, "NFS: not revalidating fake root\n"); - status = 0; - goto out_nowait; - } while (NFS_REVALIDATING(inode)) { status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING); if (status < 0) goto out_nowait; - if (time_before(jiffies,NFS_READTIME(inode)+NFS_ATTRTIMEO(inode))) { - status = NFS_STALE(inode) ? -ESTALE : 0; - goto out_nowait; - } + if (NFS_SERVER(inode)->flags & NFS_MOUNT_NOAC) + continue; + if (NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) + continue; + status = NFS_STALE(inode) ? -ESTALE : 0; + goto out_nowait; } NFS_FLAGS(inode) |= NFS_INO_REVALIDATING; + /* Protect against RPC races by saving the change attribute */ + verifier = nfs_save_change_attribute(inode); status = NFS_PROTO(inode)->getattr(inode, &fattr); if (status) { dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", @@ -995,13 +955,36 @@ __nfs_revalidate_inode(struct nfs_server goto out; } - status = nfs_refresh_inode(inode, &fattr); + status = nfs_update_inode(inode, &fattr, verifier); if (status) { dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode), status); goto out; } + flags = nfsi->flags; + /* + * We may need to keep the attributes marked as invalid if + * we raced with nfs_end_attr_update(). + */ + if (verifier == nfsi->cache_change_attribute) + nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); + /* Do the page cache invalidation */ + if (flags & NFS_INO_INVALID_DATA) { + if (S_ISREG(inode->i_mode)) { + if (filemap_fdatawrite(inode->i_mapping) == 0) + filemap_fdatawait(inode->i_mapping); + nfs_wb_all(inode); + } + nfsi->flags &= ~NFS_INO_INVALID_DATA; + invalidate_inode_pages2(inode->i_mapping); + memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); + dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode)); + /* This ensures we revalidate dentries */ + nfsi->cache_change_attribute++; + } dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -1009,41 +992,104 @@ __nfs_revalidate_inode(struct nfs_server NFS_FLAGS(inode) &= ~NFS_INO_STALE; out: NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING; - wake_up(&NFS_I(inode)->nfs_i_wait); + wake_up(&nfsi->nfs_i_wait); out_nowait: unlock_kernel(); return status; } -/* - * nfs_fattr_obsolete - Test if attribute data is newer than cached data - * @inode: inode - * @fattr: attributes to test +/** + * nfs_begin_data_update + * @inode - pointer to inode + * Declare that a set of operations will update file data on the server + */ +void nfs_begin_data_update(struct inode *inode) +{ + atomic_inc(&NFS_I(inode)->data_updates); +} + +/** + * nfs_end_data_update + * @inode - pointer to inode + * Declare end of the operations that will update file data + */ +void nfs_end_data_update(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (atomic_dec_and_test(&nfsi->data_updates)) { + nfsi->cache_change_attribute ++; + /* Mark the attribute cache for revalidation */ + nfsi->flags |= NFS_INO_INVALID_ATTR; + /* Directories and symlinks: invalidate page cache too */ + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + nfsi->flags |= NFS_INO_INVALID_DATA; + } +} + +/** + * nfs_refresh_inode - verify consistency of the inode attribute cache + * @inode - pointer to inode + * @fattr - updated attributes * - * Avoid stuffing the attribute cache with obsolete information. - * We always accept updates if the attribute cache timed out, or if - * fattr->ctime is newer than our cached value. - * If fattr->ctime matches the cached value, we still accept the update - * if it increases the file size. + * Verifies the attribute cache. If we have just changed the attributes, + * so that fattr carries weak cache consistency data, then it may + * also update the ctime/mtime/change_attribute. */ -static inline -int nfs_fattr_obsolete(struct inode *inode, struct nfs_fattr *fattr) +int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); - long cdif; + loff_t cur_size, new_isize; + int data_unstable; + + /* Are we in the process of updating data on the server? */ + data_unstable = nfs_caches_unstable(inode); - if (time_after(jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo)) - goto out_valid; - cdif = fattr->ctime.tv_sec - nfsi->read_cache_ctime.tv_sec; - if (cdif == 0) - cdif = fattr->ctime.tv_nsec - nfsi->read_cache_ctime.tv_nsec; - if (cdif > 0) - goto out_valid; - /* Ugh... */ - if (cdif == 0 && fattr->size > nfsi->read_cache_isize) - goto out_valid; - return -1; - out_valid: + if (fattr->valid & NFS_ATTR_FATTR_V4) { + if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0 + && nfsi->change_attr == fattr->pre_change_attr) + nfsi->change_attr = fattr->change_attr; + if (!data_unstable && nfsi->change_attr != fattr->change_attr) + nfsi->flags |= NFS_INO_INVALID_ATTR; + } + + if ((fattr->valid & NFS_ATTR_FATTR) == 0) + return 0; + + /* Has the inode gone and changed behind our back? */ + if (nfsi->fileid != fattr->fileid + || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) + return -EIO; + + cur_size = i_size_read(inode); + new_isize = nfs_size_to_loff_t(fattr->size); + + /* If we have atomic WCC data, we may update some attributes */ + if ((fattr->valid & NFS_ATTR_WCC) != 0) { + if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) + memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); + } + + /* Verify a few of the more important attributes */ + if (!data_unstable) { + if (!timespec_equal(&inode->i_mtime, &fattr->mtime) + || cur_size != new_isize) + nfsi->flags |= NFS_INO_INVALID_ATTR; + } else if (S_ISREG(inode->i_mode) && new_isize > cur_size) + nfsi->flags |= NFS_INO_INVALID_ATTR; + + /* Have any file permissions changed? */ + if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) + || inode->i_uid != fattr->uid + || inode->i_gid != fattr->gid) + nfsi->flags |= NFS_INO_INVALID_ATTR; + + if (!timespec_equal(&inode->i_atime, &fattr->atime)) + nfsi->flags |= NFS_INO_INVALID_ATIME; + + nfsi->read_cache_jiffies = fattr->timestamp; return 0; } @@ -1059,65 +1105,66 @@ int nfs_fattr_obsolete(struct inode *ino * * A very similar scenario holds for the dir cache. */ -int -__nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) +static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsigned long verifier) { struct nfs_inode *nfsi = NFS_I(inode); __u64 new_size; loff_t new_isize; - int invalid = 0; - int mtime_update = 0; + unsigned int invalid = 0; loff_t cur_isize; + int data_unstable; - dfprintk(VFS, "NFS: refresh_inode(%s/%ld ct=%d info=0x%x)\n", - inode->i_sb->s_id, inode->i_ino, + dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", + __FUNCTION__, inode->i_sb->s_id, inode->i_ino, atomic_read(&inode->i_count), fattr->valid); - /* First successful call after mount, fill real data. */ - if (NFS_FAKE_ROOT(inode)) { - dfprintk(VFS, "NFS: updating fake root\n"); - nfsi->fileid = fattr->fileid; - NFS_FLAGS(inode) &= ~NFS_INO_FAKE_ROOT; - } + if ((fattr->valid & NFS_ATTR_FATTR) == 0) + return 0; if (nfsi->fileid != fattr->fileid) { - printk(KERN_ERR "nfs_refresh_inode: inode number mismatch\n" + printk(KERN_ERR "%s: inode number mismatch\n" "expected (%s/0x%Lx), got (%s/0x%Lx)\n", + __FUNCTION__, inode->i_sb->s_id, (long long)nfsi->fileid, inode->i_sb->s_id, (long long)fattr->fileid); goto out_err; } - /* Throw out obsolete READDIRPLUS attributes */ - if (time_before(fattr->timestamp, NFS_READTIME(inode))) - return 0; /* * Make sure the inode's type hasn't changed. */ if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) goto out_changed; - new_size = fattr->size; - new_isize = nfs_size_to_loff_t(fattr->size); - - /* Avoid races */ - if (nfs_fattr_obsolete(inode, fattr)) - goto out_nochange; - /* * Update the read time so we don't revalidate too often. */ nfsi->read_cache_jiffies = fattr->timestamp; - /* - * Note: NFS_CACHE_ISIZE(inode) reflects the state of the cache. - * NOT inode->i_size!!! - */ - if (nfsi->read_cache_isize != new_size) { + /* Are we racing with known updates of the metadata on the server? */ + data_unstable = ! nfs_verify_change_attribute(inode, verifier); + + /* Check if the file size agrees */ + new_size = fattr->size; + new_isize = nfs_size_to_loff_t(fattr->size); + cur_isize = i_size_read(inode); + if (cur_isize != new_size) { #ifdef NFS_DEBUG_VERBOSE printk(KERN_DEBUG "NFS: isize change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); #endif - invalid = 1; + /* + * If we have pending writebacks, things can get + * messy. + */ + if (S_ISREG(inode->i_mode) && data_unstable) { + if (new_isize > cur_isize) { + i_size_write(inode, new_isize); + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + } + } else { + i_size_write(inode, new_isize); + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + } } /* @@ -1125,12 +1172,13 @@ __nfs_refresh_inode(struct inode *inode, * can change this value in VFS without requiring a * cache revalidation. */ - if (!timespec_equal(&nfsi->read_cache_mtime, &fattr->mtime)) { + if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { + memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); #ifdef NFS_DEBUG_VERBOSE printk(KERN_DEBUG "NFS: mtime change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); #endif - invalid = 1; - mtime_update = 1; + if (!data_unstable) + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } if ((fattr->valid & NFS_ATTR_FATTR_V4) @@ -1139,47 +1187,15 @@ __nfs_refresh_inode(struct inode *inode, printk(KERN_DEBUG "NFS: change_attr change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); #endif - invalid = 1; - } - - /* Check Weak Cache Consistency data. - * If size and mtime match the pre-operation values, we can - * assume that any attribute changes were caused by our NFS - * operation, so there's no need to invalidate the caches. - */ - if ((fattr->valid & NFS_ATTR_PRE_CHANGE) - && nfsi->change_attr == fattr->pre_change_attr) { - invalid = 0; - } - else if ((fattr->valid & NFS_ATTR_WCC) - && nfsi->read_cache_isize == fattr->pre_size - && timespec_equal(&nfsi->read_cache_mtime, &fattr->pre_mtime)) { - invalid = 0; - } - - /* - * If we have pending writebacks, things can get - * messy. - */ - cur_isize = i_size_read(inode); - if (nfs_have_writebacks(inode) && new_isize < cur_isize) - new_isize = cur_isize; - - nfsi->read_cache_ctime = fattr->ctime; - inode->i_ctime = fattr->ctime; - inode->i_atime = fattr->atime; - - if (mtime_update) { - if (invalid) - nfsi->cache_mtime_jiffies = fattr->timestamp; - nfsi->read_cache_mtime = fattr->mtime; - inode->i_mtime = fattr->mtime; + nfsi->change_attr = fattr->change_attr; + if (!data_unstable) + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } - nfsi->read_cache_isize = new_size; - i_size_write(inode, new_isize); + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); - if (inode->i_mode != fattr->mode || + if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || inode->i_gid != fattr->gid) { struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred; @@ -1187,11 +1203,9 @@ __nfs_refresh_inode(struct inode *inode, put_rpccred(*cred); *cred = NULL; } + invalid |= NFS_INO_INVALID_ATTR; } - if (fattr->valid & NFS_ATTR_FATTR_V4) - nfsi->change_attr = fattr->change_attr; - inode->i_mode = fattr->mode; inode->i_nlink = fattr->nlink; inode->i_uid = fattr->uid; @@ -1207,31 +1221,30 @@ __nfs_refresh_inode(struct inode *inode, inode->i_blocks = fattr->du.nfs2.blocks; inode->i_blksize = fattr->du.nfs2.blocksize; } - - /* Update attrtimeo value */ - if (invalid) { + + /* Update attrtimeo value if we're out of the unstable period */ + if (invalid & NFS_INO_INVALID_ATTR) { nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; - invalidate_remote_inode(inode); - memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); } else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) { if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; } + /* Don't invalidate the data if we were to blame */ + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) + || S_ISLNK(inode->i_mode))) + invalid &= ~NFS_INO_INVALID_DATA; + nfsi->flags |= invalid; return 0; - out_nochange: - if (!timespec_equal(&fattr->atime, &inode->i_atime)) - inode->i_atime = fattr->atime; - return 0; out_changed: /* * Big trouble! The inode has become a different object. */ #ifdef NFS_PARANOIA - printk(KERN_DEBUG "nfs_refresh_inode: inode %ld mode changed, %07o to %07o\n", - inode->i_ino, inode->i_mode, fattr->mode); + printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n", + __FUNCTION__, inode->i_ino, inode->i_mode, fattr->mode); #endif /* * No need to worry about unhashing the dentry, as the @@ -1472,17 +1485,19 @@ static int nfs4_fill_super(struct super_ down_write(&clp->cl_sem); if (clp->cl_rpcclient == NULL) { xprt = xprt_create_proto(proto, &server->addr, &timeparms); - if (xprt == NULL) { + if (IS_ERR(xprt)) { up_write(&clp->cl_sem); printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); + err = PTR_ERR(xprt); goto out_fail; } clnt = rpc_create_client(xprt, server->hostname, &nfs_program, server->rpc_ops->version, authflavour); - if (clnt == NULL) { + if (IS_ERR(clnt)) { up_write(&clp->cl_sem); printk(KERN_WARNING "NFS: cannot create RPC client.\n"); xprt_destroy(xprt); + err = PTR_ERR(clnt); goto out_fail; } clnt->cl_chatty = 1; @@ -1495,14 +1510,17 @@ static int nfs4_fill_super(struct super_ clear_bit(NFS4CLNT_OK, &clp->cl_state); list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); clnt = rpc_clone_client(clp->cl_rpcclient); - server->nfs4_state = clp; + if (!IS_ERR(clnt)) + server->nfs4_state = clp; up_write(&clp->cl_sem); clp = NULL; - if (clnt == NULL) { + if (IS_ERR(clnt)) { printk(KERN_WARNING "NFS: cannot create RPC client.\n"); + err = PTR_ERR(clnt); goto out_remove_list; } + err = -ENOMEM; if (server->nfs4_state->cl_idmap == NULL) { printk(KERN_WARNING "NFS: failed to create idmapper.\n"); goto out_shutdown; @@ -1601,7 +1619,7 @@ static struct super_block *nfs4_get_sb(s if (data->version != NFS4_MOUNT_VERSION) { printk("nfs warning: mount version %s than kernel\n", - data->version < NFS_MOUNT_VERSION ? "older" : "newer"); + data->version < NFS4_MOUNT_VERSION ? "older" : "newer"); } p = nfs_copy_user_string(NULL, &data->hostname, 256); @@ -1718,6 +1736,7 @@ static void init_once(void * foo, kmem_c INIT_LIST_HEAD(&nfsi->dirty); INIT_LIST_HEAD(&nfsi->commit); INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); + atomic_set(&nfsi->data_updates, 0); nfsi->ndirty = 0; nfsi->ncommit = 0; nfsi->npages = 0; diff -u --recursive --new-file --show-c-function linux-2.6.4-rc2/fs/nfs/mount_clnt.c linux-2.6.4-23-unrace/fs/nfs/mount_clnt.c --- linux-2.6.4-rc2/fs/nfs/mount_clnt.c 2004-03-05 19:27:57.000000000 -0500 +++ linux-2.6.4-23-unrace/fs/nfs/mount_clnt.c 2004-03-05 20:15:03.000000000 -0500 @@ -57,8 +57,9 @@ nfsroot_mount(struct sockaddr_in *addr, (unsigned)ntohl(addr->sin_addr.s_addr), path); sprintf(hostname, "%u.%u.%u.%u", NIPQUAD(addr->sin_addr.s_addr)); - if (!(mnt_clnt = mnt_create(hostname, addr, version, protocol))) - return -EACCES; + mnt_clnt = mnt_create(hostname, addr, version, protocol); + if (IS_ERR(mnt_clnt)) + return PTR_ERR(mnt_clnt); call = (version == NFS_MNT3_VERSION) ? MOUNTPROC3_MNT : MNTPROC_MNT; status = rpc_call(mnt_clnt, call, path, &result, 0); @@ -72,13 +73,14 @@ mnt_create(char *hostname, struct sockad struct rpc_xprt *xprt; struct rpc_clnt *clnt; - if (!(xprt = xprt_create_proto(protocol, srvaddr, NULL))) - return NULL; + xprt = xprt_create_proto(protocol, srvaddr, NULL); + if (IS_ERR(xprt)) + return (struct rpc_clnt *)xprt; clnt = rpc_create_client(xprt, hostname, &mnt_program, version, - RPC_AUTH_NULL); - if (!clnt) { + RPC_AUTH_UNIX); + if (IS_ERR(clnt)) { xprt_destroy(xprt); } else { clnt->cl_softrtry = 1; diff -u --recursive --new-file --show-c-function linux-2.6.4-rc2/fs/nfs/nfs2xdr.c linux-2.6.4-23-unrace/fs/nfs/nfs2xdr.c --- linux-2.6.4-rc2/fs/nfs/nfs2xdr.c 2004-03-05 19:38:07.000000000 -0500 +++ linux-2.6.4-23-unrace/fs/nfs/nfs2xdr.c 2004-03-05 20:15:10.000000000 -0500 @@ -36,33 +36,33 @@ extern int nfs_stat_to_errno(int stat) * Declare the space requirements for NFS arguments and replies as * number of 32bit-words */ -#define NFS_fhandle_sz 8 -#define NFS_sattr_sz 8 -#define NFS_filename_sz 1+(NFS2_MAXNAMLEN>>2) -#define NFS_path_sz 1+(NFS2_MAXPATHLEN>>2) -#define NFS_fattr_sz 17 -#define NFS_info_sz 5 -#define NFS_entry_sz NFS_filename_sz+3 - -#define NFS_diropargs_sz NFS_fhandle_sz+NFS_filename_sz -#define NFS_sattrargs_sz NFS_fhandle_sz+NFS_sattr_sz -#define NFS_readlinkargs_sz NFS_fhandle_sz -#define NFS_readargs_sz NFS_fhandle_sz+3 -#define NFS_writeargs_sz NFS_fhandle_sz+4 -#define NFS_createargs_sz NFS_diropargs_sz+NFS_sattr_sz -#define NFS_renameargs_sz NFS_diropargs_sz+NFS_diropargs_sz -#define NFS_linkargs_sz NFS_fhandle_sz+NFS_diropargs_sz -#define NFS_symlinkargs_sz NFS_diropargs_sz+NFS_path_sz+NFS_sattr_sz -#define NFS_readdirargs_sz NFS_fhandle_sz+2 - -#define NFS_attrstat_sz 1+NFS_fattr_sz -#define NFS_diropres_sz 1+NFS_fhandle_sz+NFS_fattr_sz -#define NFS_readlinkres_sz 1 -#define NFS_readres_sz 1+NFS_fattr_sz+1 -#define NFS_writeres_sz NFS_attrstat_sz -#define NFS_stat_sz 1 -#define NFS_readdirres_sz 1 -#define NFS_statfsres_sz 1+NFS_info_sz +#define NFS_fhandle_sz (8) +#define NFS_sattr_sz (8) +#define NFS_filename_sz (1+(NFS2_MAXNAMLEN>>2)) +#define NFS_path_sz (1+(NFS2_MAXPATHLEN>>2)) +#define NFS_fattr_sz (17) +#define NFS_info_sz (5) +#define NFS_entry_sz (NFS_filename_sz+3) + +#define NFS_diropargs_sz (NFS_fhandle_sz+NFS_filename_sz) +#define NFS_sattrargs_sz (NFS_fhandle_sz+NFS_sattr_sz) +#define NFS_readlinkargs_sz (NFS_fhandle_sz) +#define NFS_readargs_sz (NFS_fhandle_sz+3) +#define NFS_writeargs_sz (NFS_fhandle_sz+4) +#define NFS_createargs_sz (NFS_diropargs_sz+NFS_sattr_sz) +#define NFS_renameargs_sz (NFS_diropargs_sz+NFS_diropargs_sz) +#define NFS_linkargs_sz (NFS_fhandle_sz+NFS_diropargs_sz) +#define NFS_symlinkargs_sz (NFS_diropargs_sz+NFS_path_sz+NFS_sattr_sz) +#define NFS_readdirargs_sz (NFS_fhandle_sz+2) + +#define NFS_attrstat_sz (1+NFS_fattr_sz) +#define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz) +#define NFS_readlinkres_sz (1) +#define NFS_readres_sz (1+NFS_fattr_sz+1) +#define NFS_writeres_sz (NFS_attrstat_sz) +#define NFS_stat_sz (1) +#define NFS_readdirres_sz (1) +#define NFS_statfsres_sz (1+NFS_info_sz) /* * Common NFS XDR functions as inlines diff -u --recursive --new-file --show-c-function linux-2.6.4-rc2/fs/nfs/nfs3proc.c linux-2.6.4-23-unrace/fs/nfs/nfs3proc.c --- linux-2.6.4-rc2/fs/nfs/nfs3proc.c 2004-03-05 19:41:34.000000000 -0500 +++ linux-2.6.4-23-unrace/fs/nfs/nfs3proc.c 2004-03-05 20:16:30.000000000 -0500 @@ -68,20 +68,6 @@ nfs3_async_handle_jukebox(struct rpc_tas return 1; } -static void -nfs3_write_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) -{ - if (fattr->valid & NFS_ATTR_FATTR) { - if (!(fattr->valid & NFS_ATTR_WCC)) { - fattr->pre_size = NFS_CACHE_ISIZE(inode); - fattr->pre_mtime = NFS_CACHE_MTIME(inode); - fattr->pre_ctime = NFS_CACHE_CTIME(inode); - fattr->valid |= NFS_ATTR_WCC; - } - nfs_refresh_inode(inode, fattr); - } -} - static struct rpc_cred * nfs_cred(struct inode *inode, struct file *filp) { @@ -99,14 +85,18 @@ nfs_cred(struct inode *inode, struct fil */ static int nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) + struct nfs_fsinfo *info) { int status; - dprintk("NFS call getroot\n"); - fattr->valid = 0; - status = rpc_call(server->client, NFS3PROC_GETATTR, fhandle, fattr, 0); - dprintk("NFS reply getroot\n"); + dprintk("%s: call fsinfo\n", __FUNCTION__); + info->fattr->valid = 0; + status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0); + dprintk("%s: reply fsinfo %d\n", __FUNCTION__, status); + if (!(info->fattr->valid & NFS_ATTR_FATTR)) { + status = rpc_call(server->client_sys, NFS3PROC_GETATTR, fhandle, info->fattr, 0); + dprintk("%s: reply getattr %d\n", __FUNCTION__, status); + } return status; } @@ -280,7 +270,7 @@ nfs3_proc_write(struct nfs_write_data *w msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags); if (status >= 0) - nfs3_write_refresh_inode(inode, fattr); + nfs_refresh_inode(inode, fattr); dprintk("NFS reply write: %d\n", status); return status < 0? status : wdata->res.count; } @@ -303,7 +293,7 @@ nfs3_proc_commit(struct nfs_write_data * msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status >= 0) - nfs3_write_refresh_inode(inode, fattr); + nfs_refresh_inode(inode, fattr); dprintk("NFS reply commit: %d\n", status); return status; } @@ -739,11 +729,10 @@ nfs3_read_done(struct rpc_task *task) } static void -nfs3_proc_read_setup(struct nfs_read_data *data, unsigned int count) +nfs3_proc_read_setup(struct nfs_read_data *data) { struct rpc_task *task = &data->task; struct inode *inode = data->inode; - struct nfs_page *req; int flags; struct rpc_message msg = { .rpc_proc = &nfs3_procedures[NFS3PROC_READ], @@ -751,47 +740,33 @@ nfs3_proc_read_setup(struct nfs_read_dat .rpc_resp = &data->res, .rpc_cred = data->cred, }; - - req = nfs_list_entry(data->pages.next); - data->args.fh = NFS_FH(inode); - data->args.offset = req_offset(req); - data->args.pgbase = req->wb_pgbase; - data->args.pages = data->pagevec; - data->args.count = count; - data->res.fattr = &data->fattr; - data->res.count = count; - data->res.eof = 0; - + /* N.B. Do we need to test? Never called for swapfile inode */ flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); /* Finalize the task. */ rpc_init_task(task, NFS_CLIENT(inode), nfs3_read_done, flags); - task->tk_calldata = data; - /* Release requests */ - task->tk_release = nfs_readdata_release; - - rpc_call_setup(&data->task, &msg, 0); + rpc_call_setup(task, &msg, 0); } static void nfs3_write_done(struct rpc_task *task) { - struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct nfs_write_data *data; if (nfs3_async_handle_jukebox(task)) return; + data = (struct nfs_write_data *)task->tk_calldata; if (task->tk_status >= 0) - nfs3_write_refresh_inode(data->inode, data->res.fattr); + nfs_refresh_inode(data->inode, data->res.fattr); nfs_writeback_done(task); } static void -nfs3_proc_write_setup(struct nfs_write_data *data, unsigned int count, int how) +nfs3_proc_write_setup(struct nfs_write_data *data, int how) { struct rpc_task *task = &data->task; struct inode *inode = data->inode; - struct nfs_page *req; int stable; int flags; struct rpc_message msg = { @@ -808,44 +783,31 @@ nfs3_proc_write_setup(struct nfs_write_d stable = NFS_DATA_SYNC; } else stable = NFS_UNSTABLE; - - req = nfs_list_entry(data->pages.next); - data->args.fh = NFS_FH(inode); - data->args.offset = req_offset(req); - data->args.pgbase = req->wb_pgbase; - data->args.count = count; data->args.stable = stable; - data->args.pages = data->pagevec; - data->res.fattr = &data->fattr; - data->res.count = count; - data->res.verf = &data->verf; /* Set the initial flags for the task. */ flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; /* Finalize the task. */ rpc_init_task(task, NFS_CLIENT(inode), nfs3_write_done, flags); - task->tk_calldata = data; - /* Release requests */ - task->tk_release = nfs_writedata_release; - - rpc_call_setup(&data->task, &msg, 0); + rpc_call_setup(task, &msg, 0); } static void nfs3_commit_done(struct rpc_task *task) { - struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct nfs_write_data *data; if (nfs3_async_handle_jukebox(task)) return; + data = (struct nfs_write_data *)task->tk_calldata; if (task->tk_status >= 0) - nfs3_write_refresh_inode(data->inode, data->res.fattr); + nfs_refresh_inode(data->inode, data->res.fattr); nfs_commit_done(task); } static void -nfs3_proc_commit_setup(struct nfs_write_data *data, u64 start, u32 len, int how) +nfs3_proc_commit_setup(struct nfs_write_data *data, int how) { struct rpc_task *task = &data->task; struct inode *inode = data->inode; @@ -857,23 +819,12 @@ nfs3_proc_commit_setup(struct nfs_write_ .rpc_cred = data->cred, }; - data->args.fh = NFS_FH(data->inode); - data->args.offset = start; - data->args.count = len; - data->res.count = len; - data->res.fattr = &data->fattr; - data->res.verf = &data->verf; - /* Set the initial flags for the task. */ flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; /* Finalize the task. */ rpc_init_task(task, NFS_CLIENT(inode), nfs3_commit_done, flags); - task->tk_calldata = data; - /* Release requests */ - task->tk_release = nfs_commit_release; - - rpc_call_setup(&data->task, &msg, 0); + rpc_call_setup(task, &msg, 0); } /* diff -u --recursive --new-file --show-c-function linux-2.6.4-rc2/fs/nfs/nfs3xdr.c linux-2.6.4-23-unrace/fs/nfs/nfs3xdr.c --- linux-2.6.4-rc2/fs/nfs/nfs3xdr.c 2004-03-05 20:01:23.000000000 -0500 +++ linux-2.6.4-23-unrace/fs/nfs/nfs3xdr.c 2004-03-05 20:15:50.000000000 -0500 @@ -33,51 +33,51 @@ extern int nfs_stat_to_errno(int); * Declare the space requirements for NFS arguments and replies as * number of 32bit-words */ -#define NFS3_fhandle_sz 1+16 -#define NFS3_fh_sz NFS3_fhandle_sz /* shorthand */ -#define NFS3_sattr_sz 15 -#define NFS3_filename_sz 1+(NFS3_MAXNAMLEN>>2) -#define NFS3_path_sz 1+(NFS3_MAXPATHLEN>>2) -#define NFS3_fattr_sz 21 -#define NFS3_wcc_attr_sz 6 -#define NFS3_pre_op_attr_sz 1+NFS3_wcc_attr_sz -#define NFS3_post_op_attr_sz 1+NFS3_fattr_sz -#define NFS3_wcc_data_sz NFS3_pre_op_attr_sz+NFS3_post_op_attr_sz +#define NFS3_fhandle_sz (1+16) +#define NFS3_fh_sz (NFS3_fhandle_sz) /* shorthand */ +#define NFS3_sattr_sz (15) +#define NFS3_filename_sz (1+(NFS3_MAXNAMLEN>>2)) +#define NFS3_path_sz (1+(NFS3_MAXPATHLEN>>2)) +#define NFS3_fattr_sz (21) +#define NFS3_wcc_attr_sz (6) +#define NFS3_pre_op_attr_sz (1+NFS3_wcc_attr_sz) +#define NFS3_post_op_attr_sz (1+NFS3_fattr_sz) +#define NFS3_wcc_data_sz (NFS3_pre_op_attr_sz+NFS3_post_op_attr_sz) #define NFS3_fsstat_sz #define NFS3_fsinfo_sz #define NFS3_pathconf_sz -#define NFS3_entry_sz NFS3_filename_sz+3 +#define NFS3_entry_sz (NFS3_filename_sz+3) -#define NFS3_sattrargs_sz NFS3_fh_sz+NFS3_sattr_sz+3 -#define NFS3_diropargs_sz NFS3_fh_sz+NFS3_filename_sz -#define NFS3_accessargs_sz NFS3_fh_sz+1 -#define NFS3_readlinkargs_sz NFS3_fh_sz -#define NFS3_readargs_sz NFS3_fh_sz+3 -#define NFS3_writeargs_sz NFS3_fh_sz+5 -#define NFS3_createargs_sz NFS3_diropargs_sz+NFS3_sattr_sz -#define NFS3_mkdirargs_sz NFS3_diropargs_sz+NFS3_sattr_sz -#define NFS3_symlinkargs_sz NFS3_diropargs_sz+NFS3_path_sz+NFS3_sattr_sz -#define NFS3_mknodargs_sz NFS3_diropargs_sz+2+NFS3_sattr_sz -#define NFS3_renameargs_sz NFS3_diropargs_sz+NFS3_diropargs_sz -#define NFS3_linkargs_sz NFS3_fh_sz+NFS3_diropargs_sz -#define NFS3_readdirargs_sz NFS3_fh_sz+2 -#define NFS3_commitargs_sz NFS3_fh_sz+3 - -#define NFS3_attrstat_sz 1+NFS3_fattr_sz -#define NFS3_wccstat_sz 1+NFS3_wcc_data_sz -#define NFS3_lookupres_sz 1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz) -#define NFS3_accessres_sz 1+NFS3_post_op_attr_sz+1 -#define NFS3_readlinkres_sz 1+NFS3_post_op_attr_sz -#define NFS3_readres_sz 1+NFS3_post_op_attr_sz+3 -#define NFS3_writeres_sz 1+NFS3_wcc_data_sz+4 -#define NFS3_createres_sz 1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz -#define NFS3_renameres_sz 1+(2 * NFS3_wcc_data_sz) -#define NFS3_linkres_sz 1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz -#define NFS3_readdirres_sz 1+NFS3_post_op_attr_sz+2 -#define NFS3_fsstatres_sz 1+NFS3_post_op_attr_sz+13 -#define NFS3_fsinfores_sz 1+NFS3_post_op_attr_sz+12 -#define NFS3_pathconfres_sz 1+NFS3_post_op_attr_sz+6 -#define NFS3_commitres_sz 1+NFS3_wcc_data_sz+2 +#define NFS3_sattrargs_sz (NFS3_fh_sz+NFS3_sattr_sz+3) +#define NFS3_diropargs_sz (NFS3_fh_sz+NFS3_filename_sz) +#define NFS3_accessargs_sz (NFS3_fh_sz+1) +#define NFS3_readlinkargs_sz (NFS3_fh_sz) +#define NFS3_readargs_sz (NFS3_fh_sz+3) +#define NFS3_writeargs_sz (NFS3_fh_sz+5) +#define NFS3_createargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz) +#define NFS3_mkdirargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz) +#define NFS3_symlinkargs_sz (NFS3_diropargs_sz+NFS3_path_sz+NFS3_sattr_sz) +#define NFS3_mknodargs_sz (NFS3_diropargs_sz+2+NFS3_sattr_sz) +#define NFS3_renameargs_sz (NFS3_diropargs_sz+NFS3_diropargs_sz) +#define NFS3_linkargs_sz (NFS3_fh_sz+NFS3_diropargs_sz) +#define NFS3_readdirargs_sz (NFS3_fh_sz+2) +#define NFS3_commitargs_sz (NFS3_fh_sz+3) + +#define NFS3_attrstat_sz (1+NFS3_fattr_sz) +#define NFS3_wccstat_sz (1+NFS3_wcc_data_sz) +#define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz)) +#define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1) +#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz) +#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3) +#define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4) +#define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz) +#define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz)) +#define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz) +#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2) +#define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13) +#define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12) +#define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6) +#define NFS3_commitres_sz (1+NFS3_wcc_data_sz+2) /* * Map file type to S_IFMT bits @@ -103,9 +103,7 @@ static struct { static inline u32 * xdr_encode_fhandle(u32 *p, struct nfs_fh *fh) { - *p++ = htonl(fh->size); - memcpy(p, fh->data, fh->size); - return p + XDR_QUADLEN(fh->size); + return xdr_encode_array(p, fh->data, fh->size); } static inline u32 * diff -u --recursive --new-file --show-c-function linux-2.6.4-rc2/fs/nfs/nfs4proc.c linux-2.6.4-23-unrace/fs/nfs/nfs4proc.c --- linux-2.6.4-rc2/fs/nfs/nfs4proc.c 2004-03-05 19:52:04.000000000 -0500 +++ linux-2.6.4-23-unrace/fs/nfs/nfs4proc.c 2004-03-05 20:16:30.000000000 -0500 @@ -54,6 +54,7 @@ #define GET_OP(cp,name) &cp->ops[cp->req_nops].u.name #define OPNUM(cp) cp->ops[cp->req_nops].opnum +static int nfs4_proc_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *); extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); extern struct rpc_procinfo nfs4_procedures[]; @@ -505,6 +506,8 @@ nfs4_open_reclaim(struct nfs4_state_owne status = rpc_call_sync(server->client, &msg, 0); nfs4_increment_seqid(status, sp); + if (status == 0) + memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); /* Update the inode attributes */ nfs_refresh_inode(inode, &fattr); return status; @@ -689,12 +692,12 @@ nfs4_do_setattr(struct nfs_server *serve retry: fattr->valid = 0; - if (state) + if (sattr->ia_valid & ATTR_SIZE) nfs4_copy_stateid(&arg.stateid, state, 0); - else + else memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); - status = rpc_call_sync(server->client, &msg, 0); + status = rpc_call_sync(server->client, &msg, 0); if (status) { status = nfs4_handle_error(server, status); if (!status) @@ -822,10 +825,11 @@ nfs4_open_revalidate(struct inode *dir, static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) + struct nfs_fsinfo *info) { struct nfs4_compound compound; struct nfs4_op ops[4]; + struct nfs_fattr * fattr = info->fattr; unsigned char * p; struct qstr q; int status; @@ -869,7 +873,9 @@ nfs4_proc_get_root(struct nfs_server *se break; } out: - return status; + if (status) + return status; + return nfs4_proc_fsinfo(server, fhandle, info); } static int @@ -1079,21 +1085,17 @@ nfs4_proc_read(struct nfs_read_data *rda if (filp) { struct nfs4_state *state; state = (struct nfs4_state *)filp->private_data; - nfs4_copy_stateid(&rdata->args.stateid, state, rdata->lockowner); + rdata->args.state = state; msg.rpc_cred = state->owner->so_cred; } else { - memcpy(&rdata->args.stateid, &zero_stateid, sizeof(rdata->args.stateid)); + rdata->args.state = NULL; msg.rpc_cred = NFS_I(inode)->mm_cred; } fattr->valid = 0; status = rpc_call_sync(server->client, &msg, flags); - if (!status) { + if (!status) renew_lease(server, timestamp); - /* Check cache consistency */ - if (fattr->change_attr != NFS_CHANGE_ATTR(inode)) - nfs_zap_caches(inode); - } dprintk("NFS reply read: %d\n", status); return status; } @@ -1121,16 +1123,15 @@ nfs4_proc_write(struct nfs_write_data *w if (filp) { struct nfs4_state *state; state = (struct nfs4_state *)filp->private_data; - nfs4_copy_stateid(&wdata->args.stateid, state, wdata->lockowner); + wdata->args.state = state; msg.rpc_cred = state->owner->so_cred; } else { - memcpy(&wdata->args.stateid, &zero_stateid, sizeof(wdata->args.stateid)); + wdata->args.state = NULL; msg.rpc_cred = NFS_I(inode)->mm_cred; } fattr->valid = 0; status = rpc_call_sync(server->client, &msg, rpcflags); - NFS_CACHEINV(inode); dprintk("NFS reply write: %d\n", status); return status; } @@ -1154,15 +1155,10 @@ nfs4_proc_commit(struct nfs_write_data * /* * Try first to use O_WRONLY, then O_RDWR stateid. */ - if (filp) { - struct nfs4_state *state; - state = (struct nfs4_state *)filp->private_data; - nfs4_copy_stateid(&cdata->args.stateid, state, cdata->lockowner); - msg.rpc_cred = state->owner->so_cred; - } else { - memcpy(&cdata->args.stateid, &zero_stateid, sizeof(cdata->args.stateid)); + if (filp) + msg.rpc_cred = ((struct nfs4_state *)filp->private_data)->owner->so_cred; + else msg.rpc_cred = NFS_I(inode)->mm_cred; - } fattr->valid = 0; status = rpc_call_sync(server->client, &msg, 0); @@ -1421,6 +1417,8 @@ nfs4_proc_readdir(struct dentry *dentry, nfs4_setup_putfh(&compound, NFS_FH(dir)); nfs4_setup_readdir(&compound, cookie, NFS_COOKIEVERF(dir), &page, count, dentry); status = nfs4_call_compound(&compound, cred, 0); + if (status == 0) + memcpy(NFS_COOKIEVERF(dir), ops[1].u.readdir.rd_resp_verifier.data, NFS4_VERIFIER_SIZE); unlock_kernel(); return status; @@ -1463,7 +1461,6 @@ nfs4_proc_statfs(struct nfs_server *serv struct nfs4_compound compound; struct nfs4_op ops[2]; - memset(fsstat, 0, sizeof(*fsstat)); nfs4_setup_compound(&compound, ops, server, "statfs"); nfs4_setup_putfh(&compound, fhandle); nfs4_setup_statfs(&compound, fsstat); @@ -1480,7 +1477,6 @@ nfs4_proc_fsinfo(struct nfs_server *serv .rpc_resp = fsinfo, }; - memset(fsinfo, 0, sizeof(*fsinfo)); return rpc_call_sync(server->client, &msg, 0); } @@ -1491,7 +1487,6 @@ nfs4_proc_pathconf(struct nfs_server *se struct nfs4_compound compound; struct nfs4_op ops[2]; - memset(pathconf, 0, sizeof(*pathconf)); nfs4_setup_compound(&compound, ops, server, "statfs"); nfs4_setup_putfh(&compound, fhandle); nfs4_setup_pathconf(&compound, pathconf); @@ -1499,43 +1494,23 @@ nfs4_proc_pathconf(struct nfs_server *se } static void -nfs4_restart_read(struct rpc_task *task) -{ - struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata; - struct nfs_page *req; - - rpc_restart_call(task); - req = nfs_list_entry(data->pages.next); - if (req->wb_state) - nfs4_copy_stateid(&data->args.stateid, req->wb_state, req->wb_lockowner); - else - memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); -} - -static void nfs4_read_done(struct rpc_task *task) { struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata; struct inode *inode = data->inode; - struct nfs_fattr *fattr = data->res.fattr; if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { - task->tk_action = nfs4_restart_read; + rpc_restart_call(task); return; } if (task->tk_status > 0) renew_lease(NFS_SERVER(inode), data->timestamp); - /* Check cache consistency */ - if (fattr->change_attr != NFS_CHANGE_ATTR(inode)) - nfs_zap_caches(inode); - if (fattr->bitmap[1] & FATTR4_WORD1_TIME_ACCESS) - inode->i_atime = fattr->atime; /* Call back common NFS readpage processing */ nfs_readpage_result(task); } static void -nfs4_proc_read_setup(struct nfs_read_data *data, unsigned int count) +nfs4_proc_read_setup(struct nfs_read_data *data) { struct rpc_task *task = &data->task; struct rpc_message msg = { @@ -1545,85 +1520,36 @@ nfs4_proc_read_setup(struct nfs_read_dat .rpc_cred = data->cred, }; struct inode *inode = data->inode; - struct nfs_page *req = nfs_list_entry(data->pages.next); int flags; - data->args.fh = NFS_FH(inode); - data->args.offset = req_offset(req); - data->args.pgbase = req->wb_pgbase; - data->args.pages = data->pagevec; - data->args.count = count; - data->res.fattr = &data->fattr; - data->res.count = count; - data->res.eof = 0; data->timestamp = jiffies; - data->lockowner = req->wb_lockowner; - if (req->wb_state) - nfs4_copy_stateid(&data->args.stateid, req->wb_state, req->wb_lockowner); - else - memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); - /* N.B. Do we need to test? Never called for swapfile inode */ flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); /* Finalize the task. */ rpc_init_task(task, NFS_CLIENT(inode), nfs4_read_done, flags); - task->tk_calldata = data; - /* Release requests */ - task->tk_release = nfs_readdata_release; - rpc_call_setup(task, &msg, 0); } static void -nfs4_write_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) -{ - /* Check cache consistency */ - if (fattr->pre_change_attr != NFS_CHANGE_ATTR(inode)) - nfs_zap_caches(inode); - NFS_CHANGE_ATTR(inode) = fattr->change_attr; - if (fattr->bitmap[1] & FATTR4_WORD1_SPACE_USED) - inode->i_blocks = (fattr->du.nfs3.used + 511) >> 9; - if (fattr->bitmap[1] & FATTR4_WORD1_TIME_METADATA) - inode->i_ctime = fattr->ctime; - if (fattr->bitmap[1] & FATTR4_WORD1_TIME_MODIFY) - inode->i_mtime = fattr->mtime; -} - -static void -nfs4_restart_write(struct rpc_task *task) -{ - struct nfs_write_data *data = (struct nfs_write_data *)task->tk_calldata; - struct nfs_page *req; - - rpc_restart_call(task); - req = nfs_list_entry(data->pages.next); - if (req->wb_state) - nfs4_copy_stateid(&data->args.stateid, req->wb_state, req->wb_lockowner); - else - memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); -} - -static void nfs4_write_done(struct rpc_task *task) { struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; struct inode *inode = data->inode; if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { - task->tk_action = nfs4_restart_write; + rpc_restart_call(task); return; } if (task->tk_status >= 0) renew_lease(NFS_SERVER(inode), data->timestamp); - nfs4_write_refresh_inode(inode, data->res.fattr); /* Call back common NFS writeback processing */ nfs_writeback_done(task); } static void -nfs4_proc_write_setup(struct nfs_write_data *data, unsigned int count, int how) +nfs4_proc_write_setup(struct nfs_write_data *data, int how) { struct rpc_task *task = &data->task; struct rpc_message msg = { @@ -1633,7 +1559,6 @@ nfs4_proc_write_setup(struct nfs_write_d .rpc_cred = data->cred, }; struct inode *inode = data->inode; - struct nfs_page *req = nfs_list_entry(data->pages.next); int stable; int flags; @@ -1644,33 +1569,15 @@ nfs4_proc_write_setup(struct nfs_write_d stable = NFS_DATA_SYNC; } else stable = NFS_UNSTABLE; - - data->args.fh = NFS_FH(inode); - data->args.offset = req_offset(req); - data->args.pgbase = req->wb_pgbase; - data->args.count = count; data->args.stable = stable; - data->args.pages = data->pagevec; - data->res.fattr = &data->fattr; - data->res.count = count; - data->res.verf = &data->verf; - data->timestamp = jiffies; - data->lockowner = req->wb_lockowner; - if (req->wb_state) - nfs4_copy_stateid(&data->args.stateid, req->wb_state, req->wb_lockowner); - else - memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); + data->timestamp = jiffies; /* Set the initial flags for the task. */ flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; /* Finalize the task. */ rpc_init_task(task, NFS_CLIENT(inode), nfs4_write_done, flags); - task->tk_calldata = data; - /* Release requests */ - task->tk_release = nfs_writedata_release; - rpc_call_setup(task, &msg, 0); } @@ -1681,16 +1588,15 @@ nfs4_commit_done(struct rpc_task *task) struct inode *inode = data->inode; if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { - task->tk_action = nfs4_restart_write; + rpc_restart_call(task); return; } - nfs4_write_refresh_inode(inode, data->res.fattr); /* Call back common NFS writeback processing */ nfs_commit_done(task); } static void -nfs4_proc_commit_setup(struct nfs_write_data *data, u64 start, u32 len, int how) +nfs4_proc_commit_setup(struct nfs_write_data *data, int how) { struct rpc_task *task = &data->task; struct rpc_message msg = { @@ -1702,22 +1608,11 @@ nfs4_proc_commit_setup(struct nfs_write_ struct inode *inode = data->inode; int flags; - data->args.fh = NFS_FH(data->inode); - data->args.offset = start; - data->args.count = len; - data->res.count = len; - data->res.fattr = &data->fattr; - data->res.verf = &data->verf; - /* Set the initial flags for the task. */ flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; /* Finalize the task. */ rpc_init_task(task, NFS_CLIENT(inode), nfs4_commit_done, flags); - task->tk_calldata = data; - /* Release requests */ - task->tk_release = nfs_commit_release; - rpc_call_setup(task, &msg, 0); } @@ -1807,6 +1702,7 @@ nfs4_proc_file_open(struct inode *inode, if (filp->f_mode & FMODE_WRITE) { lock_kernel(); nfs_set_mmcred(inode, state->owner->so_cred); + nfs_begin_data_update(inode); unlock_kernel(); } filp->private_data = state; @@ -1823,6 +1719,11 @@ nfs4_proc_file_release(struct inode *ino if (state) nfs4_close_state(state, filp->f_mode); + if (filp->f_mode & FMODE_WRITE) { + lock_kernel(); + nfs_end_data_update(inode); + unlock_kernel(); + } return 0; } diff -u --recursive --new-file --show-c-function linux-2.6.4-rc2/fs/nfs/nfs4state.c linux-2.6.4-23-unrace/fs/nfs/nfs4state.c --- linux-2.6.4-rc2/fs/nfs/nfs4state.c 2004-03-05 19:25:27.000000000 -0500 +++ linux-2.6.4-23-unrace/fs/nfs/nfs4state.c 2004-03-05 20:15:10.000000000 -0500 @@ -790,7 +790,7 @@ reclaimer(void *ptr) restart_loop: spin_lock(&clp->cl_lock); list_for_each_entry(sp, &clp->cl_state_owners, so_list) { - if (sp->so_generation - generation <= 0) + if (sp->so_generation - generation >= 0) continue; atomic_inc(&sp->so_count); spin_unlock(&clp->cl_lock); diff -u --recursive --new-file --show-c-function linux-2.6.4-rc2/fs/nfs/nfs4xdr.c linux-2.6.4-23-unrace/fs/nfs/nfs4xdr.c --- linux-2.6.4-rc2/fs/nfs/nfs4xdr.c 2004-03-05 20:00:33.000000000 -0500 +++ linux-2.6.4-23-unrace/fs/nfs/nfs4xdr.c 2004-03-05 20:16:30.000000000 -0500 @@ -69,84 +69,84 @@ static int nfs_stat_to_errno(int); /* lock,open owner id: * we currently use size 1 (u32) out of (NFS4_OPAQUE_LIMIT >> 2) */ -#define owner_id_maxsz 1 + 1 -#define compound_encode_hdr_maxsz 3 + (NFS4_MAXTAGLEN >> 2) -#define compound_decode_hdr_maxsz 2 + (NFS4_MAXTAGLEN >> 2) -#define op_encode_hdr_maxsz 1 -#define op_decode_hdr_maxsz 2 -#define encode_putfh_maxsz op_encode_hdr_maxsz + 1 + \ - (NFS4_FHSIZE >> 2) -#define decode_putfh_maxsz op_decode_hdr_maxsz -#define encode_putrootfh_maxsz op_encode_hdr_maxsz -#define decode_putrootfh_maxsz op_decode_hdr_maxsz -#define encode_getfh_maxsz op_encode_hdr_maxsz -#define decode_getfh_maxsz op_decode_hdr_maxsz + 1 + \ - (NFS4_FHSIZE >> 2) -#define encode_getattr_maxsz op_encode_hdr_maxsz + 3 -#define nfs4_fattr_bitmap_maxsz 26 + 2 * ((NFS4_MAXNAMLEN +1) >> 2) -#define decode_getattr_maxsz op_decode_hdr_maxsz + 3 + \ - nfs4_fattr_bitmap_maxsz -#define encode_savefh_maxsz op_encode_hdr_maxsz -#define decode_savefh_maxsz op_decode_hdr_maxsz -#define encode_restorefh_maxsz op_encode_hdr_maxsz -#define decode_restorefh_maxsz op_decode_hdr_maxsz -#define encode_read_getattr_maxsz op_encode_hdr_maxsz + 2 -#define decode_read_getattr_maxsz op_decode_hdr_maxsz + 8 -#define encode_pre_write_getattr_maxsz op_encode_hdr_maxsz + 2 -#define decode_pre_write_getattr_maxsz op_decode_hdr_maxsz + 5 -#define encode_post_write_getattr_maxsz op_encode_hdr_maxsz + 2 -#define decode_post_write_getattr_maxsz op_decode_hdr_maxsz + 13 -#define encode_fsinfo_maxsz op_encode_hdr_maxsz + 2 -#define decode_fsinfo_maxsz op_decode_hdr_maxsz + 11 -#define encode_renew_maxsz op_encode_hdr_maxsz + 3 -#define decode_renew_maxsz op_decode_hdr_maxsz +#define owner_id_maxsz (1 + 1) +#define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) +#define compound_decode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) +#define op_encode_hdr_maxsz (1) +#define op_decode_hdr_maxsz (2) +#define encode_putfh_maxsz (op_encode_hdr_maxsz + 1 + \ + (NFS4_FHSIZE >> 2)) +#define decode_putfh_maxsz (op_decode_hdr_maxsz) +#define encode_putrootfh_maxsz (op_encode_hdr_maxsz) +#define decode_putrootfh_maxsz (op_decode_hdr_maxsz) +#define encode_getfh_maxsz (op_encode_hdr_maxsz) +#define decode_getfh_maxsz (op_decode_hdr_maxsz + 1 + \ + (NFS4_FHSIZE >> 2)) +#define encode_getattr_maxsz (op_encode_hdr_maxsz + 3) +#define nfs4_fattr_bitmap_maxsz (26 + 2 * ((NFS4_MAXNAMLEN +1) >> 2)) +#define decode_getattr_maxsz (op_decode_hdr_maxsz + 3 + \ + nfs4_fattr_bitmap_maxsz) +#define encode_savefh_maxsz (op_encode_hdr_maxsz) +#define decode_savefh_maxsz (op_decode_hdr_maxsz) +#define encode_restorefh_maxsz (op_encode_hdr_maxsz) +#define decode_restorefh_maxsz (op_decode_hdr_maxsz) +#define encode_read_getattr_maxsz (op_encode_hdr_maxsz + 2) +#define decode_read_getattr_maxsz (op_decode_hdr_maxsz + 8) +#define encode_pre_write_getattr_maxsz (op_encode_hdr_maxsz + 2) +#define decode_pre_write_getattr_maxsz (op_decode_hdr_maxsz + 5) +#define encode_post_write_getattr_maxsz (op_encode_hdr_maxsz + 2) +#define decode_post_write_getattr_maxsz (op_decode_hdr_maxsz + 13) +#define encode_fsinfo_maxsz (op_encode_hdr_maxsz + 2) +#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 11) +#define encode_renew_maxsz (op_encode_hdr_maxsz + 3) +#define decode_renew_maxsz (op_decode_hdr_maxsz) #define encode_setclientid_maxsz \ - op_encode_hdr_maxsz + \ + (op_encode_hdr_maxsz + \ 4 /*server->ip_addr*/ + \ 1 /*Netid*/ + \ 6 /*uaddr*/ + \ - 6 + (NFS4_VERIFIER_SIZE >> 2) + 6 + (NFS4_VERIFIER_SIZE >> 2)) #define decode_setclientid_maxsz \ - op_decode_hdr_maxsz + \ + (op_decode_hdr_maxsz + \ 2 + \ - 1024 /* large value for CLID_INUSE */ + 1024) /* large value for CLID_INUSE */ #define encode_setclientid_confirm_maxsz \ - op_encode_hdr_maxsz + \ - 3 + (NFS4_VERIFIER_SIZE >> 2) + (op_encode_hdr_maxsz + \ + 3 + (NFS4_VERIFIER_SIZE >> 2)) #define decode_setclientid_confirm_maxsz \ - op_decode_hdr_maxsz + (op_decode_hdr_maxsz) -#define NFS4_enc_compound_sz 1024 /* XXX: large enough? */ -#define NFS4_dec_compound_sz 1024 /* XXX: large enough? */ -#define NFS4_enc_read_sz compound_encode_hdr_maxsz + \ +#define NFS4_enc_compound_sz (1024) /* XXX: large enough? */ +#define NFS4_dec_compound_sz (1024) /* XXX: large enough? */ +#define NFS4_enc_read_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_read_getattr_maxsz + \ - op_encode_hdr_maxsz + 7 -#define NFS4_dec_read_sz compound_decode_hdr_maxsz + \ + op_encode_hdr_maxsz + 7) +#define NFS4_dec_read_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ decode_read_getattr_maxsz + \ - op_decode_hdr_maxsz + 2 -#define NFS4_enc_write_sz compound_encode_hdr_maxsz + \ + op_decode_hdr_maxsz + 2) +#define NFS4_enc_write_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_pre_write_getattr_maxsz + \ op_encode_hdr_maxsz + 8 + \ - encode_post_write_getattr_maxsz -#define NFS4_dec_write_sz compound_decode_hdr_maxsz + \ + encode_post_write_getattr_maxsz) +#define NFS4_dec_write_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ decode_pre_write_getattr_maxsz + \ op_decode_hdr_maxsz + 4 + \ - decode_post_write_getattr_maxsz -#define NFS4_enc_commit_sz compound_encode_hdr_maxsz + \ + decode_post_write_getattr_maxsz) +#define NFS4_enc_commit_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_pre_write_getattr_maxsz + \ op_encode_hdr_maxsz + 3 + \ - encode_post_write_getattr_maxsz -#define NFS4_dec_commit_sz compound_decode_hdr_maxsz + \ + encode_post_write_getattr_maxsz) +#define NFS4_dec_commit_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ decode_pre_write_getattr_maxsz + \ op_decode_hdr_maxsz + 2 + \ - decode_post_write_getattr_maxsz -#define NFS4_enc_open_sz compound_encode_hdr_maxsz + \ + decode_post_write_getattr_maxsz) +#define NFS4_enc_open_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_savefh_maxsz + \ op_encode_hdr_maxsz + \ @@ -154,107 +154,107 @@ static int nfs_stat_to_errno(int); encode_getattr_maxsz + \ encode_getfh_maxsz + \ encode_restorefh_maxsz + \ - encode_getattr_maxsz -#define NFS4_dec_open_sz compound_decode_hdr_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ decode_savefh_maxsz + \ op_decode_hdr_maxsz + 4 + 5 + 2 + 3 + \ decode_getattr_maxsz + \ decode_getfh_maxsz + \ decode_restorefh_maxsz + \ - decode_getattr_maxsz + decode_getattr_maxsz) #define NFS4_enc_open_confirm_sz \ - compound_encode_hdr_maxsz + \ + (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ - op_encode_hdr_maxsz + 5 -#define NFS4_dec_open_confirm_sz compound_decode_hdr_maxsz + \ + op_encode_hdr_maxsz + 5) +#define NFS4_dec_open_confirm_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - op_decode_hdr_maxsz + 4 -#define NFS4_enc_open_reclaim_sz compound_encode_hdr_maxsz + \ + op_decode_hdr_maxsz + 4) +#define NFS4_enc_open_reclaim_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ op_encode_hdr_maxsz + \ 11 + \ - encode_getattr_maxsz -#define NFS4_dec_open_reclaim_sz compound_decode_hdr_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_open_reclaim_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ op_decode_hdr_maxsz + \ 4 + 5 + 2 + 3 + \ - decode_getattr_maxsz + decode_getattr_maxsz) #define NFS4_enc_open_downgrade_sz \ - compound_encode_hdr_maxsz + \ + (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ - op_encode_hdr_maxsz + 7 + op_encode_hdr_maxsz + 7) #define NFS4_dec_open_downgrade_sz \ - compound_decode_hdr_maxsz + \ + (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - op_decode_hdr_maxsz + 4 -#define NFS4_enc_close_sz compound_encode_hdr_maxsz + \ + op_decode_hdr_maxsz + 4) +#define NFS4_enc_close_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ - op_encode_hdr_maxsz + 5 -#define NFS4_dec_close_sz compound_decode_hdr_maxsz + \ + op_encode_hdr_maxsz + 5) +#define NFS4_dec_close_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - op_decode_hdr_maxsz + 4 -#define NFS4_enc_setattr_sz compound_encode_hdr_maxsz + \ + op_decode_hdr_maxsz + 4) +#define NFS4_enc_setattr_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ op_encode_hdr_maxsz + 4 + \ nfs4_fattr_bitmap_maxsz + \ - encode_getattr_maxsz -#define NFS4_dec_setattr_sz compound_decode_hdr_maxsz + \ + encode_getattr_maxsz) +#define NFS4_dec_setattr_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - op_decode_hdr_maxsz + 3 -#define NFS4_enc_fsinfo_sz compound_encode_hdr_maxsz + \ + op_decode_hdr_maxsz + 3) +#define NFS4_enc_fsinfo_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ - encode_fsinfo_maxsz -#define NFS4_dec_fsinfo_sz compound_decode_hdr_maxsz + \ + encode_fsinfo_maxsz) +#define NFS4_dec_fsinfo_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ - decode_fsinfo_maxsz -#define NFS4_enc_renew_sz compound_encode_hdr_maxsz + \ - encode_renew_maxsz -#define NFS4_dec_renew_sz compound_decode_hdr_maxsz + \ - decode_renew_maxsz -#define NFS4_enc_setclientid_sz compound_encode_hdr_maxsz + \ - encode_setclientid_maxsz -#define NFS4_dec_setclientid_sz compound_decode_hdr_maxsz + \ - decode_setclientid_maxsz + decode_fsinfo_maxsz) +#define NFS4_enc_renew_sz (compound_encode_hdr_maxsz + \ + encode_renew_maxsz) +#define NFS4_dec_renew_sz (compound_decode_hdr_maxsz + \ + decode_renew_maxsz) +#define NFS4_enc_setclientid_sz (compound_encode_hdr_maxsz + \ + encode_setclientid_maxsz) +#define NFS4_dec_setclientid_sz (compound_decode_hdr_maxsz + \ + decode_setclientid_maxsz) #define NFS4_enc_setclientid_confirm_sz \ - compound_encode_hdr_maxsz + \ + (compound_encode_hdr_maxsz + \ encode_setclientid_confirm_maxsz + \ encode_putrootfh_maxsz + \ - encode_fsinfo_maxsz + encode_fsinfo_maxsz) #define NFS4_dec_setclientid_confirm_sz \ - compound_decode_hdr_maxsz + \ + (compound_decode_hdr_maxsz + \ decode_setclientid_confirm_maxsz + \ decode_putrootfh_maxsz + \ - decode_fsinfo_maxsz -#define NFS4_enc_lock_sz compound_encode_hdr_maxsz + \ + decode_fsinfo_maxsz) +#define NFS4_enc_lock_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_getattr_maxsz + \ op_encode_hdr_maxsz + \ 1 + 1 + 2 + 2 + \ 1 + 4 + 1 + 2 + \ - owner_id_maxsz -#define NFS4_dec_lock_sz compound_decode_hdr_maxsz + \ + owner_id_maxsz) +#define NFS4_dec_lock_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ decode_getattr_maxsz + \ op_decode_hdr_maxsz + \ 2 + 2 + 1 + 2 + \ - owner_id_maxsz -#define NFS4_enc_lockt_sz compound_encode_hdr_maxsz + \ + owner_id_maxsz) +#define NFS4_enc_lockt_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_getattr_maxsz + \ op_encode_hdr_maxsz + \ 1 + 2 + 2 + 2 + \ - owner_id_maxsz -#define NFS4_dec_lockt_sz NFS4_dec_lock_sz -#define NFS4_enc_locku_sz compound_encode_hdr_maxsz + \ + owner_id_maxsz) +#define NFS4_dec_lockt_sz (NFS4_dec_lock_sz) +#define NFS4_enc_locku_sz (compound_encode_hdr_maxsz + \ encode_putfh_maxsz + \ encode_getattr_maxsz + \ op_encode_hdr_maxsz + \ - 1 + 1 + 4 + 2 + 2 -#define NFS4_dec_locku_sz compound_decode_hdr_maxsz + \ + 1 + 1 + 4 + 2 + 2) +#define NFS4_dec_locku_sz (compound_decode_hdr_maxsz + \ decode_putfh_maxsz + \ decode_getattr_maxsz + \ - op_decode_hdr_maxsz + 4 + op_decode_hdr_maxsz + 4) @@ -324,7 +324,7 @@ encode_compound_hdr(struct xdr_stream *x dprintk("encode_compound: tag=%.*s\n", (int)hdr->taglen, hdr->tag); BUG_ON(hdr->taglen > NFS4_MAXTAGLEN); - RESERVE_SPACE(12+XDR_QUADLEN(hdr->taglen)); + RESERVE_SPACE(12+(XDR_QUADLEN(hdr->taglen)<<2)); WRITE32(hdr->taglen); WRITEMEM(hdr->tag, hdr->taglen); WRITE32(NFS4_MINOR_VERSION); @@ -868,14 +868,32 @@ encode_putrootfh(struct xdr_stream *xdr) return 0; } +static void +encode_stateid(struct xdr_stream *xdr, struct nfs4_state *state, fl_owner_t lockowner) +{ + extern nfs4_stateid zero_stateid; + nfs4_stateid stateid; + uint32_t *p; + + RESERVE_SPACE(16); + if (state != NULL) { + nfs4_copy_stateid(&stateid, state, lockowner); + WRITEMEM(stateid.data, sizeof(stateid.data)); + } else + WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data)); +} + static int encode_read(struct xdr_stream *xdr, struct nfs_readargs *args) { uint32_t *p; - RESERVE_SPACE(32); + RESERVE_SPACE(4); WRITE32(OP_READ); - WRITEMEM(args->stateid.data, sizeof(args->stateid.data)); + + encode_stateid(xdr, args->state, args->lockowner); + + RESERVE_SPACE(12); WRITE64(args->offset); WRITE32(args->count); @@ -1057,9 +1075,12 @@ encode_write(struct xdr_stream *xdr, str { uint32_t *p; - RESERVE_SPACE(36); + RESERVE_SPACE(4); WRITE32(OP_WRITE); - WRITEMEM(args->stateid.data, sizeof(args->stateid.data)); + + encode_stateid(xdr, args->state, args->lockowner); + + RESERVE_SPACE(16); WRITE64(args->offset); WRITE32(args->stable); WRITE32(args->count); @@ -3165,6 +3186,10 @@ static struct { { NFS4ERR_SYMLINK, ELOOP }, { NFS4ERR_OP_ILLEGAL, EOPNOTSUPP }, { NFS4ERR_DEADLOCK, EDEADLK }, + { NFS4ERR_WRONGSEC, EPERM }, /* FIXME: this needs + * to be handled by a + * middle-layer. + */ { -1, EIO } }; diff -u --recursive --new-file --show-c-function linux-2.6.4-rc2/fs/nfs/pagelist.c linux-2.6.4-23-unrace/fs/nfs/pagelist.c --- linux-2.6.4-rc2/fs/nfs/pagelist.c 2004-03-05 19:56:58.000000000 -0500 +++ linux-2.6.4-23-unrace/fs/nfs/pagelist.c 2004-03-05 20:16:37.000000000 -0500 @@ -32,7 +32,7 @@ static inline struct nfs_page * nfs_page_alloc(void) { struct nfs_page *p; - p = kmem_cache_alloc(nfs_page_cachep, SLAB_NOFS); + p = kmem_cache_alloc(nfs_page_cachep, SLAB_KERNEL); if (p) { memset(p, 0, sizeof(*p)); INIT_LIST_HEAD(&p->wb_list); @@ -88,6 +88,7 @@ nfs_create_request(struct file *file, st * long write-back delay. This will be adjusted in * update_nfs_request below if the region is not locked. */ req->wb_page = page; + atomic_set(&req->wb_complete, 0); req->wb_index = page->index; page_cache_get(page); req->wb_offset = offset; @@ -246,7 +247,6 @@ nfs_coalesce_requests(struct list_head * * nfs_scan_list - Scan a list for matching requests * @head: One of the NFS inode request lists * @dst: Destination list - * @file: if set, ensure we match requests from this file * @idx_start: lower bound of page->index to scan * @npages: idx_start + npages sets the upper bound to scan. * @@ -258,7 +258,6 @@ nfs_coalesce_requests(struct list_head * */ int nfs_scan_list(struct list_head *head, struct list_head *dst, - struct file *file, unsigned long idx_start, unsigned int npages) { struct list_head *pos, *tmp; @@ -276,9 +275,6 @@ nfs_scan_list(struct list_head *head, st req = nfs_list_entry(pos); - if (file && req->wb_file != file) - continue; - if (req->wb_index < idx_start) continue; if (req->wb_index > idx_end) diff -u --recursive --new-file --show-c-function linux-2.6.4-rc2/fs/nfs/proc.c linux-2.6.4-23-unrace/fs/nfs/proc.c --- linux-2.6.4-rc2/fs/nfs/proc.c 2004-03-05 19:57:31.000000000 -0500 +++ linux-2.6.4-23-unrace/fs/nfs/proc.c 2004-03-05 20:16:30.000000000 -0500 @@ -49,18 +49,6 @@ extern struct rpc_procinfo nfs_procedures[]; -static void -nfs_write_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) -{ - if (!(fattr->valid & NFS_ATTR_WCC)) { - fattr->pre_size = NFS_CACHE_ISIZE(inode); - fattr->pre_mtime = NFS_CACHE_MTIME(inode); - fattr->pre_ctime = NFS_CACHE_CTIME(inode); - fattr->valid |= NFS_ATTR_WCC; - } - nfs_refresh_inode(inode, fattr); -} - static struct rpc_cred * nfs_cred(struct inode *inode, struct file *filp) { @@ -78,15 +66,33 @@ nfs_cred(struct inode *inode, struct fil */ static int nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, - struct nfs_fattr *fattr) + struct nfs_fsinfo *info) { - int status; + struct nfs_fattr *fattr = info->fattr; + struct nfs2_fsstat fsinfo; + int status; - dprintk("NFS call getroot\n"); + dprintk("%s: call getattr\n", __FUNCTION__); fattr->valid = 0; - status = rpc_call(server->client, NFSPROC_GETATTR, fhandle, fattr, 0); - dprintk("NFS reply getroot\n"); - return status; + status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0); + dprintk("%s: reply getattr %d\n", __FUNCTION__, status); + if (status) + return status; + dprintk("%s: call statfs\n", __FUNCTION__); + status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0); + dprintk("%s: reply statfs %d\n", __FUNCTION__, status); + if (status) + return status; + info->rtmax = NFS_MAXDATA; + info->rtpref = fsinfo.tsize; + info->rtmult = fsinfo.bsize; + info->wtmax = NFS_MAXDATA; + info->wtpref = fsinfo.tsize; + info->wtmult = fsinfo.bsize; + info->dtpref = fsinfo.tsize; + info->maxfilesize = 0x7FFFFFFF; + info->lease_time = 0; + return 0; } /* @@ -180,8 +186,14 @@ nfs_proc_read(struct nfs_read_data *rdat msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); - if (status >= 0) + if (status >= 0) { nfs_refresh_inode(inode, fattr); + /* Emulate the eof flag, which isn't normally needed in NFSv2 + * as it is guaranteed to always return the file attributes + */ + if (rdata->args.offset + rdata->args.count >= fattr->size) + rdata->res.eof = 1; + } dprintk("NFS reply read: %d\n", status); return status; } @@ -205,7 +217,7 @@ nfs_proc_write(struct nfs_write_data *wd msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); if (status >= 0) { - nfs_write_refresh_inode(inode, fattr); + nfs_refresh_inode(inode, fattr); wdata->res.count = wdata->args.count; wdata->verf.committed = NFS_FILE_SYNC; } @@ -331,10 +343,8 @@ nfs_proc_unlink_done(struct dentry *dir, { struct rpc_message *msg = &task->tk_msg; - if (msg->rpc_argp) { - NFS_CACHEINV(dir->d_inode); + if (msg->rpc_argp) kfree(msg->rpc_argp); - } return 0; } @@ -537,17 +547,22 @@ nfs_read_done(struct rpc_task *task) { struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata; - if (task->tk_status >= 0) + if (task->tk_status >= 0) { nfs_refresh_inode(data->inode, data->res.fattr); + /* Emulate the eof flag, which isn't normally needed in NFSv2 + * as it is guaranteed to always return the file attributes + */ + if (data->args.offset + data->args.count >= data->res.fattr->size) + data->res.eof = 1; + } nfs_readpage_result(task); } static void -nfs_proc_read_setup(struct nfs_read_data *data, unsigned int count) +nfs_proc_read_setup(struct nfs_read_data *data) { struct rpc_task *task = &data->task; struct inode *inode = data->inode; - struct nfs_page *req; int flags; struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_READ], @@ -555,27 +570,13 @@ nfs_proc_read_setup(struct nfs_read_data .rpc_resp = &data->res, .rpc_cred = data->cred, }; - - req = nfs_list_entry(data->pages.next); - data->args.fh = NFS_FH(inode); - data->args.offset = req_offset(req); - data->args.pgbase = req->wb_pgbase; - data->args.pages = data->pagevec; - data->args.count = count; - data->res.fattr = &data->fattr; - data->res.count = count; - data->res.eof = 0; - + /* N.B. Do we need to test? Never called for swapfile inode */ flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); /* Finalize the task. */ rpc_init_task(task, NFS_CLIENT(inode), nfs_read_done, flags); - task->tk_calldata = data; - /* Release requests */ - task->tk_release = nfs_readdata_release; - - rpc_call_setup(&data->task, &msg, 0); + rpc_call_setup(task, &msg, 0); } static void @@ -584,16 +585,15 @@ nfs_write_done(struct rpc_task *task) struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; if (task->tk_status >= 0) - nfs_write_refresh_inode(data->inode, data->res.fattr); + nfs_refresh_inode(data->inode, data->res.fattr); nfs_writeback_done(task); } static void -nfs_proc_write_setup(struct nfs_write_data *data, unsigned int count, int how) +nfs_proc_write_setup(struct nfs_write_data *data, int how) { struct rpc_task *task = &data->task; struct inode *inode = data->inode; - struct nfs_page *req; int flags; struct rpc_message msg = { .rpc_proc = &nfs_procedures[NFSPROC_WRITE], @@ -603,32 +603,18 @@ nfs_proc_write_setup(struct nfs_write_da }; /* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */ - - req = nfs_list_entry(data->pages.next); - data->args.fh = NFS_FH(inode); - data->args.offset = req_offset(req); - data->args.pgbase = req->wb_pgbase; - data->args.count = count; data->args.stable = NFS_FILE_SYNC; - data->args.pages = data->pagevec; - data->res.fattr = &data->fattr; - data->res.count = count; - data->res.verf = &data->verf; /* Set the initial flags for the task. */ flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; /* Finalize the task. */ rpc_init_task(task, NFS_CLIENT(inode), nfs_write_done, flags); - task->tk_calldata = data; - /* Release requests */ - task->tk_release = nfs_writedata_release; - - rpc_call_setup(&data->task, &msg, 0); + rpc_call_setup(task, &msg, 0); } static void -nfs_proc_commit_setup(struct nfs_write_data *data, u64 start, u32 len, int how) +nfs_proc_commit_setup(struct nfs_write_data *data, int how) { BUG(); } diff -u --recursive --new-file --show-c-function linux-2.6.4-rc2/fs/nfs/read.c linux-2.6.4-23-unrace/fs/nfs/read.c --- linux-2.6.4-rc2/fs/nfs/read.c 2004-03-05 19:23:55.000000000 -0500 +++ linux-2.6.4-23-unrace/fs/nfs/read.c 2004-03-05 20:16:21.000000000 -0500 @@ -35,6 +35,8 @@ #define NFSDBG_FACILITY NFSDBG_PAGECACHE static int nfs_pagein_one(struct list_head *, struct inode *); +static void nfs_readpage_result_partial(struct nfs_read_data *, int); +static void nfs_readpage_result_full(struct nfs_read_data *, int); static kmem_cache_t *nfs_rdata_cachep; static mempool_t *nfs_rdata_mempool; @@ -57,12 +59,37 @@ static __inline__ void nfs_readdata_free mempool_free(p, nfs_rdata_mempool); } -void nfs_readdata_release(struct rpc_task *task) +static void nfs_readdata_release(struct rpc_task *task) { struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata; nfs_readdata_free(data); } +static +unsigned int nfs_page_length(struct inode *inode, struct page *page) +{ + loff_t i_size = i_size_read(inode); + unsigned long idx; + + if (i_size <= 0) + return 0; + idx = (i_size - 1) >> PAGE_CACHE_SHIFT; + if (page->index > idx) + return 0; + if (page->index != idx) + return PAGE_CACHE_SIZE; + return 1 + ((i_size - 1) & (PAGE_CACHE_SIZE - 1)); +} + +static +int nfs_return_empty_page(struct page *page) +{ + memclear_highpage_flush(page, 0, PAGE_CACHE_SIZE); + SetPageUptodate(page); + unlock_page(page); + return 0; +} + /* * Read a page synchronously. */ @@ -78,6 +105,7 @@ nfs_readpage_sync(struct file *file, str .inode = inode, .args = { .fh = NFS_FH(inode), + .lockowner = current->files, .pages = &page, .pgbase = 0UL, .count = rsize, @@ -121,9 +149,13 @@ nfs_readpage_sync(struct file *file, str } count -= result; rdata.args.pgbase += result; - if (result < rdata.args.count) /* NFSv2ism */ + /* Note: result == 0 should only happen if we're caching + * a write that extends the file and punches a hole. + */ + if (rdata.res.eof != 0 || result == 0) break; } while (count); + NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; if (count) memclear_highpage_flush(page, rdata.args.pgbase, count); @@ -142,89 +174,208 @@ nfs_readpage_async(struct file *file, st { LIST_HEAD(one_request); struct nfs_page *new; + unsigned int len; - new = nfs_create_request(file, inode, page, 0, PAGE_CACHE_SIZE); + len = nfs_page_length(inode, page); + if (len == 0) + return nfs_return_empty_page(page); + new = nfs_create_request(file, inode, page, 0, len); if (IS_ERR(new)) { unlock_page(page); return PTR_ERR(new); } + if (len < PAGE_CACHE_SIZE) + memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); + nfs_lock_request(new); nfs_list_add_request(new, &one_request); nfs_pagein_one(&one_request, inode); return 0; } +static void nfs_readpage_release(struct nfs_page *req) +{ + unlock_page(req->wb_page); + + nfs_clear_request(req); + nfs_release_request(req); + nfs_unlock_request(req); + + dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", + req->wb_inode->i_sb->s_id, + (long long)NFS_FILEID(req->wb_inode), + req->wb_bytes, + (long long)req_offset(req)); +} + /* * Set up the NFS read request struct */ -static void -nfs_read_rpcsetup(struct list_head *head, struct nfs_read_data *data) +static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, + unsigned int count, unsigned int offset) { struct inode *inode; - struct nfs_page *req; - struct page **pages; - unsigned int count; - pages = data->pagevec; - count = 0; - while (!list_empty(head)) { - req = nfs_list_entry(head->next); - nfs_list_remove_request(req); - nfs_list_add_request(req, &data->pages); - *pages++ = req->wb_page; - count += req->wb_bytes; - } - req = nfs_list_entry(data->pages.next); + data->req = req; data->inode = inode = req->wb_inode; data->cred = req->wb_cred; - NFS_PROTO(inode)->read_setup(data, count); + data->args.fh = NFS_FH(inode); + data->args.offset = req_offset(req) + offset; + data->args.pgbase = req->wb_pgbase + offset; + data->args.pages = data->pagevec; + data->args.count = count; + data->args.lockowner = req->wb_lockowner; + data->args.state = req->wb_state; + + data->res.fattr = &data->fattr; + data->res.count = count; + data->res.eof = 0; + + NFS_PROTO(inode)->read_setup(data); + + data->task.tk_calldata = data; + /* Release requests */ + data->task.tk_release = nfs_readdata_release; - dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu.\n", + dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n", data->task.tk_pid, inode->i_sb->s_id, (long long)NFS_FILEID(inode), count, - (unsigned long long)req_offset(req)); + data->args.offset); } static void nfs_async_read_error(struct list_head *head) { struct nfs_page *req; - struct page *page; while (!list_empty(head)) { req = nfs_list_entry(head->next); - page = req->wb_page; nfs_list_remove_request(req); - SetPageError(page); - unlock_page(page); - nfs_clear_request(req); - nfs_release_request(req); - nfs_unlock_request(req); + SetPageError(req->wb_page); + nfs_readpage_release(req); } } -static int -nfs_pagein_one(struct list_head *head, struct inode *inode) +/* + * Start an async read operation + */ +static void nfs_execute_read(struct nfs_read_data *data) { - struct rpc_clnt *clnt = NFS_CLIENT(inode); + struct rpc_clnt *clnt = NFS_CLIENT(data->inode); + sigset_t oldset; + + rpc_clnt_sigmask(clnt, &oldset); + lock_kernel(); + rpc_execute(&data->task); + unlock_kernel(); + rpc_clnt_sigunmask(clnt, &oldset); +} + +/* + * Generate multiple requests to fill a single page. + * + * We optimize to reduce the number of read operations on the wire. If we + * detect that we're reading a page, or an area of a page, that is past the + * end of file, we do not generate NFS read operations but just clear the + * parts of the page that would have come back zero from the server anyway. + * + * We rely on the cached value of i_size to make this determination; another + * client can fill pages on the server past our cached end-of-file, but we + * won't see the new data until our attribute cache is updated. This is more + * or less conventional NFS client behavior. + */ +static int nfs_pagein_multi(struct list_head *head, struct inode *inode) +{ + struct nfs_page *req = nfs_list_entry(head->next); + struct page *page = req->wb_page; + struct nfs_read_data *data; + unsigned int rsize = NFS_SERVER(inode)->rsize; + unsigned int nbytes, offset; + int requests = 0; + LIST_HEAD(list); + + nfs_list_remove_request(req); + + nbytes = req->wb_bytes; + for(;;) { + data = nfs_readdata_alloc(); + if (!data) + goto out_bad; + list_add(&data->pages, &list); + requests++; + if (nbytes <= rsize) + break; + nbytes -= rsize; + } + atomic_set(&req->wb_complete, requests); + + ClearPageError(page); + offset = 0; + nbytes = req->wb_bytes; + do { + data = list_entry(list.next, struct nfs_read_data, pages); + list_del_init(&data->pages); + + data->pagevec[0] = page; + data->complete = nfs_readpage_result_partial; + + if (nbytes > rsize) { + nfs_read_rpcsetup(req, data, rsize, offset); + offset += rsize; + nbytes -= rsize; + } else { + nfs_read_rpcsetup(req, data, nbytes, offset); + nbytes = 0; + } + nfs_execute_read(data); + } while (nbytes != 0); + + return 0; + +out_bad: + while (!list_empty(&list)) { + data = list_entry(list.next, struct nfs_read_data, pages); + list_del(&data->pages); + nfs_readdata_free(data); + } + SetPageError(page); + nfs_readpage_release(req); + return -ENOMEM; +} + +static int nfs_pagein_one(struct list_head *head, struct inode *inode) +{ + struct nfs_page *req; + struct page **pages; struct nfs_read_data *data; - sigset_t oldset; + unsigned int count; + + if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) + return nfs_pagein_multi(head, inode); data = nfs_readdata_alloc(); if (!data) goto out_bad; - nfs_read_rpcsetup(head, data); + pages = data->pagevec; + count = 0; + while (!list_empty(head)) { + req = nfs_list_entry(head->next); + nfs_list_remove_request(req); + nfs_list_add_request(req, &data->pages); + ClearPageError(req->wb_page); + *pages++ = req->wb_page; + count += req->wb_bytes; + } + req = nfs_list_entry(data->pages.next); - /* Start the async call */ - rpc_clnt_sigmask(clnt, &oldset); - lock_kernel(); - rpc_execute(&data->task); - unlock_kernel(); - rpc_clnt_sigunmask(clnt, &oldset); + data->complete = nfs_readpage_result_full; + nfs_read_rpcsetup(req, data, count, 0); + + nfs_execute_read(data); return 0; out_bad: nfs_async_read_error(head); @@ -254,54 +405,84 @@ nfs_pagein_list(struct list_head *head, } /* + * Handle a read reply that fills part of a page. + */ +static void nfs_readpage_result_partial(struct nfs_read_data *data, int status) +{ + struct nfs_page *req = data->req; + struct page *page = req->wb_page; + + if (status >= 0) { + unsigned int request = data->args.count; + unsigned int result = data->res.count; + + if (result < request) { + memclear_highpage_flush(page, + data->args.pgbase + result, + request - result); + if (!data->res.eof) + SetPageError(page); + } + } else + SetPageError(page); + + if (atomic_dec_and_test(&req->wb_complete)) { + if (!PageError(page)) + SetPageUptodate(page); + nfs_readpage_release(req); + } +} + +/* * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). */ -void -nfs_readpage_result(struct rpc_task *task) +static void nfs_readpage_result_full(struct nfs_read_data *data, int status) { - struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata; unsigned int count = data->res.count; - dprintk("NFS: %4d nfs_readpage_result, (status %d)\n", - task->tk_pid, task->tk_status); - while (!list_empty(&data->pages)) { struct nfs_page *req = nfs_list_entry(data->pages.next); struct page *page = req->wb_page; nfs_list_remove_request(req); - if (task->tk_status >= 0) { + if (status >= 0) { if (count < PAGE_CACHE_SIZE) { - memclear_highpage_flush(page, + if (count < req->wb_bytes) + memclear_highpage_flush(page, req->wb_pgbase + count, req->wb_bytes - count); - + if (!data->res.eof) + SetPageError(page); count = 0; } else count -= PAGE_CACHE_SIZE; SetPageUptodate(page); } else SetPageError(page); - unlock_page(page); - - dprintk("NFS: read (%s/%Ld %d@%Ld)\n", - req->wb_inode->i_sb->s_id, - (long long)NFS_FILEID(req->wb_inode), - req->wb_bytes, - (long long)req_offset(req)); - nfs_clear_request(req); - nfs_release_request(req); - nfs_unlock_request(req); + nfs_readpage_release(req); } } /* + * This is the callback from RPC telling us whether a reply was + * received or some error occurred (timeout or socket shutdown). + */ +void nfs_readpage_result(struct rpc_task *task) +{ + struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata; + int status = task->tk_status; + + dprintk("NFS: %4d nfs_readpage_result, (status %d)\n", + task->tk_pid, status); + + NFS_FLAGS(data->inode) |= NFS_INO_INVALID_ATIME; + data->complete(data, status); +} + +/* * Read a page over NFS. - * We read the page synchronously in the following cases: - * - The NFS rsize is smaller than PAGE_CACHE_SIZE. We could kludge our way - * around this by creating several consecutive read requests, but - * that's hardly worth it. + * We read the page synchronously in the following case: * - The error flag is set for this page. This happens only when a * previous async read operation failed. */ @@ -324,7 +505,7 @@ nfs_readpage(struct file *file, struct p if (error) goto out_error; - if (!PageError(page) && NFS_SERVER(inode)->rsize >= PAGE_CACHE_SIZE) { + if (!IS_SYNC(inode)) { error = nfs_readpage_async(file, inode, page); goto out; } @@ -346,26 +527,25 @@ struct nfs_readdesc { }; static int -readpage_sync_filler(void *data, struct page *page) -{ - struct nfs_readdesc *desc = (struct nfs_readdesc *)data; - return nfs_readpage_sync(desc->filp, page->mapping->host, page); -} - -static int readpage_async_filler(void *data, struct page *page) { struct nfs_readdesc *desc = (struct nfs_readdesc *)data; struct inode *inode = page->mapping->host; struct nfs_page *new; + unsigned int len; nfs_wb_page(inode, page); - new = nfs_create_request(desc->filp, inode, page, 0, PAGE_CACHE_SIZE); + len = nfs_page_length(inode, page); + if (len == 0) + return nfs_return_empty_page(page); + new = nfs_create_request(desc->filp, inode, page, 0, len); if (IS_ERR(new)) { SetPageError(page); unlock_page(page); return PTR_ERR(new); } + if (len < PAGE_CACHE_SIZE) + memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); nfs_lock_request(new); nfs_list_add_request(new, desc->head); return 0; @@ -380,14 +560,16 @@ nfs_readpages(struct file *filp, struct .filp = filp, .head = &head, }; - struct nfs_server *server = NFS_SERVER(mapping->host); - int is_sync = server->rsize < PAGE_CACHE_SIZE; + struct inode *inode = mapping->host; + struct nfs_server *server = NFS_SERVER(inode); int ret; - ret = read_cache_pages(mapping, pages, - is_sync ? readpage_sync_filler : - readpage_async_filler, - &desc); + dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + nr_pages); + + ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); if (!list_empty(&head)) { int err = nfs_pagein_list(&head, server->rpages); if (!ret) diff -u --recursive --new-file --show-c-function linux-2.6.4-rc2/fs/nfs/unlink.c linux-2.6.4-23-unrace/fs/nfs/unlink.c --- linux-2.6.4-rc2/fs/nfs/unlink.c 2004-03-05 19:57:59.000000000 -0500 +++ linux-2.6.4-23-unrace/fs/nfs/unlink.c 2004-03-05 20:13:58.000000000 -0500 @@ -104,6 +104,7 @@ nfs_async_unlink_init(struct rpc_task *t status = NFS_PROTO(dir->d_inode)->unlink_setup(&msg, dir, &data->name); if (status < 0) goto out_err; + nfs_begin_data_update(dir->d_inode); rpc_call_setup(task, &msg, 0); return; out_err: @@ -126,7 +127,7 @@ nfs_async_unlink_done(struct rpc_task *t if (!dir) return; dir_i = dir->d_inode; - nfs_zap_caches(dir_i); + nfs_end_data_update(dir_i); if (NFS_PROTO(dir_i)->unlink_done(dir, task)) return; put_rpccred(data->cred); diff -u --recursive --new-file --show-c-function linux-2.6.4-rc2/fs/nfs/write.c linux-2.6.4-23-unrace/fs/nfs/write.c --- linux-2.6.4-rc2/fs/nfs/write.c 2004-03-05 19:51:47.000000000 -0500 +++ linux-2.6.4-23-unrace/fs/nfs/write.c 2004-03-05 23:06:02.000000000 -0500 @@ -74,12 +74,17 @@ static struct nfs_page * nfs_update_request(struct file*, struct inode *, struct page *, unsigned int, unsigned int); -static void nfs_strategy(struct inode *inode); +static void nfs_writeback_done_partial(struct nfs_write_data *, int); +static void nfs_writeback_done_full(struct nfs_write_data *, int); +static int nfs_wait_on_write_congestion(struct address_space *, int); +static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int); static kmem_cache_t *nfs_wdata_cachep; static mempool_t *nfs_wdata_mempool; static mempool_t *nfs_commit_mempool; +static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion); + static __inline__ struct nfs_write_data *nfs_writedata_alloc(void) { struct nfs_write_data *p; @@ -96,7 +101,7 @@ static __inline__ void nfs_writedata_fre mempool_free(p, nfs_wdata_mempool); } -void nfs_writedata_release(struct rpc_task *task) +static void nfs_writedata_release(struct rpc_task *task) { struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata; nfs_writedata_free(wdata); @@ -118,10 +123,50 @@ static __inline__ void nfs_commit_free(s mempool_free(p, nfs_commit_mempool); } -void nfs_commit_release(struct rpc_task *task) +/* Adjust the file length if we're writing beyond the end */ +static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) { - struct nfs_write_data *wdata = (struct nfs_write_data *)task->tk_calldata; - nfs_commit_free(wdata); + struct inode *inode = page->mapping->host; + loff_t end, i_size = i_size_read(inode); + unsigned long end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; + + if (i_size > 0 && page->index < end_index) + return; + end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); + if (i_size >= end) + return; + i_size_write(inode, end); +} + +/* We can set the PG_uptodate flag if we see that a write request + * covers the full page. + */ +static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count) +{ + loff_t end_offs; + + if (PageUptodate(page)) + return; + if (base != 0) + return; + if (count == PAGE_CACHE_SIZE) { + SetPageUptodate(page); + return; + } + + end_offs = i_size_read(page->mapping->host) - 1; + if (end_offs < 0) + return; + /* Is this the last page? */ + if (page->index != (unsigned long)(end_offs >> PAGE_CACHE_SHIFT)) + return; + /* This is the last page: set PG_uptodate if we cover the entire + * extent of the data, then zero the rest of the page. + */ + if (count == (unsigned int)(end_offs & (PAGE_CACHE_SIZE - 1)) + 1) { + memclear_highpage_flush(page, count, PAGE_CACHE_SIZE - count); + SetPageUptodate(page); + } } /* @@ -141,6 +186,7 @@ nfs_writepage_sync(struct file *file, st .inode = inode, .args = { .fh = NFS_FH(inode), + .lockowner = current->files, .pages = &page, .stable = NFS_FILE_SYNC, .pgbase = offset, @@ -157,6 +203,7 @@ nfs_writepage_sync(struct file *file, st (long long)NFS_FILEID(inode), count, (long long)(page_offset(page) + offset)); + nfs_begin_data_update(inode); do { if (count < wsize && !swapfile) wdata.args.count = count; @@ -177,52 +224,48 @@ nfs_writepage_sync(struct file *file, st wdata.args.pgbase += result; written += result; count -= result; - - /* - * If we've extended the file, update the inode - * now so we don't invalidate the cache. - */ - if (wdata.args.offset > i_size_read(inode)) - i_size_write(inode, wdata.args.offset); } while (count); + /* Update file length */ + nfs_grow_file(page, offset, written); + /* Set the PG_uptodate flag? */ + nfs_mark_uptodate(page, offset, written); if (PageError(page)) ClearPageError(page); io_error: + nfs_end_data_update(inode); if (wdata.cred) put_rpccred(wdata.cred); return written ? written : result; } -static int -nfs_writepage_async(struct file *file, struct inode *inode, struct page *page, - unsigned int offset, unsigned int count) +static int nfs_writepage_async(struct file *file, struct inode *inode, + struct page *page, unsigned int offset, unsigned int count) { struct nfs_page *req; - loff_t end; int status; + nfs_begin_data_update(inode); req = nfs_update_request(file, inode, page, offset, count); status = (IS_ERR(req)) ? PTR_ERR(req) : 0; if (status < 0) goto out; + /* Update file length */ + nfs_grow_file(page, offset, count); + /* Set the PG_uptodate flag? */ + nfs_mark_uptodate(page, offset, count); nfs_unlock_request(req); - nfs_strategy(inode); - end = ((loff_t)page->index<mapping->host; unsigned long end_index; @@ -258,11 +301,13 @@ nfs_writepage(struct page *page, struct goto out; do_it: lock_kernel(); - if (NFS_SERVER(inode)->wsize >= PAGE_CACHE_SIZE && !IS_SYNC(inode) && - inode_referenced) { + if (!IS_SYNC(inode) && inode_referenced) { err = nfs_writepage_async(NULL, inode, page, 0, offset); - if (err >= 0) + if (err >= 0) { err = 0; + if (wbc->for_reclaim) + err = WRITEPAGE_ACTIVATE; + } } else { err = nfs_writepage_sync(NULL, inode, page, 0, offset); if (err == offset) @@ -270,32 +315,46 @@ do_it: } unlock_kernel(); out: - unlock_page(page); + if (err != WRITEPAGE_ACTIVATE) + unlock_page(page); if (inode_referenced) iput(inode); return err; } -int -nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) +/* + * Note: causes nfs_update_request() to block on the assumption + * that the writeback is generated due to memory pressure. + */ +int nfs_writepages(struct address_space *mapping, struct writeback_co