fs/Kconfig | 39 - fs/inode.c | 2 fs/nfs/dir.c | 304 ++++++++- fs/nfs/direct.c | 3 fs/nfs/file.c | 30 fs/nfs/idmap.c | 407 ++++++------ fs/nfs/inode.c | 562 +++++++++++------ fs/nfs/nfs3proc.c | 38 - fs/nfs/nfs4proc.c | 1034 +++++++++++++++++++++++--------- fs/nfs/nfs4renewd.c | 110 ++- fs/nfs/nfs4state.c | 516 +++++++++++++++ fs/nfs/nfs4xdr.c | 921 ++++++++++++++++++++++++++-- fs/nfs/proc.c | 30 fs/nfs/read.c | 2 fs/nfs/unlink.c | 3 fs/nfs/write.c | 167 ++--- include/linux/fs.h | 2 include/linux/nfs4.h | 79 ++ include/linux/nfs_fs.h | 214 ++++-- include/linux/nfs_fs_sb.h | 6 include/linux/nfs_idmap.h | 21 include/linux/nfs_page.h | 1 include/linux/nfs_xdr.h | 84 ++ include/linux/sunrpc/auth.h | 7 include/linux/sunrpc/clnt.h | 23 include/linux/sunrpc/gss_api.h | 9 include/linux/sunrpc/gss_krb5.h | 22 include/linux/sunrpc/rpc_pipe_fs.h | 5 include/linux/sunrpc/sched.h | 4 include/linux/sunrpc/xdr.h | 4 include/linux/sunrpc/xprt.h | 8 net/sunrpc/auth.c | 42 + net/sunrpc/auth_gss/auth_gss.c | 389 ++++++++---- net/sunrpc/auth_gss/gss_krb5_crypto.c | 89 +- net/sunrpc/auth_gss/gss_krb5_mech.c | 30 net/sunrpc/auth_gss/gss_krb5_seal.c | 58 - net/sunrpc/auth_gss/gss_krb5_unseal.c | 156 ---- net/sunrpc/auth_gss/gss_mech_switch.c | 5 net/sunrpc/auth_gss/gss_pseudoflavors.c | 1 net/sunrpc/clnt.c | 72 +- net/sunrpc/pmap_clnt.c | 17 net/sunrpc/rpc_pipe.c | 86 ++ net/sunrpc/sched.c | 5 net/sunrpc/sunrpc_syms.c | 6 net/sunrpc/xdr.c | 157 ++++ net/sunrpc/xprt.c | 120 ++- 46 files changed, 4376 insertions(+), 1514 deletions(-) diff -u --recursive --new-file --show-c-function linux-2.6.2/fs/inode.c linux-2.6.2-33-fix_kconfig/fs/inode.c --- linux-2.6.2/fs/inode.c 2004-02-07 12:58:26.000000000 +0100 +++ linux-2.6.2-33-fix_kconfig/fs/inode.c 2004-02-07 13:10:00.000000000 +0100 @@ -1178,6 +1178,8 @@ void inode_update_time(struct inode *ino struct timespec now; int sync_it = 0; + if (IS_NOCMTIME(inode)) + return; if (IS_RDONLY(inode)) return; diff -u --recursive --new-file --show-c-function linux-2.6.2/fs/Kconfig linux-2.6.2-33-fix_kconfig/fs/Kconfig --- linux-2.6.2/fs/Kconfig 2004-02-07 12:54:12.000000000 +0100 +++ linux-2.6.2-33-fix_kconfig/fs/Kconfig 2004-02-08 14:59:07.000000000 +0100 @@ -1302,15 +1302,18 @@ config NFS_V3 Say Y here if you want your NFS client to be able to speak the newer version 3 of the NFS protocol. - If unsure, say N. + If unsure, say Y. config NFS_V4 bool "Provide NFSv4 client support (EXPERIMENTAL)" depends on NFS_FS && EXPERIMENTAL + select RPCSEC_GSS_KRB5 help Say Y here if you want your NFS client to be able to speak the newer - version 4 of the NFS protocol. This feature is experimental, and - should only be used if you are interested in helping to test NFSv4. + version 4 of the NFS protocol. + + Note: Requires auxiliary userspace daemons which may be found on + http://www.citi.umich.edu/projects/nfsv4/ If unsure, say N. @@ -1419,28 +1422,24 @@ config SUNRPC tristate config SUNRPC_GSS - tristate "Provide RPCSEC_GSS authentication (EXPERIMENTAL)" + tristate + +config RPCSEC_GSS_KRB5 + tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" depends on SUNRPC && EXPERIMENTAL - default SUNRPC if NFS_V4=y + select SUNRPC_GSS + select CRYPTO + select CRYPTO_MD5 + select CRYPTO_DES help - Provides cryptographic authentication for NFS rpc requests. To - make this useful, you must also select at least one rpcsec_gss - mechanism. - Note: You should always select this option if you wish to use + Provides for secure RPC calls by means of a gss-api + mechanism based on Kerberos V5. This is required for NFSv4. -config RPCSEC_GSS_KRB5 - tristate "Kerberos V mechanism for RPCSEC_GSS (EXPERIMENTAL)" - depends on SUNRPC_GSS && CRYPTO_DES && CRYPTO_MD5 - default SUNRPC_GSS if NFS_V4=y - help - Provides a gss-api mechanism based on Kerberos V5 (this is - mandatory for RFC3010-compliant NFSv4 implementations). - Requires a userspace daemon; - see http://www.citi.umich.edu/projects/nfsv4/. + Note: Requires an auxiliary userspace daemon which may be found on + http://www.citi.umich.edu/projects/nfsv4/ - Note: If you select this option, please ensure that you also - enable the MD5 and DES crypto ciphers. + If unsure, say N. config SMB_FS tristate "SMB file system support (to mount Windows shares etc.)" diff -u --recursive --new-file --show-c-function linux-2.6.2/fs/nfs/dir.c linux-2.6.2-33-fix_kconfig/fs/nfs/dir.c --- linux-2.6.2/fs/nfs/dir.c 2004-02-07 12:51:45.000000000 +0100 +++ linux-2.6.2-33-fix_kconfig/fs/nfs/dir.c 2004-02-07 13:10:00.000000000 +0100 @@ -72,6 +72,26 @@ struct inode_operations nfs_dir_inode_op .setattr = nfs_setattr, }; +#ifdef CONFIG_NFS_V4 + +static struct dentry *nfs_atomic_lookup(struct inode *, struct dentry *, struct nameidata *); +struct inode_operations nfs4_dir_inode_operations = { + .create = nfs_create, + .lookup = nfs_atomic_lookup, + .link = nfs_link, + .unlink = nfs_unlink, + .symlink = nfs_symlink, + .mkdir = nfs_mkdir, + .rmdir = nfs_rmdir, + .mknod = nfs_mknod, + .rename = nfs_rename, + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, +}; + +#endif /* CONFIG_NFS_V4 */ + /* * Open file */ @@ -119,11 +139,13 @@ int nfs_readdir_filler(nfs_readdir_descr struct file *file = desc->file; struct inode *inode = file->f_dentry->d_inode; struct rpc_cred *cred = nfs_file_cred(file); + unsigned long timestamp; int error; dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index); again: + timestamp = jiffies; error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->entry->cookie, page, NFS_SERVER(inode)->dtsize, desc->plus); if (error < 0) { @@ -137,18 +159,21 @@ int nfs_readdir_filler(nfs_readdir_descr goto error; } SetPageUptodate(page); + NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; /* Ensure consistent page alignment of the data. * Note: assumes we have exclusive access to this mapping either * throught inode->i_sem or some other mechanism. */ - if (page->index == 0) + if (page->index == 0) { invalidate_inode_pages(inode->i_mapping); + NFS_I(inode)->readdir_timestamp = timestamp; + } unlock_page(page); return 0; error: SetPageError(page); unlock_page(page); - invalidate_inode_pages(inode->i_mapping); + nfs_zap_caches(inode); desc->error = error; return -EIO; } @@ -361,6 +386,7 @@ int uncached_readdir(nfs_readdir_descrip page, NFS_SERVER(inode)->dtsize, desc->plus); + NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; desc->page = page; desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ if (desc->error >= 0) { @@ -439,7 +465,15 @@ static int nfs_readdir(struct file *filp } res = 0; break; - } else if (res < 0) + } + if (res == -ETOOSMALL && desc->plus) { + NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; + nfs_zap_caches(inode); + desc->plus = 0; + desc->entry->eof = 0; + continue; + } + if (res < 0) break; res = nfs_do_filldir(desc, dirent, filldir); @@ -461,14 +495,19 @@ static int nfs_readdir(struct file *filp * In the case it has, we assume that the dentries are untrustworthy * and may need to be looked up again. */ -static inline -int nfs_check_verifier(struct inode *dir, struct dentry *dentry) +static inline int nfs_check_verifier(struct inode *dir, struct dentry *dentry) { if (IS_ROOT(dentry)) return 1; - if (nfs_revalidate_inode(NFS_SERVER(dir), dir)) + if ((NFS_FLAGS(dir) & NFS_INO_INVALID_ATTR) != 0 + || nfs_attribute_timeout(dir)) return 0; - return time_after(dentry->d_time, NFS_MTIME_UPDATE(dir)); + return nfs_verify_change_attribute(dir, (unsigned long)dentry->d_fsdata); +} + +static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf) +{ + dentry->d_fsdata = (void *)verf; } /* @@ -508,9 +547,7 @@ int nfs_neg_need_reval(struct inode *dir /* Don't revalidate a negative dentry if we're creating a new file */ if ((ndflags & LOOKUP_CREATE) && !(ndflags & LOOKUP_CONTINUE)) return 0; - if (!nfs_check_verifier(dir, dentry)) - return 1; - return time_after(jiffies, dentry->d_time + NFS_ATTRTIMEO(dir)); + return !nfs_check_verifier(dir, dentry); } /* @@ -532,6 +569,7 @@ static int nfs_lookup_revalidate(struct int error; struct nfs_fh fhandle; struct nfs_fattr fattr; + unsigned long verifier; int isopen = 0; parent = dget_parent(dentry); @@ -554,6 +592,9 @@ static int nfs_lookup_revalidate(struct goto out_bad; } + /* Revalidate parent directory attribute cache */ + nfs_revalidate_inode(NFS_SERVER(dir), dir); + /* Force a full look up iff the parent directory has changed */ if (nfs_check_verifier(dir, dentry)) { if (nfs_lookup_verify_inode(inode, isopen)) @@ -561,6 +602,12 @@ static int nfs_lookup_revalidate(struct goto out_valid; } + /* + * Note: we're not holding inode->i_sem and so may be racing with + * operations that change the directory. We therefore save the + * change attribute *before* we do the RPC call. + */ + verifier = nfs_save_change_attribute(dir); error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr); if (!error) { if (memcmp(NFS_FH(inode), &fhandle, sizeof(struct nfs_fh))!= 0) @@ -583,6 +630,7 @@ static int nfs_lookup_revalidate(struct out_valid_renew: nfs_renew_times(dentry); + nfs_set_verifier(dentry, verifier); out_valid: unlock_kernel(); dput(parent); @@ -670,9 +718,11 @@ static struct dentry *nfs_lookup(struct goto out; error = -ENOMEM; - dentry->d_op = &nfs_dentry_operations; + dentry->d_op = NFS_PROTO(dir)->dentry_ops; lock_kernel(); + /* Revalidate parent directory attribute cache */ + nfs_revalidate_inode(NFS_SERVER(dir), dir); /* If we're doing an exclusive create, optimize away the lookup */ if (nfs_is_exclusive_create(dir, nd)) @@ -695,6 +745,7 @@ no_entry: error = 0; d_add(dentry, inode); nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); out_unlock: unlock_kernel(); out: @@ -702,6 +753,139 @@ out: return ERR_PTR(error); } +#ifdef CONFIG_NFS_V4 +static int nfs_open_revalidate(struct dentry *, struct nameidata *); + +struct dentry_operations nfs4_dentry_operations = { + .d_revalidate = nfs_open_revalidate, + .d_delete = nfs_dentry_delete, + .d_iput = nfs_dentry_iput, +}; + +static int is_atomic_open(struct inode *dir, struct nameidata *nd) +{ + if (!nd) + return 0; + /* Check that we are indeed trying to open this file */ + if ((nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_OPEN)) + return 0; + /* NFS does not (yet) have a stateful open for directories */ + if (nd->flags & LOOKUP_DIRECTORY) + return 0; + /* Are we trying to write to a read only partition? */ + if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) + return 0; + return 1; +} + +static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +{ + struct inode *inode = NULL; + int error = 0; + + /* Check that we are indeed trying to open this file */ + if (!is_atomic_open(dir, nd)) + goto no_open; + + if (dentry->d_name.len > NFS_SERVER(dir)->namelen) { + error = -ENAMETOOLONG; + goto out; + } + dentry->d_op = NFS_PROTO(dir)->dentry_ops; + + /* Let vfs_create() deal with O_EXCL */ + if (nd->intent.open.flags & O_EXCL) + goto no_entry; + + /* Open the file on the server */ + lock_kernel(); + /* Revalidate parent directory attribute cache */ + nfs_revalidate_inode(NFS_SERVER(dir), dir); + + if (nd->intent.open.flags & O_CREAT) { + nfs_begin_data_update(dir); + inode = nfs4_atomic_open(dir, dentry, nd); + nfs_end_data_update(dir); + } else + inode = nfs4_atomic_open(dir, dentry, nd); + unlock_kernel(); + if (IS_ERR(inode)) { + error = PTR_ERR(inode); + switch (error) { + /* Make a negative dentry */ + case -ENOENT: + inode = NULL; + break; + /* This turned out not to be a regular file */ + case -ELOOP: + if (!(nd->intent.open.flags & O_NOFOLLOW)) + goto no_open; + /* case -EISDIR: */ + /* case -EINVAL: */ + default: + goto out; + } + } +no_entry: + d_add(dentry, inode); + nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); +out: + BUG_ON(error > 0); + return ERR_PTR(error); +no_open: + return nfs_lookup(dir, dentry, nd); +} + +static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) +{ + struct dentry *parent = NULL; + struct inode *inode = dentry->d_inode; + struct inode *dir; + unsigned long verifier; + int openflags, ret = 0; + + /* NFS only supports OPEN for regular files */ + if (inode && !S_ISREG(inode->i_mode)) + goto no_open; + parent = dget_parent(dentry); + dir = parent->d_inode; + if (!is_atomic_open(dir, nd)) + goto no_open; + openflags = nd->intent.open.flags; + if (openflags & O_CREAT) { + /* If this is a negative dentry, just drop it */ + if (!inode) + goto out; + /* If this is exclusive open, just revalidate */ + if (openflags & O_EXCL) + goto no_open; + } + /* We can't create new files, or truncate existing ones here */ + openflags &= ~(O_CREAT|O_TRUNC); + + /* + * Note: we're not holding inode->i_sem and so may be racing with + * operations that change the directory. We therefore save the + * change attribute *before* we do the RPC call. + */ + lock_kernel(); + verifier = nfs_save_change_attribute(dir); + ret = nfs4_open_revalidate(dir, dentry, openflags); + if (!ret) + nfs_set_verifier(dentry, verifier); + unlock_kernel(); +out: + dput(parent); + if (!ret) + d_drop(dentry); + return ret; +no_open: + dput(parent); + return nfs_lookup_revalidate(dentry, nd); +} +#endif /* CONFIG_NFSV4 */ + static inline int find_dirent_name(nfs_readdir_descriptor_t *desc, struct page *page, struct dentry *dentry) { @@ -736,15 +920,20 @@ int nfs_cached_lookup(struct inode *dir, struct nfs_server *server; struct nfs_entry entry; struct page *page; - unsigned long timestamp = NFS_MTIME_UPDATE(dir); + unsigned long timestamp; int res; if (!NFS_USE_READDIRPLUS(dir)) return -ENOENT; server = NFS_SERVER(dir); - if (server->flags & NFS_MOUNT_NOAC) + /* Don't use readdirplus unless the cache is stable */ + if ((server->flags & NFS_MOUNT_NOAC) != 0 + || nfs_caches_unstable(dir) + || nfs_attribute_timeout(dir)) + return -ENOENT; + if ((NFS_FLAGS(dir) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) != 0) return -ENOENT; - nfs_revalidate_inode(server, dir); + timestamp = NFS_I(dir)->readdir_timestamp; entry.fh = fh; entry.fattr = fattr; @@ -798,6 +987,7 @@ static int nfs_instantiate(struct dentry if (inode) { d_instantiate(dentry, inode); nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dentry->d_parent->d_inode)); error = 0; } return error; @@ -836,11 +1026,13 @@ static int nfs_create(struct inode *dir, * does not pass the create flags. */ lock_kernel(); - nfs_zap_caches(dir); + nfs_begin_data_update(dir); inode = NFS_PROTO(dir)->create(dir, &dentry->d_name, &attr, open_flags); + nfs_end_data_update(dir); if (!IS_ERR(inode)) { d_instantiate(dentry, inode); nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); error = 0; } else { error = PTR_ERR(inode); @@ -871,9 +1063,10 @@ nfs_mknod(struct inode *dir, struct dent attr.ia_valid = ATTR_MODE; lock_kernel(); - nfs_zap_caches(dir); + nfs_begin_data_update(dir); error = NFS_PROTO(dir)->mknod(dir, &dentry->d_name, &attr, rdev, &fhandle, &fattr); + nfs_end_data_update(dir); if (!error) error = nfs_instantiate(dentry, &fhandle, &fattr); else @@ -908,9 +1101,10 @@ static int nfs_mkdir(struct inode *dir, */ d_drop(dentry); #endif - nfs_zap_caches(dir); + nfs_begin_data_update(dir); error = NFS_PROTO(dir)->mkdir(dir, &dentry->d_name, &attr, &fhandle, &fattr); + nfs_end_data_update(dir); if (!error) error = nfs_instantiate(dentry, &fhandle, &fattr); else @@ -927,10 +1121,12 @@ static int nfs_rmdir(struct inode *dir, dir->i_ino, dentry->d_name.name); lock_kernel(); - nfs_zap_caches(dir); + nfs_begin_data_update(dir); error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); - if (!error) + /* Ensure the VFS deletes this inode */ + if (error == 0 && dentry->d_inode != NULL) dentry->d_inode->i_nlink = 0; + nfs_end_data_update(dir); unlock_kernel(); return error; @@ -986,12 +1182,21 @@ dentry->d_parent->d_name.name, dentry->d goto out; } while(sdentry->d_inode != NULL); /* need negative lookup */ - nfs_zap_caches(dir); qsilly.name = silly; qsilly.len = strlen(silly); - error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, dir, &qsilly); + nfs_begin_data_update(dir); + if (dentry->d_inode) { + nfs_begin_data_update(dentry->d_inode); + error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, + dir, &qsilly); + nfs_end_data_update(dentry->d_inode); + } else + error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, + dir, &qsilly); + nfs_end_data_update(dir); if (!error) { nfs_renew_times(dentry); + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); d_move(dentry, sdentry); error = nfs_async_unlink(dentry); /* If we return 0 we don't unlink */ @@ -1023,14 +1228,17 @@ static int nfs_safe_remove(struct dentry goto out; } - nfs_zap_caches(dir); - if (inode) - NFS_CACHEINV(inode); - error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); - if (error < 0) - goto out; - if (inode) - inode->i_nlink--; + nfs_begin_data_update(dir); + if (inode != NULL) { + nfs_begin_data_update(inode); + error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); + /* The VFS may want to delete this inode */ + if (error == 0) + inode->i_nlink--; + nfs_end_data_update(inode); + } else + error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); + nfs_end_data_update(dir); out: return error; } @@ -1065,9 +1273,10 @@ static int nfs_unlink(struct inode *dir, spin_unlock(&dentry->d_lock); spin_unlock(&dcache_lock); error = nfs_safe_remove(dentry); - if (!error) + if (!error) { nfs_renew_times(dentry); - else if (need_rehash) + nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + } else if (need_rehash) d_rehash(dentry); unlock_kernel(); return error; @@ -1114,9 +1323,10 @@ dentry->d_parent->d_name.name, dentry->d qsymname.len = strlen(symname); lock_kernel(); - nfs_zap_caches(dir); + nfs_begin_data_update(dir); error = NFS_PROTO(dir)->symlink(dir, &dentry->d_name, &qsymname, &attr, &sym_fh, &sym_attr); + nfs_end_data_update(dir); if (!error) { error = nfs_instantiate(dentry, &sym_fh, &sym_attr); } else { @@ -1148,9 +1358,12 @@ nfs_link(struct dentry *old_dentry, stru */ lock_kernel(); d_drop(dentry); - nfs_zap_caches(dir); - NFS_CACHEINV(inode); + + nfs_begin_data_update(dir); + nfs_begin_data_update(inode); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); + nfs_end_data_update(inode); + nfs_end_data_update(dir); unlock_kernel(); return error; } @@ -1255,16 +1468,23 @@ go_ahead: if (new_inode) d_delete(new_dentry); - nfs_zap_caches(new_dir); - nfs_zap_caches(old_dir); + nfs_begin_data_update(old_dir); + nfs_begin_data_update(new_dir); + nfs_begin_data_update(old_inode); error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name, new_dir, &new_dentry->d_name); + nfs_end_data_update(old_inode); + nfs_end_data_update(new_dir); + nfs_end_data_update(old_dir); out: if (rehash) d_rehash(rehash); - if (!error && !S_ISDIR(old_inode->i_mode)) - d_move(old_dentry, new_dentry); - nfs_renew_times(new_dentry); + if (!error) { + if (!S_ISDIR(old_inode->i_mode)) + d_move(old_dentry, new_dentry); + nfs_renew_times(new_dentry); + nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir)); + } /* new dentry created? */ if (dentry) @@ -1306,6 +1526,9 @@ nfs_permission(struct inode *inode, int /* We only need to check permissions on file open() and access() */ if (!nd || !(nd->flags & (LOOKUP_OPEN|LOOKUP_ACCESS))) return 0; + /* NFSv4 has atomic_open... */ + if (NFS_PROTO(inode)->version > 3 && (nd->flags & LOOKUP_OPEN)) + return 0; } lock_kernel(); @@ -1315,7 +1538,8 @@ nfs_permission(struct inode *inode, int cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); if (cache->cred == cred - && time_before(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))) { + && time_before(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)) + && !(NFS_FLAGS(inode) & NFS_INO_INVALID_ATTR)) { if (!(res = cache->err)) { /* Is the mask a subset of an accepted mask? */ if ((cache->mask & mask) == mask) diff -u --recursive --new-file --show-c-function linux-2.6.2/fs/nfs/direct.c linux-2.6.2-33-fix_kconfig/fs/nfs/direct.c --- linux-2.6.2/fs/nfs/direct.c 2004-02-07 12:57:49.000000000 +0100 +++ linux-2.6.2-33-fix_kconfig/fs/nfs/direct.c 2004-02-07 13:10:00.000000000 +0100 @@ -269,6 +269,7 @@ nfs_direct_write_seg(struct inode *inode if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize) wdata.args.stable = NFS_FILE_SYNC; + nfs_begin_data_update(inode); retry: need_commit = 0; tot_bytes = 0; @@ -334,6 +335,8 @@ retry: VERF_SIZE) != 0) goto sync_retry; } + nfs_end_data_update(inode); + NFS_FLAGS(inode) |= NFS_INO_INVALID_DATA; return tot_bytes; diff -u --recursive --new-file --show-c-function linux-2.6.2/fs/nfs/file.c linux-2.6.2-33-fix_kconfig/fs/nfs/file.c --- linux-2.6.2/fs/nfs/file.c 2004-02-07 12:56:25.000000000 +0100 +++ linux-2.6.2-33-fix_kconfig/fs/nfs/file.c 2004-02-07 13:10:00.000000000 +0100 @@ -26,7 +26,6 @@ #include #include #include -#include #include #include @@ -105,11 +104,16 @@ nfs_file_flush(struct file *file) dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + if ((file->f_mode & FMODE_WRITE) == 0) + return 0; lock_kernel(); - status = nfs_wb_file(inode, file); + /* Ensure that data+attribute caches are up to date after close() */ + status = nfs_wb_all(inode); if (!status) { status = file->f_error; file->f_error = 0; + if (!status) + __nfs_revalidate_inode(NFS_SERVER(inode), inode); } unlock_kernel(); return status; @@ -278,21 +282,17 @@ nfs_lock(struct file *filp, int cmd, str if (!inode) return -EINVAL; - /* This will be in a forthcoming patch. */ - if (NFS_PROTO(inode)->version == 4) { - printk(KERN_INFO "NFS: file locking over NFSv4 is not yet supported\n"); - return -EIO; - } - /* No mandatory locks over NFS */ if ((inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) return -ENOLCK; - /* Fake OK code if mounted without NLM support */ - if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) { - if (IS_GETLK(cmd)) - status = LOCK_USE_CLNT; - goto out_ok; + if (NFS_PROTO(inode)->version != 4) { + /* Fake OK code if mounted without NLM support */ + if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) { + if (IS_GETLK(cmd)) + status = LOCK_USE_CLNT; + goto out_ok; + } } /* @@ -302,7 +302,7 @@ nfs_lock(struct file *filp, int cmd, str * Not sure whether that would be unique, though, or whether * that would break in other places. */ - if (!fl->fl_owner || (fl->fl_flags & FL_POSIX) != FL_POSIX) + if (!fl->fl_owner || !(fl->fl_flags & FL_POSIX)) return -ENOLCK; /* @@ -322,7 +322,7 @@ nfs_lock(struct file *filp, int cmd, str return status; lock_kernel(); - status = nlmclnt_proc(inode, cmd, fl); + status = NFS_PROTO(inode)->lock(filp, cmd, fl); unlock_kernel(); if (status < 0) return status; diff -u --recursive --new-file --show-c-function linux-2.6.2/fs/nfs/idmap.c linux-2.6.2-33-fix_kconfig/fs/nfs/idmap.c --- linux-2.6.2/fs/nfs/idmap.c 2004-02-07 12:45:39.000000000 +0100 +++ linux-2.6.2-33-fix_kconfig/fs/nfs/idmap.c 2004-02-07 13:09:46.000000000 +0100 @@ -43,6 +43,7 @@ #include #include +#include #include #include @@ -51,14 +52,16 @@ #include #define IDMAP_HASH_SZ 128 -#define IDMAP_HASH_TYPE_NAME 0x01 -#define IDMAP_HASH_TYPE_ID 0x02 -#define IDMAP_HASH_TYPE_INSERT 0x04 struct idmap_hashent { - uid_t ih_id; - char ih_name[IDMAP_NAMESZ]; - u_int32_t ih_namelen; + __u32 ih_id; + int ih_namelen; + char ih_name[IDMAP_NAMESZ]; +}; + +struct idmap_hashtable { + __u8 h_type; + struct idmap_hashent h_entries[IDMAP_HASH_SZ]; }; struct idmap { @@ -66,12 +69,10 @@ struct idmap { struct dentry *idmap_dentry; wait_queue_head_t idmap_wq; struct idmap_msg idmap_im; - struct nfs_server *idmap_server; - struct semaphore idmap_lock; - struct semaphore idmap_im_lock; - struct semaphore idmap_hash_lock; - struct idmap_hashent idmap_id_hash[IDMAP_HASH_SZ]; - struct idmap_hashent idmap_name_hash[IDMAP_HASH_SZ]; + struct semaphore idmap_lock; /* Serializes upcalls */ + struct semaphore idmap_im_lock; /* Protects the hashtable */ + struct idmap_hashtable idmap_user_hash; + struct idmap_hashtable idmap_group_hash; }; static ssize_t idmap_pipe_upcall(struct file *, struct rpc_pipe_msg *, char *, @@ -79,10 +80,7 @@ static ssize_t idmap_pipe_upcall(struc static ssize_t idmap_pipe_downcall(struct file *, const char *, size_t); void idmap_pipe_destroy_msg(struct rpc_pipe_msg *); -static int validate_ascii(char *, u_int32_t); - -static u_int32_t fnvhash32(void *, u_int32_t); -static int idmap_cache_lookup(struct idmap *, int, char *, u_int32_t *, uid_t *); +static unsigned int fnvhash32(const void *, size_t); static struct rpc_pipe_ops idmap_upcall_ops = { .upcall = idmap_pipe_upcall, @@ -90,95 +88,153 @@ static struct rpc_pipe_ops idmap_upcall_ .destroy_msg = idmap_pipe_destroy_msg, }; -void * -nfs_idmap_new(struct nfs_server *server) +void +nfs_idmap_new(struct nfs4_client *clp) { struct idmap *idmap; + if (clp->cl_idmap != NULL) + return; if ((idmap = kmalloc(sizeof(*idmap), GFP_KERNEL)) == NULL) - return (NULL); + return; memset(idmap, 0, sizeof(*idmap)); - idmap->idmap_server = server; - snprintf(idmap->idmap_path, sizeof(idmap->idmap_path), - "%s/idmap", idmap->idmap_server->client->cl_pathname); + "%s/idmap", clp->cl_rpcclient->cl_pathname); idmap->idmap_dentry = rpc_mkpipe(idmap->idmap_path, - idmap->idmap_server, &idmap_upcall_ops, 0); - if (IS_ERR(idmap->idmap_dentry)) - goto err_free; + idmap, &idmap_upcall_ops, 0); + if (IS_ERR(idmap->idmap_dentry)) { + kfree(idmap); + return; + } init_MUTEX(&idmap->idmap_lock); init_MUTEX(&idmap->idmap_im_lock); - init_MUTEX(&idmap->idmap_hash_lock); init_waitqueue_head(&idmap->idmap_wq); + idmap->idmap_user_hash.h_type = IDMAP_TYPE_USER; + idmap->idmap_group_hash.h_type = IDMAP_TYPE_GROUP; - return (idmap); - - err_free: - kfree(idmap); - return (NULL); + clp->cl_idmap = idmap; } void -nfs_idmap_delete(struct nfs_server *server) +nfs_idmap_delete(struct nfs4_client *clp) { - struct idmap *idmap = server->idmap; + struct idmap *idmap = clp->cl_idmap; if (!idmap) return; rpc_unlink(idmap->idmap_path); - server->idmap = NULL; + clp->cl_idmap = NULL; kfree(idmap); } /* + * Helper routines for manipulating the hashtable + */ +static inline struct idmap_hashent * +idmap_name_hash(struct idmap_hashtable* h, const char *name, size_t len) +{ + return &h->h_entries[fnvhash32(name, len) % IDMAP_HASH_SZ]; +} + +static struct idmap_hashent * +idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len) +{ + struct idmap_hashent *he = idmap_name_hash(h, name, len); + + if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0) + return NULL; + return he; +} + +static inline struct idmap_hashent * +idmap_id_hash(struct idmap_hashtable* h, __u32 id) +{ + return &h->h_entries[fnvhash32(&id, sizeof(id)) % IDMAP_HASH_SZ]; +} + +static struct idmap_hashent * +idmap_lookup_id(struct idmap_hashtable *h, __u32 id) +{ + struct idmap_hashent *he = idmap_id_hash(h, id); + if (he->ih_id != id || he->ih_namelen == 0) + return NULL; + return he; +} + +/* + * Routines for allocating new entries in the hashtable. + * For now, we just have 1 entry per bucket, so it's all + * pretty trivial. + */ +static inline struct idmap_hashent * +idmap_alloc_name(struct idmap_hashtable *h, char *name, unsigned len) +{ + return idmap_name_hash(h, name, len); +} + +static inline struct idmap_hashent * +idmap_alloc_id(struct idmap_hashtable *h, __u32 id) +{ + return idmap_id_hash(h, id); +} + +static void +idmap_update_entry(struct idmap_hashent *he, const char *name, + size_t namelen, __u32 id) +{ + he->ih_id = id; + memcpy(he->ih_name, name, namelen); + he->ih_name[namelen] = '\0'; + he->ih_namelen = namelen; +} + +/* * Name -> ID */ -int -nfs_idmap_id(struct nfs_server *server, u_int8_t type, char *name, - u_int namelen, uid_t *id) +static int +nfs_idmap_id(struct idmap *idmap, struct idmap_hashtable *h, + const char *name, size_t namelen, __u32 *id) { struct rpc_pipe_msg msg; - struct idmap *idmap = server->idmap; struct idmap_msg *im; + struct idmap_hashent *he; DECLARE_WAITQUEUE(wq, current); - int ret = -1, hashtype = IDMAP_HASH_TYPE_NAME, xnamelen = namelen; - - if (idmap == NULL) - return (-1); + int ret = -EIO; im = &idmap->idmap_im; - if (namelen > IDMAP_NAMESZ || namelen == 0) - return (-1); + /* + * String sanity checks + * Note that the userland daemon expects NUL terminated strings + */ + for (;;) { + if (namelen == 0) + return -EINVAL; + if (name[namelen-1] != '\0') + break; + namelen--; + } + if (namelen >= IDMAP_NAMESZ) + return -EINVAL; down(&idmap->idmap_lock); down(&idmap->idmap_im_lock); - if (name[xnamelen - 1] == '\0') - xnamelen--; - - if (idmap_cache_lookup(idmap, hashtype, name, &xnamelen, id) == 0) { + he = idmap_lookup_name(h, name, namelen); + if (he != NULL) { + *id = he->ih_id; ret = 0; goto out; } memset(im, 0, sizeof(*im)); memcpy(im->im_name, name, namelen); - /* Make sure the string is NULL terminated */ - if (namelen != xnamelen) { - /* We cannot fit a NULL character */ - if (namelen == IDMAP_NAMESZ) { - ret = -1; - goto out; - } - im->im_name[namelen] = '\0'; - } - im->im_type = type; + im->im_type = h->h_type; im->im_conv = IDMAP_CONV_NAMETOID; memset(&msg, 0, sizeof(msg)); @@ -198,16 +254,9 @@ nfs_idmap_id(struct nfs_server *server, remove_wait_queue(&idmap->idmap_wq, &wq); down(&idmap->idmap_im_lock); - /* - * XXX Race condition here, with testing for status. Go ahead - * and and do the cace lookup anyway. - */ if (im->im_status & IDMAP_STATUS_SUCCESS) { - ret = 0; *id = im->im_id; - - hashtype |= IDMAP_HASH_TYPE_INSERT; - ret = idmap_cache_lookup(idmap, hashtype, name, &xnamelen, id); + ret = 0; } out: @@ -220,35 +269,31 @@ nfs_idmap_id(struct nfs_server *server, /* * ID -> Name */ -int -nfs_idmap_name(struct nfs_server *server, u_int8_t type, uid_t id, - char *name, u_int *namelen) +static int +nfs_idmap_name(struct idmap *idmap, struct idmap_hashtable *h, + __u32 id, char *name) { struct rpc_pipe_msg msg; - struct idmap *idmap = server->idmap; struct idmap_msg *im; + struct idmap_hashent *he; DECLARE_WAITQUEUE(wq, current); - int ret = -1, hashtype = IDMAP_HASH_TYPE_ID; - u_int len; - - if (idmap == NULL) - return (-1); + int ret = -EIO; + unsigned int len; im = &idmap->idmap_im; - if (*namelen < IDMAP_NAMESZ || *namelen == 0) - return (-1); - down(&idmap->idmap_lock); down(&idmap->idmap_im_lock); - if (idmap_cache_lookup(idmap, hashtype, name, namelen, &id) == 0) { - ret = 0; + he = idmap_lookup_id(h, id); + if (he != 0) { + memcpy(name, he->ih_name, he->ih_namelen); + ret = he->ih_namelen; goto out; } memset(im, 0, sizeof(*im)); - im->im_type = type; + im->im_type = h->h_type; im->im_conv = IDMAP_CONV_IDTONAME; im->im_id = id; @@ -263,9 +308,6 @@ nfs_idmap_name(struct nfs_server *server goto out; } - /* - * XXX add timeouts here - */ set_current_state(TASK_UNINTERRUPTIBLE); up(&idmap->idmap_im_lock); schedule(); @@ -274,23 +316,20 @@ nfs_idmap_name(struct nfs_server *server down(&idmap->idmap_im_lock); if (im->im_status & IDMAP_STATUS_SUCCESS) { - if ((len = validate_ascii(im->im_name, IDMAP_NAMESZ)) == -1) + if ((len = strnlen(im->im_name, IDMAP_NAMESZ)) == 0) goto out; - ret = 0; memcpy(name, im->im_name, len); - *namelen = len; - - hashtype |= IDMAP_HASH_TYPE_INSERT; - ret = idmap_cache_lookup(idmap, hashtype, name, namelen, &id); + ret = len; } out: memset(im, 0, sizeof(*im)); up(&idmap->idmap_im_lock); up(&idmap->idmap_lock); - return (ret); + return ret; } +/* RPC pipefs upcall/downcall routines */ static ssize_t idmap_pipe_upcall(struct file *filp, struct rpc_pipe_msg *msg, char *dst, size_t buflen) @@ -317,10 +356,12 @@ static ssize_t idmap_pipe_downcall(struct file *filp, const char *src, size_t mlen) { struct rpc_inode *rpci = RPC_I(filp->f_dentry->d_inode); - struct nfs_server *server = rpci->private; - struct idmap *idmap = server->idmap; + struct idmap *idmap = (struct idmap *)rpci->private; struct idmap_msg im_in, *im = &idmap->idmap_im; - int match = 0, hashtype, badmsg = 0, namelen_in, namelen; + struct idmap_hashtable *h; + struct idmap_hashent *he = NULL; + int namelen_in; + int ret; if (mlen != sizeof(im_in)) return (-ENOSPC); @@ -330,39 +371,66 @@ idmap_pipe_downcall(struct file *filp, c down(&idmap->idmap_im_lock); - namelen_in = validate_ascii(im_in.im_name, IDMAP_NAMESZ); - namelen = validate_ascii(im->im_name, IDMAP_NAMESZ); + ret = mlen; + im->im_status = im_in.im_status; + /* If we got an error, terminate now, and wake up pending upcalls */ + if (!(im_in.im_status & IDMAP_STATUS_SUCCESS)) { + wake_up(&idmap->idmap_wq); + goto out; + } + + /* Sanity checking of strings */ + ret = -EINVAL; + namelen_in = strnlen(im_in.im_name, IDMAP_NAMESZ); + if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) + goto out; - badmsg = !(im_in.im_status & IDMAP_STATUS_SUCCESS) || namelen_in <= 0; + switch (im_in.im_type) { + case IDMAP_TYPE_USER: + h = &idmap->idmap_user_hash; + break; + case IDMAP_TYPE_GROUP: + h = &idmap->idmap_group_hash; + break; + default: + goto out; + } switch (im_in.im_conv) { case IDMAP_CONV_IDTONAME: - match = im->im_id == im_in.im_id; + /* Did we match the current upcall? */ + if (im->im_conv == IDMAP_CONV_IDTONAME + && im->im_type == im_in.im_type + && im->im_id == im_in.im_id) { + /* Yes: copy string, including the terminating '\0' */ + memcpy(im->im_name, im_in.im_name, namelen_in); + im->im_name[namelen_in] = '\0'; + wake_up(&idmap->idmap_wq); + } + he = idmap_alloc_id(h, im_in.im_id); break; case IDMAP_CONV_NAMETOID: - match = namelen == namelen_in && - memcmp(im->im_name, im_in.im_name, namelen) == 0; + /* Did we match the current upcall? */ + if (im->im_conv == IDMAP_CONV_NAMETOID + && im->im_type == im_in.im_type + && strnlen(im->im_name, IDMAP_NAMESZ) == namelen_in + && memcmp(im->im_name, im_in.im_name, namelen_in) == 0) { + im->im_id = im_in.im_id; + wake_up(&idmap->idmap_wq); + } + he = idmap_alloc_name(h, im_in.im_name, namelen_in); break; default: - badmsg = 1; - break; - } - - match = match && im->im_type == im_in.im_type; - - if (match) { - memcpy(im, &im_in, sizeof(*im)); - wake_up(&idmap->idmap_wq); - } else if (!badmsg) { - hashtype = im_in.im_conv == IDMAP_CONV_IDTONAME ? - IDMAP_HASH_TYPE_ID : IDMAP_HASH_TYPE_NAME; - hashtype |= IDMAP_HASH_TYPE_INSERT; - idmap_cache_lookup(idmap, hashtype, im_in.im_name, &namelen_in, - &im_in.im_id); + goto out; } + /* If the entry is valid, also copy it to the cache */ + if (he != NULL) + idmap_update_entry(he, im_in.im_name, namelen_in, im_in.im_id); + ret = mlen; +out: up(&idmap->idmap_im_lock); - return (mlen); + return ret; } void @@ -379,108 +447,51 @@ idmap_pipe_destroy_msg(struct rpc_pipe_m up(&idmap->idmap_im_lock); } -static int -validate_ascii(char *string, u_int32_t len) -{ - int i; - - for (i = 0; i < len; i++) { - if (string[i] == '\0') - break; - - if (string[i] & 0x80) - return (-1); - } - - if (string[i] != '\0') - return (-1); - - return (i); -} - /* * Fowler/Noll/Vo hash * http://www.isthe.com/chongo/tech/comp/fnv/ */ -#define FNV_P_32 ((u_int32_t)0x01000193) /* 16777619 */ -#define FNV_1_32 ((u_int32_t)0x811c9dc5) /* 2166136261 */ +#define FNV_P_32 ((unsigned int)0x01000193) /* 16777619 */ +#define FNV_1_32 ((unsigned int)0x811c9dc5) /* 2166136261 */ -static u_int32_t -fnvhash32(void *buf, u_int32_t buflen) +static unsigned int fnvhash32(const void *buf, size_t buflen) { - u_char *p, *end = (u_char *)buf + buflen; - u_int32_t hash = FNV_1_32; + const unsigned char *p, *end = (const unsigned char *)buf + buflen; + unsigned int hash = FNV_1_32; for (p = buf; p < end; p++) { hash *= FNV_P_32; - hash ^= (u_int32_t)*p; + hash ^= (unsigned int)*p; } return (hash); } -/* - * ->ih_namelen == 0 indicates negative entry - */ -static int -idmap_cache_lookup(struct idmap *idmap, int type, char *name, u_int32_t *namelen, - uid_t *id) +int nfs_map_name_to_uid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid) { - u_int32_t hash; - struct idmap_hashent *he = NULL; - int insert = type & IDMAP_HASH_TYPE_INSERT; - int ret = -1; + struct idmap *idmap = clp->cl_idmap; - /* - * XXX technically, this is not needed, since we will always - * hold idmap_im_lock when altering the hash tables. but - * semantically that just hurts. - * - * XXX cache negative responses - */ - down(&idmap->idmap_hash_lock); + return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); +} - if (*namelen > IDMAP_NAMESZ || *namelen == 0) - goto out; +int nfs_map_group_to_gid(struct nfs4_client *clp, const char *name, size_t namelen, __u32 *uid) +{ + struct idmap *idmap = clp->cl_idmap; - if (type & IDMAP_HASH_TYPE_NAME) { - hash = fnvhash32(name, *namelen) % IDMAP_HASH_SZ; - he = &idmap->idmap_name_hash[hash]; - - /* - * Testing he->ih_namelen == *namelen implicitly tests - * namelen != 0, and thus a non-negative entry. - */ - if (!insert && he->ih_namelen == *namelen && - memcmp(he->ih_name, name, *namelen) == 0) { - *id = he->ih_id; - ret = 0; - goto out; - } - } + return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); +} - if (type & IDMAP_HASH_TYPE_ID) { - hash = fnvhash32(id, sizeof(*id)) % IDMAP_HASH_SZ; - he = &idmap->idmap_id_hash[hash]; - - if (!insert && *id == he->ih_id && he->ih_namelen != 0 && - *namelen >= he->ih_namelen) { - memcpy(name, he->ih_name, he->ih_namelen); - *namelen = he->ih_namelen; - ret = 0; - goto out; - } - } +int nfs_map_uid_to_name(struct nfs4_client *clp, __u32 uid, char *buf) +{ + struct idmap *idmap = clp->cl_idmap; - if (insert && he != NULL) { - he->ih_id = *id; - memcpy(he->ih_name, name, *namelen); - he->ih_namelen = *namelen; - ret = 0; - } + return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); +} +int nfs_map_gid_to_group(struct nfs4_client *clp, __u32 uid, char *buf) +{ + struct idmap *idmap = clp->cl_idmap; - out: - up(&idmap->idmap_hash_lock); - return (ret); + return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); } + diff -u --recursive --new-file --show-c-function linux-2.6.2/fs/nfs/inode.c linux-2.6.2-33-fix_kconfig/fs/nfs/inode.c --- linux-2.6.2/fs/nfs/inode.c 2004-02-07 12:57:23.000000000 +0100 +++ linux-2.6.2-33-fix_kconfig/fs/nfs/inode.c 2004-02-08 18:15:43.000000000 +0100 @@ -53,8 +53,8 @@ */ #define NFS_MAX_READAHEAD RPC_MAXREQS -void nfs_zap_caches(struct inode *); static void nfs_invalidate_inode(struct inode *); +static int nfs_update_inode(struct inode *, struct nfs_fattr *, unsigned long); static struct inode *nfs_alloc_inode(struct super_block *sb); static void nfs_destroy_inode(struct inode *); @@ -151,6 +151,7 @@ nfs_clear_inode(struct inode *inode) cred = nfsi->cache_access.cred; if (cred) put_rpccred(cred); + BUG_ON(atomic_read(&nfsi->data_updates) != 0); } void @@ -158,10 +159,7 @@ nfs_put_super(struct super_block *sb) { struct nfs_server *server = NFS_SB(sb); -#ifdef CONFIG_NFS_V4 - if (server->idmap != NULL) - nfs_idmap_delete(server); -#endif /* CONFIG_NFS_V4 */ + nfs4_renewd_prepare_shutdown(server); if (server->client != NULL) rpc_shutdown_client(server->client); @@ -301,7 +299,6 @@ nfs_sb_init(struct super_block *sb, rpc_ server = NFS_SB(sb); sb->s_magic = NFS_SUPER_MAGIC; - sb->s_op = &nfs_sops; /* Did getting the root inode fail? */ if (nfs_get_root(&root_inode, authflavor, sb, &server->fh) < 0) @@ -310,7 +307,7 @@ nfs_sb_init(struct super_block *sb, rpc_ if (!sb->s_root) goto out_no_root; - sb->s_root->d_op = &nfs_dentry_operations; + sb->s_root->d_op = server->rpc_ops->dentry_ops; /* Get some general file system info */ if (server->rpc_ops->fsinfo(server, &server->fh, &fsinfo) < 0) { @@ -493,10 +490,17 @@ nfs_fill_super(struct super_block *sb, s server->client = nfs_create_client(server, data); if (server->client == NULL) goto out_fail; - data->pseudoflavor = RPC_AUTH_UNIX; /* RFC 2623, sec 2.3.2 */ - server->client_sys = nfs_create_client(server, data); - if (server->client_sys == NULL) - goto out_shutdown; + /* RFC 2623, sec 2.3.2 */ + if (authflavor != RPC_AUTH_UNIX) { + server->client_sys = rpc_clone_client(server->client); + if (server->client_sys == NULL) + goto out_shutdown; + if (!rpcauth_create(RPC_AUTH_UNIX, server->client_sys)) + goto out_shutdown; + } else { + atomic_inc(&server->client->cl_count); + server->client_sys = server->client; + } /* Fire up rpciod if not yet running */ if (rpciod_up() != 0) { @@ -504,6 +508,7 @@ nfs_fill_super(struct super_block *sb, s goto out_shutdown; } + sb->s_op = &nfs_sops; err = nfs_sb_init(sb, authflavor); if (err != 0) goto out_noinit; @@ -623,13 +628,17 @@ static int nfs_show_options(struct seq_f void nfs_zap_caches(struct inode *inode) { + struct nfs_inode *nfsi = NFS_I(inode); + int mode = inode->i_mode; + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); NFS_ATTRTIMEO_UPDATE(inode) = jiffies; - invalidate_remote_inode(inode); - memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); - NFS_CACHEINV(inode); + if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) + nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + else + nfsi->flags |= NFS_INO_INVALID_ATTR; } /* @@ -669,9 +678,6 @@ nfs_find_actor(struct inode *inode, void return 0; if (is_bad_inode(inode)) return 0; - /* Force an attribute cache update if inode->i_count == 0 */ - if (!atomic_read(&inode->i_count)) - NFS_CACHEINV(inode); return 1; } @@ -725,7 +731,7 @@ nfs_fhget(struct super_block *sb, struct inode->i_ino = hash; /* We can't support update_atime(), since the server will reset it */ - inode->i_flags |= S_NOATIME; + inode->i_flags |= S_NOATIME|S_NOCMTIME; inode->i_mode = fattr->mode; /* Why so? Because we want revalidate for devices/FIFOs, and * that's precisely what we have in nfs_file_inode_operations. @@ -736,7 +742,7 @@ nfs_fhget(struct super_block *sb, struct inode->i_data.a_ops = &nfs_file_aops; inode->i_data.backing_dev_info = &NFS_SB(sb)->backing_dev_info; } else if (S_ISDIR(inode->i_mode)) { - inode->i_op = &nfs_dir_inode_operations; + inode->i_op = NFS_SB(sb)->rpc_ops->dir_inode_ops; inode->i_fop = &nfs_dir_operations; if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS) && fattr->size <= NFS_LIMIT_READDIRPLUS) @@ -750,10 +756,6 @@ nfs_fhget(struct super_block *sb, struct inode->i_atime = fattr->atime; inode->i_mtime = fattr->mtime; inode->i_ctime = fattr->ctime; - nfsi->read_cache_ctime = fattr->ctime; - nfsi->read_cache_mtime = fattr->mtime; - nfsi->cache_mtime_jiffies = fattr->timestamp; - nfsi->read_cache_isize = fattr->size; if (fattr->valid & NFS_ATTR_FATTR_V4) nfsi->change_attr = fattr->change_attr; inode->i_size = nfs_size_to_loff_t(fattr->size); @@ -800,65 +802,50 @@ nfs_setattr(struct dentry *dentry, struc struct nfs_fattr fattr; int error; + if (attr->ia_valid & ATTR_SIZE) { + if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode)) + attr->ia_valid &= ~ATTR_SIZE; + } + /* Optimization: if the end result is no change, don't RPC */ attr->ia_valid &= NFS_VALID_ATTRS; if (attr->ia_valid == 0) return 0; lock_kernel(); - - /* - * Make sure the inode is up-to-date. - */ - error = nfs_revalidate_inode(NFS_SERVER(inode),inode); - if (error) { -#ifdef NFS_PARANOIA -printk("nfs_setattr: revalidate failed, error=%d\n", error); -#endif - goto out; - } - - if (!S_ISREG(inode->i_mode)) { - attr->ia_valid &= ~ATTR_SIZE; - if (attr->ia_valid == 0) - goto out; - } else { - filemap_fdatawrite(inode->i_mapping); - error = nfs_wb_all(inode); - filemap_fdatawait(inode->i_mapping); - if (error) - goto out; + nfs_begin_data_update(inode); + /* Write all dirty data if we're changing file permissions or size */ + if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) { + if (filemap_fdatawrite(inode->i_mapping) == 0) + filemap_fdatawait(inode->i_mapping); + nfs_wb_all(inode); } - error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); - if (error) - goto out; - /* - * If we changed the size or mtime, update the inode - * now to avoid invalidating the page cache. - */ - if (attr->ia_valid & ATTR_SIZE) { - if (attr->ia_size != fattr.size) - printk("nfs_setattr: attr=%Ld, fattr=%Ld??\n", - (long long) attr->ia_size, (long long)fattr.size); - vmtruncate(inode, attr->ia_size); + if (error == 0) { + nfs_refresh_inode(inode, &fattr); + if ((attr->ia_valid & ATTR_MODE) != 0) { + int mode; + mode = inode->i_mode & ~S_IALLUGO; + mode |= attr->ia_mode & S_IALLUGO; + inode->i_mode = mode; + } + if ((attr->ia_valid & ATTR_UID) != 0) + inode->i_uid = attr->ia_uid; + if ((attr->ia_valid & ATTR_GID) != 0) + inode->i_gid = attr->ia_gid; + if ((attr->ia_valid & ATTR_SIZE) != 0) { + i_size_write(inode, attr->ia_size); + vmtruncate(inode, attr->ia_size); + } } - - /* - * If we changed the size or mtime, update the inode - * now to avoid invalidating the page cache. - */ - if (!(fattr.valid & NFS_ATTR_WCC)) { - struct nfs_inode *nfsi = NFS_I(inode); - fattr.pre_size = nfsi->read_cache_isize; - fattr.pre_mtime = nfsi->read_cache_mtime; - fattr.pre_ctime = nfsi->read_cache_ctime; - fattr.valid |= NFS_ATTR_WCC; - } - /* Force an attribute cache update */ - NFS_CACHEINV(inode); - error = nfs_refresh_inode(inode, &fattr); -out: + if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { + struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred; + if (*cred) { + put_rpccred(*cred); + *cred = NULL; + } + } + nfs_end_data_update(inode); unlock_kernel(); return error; } @@ -886,7 +873,19 @@ nfs_wait_on_inode(struct inode *inode, i int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; - int err = nfs_revalidate_inode(NFS_SERVER(inode), inode); + struct nfs_inode *nfsi = NFS_I(inode); + int need_atime = nfsi->flags & NFS_INO_INVALID_ATIME; + int err; + + if (__IS_FLG(inode, MS_NOATIME)) + need_atime = 0; + else if (__IS_FLG(inode, MS_NODIRATIME) && S_ISDIR(inode->i_mode)) + need_atime = 0; + /* We may force a getattr if the user cares about atime */ + if (need_atime) + err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); + else + err = nfs_revalidate_inode(NFS_SERVER(inode), inode); if (!err) generic_fillattr(inode, stat); return err; @@ -921,8 +920,10 @@ int nfs_open(struct inode *inode, struct auth = NFS_CLIENT(inode)->cl_auth; cred = rpcauth_lookupcred(auth, 0); filp->private_data = cred; - if (filp->f_mode & FMODE_WRITE) + if ((filp->f_mode & FMODE_WRITE) != 0) { nfs_set_mmcred(inode, cred); + nfs_begin_data_update(inode); + } return 0; } @@ -931,6 +932,8 @@ int nfs_release(struct inode *inode, str struct rpc_cred *cred; lock_kernel(); + if ((filp->f_mode & FMODE_WRITE) != 0) + nfs_end_data_update(inode); cred = nfs_file_cred(filp); if (cred) put_rpccred(cred); @@ -947,6 +950,9 @@ __nfs_revalidate_inode(struct nfs_server { int status = -ESTALE; struct nfs_fattr fattr; + struct nfs_inode *nfsi = NFS_I(inode); + unsigned long verifier; + unsigned int flags; dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -956,23 +962,22 @@ __nfs_revalidate_inode(struct nfs_server goto out_nowait; if (NFS_STALE(inode) && inode != inode->i_sb->s_root->d_inode) goto out_nowait; - if (NFS_FAKE_ROOT(inode)) { - dfprintk(VFS, "NFS: not revalidating fake root\n"); - status = 0; - goto out_nowait; - } while (NFS_REVALIDATING(inode)) { status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING); if (status < 0) goto out_nowait; - if (time_before(jiffies,NFS_READTIME(inode)+NFS_ATTRTIMEO(inode))) { - status = NFS_STALE(inode) ? -ESTALE : 0; - goto out_nowait; - } + if (NFS_SERVER(inode)->flags & NFS_MOUNT_NOAC) + continue; + if (NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) + continue; + status = NFS_STALE(inode) ? -ESTALE : 0; + goto out_nowait; } NFS_FLAGS(inode) |= NFS_INO_REVALIDATING; + /* Protect against RPC races by saving the change attribute */ + verifier = nfs_save_change_attribute(inode); status = NFS_PROTO(inode)->getattr(inode, &fattr); if (status) { dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", @@ -986,13 +991,36 @@ __nfs_revalidate_inode(struct nfs_server goto out; } - status = nfs_refresh_inode(inode, &fattr); + status = nfs_update_inode(inode, &fattr, verifier); if (status) { dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode), status); goto out; } + flags = nfsi->flags; + /* + * We may need to keep the attributes marked as invalid if + * we raced with nfs_end_attr_update(). + */ + if (verifier == nfsi->cache_change_attribute) + nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); + /* Do the page cache invalidation */ + if (flags & NFS_INO_INVALID_DATA) { + if (S_ISREG(inode->i_mode)) { + if (filemap_fdatawrite(inode->i_mapping) == 0) + filemap_fdatawait(inode->i_mapping); + nfs_wb_all(inode); + } + nfsi->flags &= ~NFS_INO_INVALID_DATA; + invalidate_inode_pages2(inode->i_mapping); + memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); + dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode)); + /* This ensures we revalidate dentries */ + nfsi->cache_change_attribute++; + } dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n", inode->i_sb->s_id, (long long)NFS_FILEID(inode)); @@ -1000,41 +1028,104 @@ __nfs_revalidate_inode(struct nfs_server NFS_FLAGS(inode) &= ~NFS_INO_STALE; out: NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING; - wake_up(&NFS_I(inode)->nfs_i_wait); + wake_up(&nfsi->nfs_i_wait); out_nowait: unlock_kernel(); return status; } -/* - * nfs_fattr_obsolete - Test if attribute data is newer than cached data - * @inode: inode - * @fattr: attributes to test +/** + * nfs_begin_data_update + * @inode - pointer to inode + * Declare that a set of operations will update file data on the server + */ +void nfs_begin_data_update(struct inode *inode) +{ + atomic_inc(&NFS_I(inode)->data_updates); +} + +/** + * nfs_end_data_update + * @inode - pointer to inode + * Declare end of the operations that will update file data + */ +void nfs_end_data_update(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + if (atomic_dec_and_test(&nfsi->data_updates)) { + nfsi->cache_change_attribute ++; + /* Mark the attribute cache for revalidation */ + nfsi->flags |= NFS_INO_INVALID_ATTR; + /* Directories and symlinks: invalidate page cache too */ + if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) + nfsi->flags |= NFS_INO_INVALID_DATA; + } +} + +/** + * nfs_refresh_inode - verify consistency of the inode attribute cache + * @inode - pointer to inode + * @fattr - updated attributes * - * Avoid stuffing the attribute cache with obsolete information. - * We always accept updates if the attribute cache timed out, or if - * fattr->ctime is newer than our cached value. - * If fattr->ctime matches the cached value, we still accept the update - * if it increases the file size. + * Verifies the attribute cache. If we have just changed the attributes, + * so that fattr carries weak cache consistency data, then it may + * also update the ctime/mtime/change_attribute. */ -static inline -int nfs_fattr_obsolete(struct inode *inode, struct nfs_fattr *fattr) +int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) { struct nfs_inode *nfsi = NFS_I(inode); - long cdif; + loff_t cur_size, new_isize; + int data_unstable; + + /* Are we in the process of updating data on the server? */ + data_unstable = nfs_caches_unstable(inode); + + if (fattr->valid & NFS_ATTR_FATTR_V4) { + if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0 + && nfsi->change_attr == fattr->pre_change_attr) + nfsi->change_attr = fattr->change_attr; + if (!data_unstable && nfsi->change_attr != fattr->change_attr) + nfsi->flags |= NFS_INO_INVALID_ATTR; + } + + if ((fattr->valid & NFS_ATTR_FATTR) == 0) + return 0; + + /* Has the inode gone and changed behind our back? */ + if (nfsi->fileid != fattr->fileid + || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) + return -EIO; - if (time_after(jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo)) - goto out_valid; - cdif = fattr->ctime.tv_sec - nfsi->read_cache_ctime.tv_sec; - if (cdif == 0) - cdif = fattr->ctime.tv_nsec - nfsi->read_cache_ctime.tv_nsec; - if (cdif > 0) - goto out_valid; - /* Ugh... */ - if (cdif == 0 && fattr->size > nfsi->read_cache_isize) - goto out_valid; - return -1; - out_valid: + cur_size = i_size_read(inode); + new_isize = nfs_size_to_loff_t(fattr->size); + + /* If we have atomic WCC data, we may update some attributes */ + if ((fattr->valid & NFS_ATTR_WCC) != 0) { + if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) + memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); + } + + /* Verify a few of the more important attributes */ + if (!data_unstable) { + if (!timespec_equal(&inode->i_mtime, &fattr->mtime) + || cur_size != new_isize) + nfsi->flags |= NFS_INO_INVALID_ATTR; + } else if (S_ISREG(inode->i_mode) && new_isize > cur_size) + nfsi->flags |= NFS_INO_INVALID_ATTR; + + /* Have any file permissions changed? */ + if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) + || inode->i_uid != fattr->uid + || inode->i_gid != fattr->gid) + nfsi->flags |= NFS_INO_INVALID_ATTR; + + if (!timespec_equal(&inode->i_atime, &fattr->atime)) + nfsi->flags |= NFS_INO_INVALID_ATIME; + + nfsi->read_cache_jiffies = fattr->timestamp; return 0; } @@ -1050,20 +1141,22 @@ int nfs_fattr_obsolete(struct inode *ino * * A very similar scenario holds for the dir cache. */ -int -__nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) +static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsigned long verifier) { struct nfs_inode *nfsi = NFS_I(inode); __u64 new_size; loff_t new_isize; - int invalid = 0; - int mtime_update = 0; + unsigned int invalid = 0; loff_t cur_isize; + int data_unstable; - dfprintk(VFS, "NFS: refresh_inode(%s/%ld ct=%d info=0x%x)\n", - inode->i_sb->s_id, inode->i_ino, + dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", + __FUNCTION__, inode->i_sb->s_id, inode->i_ino, atomic_read(&inode->i_count), fattr->valid); + if ((fattr->valid & NFS_ATTR_FATTR) == 0) + return 0; + /* First successful call after mount, fill real data. */ if (NFS_FAKE_ROOT(inode)) { dfprintk(VFS, "NFS: updating fake root\n"); @@ -1072,43 +1165,49 @@ __nfs_refresh_inode(struct inode *inode, } if (nfsi->fileid != fattr->fileid) { - printk(KERN_ERR "nfs_refresh_inode: inode number mismatch\n" + printk(KERN_ERR "%s: inode number mismatch\n" "expected (%s/0x%Lx), got (%s/0x%Lx)\n", + __FUNCTION__, inode->i_sb->s_id, (long long)nfsi->fileid, inode->i_sb->s_id, (long long)fattr->fileid); goto out_err; } - /* Throw out obsolete READDIRPLUS attributes */ - if (time_before(fattr->timestamp, NFS_READTIME(inode))) - return 0; /* * Make sure the inode's type hasn't changed. */ if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) goto out_changed; - new_size = fattr->size; - new_isize = nfs_size_to_loff_t(fattr->size); - - /* Avoid races */ - if (nfs_fattr_obsolete(inode, fattr)) - goto out_nochange; - /* * Update the read time so we don't revalidate too often. */ nfsi->read_cache_jiffies = fattr->timestamp; - /* - * Note: NFS_CACHE_ISIZE(inode) reflects the state of the cache. - * NOT inode->i_size!!! - */ - if (nfsi->read_cache_isize != new_size) { + /* Are we racing with known updates of the metadata on the server? */ + data_unstable = ! nfs_verify_change_attribute(inode, verifier); + + /* Check if the file size agrees */ + new_size = fattr->size; + new_isize = nfs_size_to_loff_t(fattr->size); + cur_isize = i_size_read(inode); + if (cur_isize != new_size) { #ifdef NFS_DEBUG_VERBOSE printk(KERN_DEBUG "NFS: isize change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); #endif - invalid = 1; + /* + * If we have pending writebacks, things can get + * messy. + */ + if (S_ISREG(inode->i_mode) && data_unstable) { + if (new_isize > cur_isize) { + i_size_write(inode, new_isize); + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + } + } else { + i_size_write(inode, new_isize); + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + } } /* @@ -1116,12 +1215,13 @@ __nfs_refresh_inode(struct inode *inode, * can change this value in VFS without requiring a * cache revalidation. */ - if (!timespec_equal(&nfsi->read_cache_mtime, &fattr->mtime)) { + if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { + memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); #ifdef NFS_DEBUG_VERBOSE printk(KERN_DEBUG "NFS: mtime change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); #endif - invalid = 1; - mtime_update = 1; + if (!data_unstable) + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } if ((fattr->valid & NFS_ATTR_FATTR_V4) @@ -1130,47 +1230,15 @@ __nfs_refresh_inode(struct inode *inode, printk(KERN_DEBUG "NFS: change_attr change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); #endif - invalid = 1; - } - - /* Check Weak Cache Consistency data. - * If size and mtime match the pre-operation values, we can - * assume that any attribute changes were caused by our NFS - * operation, so there's no need to invalidate the caches. - */ - if ((fattr->valid & NFS_ATTR_PRE_CHANGE) - && nfsi->change_attr == fattr->pre_change_attr) { - invalid = 0; - } - else if ((fattr->valid & NFS_ATTR_WCC) - && nfsi->read_cache_isize == fattr->pre_size - && timespec_equal(&nfsi->read_cache_mtime, &fattr->pre_mtime)) { - invalid = 0; - } - - /* - * If we have pending writebacks, things can get - * messy. - */ - cur_isize = i_size_read(inode); - if (nfs_have_writebacks(inode) && new_isize < cur_isize) - new_isize = cur_isize; - - nfsi->read_cache_ctime = fattr->ctime; - inode->i_ctime = fattr->ctime; - inode->i_atime = fattr->atime; - - if (mtime_update) { - if (invalid) - nfsi->cache_mtime_jiffies = fattr->timestamp; - nfsi->read_cache_mtime = fattr->mtime; - inode->i_mtime = fattr->mtime; + nfsi->change_attr = fattr->change_attr; + if (!data_unstable) + invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; } - nfsi->read_cache_isize = new_size; - i_size_write(inode, new_isize); + memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); + memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); - if (inode->i_mode != fattr->mode || + if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || inode->i_uid != fattr->uid || inode->i_gid != fattr->gid) { struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred; @@ -1178,11 +1246,9 @@ __nfs_refresh_inode(struct inode *inode, put_rpccred(*cred); *cred = NULL; } + invalid |= NFS_INO_INVALID_ATTR; } - if (fattr->valid & NFS_ATTR_FATTR_V4) - nfsi->change_attr = fattr->change_attr; - inode->i_mode = fattr->mode; inode->i_nlink = fattr->nlink; inode->i_uid = fattr->uid; @@ -1198,31 +1264,30 @@ __nfs_refresh_inode(struct inode *inode, inode->i_blocks = fattr->du.nfs2.blocks; inode->i_blksize = fattr->du.nfs2.blocksize; } - - /* Update attrtimeo value */ - if (invalid) { + + /* Update attrtimeo value if we're out of the unstable period */ + if (invalid & NFS_INO_INVALID_ATTR) { nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; - invalidate_remote_inode(inode); - memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); } else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) { if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; } + /* Don't invalidate the data if we were to blame */ + if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) + || S_ISLNK(inode->i_mode))) + invalid &= ~NFS_INO_INVALID_DATA; + nfsi->flags |= invalid; return 0; - out_nochange: - if (!timespec_equal(&fattr->atime, &inode->i_atime)) - inode->i_atime = fattr->atime; - return 0; out_changed: /* * Big trouble! The inode has become a different object. */ #ifdef NFS_PARANOIA - printk(KERN_DEBUG "nfs_refresh_inode: inode %ld mode changed, %07o to %07o\n", - inode->i_ino, inode->i_mode, fattr->mode); + printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n", + __FUNCTION__, inode->i_ino, inode->i_mode, fattr->mode); #endif /* * No need to worry about unhashing the dentry, as the @@ -1274,6 +1339,8 @@ static struct super_block *nfs_get_sb(st if (!server) return ERR_PTR(-ENOMEM); memset(server, 0, sizeof(struct nfs_server)); + /* Zero out the NFS state stuff */ + init_nfsv4_state(server); root = &server->fh; memcpy(root, &data->root, sizeof(*root)); @@ -1346,9 +1413,52 @@ static struct file_system_type nfs_fs_ty #ifdef CONFIG_NFS_V4 +static void nfs4_clear_inode(struct inode *); + +static struct super_operations nfs4_sops = { + .alloc_inode = nfs_alloc_inode, + .destroy_inode = nfs_destroy_inode, + .write_inode = nfs_write_inode, + .delete_inode = nfs_delete_inode, + .put_super = nfs_put_super, + .statfs = nfs_statfs, + .clear_inode = nfs4_clear_inode, + .umount_begin = nfs_umount_begin, + .show_options = nfs_show_options, +}; + +/* + * Clean out any remaining NFSv4 state that might be left over due + * to open() calls that passed nfs_atomic_lookup, but failed to call + * nfs_open(). + */ +static void nfs4_clear_inode(struct inode *inode) +{ + struct nfs_inode *nfsi = NFS_I(inode); + + while (!list_empty(&nfsi->open_states)) { + struct nfs4_state *state; + + state = list_entry(nfsi->open_states.next, + struct nfs4_state, + inode_states); + dprintk("%s(%s/%Ld): found unclaimed NFSv4 state %p\n", + __FUNCTION__, + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), + state); + list_del(&state->inode_states); + nfs4_put_open_state(state); + } + /* Now call standard NFS clear_inode() code */ + nfs_clear_inode(inode); +} + + static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data, int silent) { struct nfs_server *server; + struct nfs4_client *clp = NULL; struct rpc_xprt *xprt = NULL; struct rpc_clnt *clnt = NULL; struct rpc_timeout timeparms; @@ -1398,13 +1508,13 @@ static int nfs4_fill_super(struct super_ return -EINVAL; } - /* Now create transport and client */ - xprt = xprt_create_proto(proto, &server->addr, &timeparms); - if (xprt == NULL) { - printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); + clp = nfs4_get_client(&server->addr.sin_addr); + if (!clp) { + printk(KERN_WARNING "NFS: failed to create NFS4 client.\n"); goto out_fail; } + /* Now create transport and client */ authflavour = RPC_AUTH_UNIX; if (data->auth_flavourlen != 0) { if (data->auth_flavourlen > 1) @@ -1414,41 +1524,78 @@ static int nfs4_fill_super(struct super_ goto out_fail; } } - clnt = rpc_create_client(xprt, server->hostname, &nfs_program, - server->rpc_ops->version, authflavour); + + down_write(&clp->cl_sem); + if (clp->cl_rpcclient == NULL) { + xprt = xprt_create_proto(proto, &server->addr, &timeparms); + if (xprt == NULL) { + up_write(&clp->cl_sem); + printk(KERN_WARNING "NFS: cannot create RPC transport.\n"); + goto out_fail; + } + clnt = rpc_create_client(xprt, server->hostname, &nfs_program, + server->rpc_ops->version, authflavour); + if (clnt == NULL) { + up_write(&clp->cl_sem); + printk(KERN_WARNING "NFS: cannot create RPC client.\n"); + xprt_destroy(xprt); + goto out_fail; + } + clnt->cl_chatty = 1; + clp->cl_rpcclient = clnt; + clp->cl_cred = rpcauth_lookupcred(clnt->cl_auth, 0); + memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr)); + nfs_idmap_new(clp); + } + if (list_empty(&clp->cl_superblocks)) + clear_bit(NFS4CLNT_OK, &clp->cl_state); + list_add_tail(&server->nfs4_siblings, &clp->cl_superblocks); + clnt = rpc_clone_client(clp->cl_rpcclient); + server->nfs4_state = clp; + up_write(&clp->cl_sem); + clp = NULL; + if (clnt == NULL) { printk(KERN_WARNING "NFS: cannot create RPC client.\n"); - xprt_destroy(xprt); - goto out_fail; + goto out_remove_list; + } + if (server->nfs4_state->cl_idmap == NULL) { + printk(KERN_WARNING "NFS: failed to create idmapper.\n"); + goto out_shutdown; } clnt->cl_intr = (server->flags & NFS4_MOUNT_INTR) ? 1 : 0; clnt->cl_softrtry = (server->flags & NFS4_MOUNT_SOFT) ? 1 : 0; - clnt->cl_chatty = 1; server->client = clnt; + if (clnt->cl_auth->au_flavor != authflavour) { + if (rpcauth_create(authflavour, clnt) == NULL) { + printk(KERN_WARNING "NFS: couldn't create credcache!\n"); + goto out_shutdown; + } + } + /* Fire up rpciod if not yet running */ if (rpciod_up() != 0) { printk(KERN_WARNING "NFS: couldn't start rpciod!\n"); goto out_shutdown; } - if (create_nfsv4_state(server, data)) - goto out_shutdown; - - if ((server->idmap = nfs_idmap_new(server)) == NULL) - printk(KERN_WARNING "NFS: couldn't start IDmap\n"); - + sb->s_op = &nfs4_sops; err = nfs_sb_init(sb, authflavour); if (err == 0) return 0; rpciod_down(); - destroy_nfsv4_state(server); - if (server->idmap != NULL) - nfs_idmap_delete(server); out_shutdown: rpc_shutdown_client(server->client); +out_remove_list: + down_write(&server->nfs4_state->cl_sem); + list_del_init(&server->nfs4_siblings); + up_write(&server->nfs4_state->cl_sem); + destroy_nfsv4_state(server); out_fail: + if (clp) + nfs4_put_client(clp); return err; } @@ -1505,6 +1652,8 @@ static struct super_block *nfs4_get_sb(s if (!server) return ERR_PTR(-ENOMEM); memset(server, 0, sizeof(struct nfs_server)); + /* Zero out the NFS state stuff */ + init_nfsv4_state(server); if (data->version != NFS4_MOUNT_VERSION) { printk("nfs warning: mount version %s than kernel\n", @@ -1625,6 +1774,7 @@ static void init_once(void * foo, kmem_c INIT_LIST_HEAD(&nfsi->dirty); INIT_LIST_HEAD(&nfsi->commit); INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); + atomic_set(&nfsi->data_updates, 0); nfsi->ndirty = 0; nfsi->ncommit = 0; nfsi->npages = 0; diff -u --recursive --new-file --show-c-function linux-2.6.2/fs/nfs/nfs3proc.c linux-2.6.2-33-fix_kconfig/fs/nfs/nfs3proc.c --- linux-2.6.2/fs/nfs/nfs3proc.c 2004-02-07 12:51:51.000000000 +0100 +++ linux-2.6.2-33-fix_kconfig/fs/nfs/nfs3proc.c 2004-02-07 13:10:00.000000000 +0100 @@ -15,6 +15,7 @@ #include #include #include +#include #include #define NFSDBG_FACILITY NFSDBG_PROC @@ -67,20 +68,6 @@ nfs3_async_handle_jukebox(struct rpc_tas return 1; } -static void -nfs3_write_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) -{ - if (fattr->valid & NFS_ATTR_FATTR) { - if (!(fattr->valid & NFS_ATTR_WCC)) { - fattr->pre_size = NFS_CACHE_ISIZE(inode); - fattr->pre_mtime = NFS_CACHE_MTIME(inode); - fattr->pre_ctime = NFS_CACHE_CTIME(inode); - fattr->valid |= NFS_ATTR_WCC; - } - nfs_refresh_inode(inode, fattr); - } -} - static struct rpc_cred * nfs_cred(struct inode *inode, struct file *filp) { @@ -279,7 +266,7 @@ nfs3_proc_write(struct nfs_write_data *w msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags); if (status >= 0) - nfs3_write_refresh_inode(inode, fattr); + nfs_refresh_inode(inode, fattr); dprintk("NFS reply write: %d\n", status); return status < 0? status : wdata->res.count; } @@ -302,7 +289,7 @@ nfs3_proc_commit(struct nfs_write_data * msg.rpc_cred = nfs_cred(inode, filp); status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); if (status >= 0) - nfs3_write_refresh_inode(inode, fattr); + nfs_refresh_inode(inode, fattr); dprintk("NFS reply commit: %d\n", status); return status; } @@ -776,12 +763,13 @@ nfs3_proc_read_setup(struct nfs_read_dat static void nfs3_write_done(struct rpc_task *task) { - struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct nfs_write_data *data; if (nfs3_async_handle_jukebox(task)) return; + data = (struct nfs_write_data *)task->tk_calldata; if (task->tk_status >= 0) - nfs3_write_refresh_inode(data->inode, data->res.fattr); + nfs_refresh_inode(data->inode, data->res.fattr); nfs_writeback_done(task); } @@ -834,12 +822,13 @@ nfs3_proc_write_setup(struct nfs_write_d static void nfs3_commit_done(struct rpc_task *task) { - struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct nfs_write_data *data; if (nfs3_async_handle_jukebox(task)) return; + data = (struct nfs_write_data *)task->tk_calldata; if (task->tk_status >= 0) - nfs3_write_refresh_inode(data->inode, data->res.fattr); + nfs_refresh_inode(data->inode, data->res.fattr); nfs_commit_done(task); } @@ -896,8 +885,16 @@ nfs3_request_compatible(struct nfs_page return 1; } +static int +nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) +{ + return nlmclnt_proc(filp->f_dentry->d_inode, cmd, fl); +} + struct nfs_rpc_ops nfs_v3_clientops = { .version = 3, /* protocol version */ + .dentry_ops = &nfs_dentry_operations, + .dir_inode_ops = &nfs_dir_inode_operations, .getroot = nfs3_proc_get_root, .getattr = nfs3_proc_getattr, .setattr = nfs3_proc_setattr, @@ -929,4 +926,5 @@ struct nfs_rpc_ops nfs_v3_clientops = { .file_release = nfs_release, .request_init = nfs3_request_init, .request_compatible = nfs3_request_compatible, + .lock = nfs3_proc_lock, }; diff -u --recursive --new-file --show-c-function linux-2.6.2/fs/nfs/nfs4proc.c linux-2.6.2-33-fix_kconfig/fs/nfs/nfs4proc.c --- linux-2.6.2/fs/nfs/nfs4proc.c 2004-02-07 12:57:17.000000000 +0100 +++ linux-2.6.2-33-fix_kconfig/fs/nfs/nfs4proc.c 2004-02-07 13:10:00.000000000 +0100 @@ -45,19 +45,21 @@ #include #include #include +#include #define NFSDBG_FACILITY NFSDBG_PROC +#define NFS4_POLL_RETRY_TIME (15*HZ) + #define GET_OP(cp,name) &cp->ops[cp->req_nops].u.name #define OPNUM(cp) cp->ops[cp->req_nops].opnum +static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *); extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); extern struct rpc_procinfo nfs4_procedures[]; extern nfs4_stateid zero_stateid; -static spinlock_t renew_lock = SPIN_LOCK_UNLOCKED; - static void nfs4_setup_compound(struct nfs4_compound *cp, struct nfs4_op *ops, struct nfs_server *server, char *tag) @@ -179,44 +181,16 @@ u32 nfs4_statfs_bitmap[2] = { | FATTR4_WORD1_SPACE_TOTAL }; -u32 nfs4_fsinfo_bitmap[2] = { - FATTR4_WORD0_MAXFILESIZE - | FATTR4_WORD0_MAXREAD - | FATTR4_WORD0_MAXWRITE - | FATTR4_WORD0_LEASE_TIME, - 0 -}; - u32 nfs4_pathconf_bitmap[2] = { FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME, 0 }; -/* mount bitmap: fattr bitmap + lease time */ -u32 nfs4_mount_bitmap[2] = { - FATTR4_WORD0_TYPE - | FATTR4_WORD0_CHANGE - | FATTR4_WORD0_SIZE - | FATTR4_WORD0_FSID - | FATTR4_WORD0_FILEID - | FATTR4_WORD0_LEASE_TIME, - FATTR4_WORD1_MODE - | FATTR4_WORD1_NUMLINKS - | FATTR4_WORD1_OWNER - | FATTR4_WORD1_OWNER_GROUP - | FATTR4_WORD1_RAWDEV - | FATTR4_WORD1_SPACE_USED - | FATTR4_WORD1_TIME_ACCESS - | FATTR4_WORD1_TIME_METADATA - | FATTR4_WORD1_TIME_MODIFY -}; - static inline void __nfs4_setup_getattr(struct nfs4_compound *cp, u32 *bitmap, struct nfs_fattr *fattr, struct nfs_fsstat *fsstat, - struct nfs_fsinfo *fsinfo, struct nfs_pathconf *pathconf) { struct nfs4_getattr *getattr = GET_OP(cp, getattr); @@ -224,7 +198,6 @@ __nfs4_setup_getattr(struct nfs4_compoun getattr->gt_bmval = bitmap; getattr->gt_attrs = fattr; getattr->gt_fsstat = fsstat; - getattr->gt_fsinfo = fsinfo; getattr->gt_pathconf = pathconf; OPNUM(cp) = OP_GETATTR; @@ -236,16 +209,7 @@ nfs4_setup_getattr(struct nfs4_compound struct nfs_fattr *fattr) { __nfs4_setup_getattr(cp, nfs4_fattr_bitmap, fattr, - NULL, NULL, NULL); -} - -static void -nfs4_setup_getrootattr(struct nfs4_compound *cp, - struct nfs_fattr *fattr, - struct nfs_fsinfo *fsinfo) -{ - __nfs4_setup_getattr(cp, nfs4_mount_bitmap, - fattr, NULL, fsinfo, NULL); + NULL, NULL); } static void @@ -253,15 +217,7 @@ nfs4_setup_statfs(struct nfs4_compound * struct nfs_fsstat *fsstat) { __nfs4_setup_getattr(cp, nfs4_statfs_bitmap, - NULL, fsstat, NULL, NULL); -} - -static void -nfs4_setup_fsinfo(struct nfs4_compound *cp, - struct nfs_fsinfo *fsinfo) -{ - __nfs4_setup_getattr(cp, nfs4_fsinfo_bitmap, - NULL, NULL, fsinfo, NULL); + NULL, fsstat, NULL); } static void @@ -269,7 +225,7 @@ nfs4_setup_pathconf(struct nfs4_compound struct nfs_pathconf *pathconf) { __nfs4_setup_getattr(cp, nfs4_pathconf_bitmap, - NULL, NULL, NULL, pathconf); + NULL, NULL, pathconf); } static void @@ -429,18 +385,6 @@ nfs4_setup_rename(struct nfs4_compound * } static void -nfs4_setup_renew(struct nfs4_compound *cp) -{ - struct nfs4_client **client_state = GET_OP(cp, renew); - - *client_state = cp->server->nfs4_state; - - OPNUM(cp) = OP_RENEW; - cp->req_nops++; - cp->renew_index = cp->req_nops; -} - -static void nfs4_setup_restorefh(struct nfs4_compound *cp) { OPNUM(cp) = OP_RESTOREFH; @@ -455,47 +399,13 @@ nfs4_setup_savefh(struct nfs4_compound * } static void -nfs4_setup_setclientid(struct nfs4_compound *cp, u32 program, unsigned short port) -{ - struct nfs4_setclientid *setclientid = GET_OP(cp, setclientid); - struct nfs_server *server = cp->server; - struct timespec tv; - u32 *p; - - tv = CURRENT_TIME; - p = (u32 *)setclientid->sc_verifier.data; - *p++ = tv.tv_sec; - *p++ = tv.tv_nsec; - setclientid->sc_name = server->ip_addr; - sprintf(setclientid->sc_netid, "udp"); - sprintf(setclientid->sc_uaddr, "%s.%d.%d", server->ip_addr, port >> 8, port & 255); - setclientid->sc_prog = program; - setclientid->sc_cb_ident = 0; - setclientid->sc_state = server->nfs4_state; - - OPNUM(cp) = OP_SETCLIENTID; - cp->req_nops++; -} - -static void -nfs4_setup_setclientid_confirm(struct nfs4_compound *cp) -{ - struct nfs4_client **client_state = GET_OP(cp, setclientid_confirm); - - *client_state = cp->server->nfs4_state; - - OPNUM(cp) = OP_SETCLIENTID_CONFIRM; - cp->req_nops++; - cp->renew_index = cp->req_nops; -} - -static void renew_lease(struct nfs_server *server, unsigned long timestamp) { - spin_lock(&renew_lock); - if (time_before(server->last_renewal,timestamp)) - server->last_renewal = timestamp; - spin_unlock(&renew_lock); + struct nfs4_client *clp = server->nfs4_state; + spin_lock(&clp->cl_lock); + if (time_before(clp->cl_last_renewal,timestamp)) + clp->cl_last_renewal = timestamp; + spin_unlock(&clp->cl_lock); } static inline void @@ -552,6 +462,57 @@ process_cinfo(struct nfs4_change_info *i } } +/* + * OPEN_RECLAIM: + * reclaim state on the server after a reboot. + * Assumes caller is holding the sp->so_sem + */ +int +nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_fattr fattr = { + .valid = 0, + }; + struct nfs4_change_info d_cinfo; + struct nfs4_getattr f_getattr = { + .gt_bmval = nfs4_fattr_bitmap, + .gt_attrs = &fattr, + }; + + struct nfs_open_reclaimargs o_arg = { + .fh = NFS_FH(inode), + .seqid = sp->so_seqid, + .id = sp->so_id, + .share_access = state->state, + .clientid = server->nfs4_state->cl_clientid, + .claim = NFS4_OPEN_CLAIM_PREVIOUS, + .f_getattr = &f_getattr, + }; + struct nfs_openres o_res = { + .cinfo = &d_cinfo, + .f_getattr = &f_getattr, + .server = server, /* Grrr */ + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_RECLAIM], + .rpc_argp = &o_arg, + .rpc_resp = &o_res, + .rpc_cred = sp->so_cred, + }; + int status; + + status = rpc_call_sync(server->client, &msg, 0); + nfs4_increment_seqid(status, sp); + /* Update the inode attributes */ + nfs_refresh_inode(inode, &fattr); + return status; +} + +/* + * Returns an nfs4_state + an referenced inode + */ struct nfs4_state * nfs4_do_open(struct inode *dir, struct qstr *name, int flags, struct iattr *sattr, struct rpc_cred *cred) { @@ -578,7 +539,6 @@ nfs4_do_open(struct inode *dir, struct q struct nfs_openargs o_arg = { .fh = NFS_FH(dir), .share_access = flags & (FMODE_READ|FMODE_WRITE), - .clientid = NFS_SERVER(dir)->nfs4_state->cl_clientid, .opentype = (flags & O_CREAT) ? NFS4_OPEN_CREATE : NFS4_OPEN_NOCREATE, .createmode = (flags & O_EXCL) ? NFS4_CREATE_EXCLUSIVE : NFS4_CREATE_UNCHECKED, .name = name, @@ -599,6 +559,7 @@ nfs4_do_open(struct inode *dir, struct q .rpc_cred = cred, }; +retry: status = -ENOMEM; if (!(sp = nfs4_get_state_owner(NFS_SERVER(dir), cred))) { dprintk("nfs4_do_open: nfs4_get_state_owner failed!\n"); @@ -615,12 +576,12 @@ nfs4_do_open(struct inode *dir, struct q down(&sp->so_sema); o_arg.seqid = sp->so_seqid; o_arg.id = sp->so_id; + o_arg.clientid = NFS_SERVER(dir)->nfs4_state->cl_clientid, status = rpc_call_sync(server->client, &msg, 0); - if (status) { - goto out_up; - } nfs4_increment_seqid(status, sp); + if (status) + goto out_up; process_cinfo(&d_cinfo, &d_attr); nfs_refresh_inode(dir, &d_attr); @@ -637,9 +598,7 @@ nfs4_do_open(struct inode *dir, struct q .fh = &o_res.fh, .seqid = sp->so_seqid, }; - struct nfs_open_confirmres oc_res = { - .status = 0, - }; + struct nfs_open_confirmres oc_res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM], .rpc_argp = &oc_arg, @@ -649,27 +608,54 @@ nfs4_do_open(struct inode *dir, struct q memcpy(&oc_arg.stateid, &o_res.stateid, sizeof(oc_arg.stateid)); status = rpc_call_sync(server->client, &msg, 0); + nfs4_increment_seqid(status, sp); if (status) goto out_up; - nfs4_increment_seqid(status, sp); memcpy(&state->stateid, &oc_res.stateid, sizeof(state->stateid)); } else memcpy(&state->stateid, &o_res.stateid, sizeof(state->stateid)); + spin_lock(&inode->i_lock); + if (flags & FMODE_READ) + state->nreaders++; + if (flags & FMODE_WRITE) + state->nwriters++; state->state |= flags & (FMODE_READ|FMODE_WRITE); - state->pid = current->pid; + spin_unlock(&inode->i_lock); up(&sp->so_sema); nfs4_put_state_owner(sp); - iput(inode); return state; out_up: up(&sp->so_sema); nfs4_put_state_owner(sp); - if (state) + if (state) { nfs4_put_open_state(state); - if (inode) + state = NULL; + } + if (inode) { iput(inode); + inode = NULL; + } + /* NOTE: BAD_SEQID means the server and client disagree about the + * book-keeping w.r.t. state-changing operations + * (OPEN/CLOSE/LOCK/LOCKU...) + * It is actually a sign of a bug on the client or on the server. + * + * If we receive a BAD_SEQID error in the particular case of + * doing an OPEN, we assume that nfs4_increment_seqid() will + * have unhashed the old state_owner for us, and that we can + * therefore safely retry using a new one. We should still warn + * the user though... + */ + if (status == -NFS4ERR_BAD_SEQID) { + printk(KERN_WARNING "NFS: v4 server returned a bad sequence-id error!\n"); + goto retry; + } + status = nfs4_handle_error(server, status); + if (!status) + goto retry; + BUG_ON(status < -1000 || status > 0); out: return ERR_PTR(status); } @@ -698,15 +684,23 @@ nfs4_do_setattr(struct nfs_server *serve .rpc_argp = &arg, .rpc_resp = &res, }; + int status; +retry: fattr->valid = 0; if (state) - memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid)); + nfs4_copy_stateid(&arg.stateid, state, 0); else memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); - return(rpc_call_sync(server->client, &msg, 0)); + status = rpc_call_sync(server->client, &msg, 0); + if (status) { + status = nfs4_handle_error(server, status); + if (!status) + goto retry; + } + return status; } /* @@ -728,9 +722,7 @@ nfs4_do_close(struct inode *inode, struc struct nfs_closeargs arg = { .fh = NFS_FH(inode), }; - struct nfs_closeres res = { - .status = 0, - }; + struct nfs_closeres res; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE], .rpc_argp = &arg, @@ -746,82 +738,111 @@ nfs4_do_close(struct inode *inode, struc * the state_owner. we keep this around to process errors */ nfs4_increment_seqid(status, sp); + if (!status) + memcpy(&state->stateid, &res.stateid, sizeof(state->stateid)); + + return status; +} + +int +nfs4_do_downgrade(struct inode *inode, struct nfs4_state *state, mode_t mode) +{ + struct nfs4_state_owner *sp = state->owner; + int status = 0; + struct nfs_closeargs arg = { + .fh = NFS_FH(inode), + .seqid = sp->so_seqid, + .share_access = mode, + }; + struct nfs_closeres res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_DOWNGRADE], + .rpc_argp = &arg, + .rpc_resp = &res, + }; + + memcpy(&arg.stateid, &state->stateid, sizeof(arg.stateid)); + status = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); + nfs4_increment_seqid(status, sp); + if (!status) + memcpy(&state->stateid, &res.stateid, sizeof(state->stateid)); return status; } +struct inode * +nfs4_atomic_open(struct inode *dir, struct dentry *dentry, struct nameidata *nd) +{ + struct iattr attr; + struct rpc_cred *cred; + struct nfs4_state *state; + + if (nd->flags & LOOKUP_CREATE) { + attr.ia_mode = nd->intent.open.create_mode; + attr.ia_valid = ATTR_MODE; + if (!IS_POSIXACL(dir)) + attr.ia_mode &= ~current->fs->umask; + } else { + attr.ia_valid = 0; + BUG_ON(nd->intent.open.flags & O_CREAT); + } + + cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred); + put_rpccred(cred); + if (IS_ERR(state)) + return (struct inode *)state; + return state->inode; +} + +int +nfs4_open_revalidate(struct inode *dir, struct dentry *dentry, int openflags) +{ + struct rpc_cred *cred; + struct nfs4_state *state; + struct inode *inode; + + cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); + state = nfs4_do_open(dir, &dentry->d_name, openflags, NULL, cred); + put_rpccred(cred); + if (state == ERR_PTR(-ENOENT) && dentry->d_inode == 0) + return 1; + if (IS_ERR(state)) + return 0; + inode = state->inode; + if (inode == dentry->d_inode) { + iput(inode); + return 1; + } + d_drop(dentry); + nfs4_close_state(state, openflags); + iput(inode); + return 0; +} + static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) { - struct nfs4_client *clp; struct nfs4_compound compound; struct nfs4_op ops[4]; - struct nfs_fsinfo fsinfo; unsigned char * p; struct qstr q; int status; - clp = server->nfs4_state = nfs4_get_client(&server->addr.sin_addr); - if (!clp) - return -ENOMEM; - - down_write(&clp->cl_sem); - /* Has the clientid already been initialized? */ - if (clp->cl_state != NFS4CLNT_NEW) { - /* Yep, so just read the root attributes and the lease time. */ - fattr->valid = 0; - nfs4_setup_compound(&compound, ops, server, "getrootfh"); - nfs4_setup_putrootfh(&compound); - nfs4_setup_getrootattr(&compound, fattr, &fsinfo); - nfs4_setup_getfh(&compound, fhandle); - if ((status = nfs4_call_compound(&compound, NULL, 0))) - goto out_unlock; - goto no_setclientid; - } - - /* - * SETCLIENTID. - * Until delegations are imported, we don't bother setting the program - * number and port to anything meaningful. - */ - nfs4_setup_compound(&compound, ops, server, "setclientid"); - nfs4_setup_setclientid(&compound, 0, 0); - if ((status = nfs4_call_compound(&compound, NULL, 0))) - goto out_unlock; - - /* - * SETCLIENTID_CONFIRM, plus root filehandle. - * We also get the lease time here. - */ - fattr->valid = 0; - nfs4_setup_compound(&compound, ops, server, "setclientid_confirm"); - nfs4_setup_setclientid_confirm(&compound); - nfs4_setup_putrootfh(&compound); - nfs4_setup_getrootattr(&compound, fattr, &fsinfo); - nfs4_setup_getfh(&compound, fhandle); - if ((status = nfs4_call_compound(&compound, NULL, 0))) - goto out_unlock; - clp->cl_state = NFS4CLNT_OK; - -no_setclientid: - /* - * Now that we have instantiated the clientid and determined - * the lease time, we can initialize the renew daemon for this - * server. - * FIXME: we only need one renewd daemon per server. - */ - server->lease_time = fsinfo.lease_time * HZ; - if ((status = nfs4_init_renewd(server))) - goto out_unlock; - up_write(&clp->cl_sem); - /* * Now we do a separate LOOKUP for each component of the mount path. * The LOOKUPs are done separately so that we can conveniently * catch an ERR_WRONGSEC if it occurs along the way... */ p = server->mnt_path; + fattr->valid = 0; + nfs4_setup_compound(&compound, ops, server, "getrootfh"); + nfs4_setup_putrootfh(&compound); + nfs4_setup_getattr(&compound, fattr); + nfs4_setup_getfh(&compound, fhandle); + if ((status = nfs4_call_compound(&compound, NULL, 0))) + goto out; for (;;) { while (*p == '/') p++; @@ -847,10 +868,7 @@ no_setclientid: } break; } - return status; -out_unlock: - up_write(&clp->cl_sem); - nfs4_put_client(clp); +out: return status; } @@ -892,28 +910,38 @@ nfs4_proc_setattr(struct dentry *dentry, struct inode * inode = dentry->d_inode; int size_change = sattr->ia_valid & ATTR_SIZE; struct nfs4_state *state = NULL; - int status; + int need_iput = 0; + int status; fattr->valid = 0; if (size_change) { struct rpc_cred *cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); - state = nfs4_do_open(dentry->d_parent->d_inode, + state = nfs4_find_state(inode, cred, FMODE_WRITE); + if (!state) { + state = nfs4_do_open(dentry->d_parent->d_inode, &dentry->d_name, FMODE_WRITE, NULL, cred); + need_iput = 1; + } put_rpccred(cred); if (IS_ERR(state)) return PTR_ERR(state); if (state->inode != inode) { - printk(KERN_WARNING "nfs: raced in setattr, returning -EIO\n"); - nfs4_put_open_state(state); - return -EIO; + printk(KERN_WARNING "nfs: raced in setattr (%p != %p), returning -EIO\n", inode, state->inode); + status = -EIO; + goto out; } } status = nfs4_do_setattr(NFS_SERVER(inode), fattr, NFS_FH(inode), sattr, state); - if (state) - nfs4_put_open_state(state); +out: + if (state) { + inode = state->inode; + nfs4_close_state(state, FMODE_WRITE); + if (need_iput) + iput(inode); + } return status; } @@ -1051,7 +1079,7 @@ nfs4_proc_read(struct nfs_read_data *rda if (filp) { struct nfs4_state *state; state = (struct nfs4_state *)filp->private_data; - memcpy(&rdata->args.stateid, &state->stateid, sizeof(rdata->args.stateid)); + nfs4_copy_stateid(&rdata->args.stateid, state, rdata->lockowner); msg.rpc_cred = state->owner->so_cred; } else { memcpy(&rdata->args.stateid, &zero_stateid, sizeof(rdata->args.stateid)); @@ -1060,12 +1088,8 @@ nfs4_proc_read(struct nfs_read_data *rda fattr->valid = 0; status = rpc_call_sync(server->client, &msg, flags); - if (!status) { + if (!status) renew_lease(server, timestamp); - /* Check cache consistency */ - if (fattr->change_attr != NFS_CHANGE_ATTR(inode)) - nfs_zap_caches(inode); - } dprintk("NFS reply read: %d\n", status); return status; } @@ -1093,7 +1117,7 @@ nfs4_proc_write(struct nfs_write_data *w if (filp) { struct nfs4_state *state; state = (struct nfs4_state *)filp->private_data; - memcpy(&wdata->args.stateid, &state->stateid, sizeof(wdata->args.stateid)); + nfs4_copy_stateid(&wdata->args.stateid, state, wdata->lockowner); msg.rpc_cred = state->owner->so_cred; } else { memcpy(&wdata->args.stateid, &zero_stateid, sizeof(wdata->args.stateid)); @@ -1102,7 +1126,6 @@ nfs4_proc_write(struct nfs_write_data *w fattr->valid = 0; status = rpc_call_sync(server->client, &msg, rpcflags); - NFS_CACHEINV(inode); dprintk("NFS reply write: %d\n", status); return status; } @@ -1129,7 +1152,7 @@ nfs4_proc_commit(struct nfs_write_data * if (filp) { struct nfs4_state *state; state = (struct nfs4_state *)filp->private_data; - memcpy(&cdata->args.stateid, &state->stateid, sizeof(cdata->args.stateid)); + nfs4_copy_stateid(&cdata->args.stateid, state, cdata->lockowner); msg.rpc_cred = state->owner->so_cred; } else { memcpy(&cdata->args.stateid, &zero_stateid, sizeof(cdata->args.stateid)); @@ -1169,18 +1192,18 @@ nfs4_proc_create(struct inode *dir, stru state = nfs4_do_open(dir, name, flags, sattr, cred); put_rpccred(cred); if (!IS_ERR(state)) { - inode = igrab(state->inode); + inode = state->inode; if (flags & O_EXCL) { struct nfs_fattr fattr; int status; status = nfs4_do_setattr(NFS_SERVER(dir), &fattr, NFS_FH(inode), sattr, state); if (status != 0) { + nfs4_close_state(state, flags); iput(inode); inode = ERR_PTR(status); } } - nfs4_put_open_state(state); } else inode = (struct inode *)state; return inode; @@ -1446,14 +1469,14 @@ static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) { - struct nfs4_compound compound; - struct nfs4_op ops[2]; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FSINFO], + .rpc_argp = fhandle, + .rpc_resp = fsinfo, + }; memset(fsinfo, 0, sizeof(*fsinfo)); - nfs4_setup_compound(&compound, ops, server, "statfs"); - nfs4_setup_putfh(&compound, fhandle); - nfs4_setup_fsinfo(&compound, fsinfo); - return nfs4_call_compound(&compound, NULL, 0); + return rpc_call_sync(server->client, &msg, 0); } static int @@ -1471,19 +1494,31 @@ nfs4_proc_pathconf(struct nfs_server *se } static void +nfs4_restart_read(struct rpc_task *task) +{ + struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata; + struct nfs_page *req; + + rpc_restart_call(task); + req = nfs_list_entry(data->pages.next); + if (req->wb_state) + nfs4_copy_stateid(&data->args.stateid, req->wb_state, req->wb_lockowner); + else + memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); +} + +static void nfs4_read_done(struct rpc_task *task) { struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata; struct inode *inode = data->inode; - struct nfs_fattr *fattr = data->res.fattr; + if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { + task->tk_action = nfs4_restart_read; + return; + } if (task->tk_status > 0) renew_lease(NFS_SERVER(inode), data->timestamp); - /* Check cache consistency */ - if (fattr->change_attr != NFS_CHANGE_ATTR(inode)) - nfs_zap_caches(inode); - if (fattr->bitmap[1] & FATTR4_WORD1_TIME_ACCESS) - inode->i_atime = fattr->atime; /* Call back common NFS readpage processing */ nfs_readpage_result(task); } @@ -1512,8 +1547,9 @@ nfs4_proc_read_setup(struct nfs_read_dat data->res.eof = 0; data->timestamp = jiffies; + data->lockowner = req->wb_lockowner; if (req->wb_state) - memcpy(&data->args.stateid, &req->wb_state->stateid, sizeof(data->args.stateid)); + nfs4_copy_stateid(&data->args.stateid, req->wb_state, req->wb_lockowner); else memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); @@ -1530,18 +1566,17 @@ nfs4_proc_read_setup(struct nfs_read_dat } static void -nfs4_write_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) +nfs4_restart_write(struct rpc_task *task) { - /* Check cache consistency */ - if (fattr->pre_change_attr != NFS_CHANGE_ATTR(inode)) - nfs_zap_caches(inode); - NFS_CHANGE_ATTR(inode) = fattr->change_attr; - if (fattr->bitmap[1] & FATTR4_WORD1_SPACE_USED) - inode->i_blocks = (fattr->du.nfs3.used + 511) >> 9; - if (fattr->bitmap[1] & FATTR4_WORD1_TIME_METADATA) - inode->i_ctime = fattr->ctime; - if (fattr->bitmap[1] & FATTR4_WORD1_TIME_MODIFY) - inode->i_mtime = fattr->mtime; + struct nfs_write_data *data = (struct nfs_write_data *)task->tk_calldata; + struct nfs_page *req; + + rpc_restart_call(task); + req = nfs_list_entry(data->pages.next); + if (req->wb_state) + nfs4_copy_stateid(&data->args.stateid, req->wb_state, req->wb_lockowner); + else + memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); } static void @@ -1550,9 +1585,12 @@ nfs4_write_done(struct rpc_task *task) struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; struct inode *inode = data->inode; + if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { + task->tk_action = nfs4_restart_write; + return; + } if (task->tk_status >= 0) renew_lease(NFS_SERVER(inode), data->timestamp); - nfs4_write_refresh_inode(inode, data->res.fattr); /* Call back common NFS writeback processing */ nfs_writeback_done(task); } @@ -1591,8 +1629,9 @@ nfs4_proc_write_setup(struct nfs_write_d data->res.verf = &data->verf; data->timestamp = jiffies; + data->lockowner = req->wb_lockowner; if (req->wb_state) - memcpy(&data->args.stateid, &req->wb_state->stateid, sizeof(data->args.stateid)); + nfs4_copy_stateid(&data->args.stateid, req->wb_state, req->wb_lockowner); else memcpy(&data->args.stateid, &zero_stateid, sizeof(data->args.stateid)); @@ -1612,8 +1651,12 @@ static void nfs4_commit_done(struct rpc_task *task) { struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + struct inode *inode = data->inode; - nfs4_write_refresh_inode(data->inode, data->res.fattr); + if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { + task->tk_action = nfs4_restart_write; + return; + } /* Call back common NFS writeback processing */ nfs_commit_done(task); } @@ -1651,55 +1694,58 @@ nfs4_proc_commit_setup(struct nfs_write_ } /* - * nfs4_proc_renew(): This is not one of the nfs_rpc_ops; it is a special + * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special * standalone procedure for queueing an asynchronous RENEW. */ -struct renew_desc { - struct rpc_task task; - struct nfs4_compound compound; - struct nfs4_op ops[1]; -}; - static void renew_done(struct rpc_task *task) { - struct nfs4_compound *cp = (struct nfs4_compound *) task->tk_msg.rpc_argp; - process_lease(cp); + struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp; + unsigned long timestamp = (unsigned long)task->tk_calldata; + + if (task->tk_status < 0) { + switch (task->tk_status) { + case -NFS4ERR_STALE_CLIENTID: + nfs4_schedule_state_recovery(clp); + return; + } + } + spin_lock(&clp->cl_lock); + if (time_before(clp->cl_last_renewal,timestamp)) + clp->cl_last_renewal = timestamp; + spin_unlock(&clp->cl_lock); } -static void -renew_release(struct rpc_task *task) +int +nfs4_proc_async_renew(struct nfs4_client *clp) { - kfree(task->tk_calldata); + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], + .rpc_argp = clp, + .rpc_cred = clp->cl_cred, + }; + + return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT, + renew_done, (void *)jiffies); } int -nfs4_proc_renew(struct nfs_server *server) +nfs4_proc_renew(struct nfs4_client *clp) { - struct renew_desc *rp; - struct rpc_task *task; - struct nfs4_compound *cp; struct rpc_message msg = { - .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMPOUND], + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENEW], + .rpc_argp = clp, + .rpc_cred = clp->cl_cred, }; + unsigned long now = jiffies; + int status; - rp = (struct renew_desc *) kmalloc(sizeof(*rp), GFP_KERNEL); - if (!rp) - return -ENOMEM; - cp = &rp->compound; - task = &rp->task; - - nfs4_setup_compound(cp, rp->ops, server, "renew"); - nfs4_setup_renew(cp); - - msg.rpc_argp = cp; - msg.rpc_resp = cp; - rpc_init_task(task, server->client, renew_done, RPC_TASK_ASYNC); - rpc_call_setup(task, &msg, 0); - task->tk_calldata = rp; - task->tk_release = renew_release; - - return rpc_execute(task); + status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); + spin_lock(&clp->cl_lock); + if (time_before(clp->cl_last_renewal,now)) + clp->cl_last_renewal = now; + spin_unlock(&clp->cl_lock); + return status; } /* @@ -1712,43 +1758,32 @@ static int nfs4_proc_file_open(struct inode *inode, struct file *filp) { struct dentry *dentry = filp->f_dentry; - struct inode *dir = dentry->d_parent->d_inode; - struct rpc_cred *cred; struct nfs4_state *state; - int flags = filp->f_flags; - int status = 0; + struct rpc_cred *cred; dprintk("nfs4_proc_file_open: starting on (%.*s/%.*s)\n", (int)dentry->d_parent->d_name.len, dentry->d_parent->d_name.name, (int)dentry->d_name.len, dentry->d_name.name); - if ((flags + 1) & O_ACCMODE) - flags++; - - lock_kernel(); -/* -* We have already opened the file "O_EXCL" in nfs4_proc_create!! -* This ugliness will go away with lookup-intent... -*/ + /* Find our open stateid */ cred = rpcauth_lookupcred(NFS_SERVER(inode)->client->cl_auth, 0); - state = nfs4_do_open(dir, &dentry->d_name, flags, NULL, cred); - if (IS_ERR(state)) { - status = PTR_ERR(state); - state = NULL; - } else if (filp->f_mode & FMODE_WRITE) - nfs_set_mmcred(inode, cred); - if (inode != filp->f_dentry->d_inode) { + state = nfs4_find_state(inode, cred, filp->f_mode); + put_rpccred(cred); + if (state == NULL) { printk(KERN_WARNING "NFS: v4 raced in function %s\n", __FUNCTION__); - status = -EIO; /* ERACE actually */ - nfs4_put_open_state(state); - state = NULL; + return -EIO; /* ERACE actually */ + } + nfs4_close_state(state, filp->f_mode); + if (filp->f_mode & FMODE_WRITE) { + lock_kernel(); + nfs_set_mmcred(inode, state->owner->so_cred); + nfs_begin_data_update(inode); + unlock_kernel(); } filp->private_data = state; - put_rpccred(cred); - unlock_kernel(); - return status; + return 0; } /* @@ -1760,7 +1795,12 @@ nfs4_proc_file_release(struct inode *ino struct nfs4_state *state = (struct nfs4_state *)filp->private_data; if (state) - nfs4_put_open_state(state); + nfs4_close_state(state, filp->f_mode); + if (filp->f_mode & FMODE_WRITE) { + lock_kernel(); + nfs_end_data_update(inode); + unlock_kernel(); + } return 0; } @@ -1780,6 +1820,120 @@ nfs4_request_init(struct nfs_page *req, state = (struct nfs4_state *)filp->private_data; req->wb_state = state; req->wb_cred = get_rpccred(state->owner->so_cred); + req->wb_lockowner = current->files; +} + +static int +nfs4_async_handle_error(struct rpc_task *task, struct nfs_server *server) +{ + struct nfs4_client *clp = server->nfs4_state; + + if (!clp) + return 0; + switch(task->tk_status) { + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL, NULL); + nfs4_schedule_state_recovery(clp); + task->tk_status = 0; + return -EAGAIN; + case -NFS4ERR_GRACE: + case -NFS4ERR_DELAY: + rpc_delay(task, NFS4_POLL_RETRY_TIME); + task->tk_status = 0; + return -EAGAIN; + case -NFS4ERR_OLD_STATEID: + task->tk_status = 0; + return -EAGAIN; + } + return 0; +} + +int +nfs4_wait_clnt_recover(struct rpc_clnt *clnt, struct nfs4_client *clp) +{ + DEFINE_WAIT(wait); + sigset_t oldset; + int interruptible, res; + + might_sleep(); + + rpc_clnt_sigmask(clnt, &oldset); + interruptible = TASK_UNINTERRUPTIBLE; + if (clnt->cl_intr) + interruptible = TASK_INTERRUPTIBLE; + do { + res = 0; + prepare_to_wait(&clp->cl_waitq, &wait, interruptible); + nfs4_schedule_state_recovery(clp); + if (test_bit(NFS4CLNT_OK, &clp->cl_state) && + !test_bit(NFS4CLNT_SETUP_STATE, &clp->cl_state)) + break; + if (clnt->cl_intr && signalled()) { + res = -ERESTARTSYS; + break; + } + schedule(); + } while(!test_bit(NFS4CLNT_OK, &clp->cl_state)); + finish_wait(&clp->cl_waitq, &wait); + rpc_clnt_sigunmask(clnt, &oldset); + return res; +} + +static int +nfs4_delay(struct rpc_clnt *clnt) +{ + sigset_t oldset; + int res = 0; + + might_sleep(); + + rpc_clnt_sigmask(clnt, &oldset); + if (clnt->cl_intr) { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(NFS4_POLL_RETRY_TIME); + if (signalled()) + res = -ERESTARTSYS; + } else { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(NFS4_POLL_RETRY_TIME); + } + rpc_clnt_sigunmask(clnt, &oldset); + return res; +} + +/* This is the error handling routine for processes that are allowed + * to sleep. + */ +int +nfs4_handle_error(struct nfs_server *server, int errorcode) +{ + struct nfs4_client *clp = server->nfs4_state; + int ret = errorcode; + + switch(errorcode) { + case -NFS4ERR_STALE_CLIENTID: + case -NFS4ERR_STALE_STATEID: + case -NFS4ERR_EXPIRED: + ret = nfs4_wait_clnt_recover(server->client, clp); + break; + case -NFS4ERR_GRACE: + case -NFS4ERR_DELAY: + ret = nfs4_delay(server->client); + break; + case -NFS4ERR_OLD_STATEID: + ret = 0; + break; + default: + if (errorcode <= -1000) { + printk(KERN_WARNING "%s could not handle NFSv4 error %d\n", + __FUNCTION__, -errorcode); + ret = -EIO; + } + } + /* We failed to handle the error */ + return ret; } @@ -1796,14 +1950,325 @@ nfs4_request_compatible(struct nfs_page state = (struct nfs4_state *)filp->private_data; if (req->wb_state != state) return 0; + if (req->wb_lockowner != current->files) + return 0; cred = state->owner->so_cred; if (req->wb_cred != cred) return 0; return 1; } +int +nfs4_proc_setclientid(struct nfs4_client *clp, + u32 program, unsigned short port) +{ + u32 *p; + struct nfs4_setclientid setclientid; + struct timespec tv; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID], + .rpc_argp = &setclientid, + .rpc_resp = clp, + .rpc_cred = clp->cl_cred, + }; + + tv = CURRENT_TIME; + p = (u32*)setclientid.sc_verifier.data; + *p++ = (u32)tv.tv_sec; + *p = (u32)tv.tv_nsec; + setclientid.sc_name = clp->cl_ipaddr; + sprintf(setclientid.sc_netid, "tcp"); + sprintf(setclientid.sc_uaddr, "%s.%d.%d", clp->cl_ipaddr, port >> 8, port & 255); + setclientid.sc_prog = htonl(program); + setclientid.sc_cb_ident = 0; + + return rpc_call_sync(clp->cl_rpcclient, &msg, 0); +} + +int +nfs4_proc_setclientid_confirm(struct nfs4_client *clp) +{ + struct nfs_fsinfo fsinfo; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM], + .rpc_argp = clp, + .rpc_resp = &fsinfo, + .rpc_cred = clp->cl_cred, + }; + unsigned long now; + int status; + + now = jiffies; + status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); + if (status == 0) { + spin_lock(&clp->cl_lock); + clp->cl_lease_time = fsinfo.lease_time * HZ; + clp->cl_last_renewal = now; + spin_unlock(&clp->cl_lock); + } + return status; +} + +#define NFS4_LOCK_MINTIMEOUT (1 * HZ) +#define NFS4_LOCK_MAXTIMEOUT (30 * HZ) + +/* + * sleep, with exponential backoff, and retry the LOCK operation. + */ +static unsigned long +nfs4_set_lock_task_retry(unsigned long timeout) +{ + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(timeout); + timeout <<= 1; + if (timeout > NFS4_LOCK_MAXTIMEOUT) + return NFS4_LOCK_MAXTIMEOUT; + return timeout; +} + +static inline int +nfs4_lck_type(int cmd, struct file_lock *request) +{ + /* set lock type */ + switch (request->fl_type) { + case F_RDLCK: + return IS_SETLKW(cmd) ? NFS4_READW_LT : NFS4_READ_LT; + case F_WRLCK: + return IS_SETLKW(cmd) ? NFS4_WRITEW_LT : NFS4_WRITE_LT; + case F_UNLCK: + return NFS4_WRITE_LT; + } + BUG(); +} + +static inline uint64_t +nfs4_lck_length(struct file_lock *request) +{ + if (request->fl_end == OFFSET_MAX) + return ~(uint64_t)0; + return request->fl_end - request->fl_start + 1; +} + +int +nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_client *clp = server->nfs4_state; + struct nfs_lockargs arg = { + .fh = NFS_FH(inode), + .type = nfs4_lck_type(cmd, request), + .offset = request->fl_start, + .length = nfs4_lck_length(request), + }; + struct nfs_lockres res = { + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKT], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = state->owner->so_cred, + }; + struct nfs_lowner nlo; + struct nfs4_lock_state *lsp; + int status; + + nlo.clientid = clp->cl_clientid; + down(&state->lock_sema); + lsp = nfs4_find_lock_state(state, request->fl_owner); + if (lsp) + nlo.id = lsp->ls_id; + else { + spin_lock(&clp->cl_lock); + nlo.id = nfs4_alloc_lockowner_id(clp); + spin_unlock(&clp->cl_lock); + } + arg.u.lockt = &nlo; + status = rpc_call_sync(server->client, &msg, 0); + if (!status) { + request->fl_type = F_UNLCK; + } else if (status == -NFS4ERR_DENIED) { + int64_t len, start, end; + start = res.u.denied.offset; + len = res.u.denied.length; + end = start + len - 1; + if (end < 0 || len == 0) + request->fl_end = OFFSET_MAX; + else + request->fl_end = (loff_t)end; + request->fl_start = (loff_t)start; + request->fl_type = F_WRLCK; + if (res.u.denied.type & 1) + request->fl_type = F_RDLCK; + request->fl_pid = 0; + status = 0; + } + if (lsp) + nfs4_put_lock_state(lsp); + up(&state->lock_sema); + return status; +} + +int +nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_lockargs arg = { + .fh = NFS_FH(inode), + .type = nfs4_lck_type(cmd, request), + .offset = request->fl_start, + .length = nfs4_lck_length(request), + }; + struct nfs_lockres res = { + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCKU], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = state->owner->so_cred, + }; + struct nfs4_lock_state *lsp; + struct nfs_locku_opargs luargs; + int status = 0; + + down(&state->lock_sema); + lsp = nfs4_find_lock_state(state, request->fl_owner); + if (!lsp) + goto out; + luargs.seqid = lsp->ls_seqid; + memcpy(&luargs.stateid, &lsp->ls_stateid, sizeof(luargs.stateid)); + arg.u.locku = &luargs; + status = rpc_call_sync(server->client, &msg, 0); + nfs4_increment_lock_seqid(status, lsp); + + if (status == 0) { + memcpy(&lsp->ls_stateid, &res.u.stateid, + sizeof(lsp->ls_stateid)); + nfs4_notify_unlck(inode, request, lsp); + } + nfs4_put_lock_state(lsp); +out: + up(&state->lock_sema); + return status; +} + +static int +nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) +{ + struct inode *inode = state->inode; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs4_lock_state *lsp; + struct nfs_lockargs arg = { + .fh = NFS_FH(inode), + .type = nfs4_lck_type(cmd, request), + .offset = request->fl_start, + .length = nfs4_lck_length(request), + }; + struct nfs_lockres res = { + .server = server, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOCK], + .rpc_argp = &arg, + .rpc_resp = &res, + .rpc_cred = state->owner->so_cred, + }; + struct nfs_lock_opargs largs = { + .new_lock_owner = 0, + }; + int status; + + down(&state->lock_sema); + lsp = nfs4_find_lock_state(state, request->fl_owner); + if (lsp == NULL) { + struct nfs4_state_owner *owner = state->owner; + struct nfs_open_to_lock otl = { + .lock_owner.clientid = server->nfs4_state->cl_clientid, + }; + status = -ENOMEM; + lsp = nfs4_alloc_lock_state(state, request->fl_owner); + if (!lsp) + goto out; + otl.lock_seqid = lsp->ls_seqid; + otl.lock_owner.id = lsp->ls_id; + memcpy(&otl.open_stateid, &state->stateid, sizeof(otl.open_stateid)); + largs.u.open_lock = &otl; + largs.new_lock_owner = 1; + arg.u.lock = &largs; + down(&owner->so_sema); + otl.open_seqid = owner->so_seqid; + status = rpc_call_sync(server->client, &msg, 0); + /* increment open_owner seqid on success, and + * seqid mutating errors */ + nfs4_increment_seqid(status, owner); + up(&owner->so_sema); + } else { + struct nfs_exist_lock el = { + .seqid = lsp->ls_seqid, + }; + memcpy(&el.stateid, &lsp->ls_stateid, sizeof(el.stateid)); + largs.u.exist_lock = ⪙ + largs.new_lock_owner = 0; + arg.u.lock = &largs; + status = rpc_call_sync(server->client, &msg, 0); + } + /* increment seqid on success, and * seqid mutating errors*/ + nfs4_increment_lock_seqid(status, lsp); + /* save the returned stateid. */ + if (status == 0) { + memcpy(&lsp->ls_stateid, &res.u.stateid, sizeof(nfs4_stateid)); + nfs4_notify_setlk(inode, request, lsp); + } else if (status == -NFS4ERR_DENIED) + status = -EAGAIN; + nfs4_put_lock_state(lsp); +out: + up(&state->lock_sema); + return status; +} + +static int +nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) +{ + struct nfs4_state *state; + unsigned long timeout = NFS4_LOCK_MINTIMEOUT; + int status; + + /* verify open state */ + state = (struct nfs4_state *)filp->private_data; + BUG_ON(!state); + + if (request->fl_start < 0 || request->fl_end < 0) + return -EINVAL; + + if (IS_GETLK(cmd)) + return nfs4_proc_getlk(state, F_GETLK, request); + + if (!(IS_SETLK(cmd) || IS_SETLKW(cmd))) + return -EINVAL; + + if (request->fl_type == F_UNLCK) + return nfs4_proc_unlck(state, cmd, request); + + do { + status = nfs4_proc_setlk(state, cmd, request); + if ((status != -EAGAIN) || IS_SETLK(cmd)) + break; + timeout = nfs4_set_lock_task_retry(timeout); + status = -ERESTARTSYS; + if (signalled()) + break; + } while(status < 0); + + return status; +} + struct nfs_rpc_ops nfs_v4_clientops = { .version = 4, /* protocol version */ + .dentry_ops = &nfs4_dentry_operations, + .dir_inode_ops = &nfs4_dir_inode_operations, .getroot = nfs4_proc_get_root, .getattr = nfs4_proc_getattr, .setattr = nfs4_proc_setattr, @@ -1835,6 +2300,7 @@ struct nfs_rpc_ops nfs_v4_clientops = { .file_release = nfs4_proc_file_release, .request_init = nfs4_request_init, .request_compatible = nfs4_request_compatible, + .lock = nfs4_proc_lock, }; /* diff -u --recursive --new-file --show-c-function linux-2.6.2/fs/nfs/nfs4renewd.c linux-2.6.2-33-fix_kconfig/fs/nfs/nfs4renewd.c --- linux-2.6.2/fs/nfs/nfs4renewd.c 2004-02-07 12:57:21.000000000 +0100 +++ linux-2.6.2-33-fix_kconfig/fs/nfs/nfs4renewd.c 2004-02-07 13:08:30.000000000 +0100 @@ -54,53 +54,91 @@ #include #include -static RPC_WAITQ(nfs4_renewd_queue, "nfs4_renewd_queue"); +#define NFSDBG_FACILITY NFSDBG_PROC -static void -renewd(struct rpc_task *task) +void +nfs4_renew_state(void *data) { - struct nfs_server *server = (struct nfs_server *)task->tk_calldata; - unsigned long lease = server->lease_time; - unsigned long last = server->last_renewal; - unsigned long timeout; - - if (!server->nfs4_state) - timeout = (2 * lease) / 3; - else if (jiffies < last + lease/3) - timeout = (2 * lease) / 3 + last - jiffies; - else { + struct nfs4_client *clp = (struct nfs4_client *)data; + long lease, timeout; + unsigned long last, now; + + down_read(&clp->cl_sem); + dprintk("%s: start\n", __FUNCTION__); + /* Are there any active superblocks? */ + if (list_empty(&clp->cl_superblocks)) + goto out; + spin_lock(&clp->cl_lock); + lease = clp->cl_lease_time; + last = clp->cl_last_renewal; + now = jiffies; + timeout = (2 * lease) / 3 + (long)last - (long)now; + /* Are we close to a lease timeout? */ + if (time_after(now, last + lease/3)) { + spin_unlock(&clp->cl_lock); /* Queue an asynchronous RENEW. */ - nfs4_proc_renew(server); + nfs4_proc_async_renew(clp); timeout = (2 * lease) / 3; - } - + spin_lock(&clp->cl_lock); + } else + dprintk("%s: failed to call renewd. Reason: lease not expired \n", + __FUNCTION__); if (timeout < 5 * HZ) /* safeguard */ timeout = 5 * HZ; - task->tk_timeout = timeout; - task->tk_action = renewd; - task->tk_exit = NULL; - rpc_sleep_on(&nfs4_renewd_queue, task, NULL, NULL); - return; + dprintk("%s: requeueing work. Lease period = %ld\n", + __FUNCTION__, (timeout + HZ - 1) / HZ); + cancel_delayed_work(&clp->cl_renewd); + schedule_delayed_work(&clp->cl_renewd, timeout); + spin_unlock(&clp->cl_lock); +out: + up_read(&clp->cl_sem); + dprintk("%s: done\n", __FUNCTION__); +} + +/* Must be called with clp->cl_sem locked for writes */ +void +nfs4_schedule_state_renewal(struct nfs4_client *clp) +{ + long timeout; + + spin_lock(&clp->cl_lock); + timeout = (2 * clp->cl_lease_time) / 3 + (long)clp->cl_last_renewal + - (long)jiffies; + if (timeout < 5 * HZ) + timeout = 5 * HZ; + dprintk("%s: requeueing work. Lease period = %ld\n", + __FUNCTION__, (timeout + HZ - 1) / HZ); + cancel_delayed_work(&clp->cl_renewd); + schedule_delayed_work(&clp->cl_renewd, timeout); + spin_unlock(&clp->cl_lock); } -int -nfs4_init_renewd(struct nfs_server *server) +void +nfs4_renewd_prepare_shutdown(struct nfs_server *server) { - struct rpc_task *task; - int status; + struct nfs4_client *clp = server->nfs4_state; - lock_kernel(); - status = -ENOMEM; - task = rpc_new_task(server->client, NULL, RPC_TASK_ASYNC); - if (!task) - goto out; - task->tk_calldata = server; - task->tk_action = renewd; - status = rpc_execute(task); + if (!clp) + return; + flush_scheduled_work(); + down_write(&clp->cl_sem); + if (!list_empty(&server->nfs4_siblings)) + list_del_init(&server->nfs4_siblings); + up_write(&clp->cl_sem); +} -out: - unlock_kernel(); - return status; +/* Must be called with clp->cl_sem locked for writes */ +void +nfs4_kill_renewd(struct nfs4_client *clp) +{ + down_read(&clp->cl_sem); + if (!list_empty(&clp->cl_superblocks)) { + up_read(&clp->cl_sem); + return; + } + cancel_delayed_work(&clp->cl_renewd); + up_read(&clp->cl_sem); + flush_scheduled_work(); } /* diff -u --recursive --new-file --show-c-function linux-2.6.2/fs/nfs/nfs4state.c linux-2.6.2-33-fix_kconfig/fs/nfs/nfs4state.c --- linux-2.6.2/fs/nfs/nfs4state.c 2004-02-07 12:43:32.000000000 +0100 +++ linux-2.6.2-33-fix_kconfig/fs/nfs/nfs4state.c 2004-02-07 13:09:53.000000000 +0100 @@ -41,6 +41,9 @@ #include #include #include +#include +#include +#include #define OPENOWNER_POOL_SIZE 8 @@ -55,6 +58,29 @@ nfs4_stateid one_stateid = static LIST_HEAD(nfs4_clientid_list); +static void nfs4_recover_state(void *); +extern void nfs4_renew_state(void *); + +void +init_nfsv4_state(struct nfs_server *server) +{ + server->nfs4_state = NULL; + INIT_LIST_HEAD(&server->nfs4_siblings); +} + +void +destroy_nfsv4_state(struct nfs_server *server) +{ + if (server->mnt_path) { + kfree(server->mnt_path); + server->mnt_path = NULL; + } + if (server->nfs4_state) { + nfs4_put_client(server->nfs4_state); + server->nfs4_state = NULL; + } +} + /* * nfs4_get_client(): returns an empty client structure * nfs4_put_client(): drops reference to client structure @@ -75,7 +101,12 @@ nfs4_alloc_client(struct in_addr *addr) INIT_LIST_HEAD(&clp->cl_unused); spin_lock_init(&clp->cl_lock); atomic_set(&clp->cl_count, 1); - clp->cl_state = NFS4CLNT_NEW; + INIT_WORK(&clp->cl_recoverd, nfs4_recover_state, clp); + INIT_WORK(&clp->cl_renewd, nfs4_renew_state, clp); + INIT_LIST_HEAD(&clp->cl_superblocks); + init_waitqueue_head(&clp->cl_waitq); + INIT_RPC_WAITQ(&clp->cl_rpcwaitq, "NFS4 client"); + clp->cl_state = 1 << NFS4CLNT_NEW; } return clp; } @@ -93,6 +124,11 @@ nfs4_free_client(struct nfs4_client *clp kfree(sp); } BUG_ON(!list_empty(&clp->cl_state_owners)); + if (clp->cl_cred) + put_rpccred(clp->cl_cred); + nfs_idmap_delete(clp); + if (clp->cl_rpcclient) + rpc_shutdown_client(clp->cl_rpcclient); kfree(clp); } @@ -126,10 +162,14 @@ nfs4_put_client(struct nfs4_client *clp) return; list_del(&clp->cl_servers); spin_unlock(&state_spinlock); + BUG_ON(!list_empty(&clp->cl_superblocks)); + wake_up_all(&clp->cl_waitq); + rpc_wake_up(&clp->cl_rpcwaitq); + nfs4_kill_renewd(clp); nfs4_free_client(clp); } -static inline u32 +u32 nfs4_alloc_lockowner_id(struct nfs4_client *clp) { return clp->cl_lockowner_id ++; @@ -145,11 +185,29 @@ nfs4_client_grab_unused(struct nfs4_clie atomic_inc(&sp->so_count); sp->so_cred = cred; list_move(&sp->so_list, &clp->cl_state_owners); + sp->so_generation = clp->cl_generation; clp->cl_nunused--; } return sp; } +static struct nfs4_state_owner * +nfs4_find_state_owner(struct nfs4_client *clp, struct rpc_cred *cred) +{ + struct nfs4_state_owner *sp, *res = NULL; + + list_for_each_entry(sp, &clp->cl_state_owners, so_list) { + if (sp->so_cred != cred) + continue; + atomic_inc(&sp->so_count); + /* Move to the head of the list */ + list_move(&sp->so_list, &clp->cl_state_owners); + res = sp; + break; + } + return res; +} + /* * nfs4_alloc_state_owner(): this is called on the OPEN or CREATE path to * create a new state_owner. @@ -170,6 +228,15 @@ nfs4_alloc_state_owner(void) return sp; } +static void +nfs4_unhash_state_owner(struct nfs4_state_owner *sp) +{ + struct nfs4_client *clp = sp->so_client; + spin_lock(&clp->cl_lock); + list_del_init(&sp->so_list); + spin_unlock(&clp->cl_lock); +} + struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *server, struct rpc_cred *cred) { @@ -179,19 +246,25 @@ nfs4_get_state_owner(struct nfs_server * get_rpccred(cred); new = nfs4_alloc_state_owner(); spin_lock(&clp->cl_lock); - sp = nfs4_client_grab_unused(clp, cred); + sp = nfs4_find_state_owner(clp, cred); + if (sp == NULL) + sp = nfs4_client_grab_unused(clp, cred); if (sp == NULL && new != NULL) { list_add(&new->so_list, &clp->cl_state_owners); new->so_client = clp; new->so_id = nfs4_alloc_lockowner_id(clp); new->so_cred = cred; + new->so_generation = clp->cl_generation; sp = new; new = NULL; } spin_unlock(&clp->cl_lock); if (new) kfree(new); - if (!sp) + if (sp) { + if (!test_bit(NFS4CLNT_OK, &clp->cl_state)) + nfs4_wait_clnt_recover(server->client, clp); + } else put_rpccred(cred); return sp; } @@ -206,6 +279,8 @@ nfs4_put_state_owner(struct nfs4_state_o return; if (clp->cl_nunused >= OPENOWNER_POOL_SIZE) goto out_free; + if (list_empty(&sp->so_list)) + goto out_free; list_move(&sp->so_list, &clp->cl_unused); clp->cl_nunused++; spin_unlock(&clp->cl_lock); @@ -227,24 +302,42 @@ nfs4_alloc_open_state(void) state = kmalloc(sizeof(*state), GFP_KERNEL); if (!state) return NULL; - state->pid = current->pid; state->state = 0; + state->nreaders = 0; + state->nwriters = 0; + state->flags = 0; memset(state->stateid.data, 0, sizeof(state->stateid.data)); atomic_set(&state->count, 1); + INIT_LIST_HEAD(&state->lock_states); + init_MUTEX(&state->lock_sema); + rwlock_init(&state->state_lock); return state; } static struct nfs4_state * -__nfs4_find_state_bypid(struct inode *inode, pid_t pid) +__nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode) { struct nfs_inode *nfsi = NFS_I(inode); struct nfs4_state *state; + mode &= (FMODE_READ|FMODE_WRITE); list_for_each_entry(state, &nfsi->open_states, inode_states) { - if (state->pid == pid) { - atomic_inc(&state->count); - return state; - } + if (state->owner->so_cred != cred) + continue; + if ((mode & FMODE_READ) != 0 && state->nreaders == 0) + continue; + if ((mode & FMODE_WRITE) != 0 && state->nwriters == 0) + continue; + if ((state->state & mode) != mode) + continue; + /* Add the state to the head of the inode's list */ + list_move(&state->inode_states, &nfsi->open_states); + atomic_inc(&state->count); + if (mode & FMODE_READ) + state->nreaders++; + if (mode & FMODE_WRITE) + state->nwriters++; + return state; } return NULL; } @@ -256,7 +349,12 @@ __nfs4_find_state_byowner(struct inode * struct nfs4_state *state; list_for_each_entry(state, &nfsi->open_states, inode_states) { + /* Is this in the process of being freed? */ + if (state->nreaders == 0 && state->nwriters == 0) + continue; if (state->owner == owner) { + /* Add the state to the head of the inode's list */ + list_move(&state->inode_states, &nfsi->open_states); atomic_inc(&state->count); return state; } @@ -265,16 +363,12 @@ __nfs4_find_state_byowner(struct inode * } struct nfs4_state * -nfs4_find_state_bypid(struct inode *inode, pid_t pid) +nfs4_find_state(struct inode *inode, struct rpc_cred *cred, mode_t mode) { - struct nfs_inode *nfsi = NFS_I(inode); struct nfs4_state *state; spin_lock(&inode->i_lock); - state = __nfs4_find_state_bypid(inode, pid); - /* Add the state to the tail of the inode's list */ - if (state) - list_move_tail(&state->inode_states, &nfsi->open_states); + state = __nfs4_find_state(inode, cred, mode); spin_unlock(&inode->i_lock); return state; } @@ -307,7 +401,6 @@ nfs4_get_open_state(struct inode *inode, atomic_inc(&owner->so_count); list_add(&state->inode_states, &nfsi->open_states); state->inode = inode; - atomic_inc(&inode->i_count); spin_unlock(&inode->i_lock); } else { spin_unlock(&inode->i_lock); @@ -323,6 +416,7 @@ nfs4_put_open_state(struct nfs4_state *s { struct inode *inode = state->inode; struct nfs4_state_owner *owner = state->owner; + int status = 0; if (!atomic_dec_and_lock(&state->count, &inode->i_lock)) return; @@ -330,14 +424,230 @@ nfs4_put_open_state(struct nfs4_state *s spin_unlock(&inode->i_lock); down(&owner->so_sema); list_del(&state->open_states); - if (state->state != 0) - nfs4_do_close(inode, state); + if (state->state != 0) { + do { + status = nfs4_do_close(inode, state); + if (!status) + break; + up(&owner->so_sema); + status = nfs4_handle_error(NFS_SERVER(inode), status); + down(&owner->so_sema); + } while (!status); + } up(&owner->so_sema); - iput(inode); nfs4_free_open_state(state); nfs4_put_state_owner(owner); } +void +nfs4_close_state(struct nfs4_state *state, mode_t mode) +{ + struct inode *inode = state->inode; + struct nfs4_state_owner *owner = state->owner; + int newstate; + int status = 0; + + down(&owner->so_sema); + /* Protect against nfs4_find_state() */ + spin_lock(&inode->i_lock); + if (mode & FMODE_READ) + state->nreaders--; + if (mode & FMODE_WRITE) + state->nwriters--; + if (state->nwriters == 0 && state->nreaders == 0) + list_del_init(&state->inode_states); + spin_unlock(&inode->i_lock); + do { + newstate = 0; + if (state->state == 0) + break; + if (state->nreaders) + newstate |= FMODE_READ; + if (state->nwriters) + newstate |= FMODE_WRITE; + if (state->state == newstate) + break; + if (newstate != 0) + status = nfs4_do_downgrade(inode, state, newstate); + else + status = nfs4_do_close(inode, state); + if (!status) { + state->state = newstate; + break; + } + up(&owner->so_sema); + status = nfs4_handle_error(NFS_SERVER(inode), status); + down(&owner->so_sema); + } while (!status); + up(&owner->so_sema); + nfs4_put_open_state(state); +} + +/* + * Search the state->lock_states for an existing lock_owner + * that is compatible with current->files + */ +static struct nfs4_lock_state * +__nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +{ + struct nfs4_lock_state *pos; + list_for_each_entry(pos, &state->lock_states, ls_locks) { + if (pos->ls_owner != fl_owner) + continue; + atomic_inc(&pos->ls_count); + return pos; + } + return NULL; +} + +struct nfs4_lock_state * +nfs4_find_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +{ + struct nfs4_lock_state *lsp; + read_lock(&state->state_lock); + lsp = __nfs4_find_lock_state(state, fl_owner); + read_unlock(&state->state_lock); + return lsp; +} + +/* + * Return a compatible lock_state. If no initialized lock_state structure + * exists, return an uninitialized one. + * + * The caller must be holding state->lock_sema + */ +struct nfs4_lock_state * +nfs4_alloc_lock_state(struct nfs4_state *state, fl_owner_t fl_owner) +{ + struct nfs4_lock_state *lsp; + struct nfs4_client *clp = state->owner->so_client; + + lsp = kmalloc(sizeof(*lsp), GFP_KERNEL); + if (lsp == NULL) + return NULL; + lsp->ls_seqid = 0; /* arbitrary */ + lsp->ls_id = -1; + memset(lsp->ls_stateid.data, 0, sizeof(lsp->ls_stateid.data)); + atomic_set(&lsp->ls_count, 1); + lsp->ls_owner = fl_owner; + lsp->ls_parent = state; + INIT_LIST_HEAD(&lsp->ls_locks); + spin_lock(&clp->cl_lock); + lsp->ls_id = nfs4_alloc_lockowner_id(clp); + spin_unlock(&clp->cl_lock); + return lsp; +} + +/* + * Byte-range lock aware utility to initialize the stateid of read/write + * requests. + */ +void +nfs4_copy_stateid(nfs4_stateid *dst, struct nfs4_state *state, fl_owner_t fl_owner) +{ + if (test_bit(LK_STATE_IN_USE, &state->flags)) { + struct nfs4_lock_state *lsp; + + lsp = nfs4_find_lock_state(state, fl_owner); + if (lsp) { + memcpy(dst, &lsp->ls_stateid, sizeof(*dst)); + nfs4_put_lock_state(lsp); + return; + } + } + memcpy(dst, &state->stateid, sizeof(*dst)); +} + +/* +* Called with state->lock_sema held. +*/ +void +nfs4_increment_lock_seqid(int status, struct nfs4_lock_state *lsp) +{ + if (status == NFS_OK || seqid_mutating_err(-status)) + lsp->ls_seqid++; +} + +/* +* Check to see if the request lock (type FL_UNLK) effects the fl lock. +* +* fl and request must have the same posix owner +* +* return: +* 0 -> fl not effected by request +* 1 -> fl consumed by request +*/ + +static int +nfs4_check_unlock(struct file_lock *fl, struct file_lock *request) +{ + if (fl->fl_start >= request->fl_start && fl->fl_end <= request->fl_end) + return 1; + return 0; +} + +/* + * Post an initialized lock_state on the state->lock_states list. + */ +void +nfs4_notify_setlk(struct inode *inode, struct file_lock *request, struct nfs4_lock_state *lsp) +{ + struct nfs4_state *state = lsp->ls_parent; + + if (!list_empty(&lsp->ls_locks)) + return; + write_lock(&state->state_lock); + list_add(&lsp->ls_locks, &state->lock_states); + set_bit(LK_STATE_IN_USE, &state->flags); + write_unlock(&state->state_lock); +} + +/* + * to decide to 'reap' lock state: + * 1) search i_flock for file_locks with fl.lock_state = to ls. + * 2) determine if unlock will consume found lock. + * if so, reap + * + * else, don't reap. + * + */ +void +nfs4_notify_unlck(struct inode *inode, struct file_lock *request, struct nfs4_lock_state *lsp) +{ + struct nfs4_state *state = lsp->ls_parent; + struct file_lock *fl; + + for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { + if (!(fl->fl_flags & FL_POSIX)) + continue; + if (fl->fl_owner != lsp->ls_owner) + continue; + /* Exit if we find at least one lock which is not consumed */ + if (nfs4_check_unlock(fl,request) == 0) + return; + } + + write_lock(&state->state_lock); + list_del_init(&lsp->ls_locks); + if (list_empty(&state->lock_states)) + clear_bit(LK_STATE_IN_USE, &state->flags); + write_unlock(&state->state_lock); +} + +/* + * Release reference to lock_state, and free it if we see that + * it is no longer in use + */ +void