summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-11-30 10:17:53 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-11-30 10:17:53 -0800
commitbaf67f6aa9d29512809f1b1fbab624fce57fd16d (patch)
tree148332061e78c4d030de5ef7e1718a12be9f4d13
parent0235da0faeeec1c1ce2265fc627f5d5d9cae7ce8 (diff)
parent38a125b31504f91bf6fdd3cfc3a3e9a721e6c97a (diff)
Merge tag 'nfs-for-6.13-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust: "Bugfixes: - nfs/localio: fix for a memory corruption in nfs_local_read_done - Revert "nfs: don't reuse partially completed requests in nfs_lock_and_join_requests" - nfsv4: - ignore SB_RDONLY when mounting nfs - Fix a use-after-free problem in open() - sunrpc: - clear XPRT_SOCK_UPD_TIMEOUT when reseting the transport - timeout and cancel TLS handshake with -ETIMEDOUT - fix one UAF issue caused by sunrpc kernel tcp socket - Fix a hang in TLS sock_close if sk_write_pending - pNFS/blocklayout: Fix device registration issues Features and cleanups: - localio cleanups from Mike Snitzer - Clean up refcounting on the nfs version modules - __counted_by() annotations - nfs: make processes that are waiting for an I/O lock killable" * tag 'nfs-for-6.13-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (24 commits) fs/nfs/io: make nfs_start_io_*() killable nfs/blocklayout: Limit repeat device registration on failure nfs/blocklayout: Don't attempt unregister for invalid block device sunrpc: fix one UAF issue caused by sunrpc kernel tcp socket SUNRPC: timeout and cancel TLS handshake with -ETIMEDOUT sunrpc: clear XPRT_SOCK_UPD_TIMEOUT when reset transport nfs: ignore SB_RDONLY when mounting nfs Revert "nfs: don't reuse partially completed requests in nfs_lock_and_join_requests" Revert "fs: nfs: fix missing refcnt by replacing folio_set_private by folio_attach_private" nfs/localio: must clear res.replen in nfs_local_read_done NFSv4.0: Fix a use-after-free problem in the asynchronous open() NFSv4.0: Fix the wake up of the next waiter in nfs_release_seqid() SUNRPC: Fix a hang in TLS sock_close if sk_write_pending sunrpc: remove newlines from tracepoints nfs: Annotate struct pnfs_commit_array with __counted_by() nfs/localio: eliminate need for nfs_local_fsync_work forward declaration nfs/localio: remove extra indirect nfs_to call to check {read,write}_iter nfs/localio: eliminate unnecessary kref in nfs_local_fsync_ctx nfs/localio: remove redundant suid/sgid handling NFS: Implement get_nfs_version() ...
-rw-r--r--fs/nfs/blocklayout/blocklayout.c15
-rw-r--r--fs/nfs/blocklayout/dev.c6
-rw-r--r--fs/nfs/client.c64
-rw-r--r--fs/nfs/direct.c21
-rw-r--r--fs/nfs/file.c14
-rw-r--r--fs/nfs/fs_context.c6
-rw-r--r--fs/nfs/internal.h9
-rw-r--r--fs/nfs/io.c44
-rw-r--r--fs/nfs/localio.c96
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/nfs.h4
-rw-r--r--fs/nfs/nfs4proc.c8
-rw-r--r--fs/nfs/nfs4state.c10
-rw-r--r--fs/nfs/write.c55
-rw-r--r--include/linux/nfs_xdr.h2
-rw-r--r--include/trace/events/sunrpc.h4
-rw-r--r--net/sunrpc/svcsock.c4
-rw-r--r--net/sunrpc/xprtsock.c18
18 files changed, 229 insertions, 153 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 0becdec12970..47189476b553 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -571,19 +571,32 @@ retry:
if (!node)
return ERR_PTR(-ENODEV);
+ /*
+ * Devices that are marked unavailable are left in the cache with a
+ * timeout to avoid sending GETDEVINFO after every LAYOUTGET, or
+ * constantly attempting to register the device. Once marked as
+ * unavailable they must be deleted and never reused.
+ */
if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) {
unsigned long end = jiffies;
unsigned long start = end - PNFS_DEVICE_RETRY_TIMEOUT;
if (!time_in_range(node->timestamp_unavailable, start, end)) {
+ /* Uncork subsequent GETDEVINFO operations for this device */
nfs4_delete_deviceid(node->ld, node->nfs_client, id);
goto retry;
}
goto out_put;
}
- if (!bl_register_dev(container_of(node, struct pnfs_block_dev, node)))
+ if (!bl_register_dev(container_of(node, struct pnfs_block_dev, node))) {
+ /*
+ * If we cannot register, treat this device as transient:
+ * Make a negative cache entry for the device
+ */
+ nfs4_mark_deviceid_unavailable(node);
goto out_put;
+ }
return node;
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index 6252f4447945..cab8809f0e0f 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -20,9 +20,6 @@ static void bl_unregister_scsi(struct pnfs_block_dev *dev)
const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops;
int status;
- if (!test_and_clear_bit(PNFS_BDEV_REGISTERED, &dev->flags))
- return;
-
status = ops->pr_register(bdev, dev->pr_key, 0, false);
if (status)
trace_bl_pr_key_unreg_err(bdev, dev->pr_key, status);
@@ -58,7 +55,8 @@ static void bl_unregister_dev(struct pnfs_block_dev *dev)
return;
}
- if (dev->type == PNFS_BLOCK_VOLUME_SCSI)
+ if (dev->type == PNFS_BLOCK_VOLUME_SCSI &&
+ test_and_clear_bit(PNFS_BDEV_REGISTERED, &dev->flags))
bl_unregister_scsi(dev);
}
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 03ecc7765615..550ca934c9cf 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -55,9 +55,13 @@
#define NFSDBG_FACILITY NFSDBG_CLIENT
static DECLARE_WAIT_QUEUE_HEAD(nfs_client_active_wq);
-static DEFINE_SPINLOCK(nfs_version_lock);
-static DEFINE_MUTEX(nfs_version_mutex);
-static LIST_HEAD(nfs_versions);
+static DEFINE_RWLOCK(nfs_version_lock);
+
+static struct nfs_subversion *nfs_version_mods[5] = {
+ [2] = NULL,
+ [3] = NULL,
+ [4] = NULL,
+};
/*
* RPC cruft for NFS
@@ -76,38 +80,38 @@ const struct rpc_program nfs_program = {
.pipe_dir_name = NFS_PIPE_DIRNAME,
};
-static struct nfs_subversion *find_nfs_version(unsigned int version)
+static struct nfs_subversion *__find_nfs_version(unsigned int version)
{
struct nfs_subversion *nfs;
- spin_lock(&nfs_version_lock);
-
- list_for_each_entry(nfs, &nfs_versions, list) {
- if (nfs->rpc_ops->version == version) {
- spin_unlock(&nfs_version_lock);
- return nfs;
- }
- }
- spin_unlock(&nfs_version_lock);
- return ERR_PTR(-EPROTONOSUPPORT);
+ read_lock(&nfs_version_lock);
+ nfs = nfs_version_mods[version];
+ read_unlock(&nfs_version_lock);
+ return nfs;
}
-struct nfs_subversion *get_nfs_version(unsigned int version)
+struct nfs_subversion *find_nfs_version(unsigned int version)
{
- struct nfs_subversion *nfs = find_nfs_version(version);
+ struct nfs_subversion *nfs = __find_nfs_version(version);
- if (IS_ERR(nfs)) {
- mutex_lock(&nfs_version_mutex);
- request_module("nfsv%d", version);
- nfs = find_nfs_version(version);
- mutex_unlock(&nfs_version_mutex);
- }
+ if (!nfs && request_module("nfsv%d", version) == 0)
+ nfs = __find_nfs_version(version);
- if (!IS_ERR(nfs) && !try_module_get(nfs->owner))
+ if (!nfs)
+ return ERR_PTR(-EPROTONOSUPPORT);
+
+ if (!get_nfs_version(nfs))
return ERR_PTR(-EAGAIN);
+
return nfs;
}
+int get_nfs_version(struct nfs_subversion *nfs)
+{
+ return try_module_get(nfs->owner);
+}
+EXPORT_SYMBOL_GPL(get_nfs_version);
+
void put_nfs_version(struct nfs_subversion *nfs)
{
module_put(nfs->owner);
@@ -115,23 +119,23 @@ void put_nfs_version(struct nfs_subversion *nfs)
void register_nfs_version(struct nfs_subversion *nfs)
{
- spin_lock(&nfs_version_lock);
+ write_lock(&nfs_version_lock);
- list_add(&nfs->list, &nfs_versions);
+ nfs_version_mods[nfs->rpc_ops->version] = nfs;
nfs_version[nfs->rpc_ops->version] = nfs->rpc_vers;
- spin_unlock(&nfs_version_lock);
+ write_unlock(&nfs_version_lock);
}
EXPORT_SYMBOL_GPL(register_nfs_version);
void unregister_nfs_version(struct nfs_subversion *nfs)
{
- spin_lock(&nfs_version_lock);
+ write_lock(&nfs_version_lock);
nfs_version[nfs->rpc_ops->version] = NULL;
- list_del(&nfs->list);
+ nfs_version_mods[nfs->rpc_ops->version] = NULL;
- spin_unlock(&nfs_version_lock);
+ write_unlock(&nfs_version_lock);
}
EXPORT_SYMBOL_GPL(unregister_nfs_version);
@@ -151,7 +155,7 @@ struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_init)
clp->cl_minorversion = cl_init->minorversion;
clp->cl_nfs_mod = cl_init->nfs_mod;
- if (!try_module_get(clp->cl_nfs_mod->owner))
+ if (!get_nfs_version(clp->cl_nfs_mod))
goto error_dealloc;
clp->rpc_ops = clp->cl_nfs_mod->rpc_ops;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 90079ca134dd..b08dbe96bc57 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -454,8 +454,16 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
if (user_backed_iter(iter))
dreq->flags = NFS_ODIRECT_SHOULD_DIRTY;
- if (!swap)
- nfs_start_io_direct(inode);
+ if (!swap) {
+ result = nfs_start_io_direct(inode);
+ if (result) {
+ /* release the reference that would usually be
+ * consumed by nfs_direct_read_schedule_iovec()
+ */
+ nfs_direct_req_release(dreq);
+ goto out_release;
+ }
+ }
NFS_I(inode)->read_io += count;
requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos);
@@ -1007,7 +1015,14 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
requested = nfs_direct_write_schedule_iovec(dreq, iter, pos,
FLUSH_STABLE);
} else {
- nfs_start_io_direct(inode);
+ result = nfs_start_io_direct(inode);
+ if (result) {
+ /* release the reference that would usually be
+ * consumed by nfs_direct_write_schedule_iovec()
+ */
+ nfs_direct_req_release(dreq);
+ goto out_release;
+ }
requested = nfs_direct_write_schedule_iovec(dreq, iter, pos,
FLUSH_COND_STABLE);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 6800ee92d742..1bb646752e46 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -166,7 +166,10 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
iocb->ki_filp,
iov_iter_count(to), (unsigned long) iocb->ki_pos);
- nfs_start_io_read(inode);
+ result = nfs_start_io_read(inode);
+ if (result)
+ return result;
+
result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
if (!result) {
result = generic_file_read_iter(iocb, to);
@@ -187,7 +190,10 @@ nfs_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe
dprintk("NFS: splice_read(%pD2, %zu@%llu)\n", in, len, *ppos);
- nfs_start_io_read(inode);
+ result = nfs_start_io_read(inode);
+ if (result)
+ return result;
+
result = nfs_revalidate_mapping(inode, in->f_mapping);
if (!result) {
result = filemap_splice_read(in, ppos, pipe, len, flags);
@@ -668,7 +674,9 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
nfs_clear_invalid_mapping(file->f_mapping);
since = filemap_sample_wb_err(file->f_mapping);
- nfs_start_io_write(inode);
+ error = nfs_start_io_write(inode);
+ if (error)
+ return error;
result = generic_write_checks(iocb, from);
if (result > 0)
result = generic_perform_write(iocb, from);
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 7e000d782e28..b069385eea17 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -1467,7 +1467,7 @@ static int nfs_fs_context_validate(struct fs_context *fc)
/* Load the NFS protocol module if we haven't done so yet */
if (!ctx->nfs_mod) {
- nfs_mod = get_nfs_version(ctx->version);
+ nfs_mod = find_nfs_version(ctx->version);
if (IS_ERR(nfs_mod)) {
ret = PTR_ERR(nfs_mod);
goto out_version_unavailable;
@@ -1541,7 +1541,7 @@ static int nfs_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc)
}
nfs_copy_fh(ctx->mntfh, src->mntfh);
- __module_get(ctx->nfs_mod->owner);
+ get_nfs_version(ctx->nfs_mod);
ctx->client_address = NULL;
ctx->mount_server.hostname = NULL;
ctx->nfs_server.export_path = NULL;
@@ -1633,7 +1633,7 @@ static int nfs_init_fs_context(struct fs_context *fc)
}
ctx->nfs_mod = nfss->nfs_client->cl_nfs_mod;
- __module_get(ctx->nfs_mod->owner);
+ get_nfs_version(ctx->nfs_mod);
} else {
/* defaults */
ctx->timeo = NFS_UNSPEC_TIMEO;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 430733e3eff2..e564bd11ba60 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -6,13 +6,14 @@
#include "nfs4_fs.h"
#include <linux/fs_context.h>
#include <linux/security.h>
+#include <linux/compiler_attributes.h>
#include <linux/crc32.h>
#include <linux/sunrpc/addr.h>
#include <linux/nfs_page.h>
#include <linux/nfslocalio.h>
#include <linux/wait_bit.h>
-#define NFS_SB_MASK (SB_RDONLY|SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS)
+#define NFS_SB_MASK (SB_NOSUID|SB_NODEV|SB_NOEXEC|SB_SYNCHRONOUS)
extern const struct export_operations nfs_export_ops;
@@ -516,11 +517,11 @@ extern const struct netfs_request_ops nfs_netfs_ops;
#endif
/* io.c */
-extern void nfs_start_io_read(struct inode *inode);
+extern __must_check int nfs_start_io_read(struct inode *inode);
extern void nfs_end_io_read(struct inode *inode);
-extern void nfs_start_io_write(struct inode *inode);
+extern __must_check int nfs_start_io_write(struct inode *inode);
extern void nfs_end_io_write(struct inode *inode);
-extern void nfs_start_io_direct(struct inode *inode);
+extern __must_check int nfs_start_io_direct(struct inode *inode);
extern void nfs_end_io_direct(struct inode *inode);
static inline bool nfs_file_io_is_buffered(struct nfs_inode *nfsi)
diff --git a/fs/nfs/io.c b/fs/nfs/io.c
index b5551ed8f648..3388faf2acb9 100644
--- a/fs/nfs/io.c
+++ b/fs/nfs/io.c
@@ -39,19 +39,28 @@ static void nfs_block_o_direct(struct nfs_inode *nfsi, struct inode *inode)
* Note that buffered writes and truncates both take a write lock on
* inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
*/
-void
+int
nfs_start_io_read(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
+ int err;
+
/* Be an optimist! */
- down_read(&inode->i_rwsem);
+ err = down_read_killable(&inode->i_rwsem);
+ if (err)
+ return err;
if (test_bit(NFS_INO_ODIRECT, &nfsi->flags) == 0)
- return;
+ return 0;
up_read(&inode->i_rwsem);
+
/* Slow path.... */
- down_write(&inode->i_rwsem);
+ err = down_write_killable(&inode->i_rwsem);
+ if (err)
+ return err;
nfs_block_o_direct(nfsi, inode);
downgrade_write(&inode->i_rwsem);
+
+ return 0;
}
/**
@@ -74,11 +83,15 @@ nfs_end_io_read(struct inode *inode)
* Declare that a buffered read operation is about to start, and ensure
* that we block all direct I/O.
*/
-void
+int
nfs_start_io_write(struct inode *inode)
{
- down_write(&inode->i_rwsem);
- nfs_block_o_direct(NFS_I(inode), inode);
+ int err;
+
+ err = down_write_killable(&inode->i_rwsem);
+ if (!err)
+ nfs_block_o_direct(NFS_I(inode), inode);
+ return err;
}
/**
@@ -119,19 +132,28 @@ static void nfs_block_buffered(struct nfs_inode *nfsi, struct inode *inode)
* Note that buffered writes and truncates both take a write lock on
* inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
*/
-void
+int
nfs_start_io_direct(struct inode *inode)
{
struct nfs_inode *nfsi = NFS_I(inode);
+ int err;
+
/* Be an optimist! */
- down_read(&inode->i_rwsem);
+ err = down_read_killable(&inode->i_rwsem);
+ if (err)
+ return err;
if (test_bit(NFS_INO_ODIRECT, &nfsi->flags) != 0)
- return;
+ return 0;
up_read(&inode->i_rwsem);
+
/* Slow path.... */
- down_write(&inode->i_rwsem);
+ err = down_write_killable(&inode->i_rwsem);
+ if (err)
+ return err;
nfs_block_buffered(nfsi, inode);
downgrade_write(&inode->i_rwsem);
+
+ return 0;
}
/**
diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c
index 8f0ce82a677e..4b8618cf114c 100644
--- a/fs/nfs/localio.c
+++ b/fs/nfs/localio.c
@@ -42,10 +42,8 @@ struct nfs_local_fsync_ctx {
struct nfsd_file *localio;
struct nfs_commit_data *data;
struct work_struct work;
- struct kref kref;
struct completion *done;
};
-static void nfs_local_fsync_work(struct work_struct *work);
static bool localio_enabled __read_mostly = true;
module_param(localio_enabled, bool, 0644);
@@ -274,7 +272,7 @@ nfs_local_iocb_free(struct nfs_local_kiocb *iocb)
static struct nfs_local_kiocb *
nfs_local_iocb_alloc(struct nfs_pgio_header *hdr,
- struct nfsd_file *localio, gfp_t flags)
+ struct file *file, gfp_t flags)
{
struct nfs_local_kiocb *iocb;
@@ -287,9 +285,8 @@ nfs_local_iocb_alloc(struct nfs_pgio_header *hdr,
kfree(iocb);
return NULL;
}
- init_sync_kiocb(&iocb->kiocb, nfs_to->nfsd_file_file(localio));
+ init_sync_kiocb(&iocb->kiocb, file);
iocb->kiocb.ki_pos = hdr->args.offset;
- iocb->localio = localio;
iocb->hdr = hdr;
iocb->kiocb.ki_flags &= ~IOCB_APPEND;
return iocb;
@@ -354,6 +351,12 @@ nfs_local_read_done(struct nfs_local_kiocb *iocb, long status)
nfs_local_pgio_done(hdr, status);
+ /*
+ * Must clear replen otherwise NFSv3 data corruption will occur
+ * if/when switching from LOCALIO back to using normal RPC.
+ */
+ hdr->res.replen = 0;
+
if (hdr->res.count != hdr->args.count ||
hdr->args.offset + hdr->res.count >= i_size_read(file_inode(filp)))
hdr->res.eof = true;
@@ -390,13 +393,19 @@ nfs_do_local_read(struct nfs_pgio_header *hdr,
const struct rpc_call_ops *call_ops)
{
struct nfs_local_kiocb *iocb;
+ struct file *file = nfs_to->nfsd_file_file(localio);
+
+ /* Don't support filesystems without read_iter */
+ if (!file->f_op->read_iter)
+ return -EAGAIN;
dprintk("%s: vfs_read count=%u pos=%llu\n",
__func__, hdr->args.count, hdr->args.offset);
- iocb = nfs_local_iocb_alloc(hdr, localio, GFP_KERNEL);
+ iocb = nfs_local_iocb_alloc(hdr, file, GFP_KERNEL);
if (iocb == NULL)
return -ENOMEM;
+ iocb->localio = localio;
nfs_local_pgio_init(hdr, call_ops);
hdr->res.eof = false;
@@ -521,12 +530,7 @@ nfs_local_write_done(struct nfs_local_kiocb *iocb, long status)
}
if (status < 0)
nfs_reset_boot_verifier(inode);
- else if (nfs_should_remove_suid(inode)) {
- /* Deal with the suid/sgid bit corner case */
- spin_lock(&inode->i_lock);
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE);
- spin_unlock(&inode->i_lock);
- }
+
nfs_local_pgio_done(hdr, status);
}
@@ -564,14 +568,20 @@ nfs_do_local_write(struct nfs_pgio_header *hdr,
const struct rpc_call_ops *call_ops)
{
struct nfs_local_kiocb *iocb;
+ struct file *file = nfs_to->nfsd_file_file(localio);
+
+ /* Don't support filesystems without write_iter */
+ if (!file->f_op->write_iter)
+ return -EAGAIN;
dprintk("%s: vfs_write count=%u pos=%llu %s\n",
__func__, hdr->args.count, hdr->args.offset,
(hdr->args.stable == NFS_UNSTABLE) ? "unstable" : "stable");
- iocb = nfs_local_iocb_alloc(hdr, localio, GFP_NOIO);
+ iocb = nfs_local_iocb_alloc(hdr, file, GFP_NOIO);
if (iocb == NULL)
return -ENOMEM;
+ iocb->localio = localio;
switch (hdr->args.stable) {
default:
@@ -597,16 +607,9 @@ int nfs_local_doio(struct nfs_client *clp, struct nfsd_file *localio,
const struct rpc_call_ops *call_ops)
{
int status = 0;
- struct file *filp = nfs_to->nfsd_file_file(localio);
if (!hdr->args.count)
return 0;
- /* Don't support filesystems without read_iter/write_iter */
- if (!filp->f_op->read_iter || !filp->f_op->write_iter) {
- nfs_local_disable(clp);
- status = -EAGAIN;
- goto out;
- }
switch (hdr->rw_mode) {
case FMODE_READ:
@@ -620,8 +623,10 @@ int nfs_local_doio(struct nfs_client *clp, struct nfsd_file *localio,
hdr->rw_mode);
status = -EINVAL;
}
-out:
+
if (status != 0) {
+ if (status == -EAGAIN)
+ nfs_local_disable(clp);
nfs_to_nfsd_file_put_local(localio);
hdr->task.tk_status = status;
nfs_local_hdr_release(hdr, call_ops);
@@ -678,40 +683,12 @@ nfs_local_release_commit_data(struct nfsd_file *localio,
call_ops->rpc_release(data);
}
-static struct nfs_local_fsync_ctx *
-nfs_local_fsync_ctx_alloc(struct nfs_commit_data *data,
- struct nfsd_file *localio, gfp_t flags)
-{
- struct nfs_local_fsync_ctx *ctx = kmalloc(sizeof(*ctx), flags);
-
- if (ctx != NULL) {
- ctx->localio = localio;
- ctx->data = data;
- INIT_WORK(&ctx->work, nfs_local_fsync_work);
- kref_init(&ctx->kref);
- ctx->done = NULL;
- }
- return ctx;
-}
-
-static void
-nfs_local_fsync_ctx_kref_free(struct kref *kref)
-{
- kfree(container_of(kref, struct nfs_local_fsync_ctx, kref));
-}
-
-static void
-nfs_local_fsync_ctx_put(struct nfs_local_fsync_ctx *ctx)
-{
- kref_put(&ctx->kref, nfs_local_fsync_ctx_kref_free);
-}
-
static void
nfs_local_fsync_ctx_free(struct nfs_local_fsync_ctx *ctx)
{
nfs_local_release_commit_data(ctx->localio, ctx->data,
ctx->data->task.tk_ops);
- nfs_local_fsync_ctx_put(ctx);
+ kfree(ctx);
}
static void
@@ -730,6 +707,21 @@ nfs_local_fsync_work(struct work_struct *work)
nfs_local_fsync_ctx_free(ctx);
}
+static struct nfs_local_fsync_ctx *
+nfs_local_fsync_ctx_alloc(struct nfs_commit_data *data,
+ struct nfsd_file *localio, gfp_t flags)
+{
+ struct nfs_local_fsync_ctx *ctx = kmalloc(sizeof(*ctx), flags);
+
+ if (ctx != NULL) {
+ ctx->localio = localio;
+ ctx->data = data;
+ INIT_WORK(&ctx->work, nfs_local_fsync_work);
+ ctx->done = NULL;
+ }
+ return ctx;
+}
+
int nfs_local_commit(struct nfsd_file *localio,
struct nfs_commit_data *data,
const struct rpc_call_ops *call_ops, int how)
@@ -744,7 +736,7 @@ int nfs_local_commit(struct nfsd_file *localio,
}
nfs_local_init_commit(data, call_ops);
- kref_get(&ctx->kref);
+
if (how & FLUSH_SYNC) {
DECLARE_COMPLETION_ONSTACK(done);
ctx->done = &done;
@@ -752,6 +744,6 @@ int nfs_local_commit(struct nfsd_file *localio,
wait_for_completion(&done);
} else
queue_work(nfsiod_workqueue, &ctx->work);
- nfs_local_fsync_ctx_put(ctx);
+
return 0;
}
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index e7494cdd957e..2d53574da605 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -182,7 +182,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
ctx->version = client->rpc_ops->version;
ctx->minorversion = client->cl_minorversion;
ctx->nfs_mod = client->cl_nfs_mod;
- __module_get(ctx->nfs_mod->owner);
+ get_nfs_version(ctx->nfs_mod);
ret = client->rpc_ops->submount(fc, server);
if (ret < 0) {
diff --git a/fs/nfs/nfs.h b/fs/nfs/nfs.h
index 0d3ce0460e35..8a5f51be013a 100644
--- a/fs/nfs/nfs.h
+++ b/fs/nfs/nfs.h
@@ -19,10 +19,10 @@ struct nfs_subversion {
const struct nfs_rpc_ops *rpc_ops; /* NFS operations */
const struct super_operations *sops; /* NFS Super operations */
const struct xattr_handler * const *xattr; /* NFS xattr handlers */
- struct list_head list; /* List of NFS versions */
};
-struct nfs_subversion *get_nfs_version(unsigned int);
+struct nfs_subversion *find_nfs_version(unsigned int);
+int get_nfs_version(struct nfs_subversion *);
void put_nfs_version(struct nfs_subversion *);
void register_nfs_version(struct nfs_subversion *);
void unregister_nfs_version(struct nfs_subversion *);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9d40319e063d..405f17e6e0b4 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2603,12 +2603,14 @@ static void nfs4_open_release(void *calldata)
struct nfs4_opendata *data = calldata;
struct nfs4_state *state = NULL;
+ /* In case of error, no cleanup! */
+ if (data->rpc_status != 0 || !data->rpc_done) {
+ nfs_release_seqid(data->o_arg.seqid);
+ goto out_free;
+ }
/* If this request hasn't been cancelled, do nothing */
if (!data->cancelled)
goto out_free;
- /* In case of error, no cleanup! */
- if (data->rpc_status != 0 || !data->rpc_done)
- goto out_free;
/* In case we need an open_confirm, no cleanup! */
if (data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)
goto out_free;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index dafd61186557..9a9f60a2291b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1083,14 +1083,12 @@ void nfs_release_seqid(struct nfs_seqid *seqid)
return;
sequence = seqid->sequence;
spin_lock(&sequence->lock);
- list_del_init(&seqid->list);
- if (!list_empty(&sequence->list)) {
- struct nfs_seqid *next;
-
- next = list_first_entry(&sequence->list,
- struct nfs_seqid, list);
+ if (list_is_first(&seqid->list, &sequence->list) &&
+ !list_is_singular(&sequence->list)) {
+ struct nfs_seqid *next = list_next_entry(seqid, list);
rpc_wake_up_queued_task(&sequence->wait, next->task);
}
+ list_del_init(&seqid->list);
spin_unlock(&sequence->lock);
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ead2dc55952d..50fa539611f5 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -144,6 +144,31 @@ static void nfs_io_completion_put(struct nfs_io_completion *ioc)
kref_put(&ioc->refcount, nfs_io_completion_release);
}
+static void
+nfs_page_set_inode_ref(struct nfs_page *req, struct inode *inode)
+{
+ if (!test_and_set_bit(PG_INODE_REF, &req->wb_flags)) {
+ kref_get(&req->wb_kref);
+ atomic_long_inc(&NFS_I(inode)->nrequests);
+ }
+}
+
+static int
+nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode)
+{
+ int ret;
+
+ if (!test_bit(PG_REMOVE, &req->wb_flags))
+ return 0;
+ ret = nfs_page_group_lock(req);
+ if (ret)
+ return ret;
+ if (test_and_clear_bit(PG_REMOVE, &req->wb_flags))
+ nfs_page_set_inode_ref(req, inode);
+ nfs_page_group_unlock(req);
+ return 0;
+}
+
/**
* nfs_folio_find_head_request - find head request associated with a folio
* @folio: pointer to folio
@@ -540,7 +565,6 @@ static struct nfs_page *nfs_lock_and_join_requests(struct folio *folio)
struct inode *inode = folio->mapping->host;
struct nfs_page *head, *subreq;
struct nfs_commit_info cinfo;
- bool removed;
int ret;
/*
@@ -565,18 +589,18 @@ retry:
goto retry;
}
- ret = nfs_page_group_lock(head);
+ ret = nfs_cancel_remove_inode(head, inode);
if (ret < 0)
goto out_unlock;
- removed = test_bit(PG_REMOVE, &head->wb_flags);
+ ret = nfs_page_group_lock(head);
+ if (ret < 0)
+ goto out_unlock;
/* lock each request in the page group */
for (subreq = head->wb_this_page;
subreq != head;
subreq = subreq->wb_this_page) {
- if (test_bit(PG_REMOVE, &subreq->wb_flags))
- removed = true;
ret = nfs_page_group_lock_subreq(head, subreq);
if (ret < 0)
goto out_unlock;
@@ -584,21 +608,6 @@ retry:
nfs_page_group_unlock(head);
- /*
- * If PG_REMOVE is set on any request, I/O on that request has
- * completed, but some requests were still under I/O at the time
- * we locked the head request.
- *
- * In that case the above wait for all requests means that all I/O
- * has now finished, and we can restart from a clean slate. Let the
- * old requests go away and start from scratch instead.
- */
- if (removed) {
- nfs_unroll_locks(head, head);
- nfs_unlock_and_release_request(head);
- goto retry;
- }
-
nfs_init_cinfo_from_inode(&cinfo, inode);
nfs_join_page_group(head, &cinfo, inode);
return head;
@@ -772,7 +781,8 @@ static void nfs_inode_add_request(struct nfs_page *req)
nfs_lock_request(req);
spin_lock(&mapping->i_private_lock);
set_bit(PG_MAPPED, &req->wb_flags);
- folio_attach_private(folio, req);
+ folio_set_private(folio);
+ folio->private = req;
spin_unlock(&mapping->i_private_lock);
atomic_long_inc(&nfsi->nrequests);
/* this a head request for a page group - mark it as having an
@@ -796,7 +806,8 @@ static void nfs_inode_remove_request(struct nfs_page *req)
spin_lock(&mapping->i_private_lock);
if (likely(folio)) {
- folio_detach_private(folio);
+ folio->private = NULL;
+ folio_clear_private(folio);
clear_bit(PG_MAPPED, &req->wb_head->wb_flags);
}
spin_unlock(&mapping->i_private_lock);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 12d8e47bc5a3..559273a0f16d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1336,7 +1336,7 @@ struct pnfs_commit_array {
struct rcu_head rcu;
refcount_t refcount;
unsigned int nbuckets;
- struct pnfs_commit_bucket buckets[];
+ struct pnfs_commit_bucket buckets[] __counted_by(nbuckets);
};
struct pnfs_ds_commit_info {
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 5e8495216689..b13dc275ef4a 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -719,7 +719,7 @@ TRACE_EVENT(rpc_xdr_overflow,
),
TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
- " %sv%d %s requested=%zu p=%p end=%p xdr=[%p,%zu]/%u/[%p,%zu]/%u\n",
+ " %sv%d %s requested=%zu p=%p end=%p xdr=[%p,%zu]/%u/[%p,%zu]/%u",
__entry->task_id, __entry->client_id,
__get_str(progname), __entry->version, __get_str(procedure),
__entry->requested, __entry->p, __entry->end,
@@ -777,7 +777,7 @@ TRACE_EVENT(rpc_xdr_alignment,
),
TP_printk(SUNRPC_TRACE_TASK_SPECIFIER
- " %sv%d %s offset=%zu copied=%u xdr=[%p,%zu]/%u/[%p,%zu]/%u\n",
+ " %sv%d %s offset=%zu copied=%u xdr=[%p,%zu]/%u/[%p,%zu]/%u",
__entry->task_id, __entry->client_id,
__get_str(progname), __entry->version, __get_str(procedure),
__entry->offset, __entry->copied,
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index b785425c3315..95397677673b 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1551,6 +1551,10 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
newlen = error;
if (protocol == IPPROTO_TCP) {
+ __netns_tracker_free(net, &sock->sk->ns_tracker, false);
+ sock->sk->sk_net_refcnt = 1;
+ get_net_track(net, &sock->sk->ns_tracker, GFP_KERNEL);
+ sock_inuse_add(net, 1);
if ((error = kernel_listen(sock, 64)) < 0)
goto bummer;
}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 1326fbf45a34..c60936d8cef7 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1198,6 +1198,7 @@ static void xs_sock_reset_state_flags(struct rpc_xprt *xprt)
clear_bit(XPRT_SOCK_WAKE_WRITE, &transport->sock_state);
clear_bit(XPRT_SOCK_WAKE_DISCONNECT, &transport->sock_state);
clear_bit(XPRT_SOCK_NOSPACE, &transport->sock_state);
+ clear_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
}
static void xs_run_error_worker(struct sock_xprt *transport, unsigned int nr)
@@ -1278,6 +1279,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
transport->file = NULL;
sk->sk_user_data = NULL;
+ sk->sk_sndtimeo = 0;
xs_restore_old_callbacks(transport, sk);
xprt_clear_connected(xprt);
@@ -1939,6 +1941,13 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
goto out;
}
+ if (protocol == IPPROTO_TCP) {
+ __netns_tracker_free(xprt->xprt_net, &sock->sk->ns_tracker, false);
+ sock->sk->sk_net_refcnt = 1;
+ get_net_track(xprt->xprt_net, &sock->sk->ns_tracker, GFP_KERNEL);
+ sock_inuse_add(xprt->xprt_net, 1);
+ }
+
filp = sock_alloc_file(sock, O_NONBLOCK, NULL);
if (IS_ERR(filp))
return ERR_CAST(filp);
@@ -2614,11 +2623,10 @@ static int xs_tls_handshake_sync(struct rpc_xprt *lower_xprt, struct xprtsec_par
rc = wait_for_completion_interruptible_timeout(&lower_transport->handshake_done,
XS_TLS_HANDSHAKE_TO);
if (rc <= 0) {
- if (!tls_handshake_cancel(sk)) {
- if (rc == 0)
- rc = -ETIMEDOUT;
- goto out_put_xprt;
- }
+ tls_handshake_cancel(sk);
+ if (rc == 0)
+ rc = -ETIMEDOUT;
+ goto out_put_xprt;
}
rc = lower_transport->xprt_err;