Merge tag 'vfs-6.11-rc4.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs

Pull vfs fixes from Christian Brauner: "VFS: - Fix the name of file lease slab cache. When file leases were split out of file locks the name of the file lock slab cache was used for the file leases slab cache as well. - Fix a type in take_fd() helper. - Fix infinite directory iteration for stable offsets in tmpfs. - When the icache is pruned all reclaimable inodes are marked with I_FREEING and other processes that try to lookup such inodes will block. But some filesystems like ext4 can trigger lookups in their inode evict callback causing deadlocks. Ext4 does such lookups if the ea_inode feature is used whereby a separate inode may be used to store xattrs. Introduce I_LRU_ISOLATING which pins the inode while its pages are reclaimed. This avoids inode deletion during inode_lru_isolate() avoiding the deadlock and evict is made to wait until I_LRU_ISOLATING is done. netfs: - Fault in smaller chunks for non-large folio mappings for filesystems that haven't been converted to large folios yet. - Fix the CONFIG_NETFS_DEBUG config option. The config option was renamed a short while ago and that introduced two minor issues. First, it depended on CONFIG_NETFS whereas it wants to depend on CONFIG_NETFS_SUPPORT. The former doesn't exist, while the latter does. Second, the documentation for the config option wasn't fixed up. - Revert the removal of the PG_private_2 writeback flag as ceph is using it and fix how that flag is handled in netfs. - Fix DIO reads on 9p. A program watching a file on a 9p mount wouldn't see any changes in the size of the file being exported by the server if the file was changed directly in the source filesystem. Fix this by attempting to read the full size specified when a DIO read is requested. - Fix a NULL pointer dereference bug due to a data race where a cachefiles cookies was retired even though it was still in use. Check the cookie's n_accesses counter before discarding it. nsfs: - Fix ioctl declaration for NS_GET_MNTNS_ID from _IO() to _IOR() as the kernel is writing to userspace. pidfs: - Prevent the creation of pidfds for kthreads until we have a use-case for it and we know the semantics we want. It also confuses userspace why they can get pidfds for kthreads. squashfs: - Fix an unitialized value bug reported by KMSAN caused by a corrupted symbolic link size read from disk. Check that the symbolic link size is not larger than expected" * tag 'vfs-6.11-rc4.fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: Squashfs: sanity check symbolic link size 9p: Fix DIO read through netfs vfs: Don't evict inode under the inode lru traversing context netfs: Fix handling of USE_PGPRIV2 and WRITE_TO_CACHE flags netfs, ceph: Revert "netfs: Remove deprecated use of PG_private_2 as a second writeback flag" file: fix typo in take_fd() comment pidfd: prevent creation of pidfds for kthreads netfs: clean up after renaming FSCACHE_DEBUG config libfs: fix infinite directory reads for offset dir nsfs: fix ioctl declaration fs/netfs/fscache_cookie: add missing "n_accesses" check filelock: fix name of file_lease slab cache netfs: Fault in smaller chunks for non-large folio mappings
author: Linus Torvalds <torvalds@linux-foundation.org> 2024-08-14 09:06:28 -0700
committer: Linus Torvalds <torvalds@linux-foundation.org> 2024-08-14 09:06:28 -0700
commit: 4ac0f08f44b62e59a389c7ed87c89087d9fefe29 (patch)
tree: b132cf6e957a9fa385905287ad343b3a4e87ea9e /fs/netfs/io.c
parent: 02f8ca3d49055788f112c17052a3da65feb01835 (diff)
parent: 810ee43d9cd245d138a2733d87a24858a23f577d (diff)
1 files changed, 155 insertions, 6 deletions
diff --git a/fs/netfs/io.c b/fs/netfs/io.c
index c93851b98368..5367caf3fa28 100644
--- a/fs/netfs/io.c
+++ b/fs/netfs/io.c
@@ -99,6 +99,146 @@ static void netfs_rreq_completed(struct netfs_io_request *rreq, bool was_async)
 }
 
 /*
+ * [DEPRECATED] Deal with the completion of writing the data to the cache.  We
+ * have to clear the PG_fscache bits on the folios involved and release the
+ * caller's ref.
+ *
+ * May be called in softirq mode and we inherit a ref from the caller.
+ */
+static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq,
+					  bool was_async)
+{
+	struct netfs_io_subrequest *subreq;
+	struct folio *folio;
+	pgoff_t unlocked = 0;
+	bool have_unlocked = false;
+
+	rcu_read_lock();
+
+	list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
+		XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE);
+
+		xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) {
+			if (xas_retry(&xas, folio))
+				continue;
+
+			/* We might have multiple writes from the same huge
+			 * folio, but we mustn't unlock a folio more than once.
+			 */
+			if (have_unlocked && folio->index <= unlocked)
+				continue;
+			unlocked = folio_next_index(folio) - 1;
+			trace_netfs_folio(folio, netfs_folio_trace_end_copy);
+			folio_end_private_2(folio);
+			have_unlocked = true;
+		}
+	}
+
+	rcu_read_unlock();
+	netfs_rreq_completed(rreq, was_async);
+}
+
+static void netfs_rreq_copy_terminated(void *priv, ssize_t transferred_or_error,
+				       bool was_async) /* [DEPRECATED] */
+{
+	struct netfs_io_subrequest *subreq = priv;
+	struct netfs_io_request *rreq = subreq->rreq;
+
+	if (IS_ERR_VALUE(transferred_or_error)) {
+		netfs_stat(&netfs_n_rh_write_failed);
+		trace_netfs_failure(rreq, subreq, transferred_or_error,
+				    netfs_fail_copy_to_cache);
+	} else {
+		netfs_stat(&netfs_n_rh_write_done);
+	}
+
+	trace_netfs_sreq(subreq, netfs_sreq_trace_write_term);
+
+	/* If we decrement nr_copy_ops to 0, the ref belongs to us. */
+	if (atomic_dec_and_test(&rreq->nr_copy_ops))
+		netfs_rreq_unmark_after_write(rreq, was_async);
+
+	netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
+}
+
+/*
+ * [DEPRECATED] Perform any outstanding writes to the cache.  We inherit a ref
+ * from the caller.
+ */
+static void netfs_rreq_do_write_to_cache(struct netfs_io_request *rreq)
+{
+	struct netfs_cache_resources *cres = &rreq->cache_resources;
+	struct netfs_io_subrequest *subreq, *next, *p;
+	struct iov_iter iter;
+	int ret;
+
+	trace_netfs_rreq(rreq, netfs_rreq_trace_copy);
+
+	/* We don't want terminating writes trying to wake us up whilst we're
+	 * still going through the list.
+	 */
+	atomic_inc(&rreq->nr_copy_ops);
+
+	list_for_each_entry_safe(subreq, p, &rreq->subrequests, rreq_link) {
+		if (!test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) {
+			list_del_init(&subreq->rreq_link);
+			netfs_put_subrequest(subreq, false,
+					     netfs_sreq_trace_put_no_copy);
+		}
+	}
+
+	list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
+		/* Amalgamate adjacent writes */
+		while (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
+			next = list_next_entry(subreq, rreq_link);
+			if (next->start != subreq->start + subreq->len)
+				break;
+			subreq->len += next->len;
+			list_del_init(&next->rreq_link);
+			netfs_put_subrequest(next, false,
+					     netfs_sreq_trace_put_merged);
+		}
+
+		ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len,
+					       subreq->len, rreq->i_size, true);
+		if (ret < 0) {
+			trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write);
+			trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip);
+			continue;
+		}
+
+		iov_iter_xarray(&iter, ITER_SOURCE, &rreq->mapping->i_pages,
+				subreq->start, subreq->len);
+
+		atomic_inc(&rreq->nr_copy_ops);
+		netfs_stat(&netfs_n_rh_write);
+		netfs_get_subrequest(subreq, netfs_sreq_trace_get_copy_to_cache);
+		trace_netfs_sreq(subreq, netfs_sreq_trace_write);
+		cres->ops->write(cres, subreq->start, &iter,
+				 netfs_rreq_copy_terminated, subreq);
+	}
+
+	/* If we decrement nr_copy_ops to 0, the usage ref belongs to us. */
+	if (atomic_dec_and_test(&rreq->nr_copy_ops))
+		netfs_rreq_unmark_after_write(rreq, false);
+}
+
+static void netfs_rreq_write_to_cache_work(struct work_struct *work) /* [DEPRECATED] */
+{
+	struct netfs_io_request *rreq =
+		container_of(work, struct netfs_io_request, work);
+
+	netfs_rreq_do_write_to_cache(rreq);
+}
+
+static void netfs_rreq_write_to_cache(struct netfs_io_request *rreq) /* [DEPRECATED] */
+{
+	rreq->work.func = netfs_rreq_write_to_cache_work;
+	if (!queue_work(system_unbound_wq, &rreq->work))
+		BUG();
+}
+
+/*
  * Handle a short read.
  */
 static void netfs_rreq_short_read(struct netfs_io_request *rreq,
@@ -275,6 +415,10 @@ again:
 	clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
 	wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS);
 
+	if (test_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags) &&
+	    test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags))
+		return netfs_rreq_write_to_cache(rreq);
+
 	netfs_rreq_completed(rreq, was_async);
 }
 
@@ -386,7 +530,8 @@ incomplete:
 
 	if (transferred_or_error == 0) {
 		if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) {
-			subreq->error = -ENODATA;
+			if (rreq->origin != NETFS_DIO_READ)
+				subreq->error = -ENODATA;
 			goto failed;
 		}
 	} else {
@@ -457,9 +602,14 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq,
 			}
 			if (subreq->len > ictx->zero_point - subreq->start)
 				subreq->len = ictx->zero_point - subreq->start;
+
+			/* We limit buffered reads to the EOF, but let the
+			 * server deal with larger-than-EOF DIO/unbuffered
+			 * reads.
+			 */
+			if (subreq->len > rreq->i_size - subreq->start)
+				subreq->len = rreq->i_size - subreq->start;
 		}
-		if (subreq->len > rreq->i_size - subreq->start)
-			subreq->len = rreq->i_size - subreq->start;
 		if (rreq->rsize && subreq->len > rreq->rsize)
 			subreq->len = rreq->rsize;
 
@@ -595,11 +745,10 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
 	do {
 		_debug("submit %llx + %llx >= %llx",
 		       rreq->start, rreq->submitted, rreq->i_size);
-		if (rreq->origin == NETFS_DIO_READ &&
-		    rreq->start + rreq->submitted >= rreq->i_size)
-			break;
 		if (!netfs_rreq_submit_slice(rreq, &io_iter))
 			break;
+		if (test_bit(NETFS_SREQ_NO_PROGRESS, &rreq->flags))
+			break;
 		if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) &&
 		    test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags))
 			break;
author	Linus Torvalds <torvalds@linux-foundation.org>	2024-08-14 09:06:28 -0700
committer	Linus Torvalds <torvalds@linux-foundation.org>	2024-08-14 09:06:28 -0700
commit	4ac0f08f44b62e59a389c7ed87c89087d9fefe29 (patch)
tree	b132cf6e957a9fa385905287ad343b3a4e87ea9e /fs/netfs/io.c
parent	02f8ca3d49055788f112c17052a3da65feb01835 (diff)
parent	810ee43d9cd245d138a2733d87a24858a23f577d (diff)