summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-01-22 10:59:32 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2022-01-22 10:59:32 +0200
commit7fd350f6ff846f788ba5f6668bacf2ce4257ed8f (patch)
treeb94760f20590fcfda8851bc62a9888f281540033
parentb68b10b6266009bc8770adf952d637250ee93135 (diff)
parentcef0223191452b3c493a1070baad9ffe806babac (diff)
Merge tag 'fscache-fixes-20220121' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs
Pull more fscache updates from David Howells: "A set of fixes and minor updates for the fscache rewrite: - Fix mishandling of volume collisions (the wait condition is inverted and so it was only waiting if the volume collision was already resolved). - Fix miscalculation of whether there's space available in cachefiles. - Make sure a default cache name is set on a cache if the user hasn't set one by the time they bind the cache. - Adjust the way the backing inode is presented in tracepoints, add a tracepoint for mkdir and trace directory lookup. - Add a tracepoint for failure to set the active file mark. - Add an explanation of the checks made on the backing filesystem. - Check that the backing filesystem supports tmpfile. - Document how the page-release cancellation of the read-skip optimisation works. And I've included a change for netfslib: - Make ops->init_rreq() optional" * tag 'fscache-fixes-20220121' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs: netfs: Make ops->init_rreq() optional fscache: Add a comment explaining how page-release optimisation works cachefiles: Check that the backing filesystem supports tmpfiles cachefiles: Explain checks in a comment cachefiles: Trace active-mark failure cachefiles: Make some tracepoint adjustments cachefiles: set default tag name if it's unspecified cachefiles: Calculate the blockshift in terms of bytes, not pages fscache: Fix the volume collision wait condition
-rw-r--r--fs/cachefiles/cache.c17
-rw-r--r--fs/cachefiles/daemon.c11
-rw-r--r--fs/cachefiles/internal.h2
-rw-r--r--fs/cachefiles/io.c2
-rw-r--r--fs/cachefiles/namei.c12
-rw-r--r--fs/ceph/addr.c5
-rw-r--r--fs/fscache/volume.c4
-rw-r--r--fs/netfs/read_helper.c3
-rw-r--r--include/linux/fscache.h5
-rw-r--r--include/trace/events/cachefiles.h103
10 files changed, 113 insertions, 51 deletions
diff --git a/fs/cachefiles/cache.c b/fs/cachefiles/cache.c
index ce4d4785003c..7077f72e6f47 100644
--- a/fs/cachefiles/cache.c
+++ b/fs/cachefiles/cache.c
@@ -49,11 +49,19 @@ int cachefiles_add_cache(struct cachefiles_cache *cache)
goto error_unsupported;
}
- /* check parameters */
+ /* Check features of the backing filesystem:
+ * - Directories must support looking up and directory creation
+ * - We create tmpfiles to handle invalidation
+ * - We use xattrs to store metadata
+ * - We need to be able to query the amount of space available
+ * - We want to be able to sync the filesystem when stopping the cache
+ * - We use DIO to/from pages, so the blocksize mustn't be too big.
+ */
ret = -EOPNOTSUPP;
if (d_is_negative(root) ||
!d_backing_inode(root)->i_op->lookup ||
!d_backing_inode(root)->i_op->mkdir ||
+ !d_backing_inode(root)->i_op->tmpfile ||
!(d_backing_inode(root)->i_opflags & IOP_XATTR) ||
!root->d_sb->s_op->statfs ||
!root->d_sb->s_op->sync_fs ||
@@ -84,9 +92,7 @@ int cachefiles_add_cache(struct cachefiles_cache *cache)
goto error_unsupported;
cache->bsize = stats.f_bsize;
- cache->bshift = 0;
- if (stats.f_bsize < PAGE_SIZE)
- cache->bshift = PAGE_SHIFT - ilog2(stats.f_bsize);
+ cache->bshift = ilog2(stats.f_bsize);
_debug("blksize %u (shift %u)",
cache->bsize, cache->bshift);
@@ -106,7 +112,6 @@ int cachefiles_add_cache(struct cachefiles_cache *cache)
(unsigned long long) cache->fcull,
(unsigned long long) cache->fstop);
- stats.f_blocks >>= cache->bshift;
do_div(stats.f_blocks, 100);
cache->bstop = stats.f_blocks * cache->bstop_percent;
cache->bcull = stats.f_blocks * cache->bcull_percent;
@@ -209,7 +214,7 @@ int cachefiles_has_space(struct cachefiles_cache *cache,
return ret;
}
- b_avail = stats.f_bavail >> cache->bshift;
+ b_avail = stats.f_bavail;
b_writing = atomic_long_read(&cache->b_writing);
if (b_avail > b_writing)
b_avail -= b_writing;
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index 40a792421fc1..7ac04ee2c0a0 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -703,6 +703,17 @@ static int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args)
return -EBUSY;
}
+ /* Make sure we have copies of the tag string */
+ if (!cache->tag) {
+ /*
+ * The tag string is released by the fops->release()
+ * function, so we don't release it on error here
+ */
+ cache->tag = kstrdup("CacheFiles", GFP_KERNEL);
+ if (!cache->tag)
+ return -ENOMEM;
+ }
+
return cachefiles_add_cache(cache);
}
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 8dd54d9375b6..c793d33b0224 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -86,7 +86,7 @@ struct cachefiles_cache {
unsigned bcull_percent; /* when to start culling (% blocks) */
unsigned bstop_percent; /* when to stop allocating (% blocks) */
unsigned bsize; /* cache's block size */
- unsigned bshift; /* min(ilog2(PAGE_SIZE / bsize), 0) */
+ unsigned bshift; /* ilog2(bsize) */
uint64_t frun; /* when to stop culling */
uint64_t fcull; /* when to start culling */
uint64_t fstop; /* when to stop allocating */
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index 60b1eac2ce78..04eb52736990 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -264,7 +264,7 @@ static int cachefiles_write(struct netfs_cache_resources *cres,
ki->term_func = term_func;
ki->term_func_priv = term_func_priv;
ki->was_async = true;
- ki->b_writing = (len + (1 << cache->bshift)) >> cache->bshift;
+ ki->b_writing = (len + (1 << cache->bshift) - 1) >> cache->bshift;
if (ki->term_func)
ki->iocb.ki_complete = cachefiles_write_complete;
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 9bd692870617..f256c8aff7bb 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -25,7 +25,9 @@ static bool __cachefiles_mark_inode_in_use(struct cachefiles_object *object,
trace_cachefiles_mark_active(object, inode);
can_use = true;
} else {
- pr_notice("cachefiles: Inode already in use: %pd\n", dentry);
+ trace_cachefiles_mark_failed(object, inode);
+ pr_notice("cachefiles: Inode already in use: %pd (B=%lx)\n",
+ dentry, inode->i_ino);
}
return can_use;
@@ -101,6 +103,7 @@ retry:
subdir = lookup_one_len(dirname, dir, strlen(dirname));
else
subdir = ERR_PTR(ret);
+ trace_cachefiles_lookup(NULL, dir, subdir);
if (IS_ERR(subdir)) {
trace_cachefiles_vfs_error(NULL, d_backing_inode(dir),
PTR_ERR(subdir),
@@ -135,6 +138,7 @@ retry:
cachefiles_trace_mkdir_error);
goto mkdir_error;
}
+ trace_cachefiles_mkdir(dir, subdir);
if (unlikely(d_unhashed(subdir))) {
cachefiles_put_directory(subdir);
@@ -233,7 +237,7 @@ static int cachefiles_unlink(struct cachefiles_cache *cache,
};
int ret;
- trace_cachefiles_unlink(object, dentry, why);
+ trace_cachefiles_unlink(object, d_inode(dentry)->i_ino, why);
ret = security_path_unlink(&path, dentry);
if (ret < 0) {
cachefiles_io_error(cache, "Unlink security error");
@@ -386,7 +390,7 @@ try_again:
.new_dir = d_inode(cache->graveyard),
.new_dentry = grave,
};
- trace_cachefiles_rename(object, rep, grave, why);
+ trace_cachefiles_rename(object, d_inode(rep)->i_ino, why);
ret = cachefiles_inject_read_error();
if (ret == 0)
ret = vfs_rename(&rd);
@@ -617,7 +621,7 @@ bool cachefiles_look_up_object(struct cachefiles_object *object)
object->d_name_len);
else
dentry = ERR_PTR(ret);
- trace_cachefiles_lookup(object, dentry);
+ trace_cachefiles_lookup(object, fan, dentry);
if (IS_ERR(dentry)) {
if (dentry == ERR_PTR(-ENOENT))
goto new_file;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index b3d9459c9bbd..c98e5238a1b6 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -297,10 +297,6 @@ out:
dout("%s: result %d\n", __func__, err);
}
-static void ceph_init_rreq(struct netfs_read_request *rreq, struct file *file)
-{
-}
-
static void ceph_readahead_cleanup(struct address_space *mapping, void *priv)
{
struct inode *inode = mapping->host;
@@ -312,7 +308,6 @@ static void ceph_readahead_cleanup(struct address_space *mapping, void *priv)
}
static const struct netfs_read_request_ops ceph_netfs_read_ops = {
- .init_rreq = ceph_init_rreq,
.is_cache_enabled = ceph_is_cache_enabled,
.begin_cache_operation = ceph_begin_cache_operation,
.issue_op = ceph_netfs_issue_op,
diff --git a/fs/fscache/volume.c b/fs/fscache/volume.c
index a57c6cbee858..f2aa7dbad766 100644
--- a/fs/fscache/volume.c
+++ b/fs/fscache/volume.c
@@ -142,12 +142,12 @@ static void fscache_wait_on_volume_collision(struct fscache_volume *candidate,
unsigned int collidee_debug_id)
{
wait_var_event_timeout(&candidate->flags,
- fscache_is_acquire_pending(candidate), 20 * HZ);
+ !fscache_is_acquire_pending(candidate), 20 * HZ);
if (!fscache_is_acquire_pending(candidate)) {
pr_notice("Potential volume collision new=%08x old=%08x",
candidate->debug_id, collidee_debug_id);
fscache_stat(&fscache_n_volumes_collision);
- wait_var_event(&candidate->flags, fscache_is_acquire_pending(candidate));
+ wait_var_event(&candidate->flags, !fscache_is_acquire_pending(candidate));
}
}
diff --git a/fs/netfs/read_helper.c b/fs/netfs/read_helper.c
index 6169659857b3..501da990c259 100644
--- a/fs/netfs/read_helper.c
+++ b/fs/netfs/read_helper.c
@@ -55,7 +55,8 @@ static struct netfs_read_request *netfs_alloc_read_request(
INIT_WORK(&rreq->work, netfs_rreq_work);
refcount_set(&rreq->usage, 1);
__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
- ops->init_rreq(rreq, file);
+ if (ops->init_rreq)
+ ops->init_rreq(rreq, file);
netfs_stat(&netfs_n_rh_rreq);
}
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index ede50406bcb0..296c5f1d9f35 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -665,6 +665,11 @@ static inline void fscache_clear_inode_writeback(struct fscache_cookie *cookie,
static inline
void fscache_note_page_release(struct fscache_cookie *cookie)
{
+ /* If we've written data to the cache (HAVE_DATA) and there wasn't any
+ * data in the cache when we started (NO_DATA_TO_READ), it may no
+ * longer be true that we can skip reading from the cache - so clear
+ * the flag that causes reads to be skipped.
+ */
if (cookie &&
test_bit(FSCACHE_COOKIE_HAVE_DATA, &cookie->flags) &&
test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags))
diff --git a/include/trace/events/cachefiles.h b/include/trace/events/cachefiles.h
index 1172529b5b49..c6f5aa74db89 100644
--- a/include/trace/events/cachefiles.h
+++ b/include/trace/events/cachefiles.h
@@ -233,25 +233,48 @@ TRACE_EVENT(cachefiles_ref,
TRACE_EVENT(cachefiles_lookup,
TP_PROTO(struct cachefiles_object *obj,
+ struct dentry *dir,
struct dentry *de),
- TP_ARGS(obj, de),
+ TP_ARGS(obj, dir, de),
TP_STRUCT__entry(
__field(unsigned int, obj )
__field(short, error )
+ __field(unsigned long, dino )
__field(unsigned long, ino )
),
TP_fast_assign(
- __entry->obj = obj->debug_id;
+ __entry->obj = obj ? obj->debug_id : 0;
+ __entry->dino = d_backing_inode(dir)->i_ino;
__entry->ino = (!IS_ERR(de) && d_backing_inode(de) ?
d_backing_inode(de)->i_ino : 0);
__entry->error = IS_ERR(de) ? PTR_ERR(de) : 0;
),
- TP_printk("o=%08x i=%lx e=%d",
- __entry->obj, __entry->ino, __entry->error)
+ TP_printk("o=%08x dB=%lx B=%lx e=%d",
+ __entry->obj, __entry->dino, __entry->ino, __entry->error)
+ );
+
+TRACE_EVENT(cachefiles_mkdir,
+ TP_PROTO(struct dentry *dir, struct dentry *subdir),
+
+ TP_ARGS(dir, subdir),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, dir )
+ __field(unsigned int, subdir )
+ ),
+
+ TP_fast_assign(
+ __entry->dir = d_backing_inode(dir)->i_ino;
+ __entry->subdir = d_backing_inode(subdir)->i_ino;
+ ),
+
+ TP_printk("dB=%x sB=%x",
+ __entry->dir,
+ __entry->subdir)
);
TRACE_EVENT(cachefiles_tmpfile,
@@ -269,7 +292,7 @@ TRACE_EVENT(cachefiles_tmpfile,
__entry->backer = backer->i_ino;
),
- TP_printk("o=%08x b=%08x",
+ TP_printk("o=%08x B=%x",
__entry->obj,
__entry->backer)
);
@@ -289,61 +312,58 @@ TRACE_EVENT(cachefiles_link,
__entry->backer = backer->i_ino;
),
- TP_printk("o=%08x b=%08x",
+ TP_printk("o=%08x B=%x",
__entry->obj,
__entry->backer)
);
TRACE_EVENT(cachefiles_unlink,
TP_PROTO(struct cachefiles_object *obj,
- struct dentry *de,
+ ino_t ino,
enum fscache_why_object_killed why),
- TP_ARGS(obj, de, why),
+ TP_ARGS(obj, ino, why),
/* Note that obj may be NULL */
TP_STRUCT__entry(
__field(unsigned int, obj )
- __field(struct dentry *, de )
+ __field(unsigned int, ino )
__field(enum fscache_why_object_killed, why )
),
TP_fast_assign(
__entry->obj = obj ? obj->debug_id : UINT_MAX;
- __entry->de = de;
+ __entry->ino = ino;
__entry->why = why;
),
- TP_printk("o=%08x d=%p w=%s",
- __entry->obj, __entry->de,
+ TP_printk("o=%08x B=%x w=%s",
+ __entry->obj, __entry->ino,
__print_symbolic(__entry->why, cachefiles_obj_kill_traces))
);
TRACE_EVENT(cachefiles_rename,
TP_PROTO(struct cachefiles_object *obj,
- struct dentry *de,
- struct dentry *to,
+ ino_t ino,
enum fscache_why_object_killed why),
- TP_ARGS(obj, de, to, why),
+ TP_ARGS(obj, ino, why),
/* Note that obj may be NULL */
TP_STRUCT__entry(
__field(unsigned int, obj )
- __field(struct dentry *, de )
- __field(struct dentry *, to )
+ __field(unsigned int, ino )
__field(enum fscache_why_object_killed, why )
),
TP_fast_assign(
__entry->obj = obj ? obj->debug_id : UINT_MAX;
- __entry->de = de;
- __entry->to = to;
+ __entry->ino = ino;
__entry->why = why;
),
- TP_printk("o=%08x d=%p t=%p w=%s",
- __entry->obj, __entry->de, __entry->to,
+ TP_printk("o=%08x B=%x w=%s",
+ __entry->obj, __entry->ino,
__print_symbolic(__entry->why, cachefiles_obj_kill_traces))
);
@@ -370,7 +390,7 @@ TRACE_EVENT(cachefiles_coherency,
__entry->ino = ino;
),
- TP_printk("o=%08x %s i=%llx c=%u",
+ TP_printk("o=%08x %s B=%llx c=%u",
__entry->obj,
__print_symbolic(__entry->why, cachefiles_coherency_traces),
__entry->ino,
@@ -397,7 +417,7 @@ TRACE_EVENT(cachefiles_vol_coherency,
__entry->ino = ino;
),
- TP_printk("V=%08x %s i=%llx",
+ TP_printk("V=%08x %s B=%llx",
__entry->vol,
__print_symbolic(__entry->why, cachefiles_coherency_traces),
__entry->ino)
@@ -435,7 +455,7 @@ TRACE_EVENT(cachefiles_prep_read,
__entry->cache_inode = cache_inode;
),
- TP_printk("R=%08x[%u] %s %s f=%02x s=%llx %zx ni=%x b=%x",
+ TP_printk("R=%08x[%u] %s %s f=%02x s=%llx %zx ni=%x B=%x",
__entry->rreq, __entry->index,
__print_symbolic(__entry->source, netfs_sreq_sources),
__print_symbolic(__entry->why, cachefiles_prepare_read_traces),
@@ -466,7 +486,7 @@ TRACE_EVENT(cachefiles_read,
__entry->len = len;
),
- TP_printk("o=%08x b=%08x s=%llx l=%zx",
+ TP_printk("o=%08x B=%x s=%llx l=%zx",
__entry->obj,
__entry->backer,
__entry->start,
@@ -495,7 +515,7 @@ TRACE_EVENT(cachefiles_write,
__entry->len = len;
),
- TP_printk("o=%08x b=%08x s=%llx l=%zx",
+ TP_printk("o=%08x B=%x s=%llx l=%zx",
__entry->obj,
__entry->backer,
__entry->start,
@@ -524,7 +544,7 @@ TRACE_EVENT(cachefiles_trunc,
__entry->why = why;
),
- TP_printk("o=%08x b=%08x %s l=%llx->%llx",
+ TP_printk("o=%08x B=%x %s l=%llx->%llx",
__entry->obj,
__entry->backer,
__print_symbolic(__entry->why, cachefiles_trunc_traces),
@@ -549,7 +569,28 @@ TRACE_EVENT(cachefiles_mark_active,
__entry->inode = inode->i_ino;
),
- TP_printk("o=%08x i=%lx",
+ TP_printk("o=%08x B=%lx",
+ __entry->obj, __entry->inode)
+ );
+
+TRACE_EVENT(cachefiles_mark_failed,
+ TP_PROTO(struct cachefiles_object *obj,
+ struct inode *inode),
+
+ TP_ARGS(obj, inode),
+
+ /* Note that obj may be NULL */
+ TP_STRUCT__entry(
+ __field(unsigned int, obj )
+ __field(ino_t, inode )
+ ),
+
+ TP_fast_assign(
+ __entry->obj = obj ? obj->debug_id : 0;
+ __entry->inode = inode->i_ino;
+ ),
+
+ TP_printk("o=%08x B=%lx",
__entry->obj, __entry->inode)
);
@@ -570,7 +611,7 @@ TRACE_EVENT(cachefiles_mark_inactive,
__entry->inode = inode->i_ino;
),
- TP_printk("o=%08x i=%lx",
+ TP_printk("o=%08x B=%lx",
__entry->obj, __entry->inode)
);
@@ -594,7 +635,7 @@ TRACE_EVENT(cachefiles_vfs_error,
__entry->where = where;
),
- TP_printk("o=%08x b=%08x %s e=%d",
+ TP_printk("o=%08x B=%x %s e=%d",
__entry->obj,
__entry->backer,
__print_symbolic(__entry->where, cachefiles_error_traces),
@@ -621,7 +662,7 @@ TRACE_EVENT(cachefiles_io_error,
__entry->where = where;
),
- TP_printk("o=%08x b=%08x %s e=%d",
+ TP_printk("o=%08x B=%x %s e=%d",
__entry->obj,
__entry->backer,
__print_symbolic(__entry->where, cachefiles_error_traces),