diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-03 11:43:12 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-08-03 11:43:12 -0700 |
commit | 200e340f2196d7fd427a5810d06e893b932f145a (patch) | |
tree | 4456f23530138623ebed57fa2369affabe68368f /fs | |
parent | a782e866497217f22c5d9014cbb7be8549151376 (diff) | |
parent | 50417d22d0efbb1be76c3cb66b2329f83741c9c7 (diff) |
Merge tag 'pull-work.dcache' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull vfs dcache updates from Al Viro:
"The main part here is making parallel lookups safe for RT - making
sure preemption is disabled in start_dir_add()/ end_dir_add() sections
(on non-RT it's automatic, on RT it needs to to be done explicitly)
and moving wakeups from __d_lookup_done() inside of such to the end of
those sections.
Wakeups can be safely delayed for as long as ->d_lock on in-lookup
dentry is held; proving that has caught a bug in d_add_ci() that
allows memory corruption when sufficiently bogus ntfs (or
case-insensitive xfs) image is mounted. Easily fixed, fortunately"
* tag 'pull-work.dcache' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
fs/dcache: Move wakeup out of i_seq_dir write held region.
fs/dcache: Move the wakeup from __d_lookup_done() to the caller.
fs/dcache: Disable preemption on i_dir_seq write side on PREEMPT_RT
d_add_ci(): make sure we don't miss d_lookup_done()
Diffstat (limited to 'fs')
-rw-r--r-- | fs/dcache.c | 54 |
1 files changed, 43 insertions, 11 deletions
diff --git a/fs/dcache.c b/fs/dcache.c index 93f4f5ee07bf..ea5cdec24ea7 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2240,6 +2240,7 @@ struct dentry *d_add_ci(struct dentry *dentry, struct inode *inode, } res = d_splice_alias(inode, found); if (res) { + d_lookup_done(found); dput(found); return res; } @@ -2563,7 +2564,15 @@ EXPORT_SYMBOL(d_rehash); static inline unsigned start_dir_add(struct inode *dir) { - + /* + * The caller holds a spinlock (dentry::d_lock). On !PREEMPT_RT + * kernels spin_lock() implicitly disables preemption, but not on + * PREEMPT_RT. So for RT it has to be done explicitly to protect + * the sequence count write side critical section against a reader + * or another writer preempting, which would result in a live lock. + */ + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_disable(); for (;;) { unsigned n = dir->i_dir_seq; if (!(n & 1) && cmpxchg(&dir->i_dir_seq, n, n + 1) == n) @@ -2572,9 +2581,13 @@ static inline unsigned start_dir_add(struct inode *dir) } } -static inline void end_dir_add(struct inode *dir, unsigned n) +static inline void end_dir_add(struct inode *dir, unsigned int n, + wait_queue_head_t *d_wait) { smp_store_release(&dir->i_dir_seq, n + 2); + if (IS_ENABLED(CONFIG_PREEMPT_RT)) + preempt_enable(); + wake_up_all(d_wait); } static void d_wait_lookup(struct dentry *dentry) @@ -2701,32 +2714,50 @@ mismatch: } EXPORT_SYMBOL(d_alloc_parallel); -void __d_lookup_done(struct dentry *dentry) +/* + * - Unhash the dentry + * - Retrieve and clear the waitqueue head in dentry + * - Return the waitqueue head + */ +static wait_queue_head_t *__d_lookup_unhash(struct dentry *dentry) { - struct hlist_bl_head *b = in_lookup_hash(dentry->d_parent, - dentry->d_name.hash); + wait_queue_head_t *d_wait; + struct hlist_bl_head *b; + + lockdep_assert_held(&dentry->d_lock); + + b = in_lookup_hash(dentry->d_parent, dentry->d_name.hash); hlist_bl_lock(b); dentry->d_flags &= ~DCACHE_PAR_LOOKUP; __hlist_bl_del(&dentry->d_u.d_in_lookup_hash); - wake_up_all(dentry->d_wait); + d_wait = dentry->d_wait; dentry->d_wait = NULL; hlist_bl_unlock(b); INIT_HLIST_NODE(&dentry->d_u.d_alias); INIT_LIST_HEAD(&dentry->d_lru); + return d_wait; +} + +void __d_lookup_unhash_wake(struct dentry *dentry) +{ + spin_lock(&dentry->d_lock); + wake_up_all(__d_lookup_unhash(dentry)); + spin_unlock(&dentry->d_lock); } -EXPORT_SYMBOL(__d_lookup_done); +EXPORT_SYMBOL(__d_lookup_unhash_wake); /* inode->i_lock held if inode is non-NULL */ static inline void __d_add(struct dentry *dentry, struct inode *inode) { + wait_queue_head_t *d_wait; struct inode *dir = NULL; unsigned n; spin_lock(&dentry->d_lock); if (unlikely(d_in_lookup(dentry))) { dir = dentry->d_parent->d_inode; n = start_dir_add(dir); - __d_lookup_done(dentry); + d_wait = __d_lookup_unhash(dentry); } if (inode) { unsigned add_flags = d_flags_for_inode(inode); @@ -2738,7 +2769,7 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode) } __d_rehash(dentry); if (dir) - end_dir_add(dir, n); + end_dir_add(dir, n, d_wait); spin_unlock(&dentry->d_lock); if (inode) spin_unlock(&inode->i_lock); @@ -2885,6 +2916,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target, bool exchange) { struct dentry *old_parent, *p; + wait_queue_head_t *d_wait; struct inode *dir = NULL; unsigned n; @@ -2915,7 +2947,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target, if (unlikely(d_in_lookup(target))) { dir = target->d_parent->d_inode; n = start_dir_add(dir); - __d_lookup_done(target); + d_wait = __d_lookup_unhash(target); } write_seqcount_begin(&dentry->d_seq); @@ -2951,7 +2983,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target, write_seqcount_end(&dentry->d_seq); if (dir) - end_dir_add(dir, n); + end_dir_add(dir, n, d_wait); if (dentry->d_parent != old_parent) spin_unlock(&dentry->d_parent->d_lock); |