From 3b821409632ab778d46e807516b457dfa72736ed Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 23 Feb 2018 20:47:17 -0500 Subject: lock_parent() needs to recheck if dentry got __dentry_kill'ed under it In case when dentry passed to lock_parent() is protected from freeing only by the fact that it's on a shrink list and trylock of parent fails, we could get hit by __dentry_kill() (and subsequent dentry_kill(parent)) between unlocking dentry and locking presumed parent. We need to recheck that dentry is alive once we lock both it and parent *and* postpone rcu_read_unlock() until after that point. Otherwise we could return a pointer to struct dentry that already is rcu-scheduled for freeing, with ->d_lock held on it; caller's subsequent attempt to unlock it can end up with memory corruption. Cc: stable@vger.kernel.org # 3.12+, counting backports Signed-off-by: Al Viro --- fs/dcache.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 7c38f39958bc..32aaab21e648 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -647,11 +647,16 @@ again: spin_unlock(&parent->d_lock); goto again; } - rcu_read_unlock(); - if (parent != dentry) + if (parent != dentry) { spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - else + if (unlikely(dentry->d_lockref.count < 0)) { + spin_unlock(&parent->d_lock); + parent = NULL; + } + } else { parent = NULL; + } + rcu_read_unlock(); return parent; } -- cgit v1.2.3-70-g09d2 From 015555fd4d2930bc0c86952c46ad88b3392f66e4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 19 Feb 2018 14:55:54 +0000 Subject: fs: dcache: Avoid livelock between d_alloc_parallel and __d_add If d_alloc_parallel runs concurrently with __d_add, it is possible for d_alloc_parallel to continuously retry whilst i_dir_seq has been incremented to an odd value by __d_add: CPU0: __d_add n = start_dir_add(dir); cmpxchg(&dir->i_dir_seq, n, n + 1) == n CPU1: d_alloc_parallel retry: seq = smp_load_acquire(&parent->d_inode->i_dir_seq) & ~1; hlist_bl_lock(b); bit_spin_lock(0, (unsigned long *)b); // Always succeeds CPU0: __d_lookup_done(dentry) hlist_bl_lock bit_spin_lock(0, (unsigned long *)b); // Never succeeds CPU1: if (unlikely(parent->d_inode->i_dir_seq != seq)) { hlist_bl_unlock(b); goto retry; } Since the simple bit_spin_lock used to implement hlist_bl_lock does not provide any fairness guarantees, then CPU1 can starve CPU0 of the lock and prevent it from reaching end_dir_add(dir), therefore CPU1 cannot exit its retry loop because the sequence number always has the bottom bit set. This patch resolves the livelock by not taking hlist_bl_lock in d_alloc_parallel if the sequence counter is odd, since any subsequent masked comparison with i_dir_seq will fail anyway. Cc: Peter Zijlstra Cc: Al Viro Reported-by: Naresh Madhusudana Acked-by: Peter Zijlstra (Intel) Reviewed-by: Matthew Wilcox Signed-off-by: Will Deacon Signed-off-by: Al Viro --- fs/dcache.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index 32aaab21e648..bde3b6662601 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2479,7 +2479,7 @@ struct dentry *d_alloc_parallel(struct dentry *parent, retry: rcu_read_lock(); - seq = smp_load_acquire(&parent->d_inode->i_dir_seq) & ~1; + seq = smp_load_acquire(&parent->d_inode->i_dir_seq); r_seq = read_seqbegin(&rename_lock); dentry = __d_lookup_rcu(parent, name, &d_seq); if (unlikely(dentry)) { @@ -2500,6 +2500,12 @@ retry: rcu_read_unlock(); goto retry; } + + if (unlikely(seq & 1)) { + rcu_read_unlock(); + goto retry; + } + hlist_bl_lock(b); if (unlikely(parent->d_inode->i_dir_seq != seq)) { hlist_bl_unlock(b); -- cgit v1.2.3-70-g09d2 From 8cc07c808c9d595e81cbe5aad419b7769eb2e5c9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 19 Feb 2018 14:55:55 +0000 Subject: fs: dcache: Use READ_ONCE when accessing i_dir_seq i_dir_seq is subject to concurrent modification by a cmpxchg or store-release operation, so ensure that the relaxed access in d_alloc_parallel uses READ_ONCE. Reported-by: Peter Zijlstra Signed-off-by: Will Deacon Signed-off-by: Al Viro --- fs/dcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/dcache.c') diff --git a/fs/dcache.c b/fs/dcache.c index bde3b6662601..8945e6cabd93 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2507,7 +2507,7 @@ retry: } hlist_bl_lock(b); - if (unlikely(parent->d_inode->i_dir_seq != seq)) { + if (unlikely(READ_ONCE(parent->d_inode->i_dir_seq) != seq)) { hlist_bl_unlock(b); rcu_read_unlock(); goto retry; -- cgit v1.2.3-70-g09d2