diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-06-28 09:25:21 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-06-28 09:25:21 -0700 |
commit | cd63a278acedc375603820abff11a5414af53769 (patch) | |
tree | ec81b94aae190924b675541cf8b5b2978384987e /fs/bcachefs/debug.c | |
parent | cd17613f464e15ad0ca83de7ca79ba72e4e72f75 (diff) | |
parent | 64cd7de998f393e73981e2aa4ee13e4e887f01ea (diff) |
Merge tag 'bcachefs-2024-06-28' of https://evilpiepirate.org/git/bcachefs
Pull bcachefs fixes from Kent Overstreet:
"Simple stuff:
- NULL ptr/err ptr deref fixes
- fix for getting wedged on shutdown after journal error
- fix missing recalc_capacity() call, capacity now changes correctly
after a device goes read only
however: our capacity calculation still doesn't take into account
when we have mixed ro/rw devices and the ro devices have data on
them, that's going to be a more involved fix to separate accounting
for "capacity used on ro devices" and "capacity used on rw devices"
- boring syzbot stuff
Slightly more involved:
- discard, invalidate workers are now per device
this has the effect of simplifying how we take device refs in these
paths, and the device ref cleanup fixes a longstanding race between
the device removal path and the discard path
- fixes for how the debugfs code takes refs on btree_trans objects we
have debugfs code that prints in use btree_trans objects.
It uses closure_get() on trans->ref, which is mainly for the cycle
detector, but the debugfs code was using it on a closure that may
have hit 0, which is not allowed; for performance reasons we cannot
avoid having not-in-use transactions on the global list.
Introduce some new primitives to fix this and make the
synchronization here a whole lot saner"
* tag 'bcachefs-2024-06-28' of https://evilpiepirate.org/git/bcachefs:
bcachefs: Fix kmalloc bug in __snapshot_t_mut
bcachefs: Discard, invalidate workers are now per device
bcachefs: Fix shift-out-of-bounds in bch2_blacklist_entries_gc
bcachefs: slab-use-after-free Read in bch2_sb_errors_from_cpu
bcachefs: Add missing bch2_journal_do_writes() call
bcachefs: Fix null ptr deref in journal_pins_to_text()
bcachefs: Add missing recalc_capacity() call
bcachefs: Fix btree_trans list ordering
bcachefs: Fix race between trans_put() and btree_transactions_read()
closures: closure_get_not_zero(), closure_return_sync()
bcachefs: Make btree_deadlock_to_text() clearer
bcachefs: fix seqmutex_relock()
bcachefs: Fix freeing of error pointers
Diffstat (limited to 'fs/bcachefs/debug.c')
-rw-r--r-- | fs/bcachefs/debug.c | 109 |
1 files changed, 69 insertions, 40 deletions
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 51cbf3928361..f0d4727c4dc2 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -568,6 +568,32 @@ static const struct file_operations cached_btree_nodes_ops = { .read = bch2_cached_btree_nodes_read, }; +typedef int (*list_cmp_fn)(const struct list_head *l, const struct list_head *r); + +static void list_sort(struct list_head *head, list_cmp_fn cmp) +{ + struct list_head *pos; + + list_for_each(pos, head) + while (!list_is_last(pos, head) && + cmp(pos, pos->next) > 0) { + struct list_head *pos2, *next = pos->next; + + list_del(next); + list_for_each(pos2, head) + if (cmp(next, pos2) < 0) + goto pos_found; + BUG(); +pos_found: + list_add_tail(next, pos2); + } +} + +static int list_ptr_order_cmp(const struct list_head *l, const struct list_head *r) +{ + return cmp_int(l, r); +} + static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, size_t size, loff_t *ppos) { @@ -575,41 +601,39 @@ static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf, struct bch_fs *c = i->c; struct btree_trans *trans; ssize_t ret = 0; - u32 seq; i->ubuf = buf; i->size = size; i->ret = 0; restart: seqmutex_lock(&c->btree_trans_lock); - list_for_each_entry(trans, &c->btree_trans_list, list) { - struct task_struct *task = READ_ONCE(trans->locking_wait.task); + list_sort(&c->btree_trans_list, list_ptr_order_cmp); - if (!task || task->pid <= i->iter) + list_for_each_entry(trans, &c->btree_trans_list, list) { + if ((ulong) trans < i->iter) continue; - closure_get(&trans->ref); - seq = seqmutex_seq(&c->btree_trans_lock); - seqmutex_unlock(&c->btree_trans_lock); + i->iter = (ulong) trans; - ret = flush_buf(i); - if (ret) { - closure_put(&trans->ref); - goto unlocked; - } + if (!closure_get_not_zero(&trans->ref)) + continue; + + u32 seq = seqmutex_unlock(&c->btree_trans_lock); bch2_btree_trans_to_text(&i->buf, trans); prt_printf(&i->buf, "backtrace:\n"); printbuf_indent_add(&i->buf, 2); - bch2_prt_task_backtrace(&i->buf, task, 0, GFP_KERNEL); + bch2_prt_task_backtrace(&i->buf, trans->locking_wait.task, 0, GFP_KERNEL); printbuf_indent_sub(&i->buf, 2); prt_newline(&i->buf); - i->iter = task->pid; - closure_put(&trans->ref); + ret = flush_buf(i); + if (ret) + goto unlocked; + if (!seqmutex_relock(&c->btree_trans_lock, seq)) goto restart; } @@ -804,50 +828,55 @@ static const struct file_operations btree_transaction_stats_op = { .read = btree_transaction_stats_read, }; -static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf, - size_t size, loff_t *ppos) +/* walk btree transactions until we find a deadlock and print it */ +static void btree_deadlock_to_text(struct printbuf *out, struct bch_fs *c) { - struct dump_iter *i = file->private_data; - struct bch_fs *c = i->c; struct btree_trans *trans; - ssize_t ret = 0; - u32 seq; - - i->ubuf = buf; - i->size = size; - i->ret = 0; - - if (i->iter) - goto out; + pid_t iter = 0; restart: seqmutex_lock(&c->btree_trans_lock); list_for_each_entry(trans, &c->btree_trans_list, list) { struct task_struct *task = READ_ONCE(trans->locking_wait.task); - if (!task || task->pid <= i->iter) + if (!task || task->pid <= iter) continue; - closure_get(&trans->ref); - seq = seqmutex_seq(&c->btree_trans_lock); - seqmutex_unlock(&c->btree_trans_lock); + iter = task->pid; - ret = flush_buf(i); - if (ret) { - closure_put(&trans->ref); - goto out; - } + if (!closure_get_not_zero(&trans->ref)) + continue; - bch2_check_for_deadlock(trans, &i->buf); + u32 seq = seqmutex_unlock(&c->btree_trans_lock); - i->iter = task->pid; + bool found = bch2_check_for_deadlock(trans, out) != 0; closure_put(&trans->ref); + if (found) + return; + if (!seqmutex_relock(&c->btree_trans_lock, seq)) goto restart; } seqmutex_unlock(&c->btree_trans_lock); -out: +} + +static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf, + size_t size, loff_t *ppos) +{ + struct dump_iter *i = file->private_data; + struct bch_fs *c = i->c; + ssize_t ret = 0; + + i->ubuf = buf; + i->size = size; + i->ret = 0; + + if (!i->iter) { + btree_deadlock_to_text(&i->buf, c); + i->iter++; + } + if (i->buf.allocation_failure) ret = -ENOMEM; |