diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2022-04-21 13:13:57 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:09:32 -0400 |
commit | 1cab5a82cc67a09705fbe0607e6ab751f6663524 (patch) | |
tree | 3937f33e238033aa482a14cced3369ed5cdbc44b | |
parent | 104c69745fdf7e5f8aa022f60bc9d568987bd8b8 (diff) |
bcachefs: Go RW before bch2_check_lrus()
btree updates before going RW are expensive if they're in random order,
since they use the list of keys for journal replay to insert, which is
just a gap buffer.
This patch improves the bucket invalidate path so that if
bch2_check_lrus() hasn't finished it only prints warnings instead of
doing an emergency shutdown, which means we can now set BCH_FS_MAY_GO_RW
before bch2_check_lrus().
Also, the filesystem state bits are reorganized a bit.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
-rw-r--r-- | fs/bcachefs/alloc_background.c | 36 | ||||
-rw-r--r-- | fs/bcachefs/bcachefs.h | 19 | ||||
-rw-r--r-- | fs/bcachefs/lru.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 86 |
4 files changed, 90 insertions, 55 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index d9cf676da030..eb03b4135c3d 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -382,7 +382,8 @@ int bch2_alloc_v4_invalid(const struct bch_fs *c, struct bkey_s_c k, return -EINVAL; } - if (!a.v->io_time[READ]) { + if (!a.v->io_time[READ] && + test_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags)) { pr_buf(err, "cached bucket with read_time == 0"); return -EINVAL; } @@ -588,7 +589,6 @@ int bch2_trans_mark_alloc(struct btree_trans *trans, !new_a->io_time[READ]) new_a->io_time[READ] = max_t(u64, 1, atomic64_read(&c->io_clock[READ].now)); - old_lru = alloc_lru_idx(old_a); new_lru = alloc_lru_idx(*new_a); @@ -1088,6 +1088,7 @@ static int invalidate_one_bucket(struct btree_trans *trans, struct bch_dev *ca) bch2_trans_iter_init(trans, &lru_iter, BTREE_ID_lru, POS(ca->dev_idx, 0), 0); +next_lru: k = bch2_btree_iter_peek(&lru_iter); ret = bkey_err(k); if (ret) @@ -1096,9 +1097,20 @@ static int invalidate_one_bucket(struct btree_trans *trans, struct bch_dev *ca) if (!k.k || k.k->p.inode != ca->dev_idx) goto out; - if (bch2_trans_inconsistent_on(k.k->type != KEY_TYPE_lru, trans, - "non lru key in lru btree")) - goto out; + if (k.k->type != KEY_TYPE_lru) { + pr_buf(&buf, "non lru key in lru btree:\n "); + bch2_bkey_val_to_text(&buf, c, k); + + if (!test_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags)) { + bch_err(c, "%s", buf.buf); + bch2_btree_iter_advance(&lru_iter); + goto next_lru; + } else { + bch2_trans_inconsistent(trans, "%s", buf.buf); + ret = -EINVAL; + goto out; + } + } idx = k.k->p.offset; bucket = le64_to_cpu(bkey_s_c_to_lru(k).v->idx); @@ -1111,13 +1123,19 @@ static int invalidate_one_bucket(struct btree_trans *trans, struct bch_dev *ca) if (idx != alloc_lru_idx(a->v)) { pr_buf(&buf, "alloc key does not point back to lru entry when invalidating bucket:\n "); - bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&a->k_i)); pr_buf(&buf, "\n "); bch2_bkey_val_to_text(&buf, c, k); - bch2_trans_inconsistent(trans, "%s", buf.buf); - ret = -EINVAL; - goto out; + + if (!test_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags)) { + bch_err(c, "%s", buf.buf); + bch2_btree_iter_advance(&lru_iter); + goto next_lru; + } else { + bch2_trans_inconsistent(trans, "%s", buf.buf); + ret = -EINVAL; + goto out; + } } SET_BCH_ALLOC_V4_NEED_INC_GEN(&a->v, false); diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 5dda57afa802..127323b677df 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -494,11 +494,6 @@ struct bch_dev { enum { /* startup: */ - BCH_FS_CLEAN_SHUTDOWN, - BCH_FS_INITIAL_GC_DONE, - BCH_FS_INITIAL_GC_UNFIXED, - BCH_FS_TOPOLOGY_REPAIR_DONE, - BCH_FS_FSCK_DONE, BCH_FS_STARTED, BCH_FS_MAY_GO_RW, BCH_FS_RW, @@ -508,16 +503,22 @@ enum { BCH_FS_STOPPING, BCH_FS_EMERGENCY_RO, BCH_FS_WRITE_DISABLE_COMPLETE, + BCH_FS_CLEAN_SHUTDOWN, + + /* fsck passes: */ + BCH_FS_TOPOLOGY_REPAIR_DONE, + BCH_FS_INITIAL_GC_DONE, /* kill when we enumerate fsck passes */ + BCH_FS_CHECK_LRUS_DONE, + BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, + BCH_FS_FSCK_DONE, + BCH_FS_INITIAL_GC_UNFIXED, /* kill when we enumerate fsck errors */ + BCH_FS_NEED_ANOTHER_GC, /* errors: */ BCH_FS_ERROR, BCH_FS_TOPOLOGY_ERROR, BCH_FS_ERRORS_FIXED, BCH_FS_ERRORS_NOT_FIXED, - - /* misc: */ - BCH_FS_NEED_ANOTHER_GC, - BCH_FS_DELETED_NODES, }; struct btree_debug { diff --git a/fs/bcachefs/lru.c b/fs/bcachefs/lru.c index fe9d15742947..ce23b38382f5 100644 --- a/fs/bcachefs/lru.c +++ b/fs/bcachefs/lru.c @@ -204,7 +204,9 @@ int bch2_check_lrus(struct bch_fs *c, bool initial) for_each_btree_key(&trans, iter, BTREE_ID_lru, POS_MIN, BTREE_ITER_PREFETCH, k, ret) { - ret = __bch2_trans_do(&trans, NULL, NULL, 0, + ret = __bch2_trans_do(&trans, NULL, NULL, + BTREE_INSERT_NOFAIL| + BTREE_INSERT_LAZY_RW, bch2_check_lru_key(&trans, &iter, initial)); if (ret) break; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index e2474ff99702..5831ab53a982 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -994,7 +994,6 @@ static int bch2_fs_initialize_subvolumes(struct bch_fs *c) if (ret) return ret; - bkey_subvolume_init(&root_volume.k_i); root_volume.k.p.offset = BCACHEFS_ROOT_SUBVOL; root_volume.v.flags = 0; @@ -1096,6 +1095,12 @@ int bch2_fs_recovery(struct bch_fs *c) } } + if (c->opts.fsck && c->opts.norecovery) { + bch_err(c, "cannot select both norecovery and fsck"); + ret = -EINVAL; + goto err; + } + ret = bch2_blacklist_table_initialize(c); if (ret) { bch_err(c, "error initializing blacklist table"); @@ -1189,6 +1194,13 @@ use_clean: if (ret) goto err; + /* + * Skip past versions that might have possibly been used (as nonces), + * but hadn't had their pointers written: + */ + if (c->sb.encryption_type && !c->sb.clean) + atomic64_add(1 << 16, &c->key_version); + ret = read_btree_roots(c); if (ret) goto err; @@ -1211,12 +1223,7 @@ use_clean: goto err; bch_verbose(c, "stripes_read done"); - /* - * If we're not running fsck, this ensures bch2_fsck_err() calls are - * instead interpreted as bch2_inconsistent_err() calls: - */ - if (!c->opts.fsck) - set_bit(BCH_FS_FSCK_DONE, &c->flags); + bch2_stripes_heap_start(c); if (c->opts.fsck) { bool metadata_only = c->opts.norecovery; @@ -1228,6 +1235,8 @@ use_clean: goto err; bch_verbose(c, "done checking allocations"); + set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); + bch_info(c, "checking need_discard and freespace btrees"); err = "error checking need_discard and freespace btrees"; ret = bch2_check_alloc_info(c); @@ -1235,55 +1244,60 @@ use_clean: goto err; bch_verbose(c, "done checking need_discard and freespace btrees"); + set_bit(BCH_FS_MAY_GO_RW, &c->flags); + + bch_verbose(c, "starting journal replay, %zu keys", c->journal_keys.nr); + err = "journal replay failed"; + ret = bch2_journal_replay(c); + if (ret) + goto err; + if (c->opts.verbose || !c->sb.clean) + bch_info(c, "journal replay done"); + bch_info(c, "checking lrus"); err = "error checking lrus"; ret = bch2_check_lrus(c, true); if (ret) goto err; bch_verbose(c, "done checking lrus"); - } - bch2_stripes_heap_start(c); + set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags); - set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); - set_bit(BCH_FS_MAY_GO_RW, &c->flags); - - /* - * Skip past versions that might have possibly been used (as nonces), - * but hadn't had their pointers written: - */ - if (c->sb.encryption_type && !c->sb.clean) - atomic64_add(1 << 16, &c->key_version); - - if (c->opts.norecovery) - goto out; - - bch_verbose(c, "starting journal replay, %zu keys", c->journal_keys.nr); - err = "journal replay failed"; - ret = bch2_journal_replay(c); - if (ret) - goto err; - if (c->opts.verbose || !c->sb.clean) - bch_info(c, "journal replay done"); - - err = "error initializing freespace"; - ret = bch2_fs_freespace_init(c); - if (ret) - goto err; - - if (c->opts.fsck) { bch_info(c, "checking alloc to lru refs"); err = "error checking alloc to lru refs"; ret = bch2_check_alloc_to_lru_refs(c); if (ret) goto err; + set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags); ret = bch2_check_lrus(c, true); if (ret) goto err; bch_verbose(c, "done checking alloc to lru refs"); + } else { + set_bit(BCH_FS_MAY_GO_RW, &c->flags); + set_bit(BCH_FS_INITIAL_GC_DONE, &c->flags); + set_bit(BCH_FS_CHECK_LRUS_DONE, &c->flags); + set_bit(BCH_FS_CHECK_ALLOC_TO_LRU_REFS_DONE, &c->flags); + set_bit(BCH_FS_FSCK_DONE, &c->flags); + + if (c->opts.norecovery) + goto out; + + bch_verbose(c, "starting journal replay, %zu keys", c->journal_keys.nr); + err = "journal replay failed"; + ret = bch2_journal_replay(c); + if (ret) + goto err; + if (c->opts.verbose || !c->sb.clean) + bch_info(c, "journal replay done"); } + err = "error initializing freespace"; + ret = bch2_fs_freespace_init(c); + if (ret) + goto err; + if (c->sb.version < bcachefs_metadata_version_snapshot_2) { bch2_fs_lazy_rw(c); |