diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2021-03-24 18:02:16 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:08:57 -0400 |
commit | e751c01a8ee1ca934cc0953e2e77ad4ea3e64d5e (patch) | |
tree | 9930602caa160b05f2e62925c86192ae1ab9bc31 | |
parent | 4cf91b0270dc16a6637db4c200c7fb745b941065 (diff) |
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key
in the btree code:
* bpos_successor() and bpos_predecessor() now include the snapshot field
* Keys in btrees that will be using snapshots (extents, inodes, dirents
and xattrs) now always have their snapshot field set to U32_MAX
The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that
determines whether we're iterating over keys in all snapshots or not -
internally, this controlls whether bkey_(successor|predecessor)
increment/decrement the snapshot field, or only the higher bits of the
key.
We add a new member to struct btree_iter, iter->snapshot: when
BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always
equal iter->snapshot, which will be 0 for btrees that don't use
snapshots, and alsways U32_MAX for btrees that will use snapshots
(until we enable snapshot creation).
This patch also introduces a new metadata version number, and compat
code for reading from/writing to older versions - this isn't a forced
upgrade (yet).
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r-- | fs/bcachefs/bcachefs_format.h | 24 | ||||
-rw-r--r-- | fs/bcachefs/bkey.c | 17 | ||||
-rw-r--r-- | fs/bcachefs/bkey.h | 42 | ||||
-rw-r--r-- | fs/bcachefs/bkey_methods.c | 36 | ||||
-rw-r--r-- | fs/bcachefs/bset.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_cache.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/btree_gc.c | 8 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.c | 12 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.h | 26 | ||||
-rw-r--r-- | fs/bcachefs/btree_iter.c | 79 | ||||
-rw-r--r-- | fs/bcachefs/btree_iter.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/btree_types.h | 16 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 12 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_leaf.c | 14 | ||||
-rw-r--r-- | fs/bcachefs/debug.c | 6 | ||||
-rw-r--r-- | fs/bcachefs/extents.c | 7 | ||||
-rw-r--r-- | fs/bcachefs/fsck.c | 1 | ||||
-rw-r--r-- | fs/bcachefs/inode.c | 1 | ||||
-rw-r--r-- | fs/bcachefs/io.c | 5 | ||||
-rw-r--r-- | fs/bcachefs/journal_io.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 8 | ||||
-rw-r--r-- | fs/bcachefs/tests.c | 1 |
22 files changed, 251 insertions, 73 deletions
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 111f7d3c312e..2172d3cf3680 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -142,19 +142,18 @@ struct bpos { #define KEY_SNAPSHOT_MAX ((__u32)~0U) #define KEY_SIZE_MAX ((__u32)~0U) -static inline struct bpos POS(__u64 inode, __u64 offset) +static inline struct bpos SPOS(__u64 inode, __u64 offset, __u32 snapshot) { - struct bpos ret; - - ret.inode = inode; - ret.offset = offset; - ret.snapshot = 0; - - return ret; + return (struct bpos) { + .inode = inode, + .offset = offset, + .snapshot = snapshot, + }; } -#define POS_MIN POS(0, 0) -#define POS_MAX POS(KEY_INODE_MAX, KEY_OFFSET_MAX) +#define POS_MIN SPOS(0, 0, 0) +#define POS_MAX SPOS(KEY_INODE_MAX, KEY_OFFSET_MAX, KEY_SNAPSHOT_MAX) +#define POS(_inode, _offset) SPOS(_inode, _offset, 0) /* Empty placeholder struct, for container_of() */ struct bch_val { @@ -1208,7 +1207,8 @@ enum bcachefs_metadata_version { bcachefs_metadata_version_new_versioning = 10, bcachefs_metadata_version_bkey_renumber = 10, bcachefs_metadata_version_inode_btree_change = 11, - bcachefs_metadata_version_max = 12, + bcachefs_metadata_version_snapshot = 12, + bcachefs_metadata_version_max = 13, }; #define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1) @@ -1749,7 +1749,7 @@ struct btree_node { /* Closed interval: */ struct bpos min_key; struct bpos max_key; - struct bch_extent_ptr ptr; + struct bch_extent_ptr _ptr; /* not used anymore */ struct bkey_format format; union { diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c index 8b2befac95d4..a0379f980f7e 100644 --- a/fs/bcachefs/bkey.c +++ b/fs/bcachefs/bkey.c @@ -617,15 +617,19 @@ const char *bch2_bkey_format_validate(struct bkey_format *f) return "incorrect number of fields"; for (i = 0; i < f->nr_fields; i++) { + unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i]; + u64 unpacked_mask = ~((~0ULL << 1) << (unpacked_bits - 1)); u64 field_offset = le64_to_cpu(f->field_offset[i]); - if (f->bits_per_field[i] > 64) + if (f->bits_per_field[i] > unpacked_bits) return "field too large"; - if (field_offset && - (f->bits_per_field[i] == 64 || - (field_offset + ((1ULL << f->bits_per_field[i]) - 1) < - field_offset))) + if ((f->bits_per_field[i] == unpacked_bits) && field_offset) + return "offset + bits overflow"; + + if (((field_offset + ((1ULL << f->bits_per_field[i]) - 1)) & + unpacked_mask) < + field_offset) return "offset + bits overflow"; bits += f->bits_per_field[i]; @@ -1126,11 +1130,12 @@ void bch2_bkey_pack_test(void) struct bkey_packed p; struct bkey_format test_format = { - .key_u64s = 2, + .key_u64s = 3, .nr_fields = BKEY_NR_FIELDS, .bits_per_field = { 13, 64, + 32, }, }; diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index df23c5b48969..72b4267031d8 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -258,24 +258,46 @@ static inline unsigned bkey_format_key_bits(const struct bkey_format *format) format->bits_per_field[BKEY_FIELD_SNAPSHOT]; } -static inline struct bpos bkey_successor(struct bpos p) +static inline struct bpos bpos_successor(struct bpos p) { - struct bpos ret = p; + if (!++p.snapshot && + !++p.offset && + !++p.inode) + BUG(); - if (!++ret.offset) - BUG_ON(!++ret.inode); + return p; +} - return ret; +static inline struct bpos bpos_predecessor(struct bpos p) +{ + if (!p.snapshot-- && + !p.offset-- && + !p.inode--) + BUG(); + + return p; } -static inline struct bpos bkey_predecessor(struct bpos p) +static inline struct bpos bpos_nosnap_successor(struct bpos p) { - struct bpos ret = p; + p.snapshot = 0; - if (!ret.offset--) - BUG_ON(!ret.inode--); + if (!++p.offset && + !++p.inode) + BUG(); - return ret; + return p; +} + +static inline struct bpos bpos_nosnap_predecessor(struct bpos p) +{ + p.snapshot = 0; + + if (!p.offset-- && + !p.inode--) + BUG(); + + return p; } static inline u64 bkey_start_offset(const struct bkey *k) diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 5e7eadeb3b57..6fe95b802e13 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -119,10 +119,17 @@ const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, return "nonzero size field"; } - if (k.k->p.snapshot) + if (type != BKEY_TYPE_btree && + !btree_type_has_snapshots(type) && + k.k->p.snapshot) return "nonzero snapshot"; if (type != BKEY_TYPE_btree && + btree_type_has_snapshots(type) && + k.k->p.snapshot != U32_MAX) + return "invalid snapshot field"; + + if (type != BKEY_TYPE_btree && !bkey_cmp(k.k->p, POS_MAX)) return "POS_MAX key"; @@ -310,14 +317,15 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id, const struct bkey_ops *ops; struct bkey uk; struct bkey_s u; + unsigned nr_compat = 5; int i; /* * Do these operations in reverse order in the write path: */ - for (i = 0; i < 4; i++) - switch (!write ? i : 3 - i) { + for (i = 0; i < nr_compat; i++) + switch (!write ? i : nr_compat - 1 - i) { case 0: if (big_endian != CPU_BIG_ENDIAN) bch2_bkey_swab_key(f, k); @@ -351,6 +359,28 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id, } break; case 3: + if (version < bcachefs_metadata_version_snapshot && + (level || btree_type_has_snapshots(btree_id))) { + struct bkey_i *u = packed_to_bkey(k); + + if (u) { + u->k.p.snapshot = write + ? 0 : U32_MAX; + } else { + u64 min_packed = f->field_offset[BKEY_FIELD_SNAPSHOT]; + u64 max_packed = min_packed + + ~(~0ULL << f->bits_per_field[BKEY_FIELD_SNAPSHOT]); + + uk = __bch2_bkey_unpack_key(f, k); + uk.p.snapshot = write + ? min_packed : min_t(u64, U32_MAX, max_packed); + + BUG_ON(!bch2_bkey_pack_key(k, &uk, f)); + } + } + + break; + case 4: if (!bkey_packed(k)) { u = bkey_i_to_s(packed_to_bkey(k)); } else { diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c index 5746199dfafb..de4dc2fac1d6 100644 --- a/fs/bcachefs/bset.c +++ b/fs/bcachefs/bset.c @@ -1438,7 +1438,7 @@ static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter, * to the search key is going to have 0 sectors after the search key. * * But this does mean that we can't just search for - * bkey_successor(start_of_range) to get the first extent that overlaps with + * bpos_successor(start_of_range) to get the first extent that overlaps with * the range we want - if we're unlucky and there's an extent that ends * exactly where we searched, then there could be a deleted key at the same * position and we'd get that when we search instead of the preceding extent diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 63b8423fa87c..85ac08b9270a 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -1018,7 +1018,7 @@ out: if (sib != btree_prev_sib) swap(n1, n2); - if (bpos_cmp(bkey_successor(n1->key.k.p), + if (bpos_cmp(bpos_successor(n1->key.k.p), n2->data->min_key)) { char buf1[200], buf2[200]; diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 2710e4b35da3..842840664562 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -64,7 +64,7 @@ static int bch2_gc_check_topology(struct bch_fs *c, struct bpos node_end = b->data->max_key; struct bpos expected_start = bkey_deleted(&prev->k->k) ? node_start - : bkey_successor(prev->k->k.p); + : bpos_successor(prev->k->k.p); char buf1[200], buf2[200]; bool update_min = false; bool update_max = false; @@ -1187,7 +1187,9 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id) bch2_trans_init(&trans, c, 0, 0); iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN, - BTREE_ITER_PREFETCH); + BTREE_ITER_PREFETCH| + BTREE_ITER_NOT_EXTENTS| + BTREE_ITER_ALL_SNAPSHOTS); while ((k = bch2_btree_iter_peek(iter)).k && !(ret = bkey_err(k))) { @@ -1405,7 +1407,7 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter, n1->key.k.p = n1->data->max_key = bkey_unpack_pos(n1, last); - n2->data->min_key = bkey_successor(n1->data->max_key); + n2->data->min_key = bpos_successor(n1->data->max_key); memcpy_u64s(vstruct_last(s1), s2->start, u64s); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 468b1a294ce9..bc09f9377425 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -612,12 +612,6 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca, BTREE_ERR_MUST_RETRY, c, ca, b, i, "incorrect level"); - if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) { - u64 *p = (u64 *) &bn->ptr; - - *p = swab64(*p); - } - if (!write) compat_btree_node(b->c.level, b->c.btree_id, version, BSET_BIG_ENDIAN(i), write, bn); @@ -1328,8 +1322,8 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b, if (bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_btree)) return -1; - ret = validate_bset(c, NULL, b, i, sectors, WRITE, false) ?: - validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false); + ret = validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false) ?: + validate_bset(c, NULL, b, i, sectors, WRITE, false); if (ret) { bch2_inconsistent_error(c); dump_stack(); @@ -1482,7 +1476,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, validate_before_checksum = true; /* validate_bset will be modifying: */ - if (le16_to_cpu(i->version) <= bcachefs_metadata_version_inode_btree_change) + if (le16_to_cpu(i->version) < bcachefs_metadata_version_current) validate_before_checksum = true; /* if we're going to be encrypting, check metadata validity first: */ diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h index f155a6cc1755..9c14cd30a09e 100644 --- a/fs/bcachefs/btree_io.h +++ b/fs/bcachefs/btree_io.h @@ -189,8 +189,8 @@ void bch2_btree_flush_all_writes(struct bch_fs *); void bch2_dirty_btree_nodes_to_text(struct printbuf *, struct bch_fs *); static inline void compat_bformat(unsigned level, enum btree_id btree_id, - unsigned version, unsigned big_endian, - int write, struct bkey_format *f) + unsigned version, unsigned big_endian, + int write, struct bkey_format *f) { if (version < bcachefs_metadata_version_inode_btree_change && btree_id == BTREE_ID_inodes) { @@ -199,6 +199,16 @@ static inline void compat_bformat(unsigned level, enum btree_id btree_id, swap(f->field_offset[BKEY_FIELD_INODE], f->field_offset[BKEY_FIELD_OFFSET]); } + + if (version < bcachefs_metadata_version_snapshot && + (level || btree_type_has_snapshots(btree_id))) { + u64 max_packed = + ~(~0ULL << f->bits_per_field[BKEY_FIELD_SNAPSHOT]); + + f->field_offset[BKEY_FIELD_SNAPSHOT] = write + ? 0 + : U32_MAX - max_packed; + } } static inline void compat_bpos(unsigned level, enum btree_id btree_id, @@ -222,16 +232,24 @@ static inline void compat_btree_node(unsigned level, enum btree_id btree_id, btree_node_type_is_extents(btree_id) && bpos_cmp(bn->min_key, POS_MIN) && write) - bn->min_key = bkey_predecessor(bn->min_key); + bn->min_key = bpos_nosnap_predecessor(bn->min_key); + + if (version < bcachefs_metadata_version_snapshot && + write) + bn->max_key.snapshot = 0; compat_bpos(level, btree_id, version, big_endian, write, &bn->min_key); compat_bpos(level, btree_id, version, big_endian, write, &bn->max_key); + if (version < bcachefs_metadata_version_snapshot && + !write) + bn->max_key.snapshot = U32_MAX; + if (version < bcachefs_metadata_version_inode_btree_change && btree_node_type_is_extents(btree_id) && bpos_cmp(bn->min_key, POS_MIN) && !write) - bn->min_key = bkey_successor(bn->min_key); + bn->min_key = bpos_nosnap_successor(bn->min_key); } #endif /* _BCACHEFS_BTREE_IO_H */ diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 8c923aa01ea1..972486a1f724 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -18,6 +18,36 @@ static void btree_iter_set_search_pos(struct btree_iter *, struct bpos); +static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p) +{ + EBUG_ON(btree_iter_type(iter) == BTREE_ITER_NODES); + + /* Are we iterating over keys in all snapshots? */ + if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) { + p = bpos_successor(p); + } else { + p = bpos_nosnap_successor(p); + p.snapshot = iter->snapshot; + } + + return p; +} + +static inline struct bpos bkey_predecessor(struct btree_iter *iter, struct bpos p) +{ + EBUG_ON(btree_iter_type(iter) == BTREE_ITER_NODES); + + /* Are we iterating over keys in all snapshots? */ + if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) { + p = bpos_predecessor(p); + } else { + p = bpos_nosnap_predecessor(p); + p.snapshot = iter->snapshot; + } + + return p; +} + static inline bool is_btree_node(struct btree_iter *iter, unsigned l) { return l < BTREE_MAX_DEPTH && @@ -30,7 +60,7 @@ static inline struct bpos btree_iter_search_key(struct btree_iter *iter) if ((iter->flags & BTREE_ITER_IS_EXTENTS) && bkey_cmp(pos, POS_MAX)) - pos = bkey_successor(pos); + pos = bkey_successor(iter, pos); return pos; } @@ -591,10 +621,24 @@ err: static void bch2_btree_iter_verify(struct btree_iter *iter) { + enum btree_iter_type type = btree_iter_type(iter); unsigned i; EBUG_ON(iter->btree_id >= BTREE_ID_NR); + BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) && + iter->pos.snapshot != iter->snapshot); + + BUG_ON((iter->flags & BTREE_ITER_IS_EXTENTS) && + (iter->flags & BTREE_ITER_ALL_SNAPSHOTS)); + + BUG_ON(type == BTREE_ITER_NODES && + !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)); + + BUG_ON(type != BTREE_ITER_NODES && + (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) && + !btree_type_has_snapshots(iter->btree_id)); + bch2_btree_iter_verify_locks(iter); for (i = 0; i < BTREE_MAX_DEPTH; i++) @@ -605,6 +649,9 @@ static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter) { enum btree_iter_type type = btree_iter_type(iter); + BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) && + iter->pos.snapshot != iter->snapshot); + BUG_ON((type == BTREE_ITER_KEYS || type == BTREE_ITER_CACHED) && (bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 || @@ -1434,7 +1481,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter) * Haven't gotten to the end of the parent node: go back down to * the next child node */ - btree_iter_set_search_pos(iter, bkey_successor(iter->pos)); + btree_iter_set_search_pos(iter, bpos_successor(iter->pos)); /* Unlock to avoid screwing up our lock invariants: */ btree_node_unlock(iter, iter->level); @@ -1508,7 +1555,7 @@ inline bool bch2_btree_iter_advance(struct btree_iter *iter) bool ret = bpos_cmp(pos, POS_MAX) != 0; if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) - pos = bkey_successor(pos); + pos = bkey_successor(iter, pos); bch2_btree_iter_set_pos(iter, pos); return ret; } @@ -1519,7 +1566,7 @@ inline bool bch2_btree_iter_rewind(struct btree_iter *iter) bool ret = bpos_cmp(pos, POS_MIN) != 0; if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) - pos = bkey_predecessor(pos); + pos = bkey_predecessor(iter, pos); bch2_btree_iter_set_pos(iter, pos); return ret; } @@ -1535,7 +1582,7 @@ static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter) * btree, in that case we want iter->pos to reflect that: */ if (ret) - btree_iter_set_search_pos(iter, bkey_successor(next_pos)); + btree_iter_set_search_pos(iter, bpos_successor(next_pos)); else bch2_btree_iter_set_pos(iter, POS_MAX); @@ -1548,7 +1595,7 @@ static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter) bool ret = bpos_cmp(next_pos, POS_MIN) != 0; if (ret) - btree_iter_set_search_pos(iter, bkey_predecessor(next_pos)); + btree_iter_set_search_pos(iter, bpos_predecessor(next_pos)); else bch2_btree_iter_set_pos(iter, POS_MIN); @@ -1594,13 +1641,13 @@ static inline struct bkey_s_c __btree_iter_peek(struct btree_iter *iter, bool wi k = btree_iter_level_peek(iter, &iter->l[0]); if (next_update && - bkey_cmp(next_update->k.p, iter->real_pos) <= 0) + bpos_cmp(next_update->k.p, iter->real_pos) <= 0) k = bkey_i_to_s_c(next_update); if (likely(k.k)) { if (bkey_deleted(k.k)) { btree_iter_set_search_pos(iter, - bkey_successor(k.k->p)); + bkey_successor(iter, k.k->p)); continue; } @@ -1739,7 +1786,7 @@ __bch2_btree_iter_peek_slot_extents(struct btree_iter *iter) if (iter->pos.inode == KEY_INODE_MAX) return bkey_s_c_null; - bch2_btree_iter_set_pos(iter, bkey_successor(iter->pos)); + bch2_btree_iter_set_pos(iter, bkey_successor(iter, iter->pos)); } pos = iter->pos; @@ -1973,6 +2020,14 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans, { struct btree_iter *iter, *best = NULL; + if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES && + !btree_type_has_snapshots(btree_id)) + flags &= ~BTREE_ITER_ALL_SNAPSHOTS; + + if (!(flags & BTREE_ITER_ALL_SNAPSHOTS)) + pos.snapshot = btree_type_has_snapshots(btree_id) + ? U32_MAX : 0; + /* We always want a fresh iterator for node iterators: */ if ((flags & BTREE_ITER_TYPE) == BTREE_ITER_NODES) goto alloc_iter; @@ -2007,11 +2062,14 @@ alloc_iter: if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES && btree_node_type_is_extents(btree_id) && - !(flags & BTREE_ITER_NOT_EXTENTS)) + !(flags & BTREE_ITER_NOT_EXTENTS) && + !(flags & BTREE_ITER_ALL_SNAPSHOTS)) flags |= BTREE_ITER_IS_EXTENTS; iter->flags = flags; + iter->snapshot = pos.snapshot; + if (!(iter->flags & BTREE_ITER_INTENT)) bch2_btree_iter_downgrade(iter); else if (!iter->locks_want) @@ -2034,6 +2092,7 @@ struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans, __bch2_trans_get_iter(trans, btree_id, pos, BTREE_ITER_NODES| BTREE_ITER_NOT_EXTENTS| + BTREE_ITER_ALL_SNAPSHOTS| flags); unsigned i; diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 176661b3b879..7585f989ad50 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -172,6 +172,9 @@ bool bch2_btree_iter_rewind(struct btree_iter *); static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos) { + if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)) + new_pos.snapshot = iter->snapshot; + bkey_init(&iter->k); iter->k.p = iter->pos = new_pos; } diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index bcd8db34d7ee..0bcf17159744 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -216,6 +216,7 @@ enum btree_iter_type { #define BTREE_ITER_CACHED_NOFILL (1 << 9) #define BTREE_ITER_CACHED_NOCREATE (1 << 10) #define BTREE_ITER_NOT_EXTENTS (1 << 11) +#define BTREE_ITER_ALL_SNAPSHOTS (1 << 12) enum btree_iter_uptodate { BTREE_ITER_UPTODATE = 0, @@ -245,6 +246,8 @@ struct btree_iter { /* what we're searching for/what the iterator actually points to: */ struct bpos real_pos; struct bpos pos_after_commit; + /* When we're filtering by snapshot, the snapshot ID we're looking for: */ + unsigned snapshot; u16 flags; u8 idx; @@ -329,7 +332,7 @@ struct bkey_cached { struct btree_insert_entry { unsigned trigger_flags; u8 bkey_type; - u8 btree_id; + enum btree_id btree_id:8; u8 level; unsigned trans_triggers_run:1; unsigned is_extent:1; @@ -610,6 +613,17 @@ static inline bool btree_iter_is_extents(struct btree_iter *iter) (BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \ BTREE_NODE_TYPE_HAS_MEM_TRIGGERS) +#define BTREE_ID_HAS_SNAPSHOTS \ + ((1U << BTREE_ID_extents)| \ + (1U << BTREE_ID_inodes)| \ + (1U << BTREE_ID_dirents)| \ + (1U << BTREE_ID_xattrs)) + +static inline bool btree_type_has_snapshots(enum btree_id id) +{ + return (1 << id) & BTREE_ID_HAS_SNAPSHOTS; +} + enum btree_trigger_flags { __BTREE_TRIGGER_NORUN, /* Don't run triggers at all */ diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index ddb0d03e268c..aad262937645 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -69,7 +69,7 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b) break; } - next_node = bkey_successor(k.k->p); + next_node = bpos_successor(k.k->p); } #endif } @@ -289,7 +289,6 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev b->data->flags = 0; SET_BTREE_NODE_ID(b->data, as->btree_id); SET_BTREE_NODE_LEVEL(b->data, level); - b->data->ptr = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key)).start->ptr; if (b->key.k.type == KEY_TYPE_btree_ptr_v2) { struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(&b->key); @@ -1100,6 +1099,7 @@ static struct btree *__btree_split_node(struct btree_update *as, struct btree *n2; struct bset *set1, *set2; struct bkey_packed *k, *set2_start, *set2_end, *out, *prev = NULL; + struct bpos n1_pos; n2 = bch2_btree_node_alloc(as, n1->c.level); bch2_btree_update_add_new_node(as, n2); @@ -1146,8 +1146,12 @@ static struct btree *__btree_split_node(struct btree_update *as, n1->nr.packed_keys = nr_packed; n1->nr.unpacked_keys = nr_unpacked; - btree_set_max(n1, bkey_unpack_pos(n1, prev)); - btree_set_min(n2, bkey_successor(n1->key.k.p)); + n1_pos = bkey_unpack_pos(n1, prev); + if (as->c->sb.version < bcachefs_metadata_version_snapshot) + n1_pos.snapshot = U32_MAX; + + btree_set_max(n1, n1_pos); + btree_set_min(n2, bpos_successor(n1->key.k.p)); bch2_bkey_format_init(&s); bch2_bkey_format_add_pos(&s, n2->data->min_key); diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index a32c8f34039c..88da89e8b170 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -223,9 +223,17 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans, { struct bch_fs *c = trans->c; - BUG_ON(bch2_debug_check_bkeys && - bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type)); - BUG_ON(bpos_cmp(i->k->k.p, i->iter->real_pos)); + if (bch2_debug_check_bkeys) { + const char *invalid = bch2_bkey_invalid(c, + bkey_i_to_s_c(i->k), i->bkey_type); + if (invalid) { + char buf[200]; + + bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k)); + panic("invalid bkey %s on insert: %s\n", buf, invalid); + } + } + BUG_ON(!i->is_extent && bpos_cmp(i->k->k.p, i->iter->real_pos)); BUG_ON(i->level != i->iter->level); BUG_ON(i->btree_id != i->iter->btree_id); } diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 059972e5a124..111310344cec 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -222,7 +222,9 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, bch2_trans_init(&trans, i->c, 0, 0); - iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH); + iter = bch2_trans_get_iter(&trans, i->id, i->from, + BTREE_ITER_PREFETCH| + BTREE_ITER_ALL_SNAPSHOTS); k = bch2_btree_iter_peek(iter); while (k.k && !(err = bkey_err(k))) { @@ -290,7 +292,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf, * all nodes, meh */ i->from = bpos_cmp(POS_MAX, b->key.k.p) - ? bkey_successor(b->key.k.p) + ? bpos_successor(b->key.k.p) : b->key.k.p; if (!i->size) diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 7ac3d7587655..1f28dea26ca2 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -179,7 +179,8 @@ const char *bch2_btree_ptr_v2_invalid(const struct bch_fs *c, struct bkey_s_c k) if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX) return "value too big"; - if (bp.v->min_key.snapshot) + if (c->sb.version < bcachefs_metadata_version_snapshot && + bp.v->min_key.snapshot) return "invalid min_key.snapshot"; return bch2_bkey_ptrs_invalid(c, k); @@ -211,8 +212,8 @@ void bch2_btree_ptr_v2_compat(enum btree_id btree_id, unsigned version, btree_node_type_is_extents(btree_id) && bkey_cmp(bp.v->min_key, POS_MIN)) bp.v->min_key = write - ? bkey_predecessor(bp.v->min_key) - : bkey_successor(bp.v->min_key); + ? bpos_nosnap_predecessor(bp.v->min_key) + : bpos_nosnap_successor(bp.v->min_key); } /* KEY_TYPE_extent: */ diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index ffb30ef7ef00..a3acae0ddfa9 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1318,6 +1318,7 @@ static int check_inode(struct btree_trans *trans, struct bkey_inode_buf p; bch2_inode_pack(c, &p, &u); + p.inode.k.p = iter->pos; ret = __bch2_trans_do(trans, NULL, NULL, BTREE_INSERT_NOFAIL| diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index f676daf404a2..7044ab73831c 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -332,6 +332,7 @@ int bch2_inode_write(struct btree_trans *trans, return PTR_ERR(inode_p); bch2_inode_pack(trans->c, inode_p, inode); + inode_p->inode.k.p.snapshot = iter->snapshot; bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0); return 0; } diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 5ee9a6c2f4fd..9c46f67c0d8e 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -332,6 +332,9 @@ int bch2_extent_update(struct btree_trans *trans, if (i_sectors_delta || new_i_size) { bch2_inode_pack(trans->c, &inode_p, &inode_u); + + inode_p.inode.k.p.snapshot = iter->snapshot; + bch2_trans_update(trans, inode_iter, &inode_p.inode.k_i, 0); } @@ -447,6 +450,8 @@ int bch2_write_index_default(struct bch_write_op *op) k = bch2_keylist_front(keys); + k->k.p.snapshot = iter->snapshot; + bch2_bkey_buf_realloc(&sk, c, k->k.u64s); bkey_copy(sk.k, k); bch2_cut_front(iter->pos, sk.k); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 7783a874640a..4ab9cebee218 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1449,7 +1449,7 @@ void bch2_journal_write(struct closure *cl) if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset))) validate_before_checksum = true; - if (le32_to_cpu(jset->version) <= bcachefs_metadata_version_inode_btree_change) + if (le32_to_cpu(jset->version) < bcachefs_metadata_version_current) validate_before_checksum = true; if (validate_before_checksum && diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 596f7c1e4245..a3a6abb88d6f 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -998,6 +998,13 @@ int bch2_fs_recovery(struct bch_fs *c) goto err; } + if (!(c->sb.compat & (1ULL << BCH_COMPAT_bformat_overflow_done))) { + bch_err(c, "filesystem may have incompatible bkey formats; run fsck from the compat branch to fix"); + ret = -EINVAL; + goto err; + + } + if (!(c->sb.features & (1ULL << BCH_FEATURE_alloc_v2))) { bch_info(c, "alloc_v2 feature bit not set, fsck required"); c->opts.fsck = true; @@ -1340,6 +1347,7 @@ int bch2_fs_initialize(struct bch_fs *c) S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL); root_inode.bi_inum = BCACHEFS_ROOT_INO; bch2_inode_pack(c, &packed_inode, &root_inode); + packed_inode.inode.k.p.snapshot = U32_MAX; err = "error creating root directory"; ret = bch2_btree_insert(c, BTREE_ID_inodes, diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c index 286587a118fe..3de48c593963 100644 --- a/fs/bcachefs/tests.c +++ b/fs/bcachefs/tests.c @@ -483,6 +483,7 @@ static int rand_insert(struct bch_fs *c, u64 nr) for (i = 0; i < nr; i++) { bkey_cookie_init(&k.k_i); k.k.p.offset = test_rand(); + k.k.p.snapshot = U32_MAX; ret = __bch2_trans_do(&trans, NULL, NULL, 0, __bch2_btree_insert(&trans, BTREE_ID_xattrs, &k.k_i)); |