summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2021-03-24 18:02:16 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:08:57 -0400
commite751c01a8ee1ca934cc0953e2e77ad4ea3e64d5e (patch)
tree9930602caa160b05f2e62925c86192ae1ab9bc31
parent4cf91b0270dc16a6637db4c200c7fb745b941065 (diff)
bcachefs: Start using bpos.snapshot field
This patch starts treating the bpos.snapshot field like part of the key in the btree code: * bpos_successor() and bpos_predecessor() now include the snapshot field * Keys in btrees that will be using snapshots (extents, inodes, dirents and xattrs) now always have their snapshot field set to U32_MAX The btree iterator code gets a new flag, BTREE_ITER_ALL_SNAPSHOTS, that determines whether we're iterating over keys in all snapshots or not - internally, this controlls whether bkey_(successor|predecessor) increment/decrement the snapshot field, or only the higher bits of the key. We add a new member to struct btree_iter, iter->snapshot: when BTREE_ITER_ALL_SNAPSHOTS is not set, iter->pos.snapshot should always equal iter->snapshot, which will be 0 for btrees that don't use snapshots, and alsways U32_MAX for btrees that will use snapshots (until we enable snapshot creation). This patch also introduces a new metadata version number, and compat code for reading from/writing to older versions - this isn't a forced upgrade (yet). Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--fs/bcachefs/bcachefs_format.h24
-rw-r--r--fs/bcachefs/bkey.c17
-rw-r--r--fs/bcachefs/bkey.h42
-rw-r--r--fs/bcachefs/bkey_methods.c36
-rw-r--r--fs/bcachefs/bset.c2
-rw-r--r--fs/bcachefs/btree_cache.c2
-rw-r--r--fs/bcachefs/btree_gc.c8
-rw-r--r--fs/bcachefs/btree_io.c12
-rw-r--r--fs/bcachefs/btree_io.h26
-rw-r--r--fs/bcachefs/btree_iter.c79
-rw-r--r--fs/bcachefs/btree_iter.h3
-rw-r--r--fs/bcachefs/btree_types.h16
-rw-r--r--fs/bcachefs/btree_update_interior.c12
-rw-r--r--fs/bcachefs/btree_update_leaf.c14
-rw-r--r--fs/bcachefs/debug.c6
-rw-r--r--fs/bcachefs/extents.c7
-rw-r--r--fs/bcachefs/fsck.c1
-rw-r--r--fs/bcachefs/inode.c1
-rw-r--r--fs/bcachefs/io.c5
-rw-r--r--fs/bcachefs/journal_io.c2
-rw-r--r--fs/bcachefs/recovery.c8
-rw-r--r--fs/bcachefs/tests.c1
22 files changed, 251 insertions, 73 deletions
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 111f7d3c312e..2172d3cf3680 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -142,19 +142,18 @@ struct bpos {
#define KEY_SNAPSHOT_MAX ((__u32)~0U)
#define KEY_SIZE_MAX ((__u32)~0U)
-static inline struct bpos POS(__u64 inode, __u64 offset)
+static inline struct bpos SPOS(__u64 inode, __u64 offset, __u32 snapshot)
{
- struct bpos ret;
-
- ret.inode = inode;
- ret.offset = offset;
- ret.snapshot = 0;
-
- return ret;
+ return (struct bpos) {
+ .inode = inode,
+ .offset = offset,
+ .snapshot = snapshot,
+ };
}
-#define POS_MIN POS(0, 0)
-#define POS_MAX POS(KEY_INODE_MAX, KEY_OFFSET_MAX)
+#define POS_MIN SPOS(0, 0, 0)
+#define POS_MAX SPOS(KEY_INODE_MAX, KEY_OFFSET_MAX, KEY_SNAPSHOT_MAX)
+#define POS(_inode, _offset) SPOS(_inode, _offset, 0)
/* Empty placeholder struct, for container_of() */
struct bch_val {
@@ -1208,7 +1207,8 @@ enum bcachefs_metadata_version {
bcachefs_metadata_version_new_versioning = 10,
bcachefs_metadata_version_bkey_renumber = 10,
bcachefs_metadata_version_inode_btree_change = 11,
- bcachefs_metadata_version_max = 12,
+ bcachefs_metadata_version_snapshot = 12,
+ bcachefs_metadata_version_max = 13,
};
#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1)
@@ -1749,7 +1749,7 @@ struct btree_node {
/* Closed interval: */
struct bpos min_key;
struct bpos max_key;
- struct bch_extent_ptr ptr;
+ struct bch_extent_ptr _ptr; /* not used anymore */
struct bkey_format format;
union {
diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c
index 8b2befac95d4..a0379f980f7e 100644
--- a/fs/bcachefs/bkey.c
+++ b/fs/bcachefs/bkey.c
@@ -617,15 +617,19 @@ const char *bch2_bkey_format_validate(struct bkey_format *f)
return "incorrect number of fields";
for (i = 0; i < f->nr_fields; i++) {
+ unsigned unpacked_bits = bch2_bkey_format_current.bits_per_field[i];
+ u64 unpacked_mask = ~((~0ULL << 1) << (unpacked_bits - 1));
u64 field_offset = le64_to_cpu(f->field_offset[i]);
- if (f->bits_per_field[i] > 64)
+ if (f->bits_per_field[i] > unpacked_bits)
return "field too large";
- if (field_offset &&
- (f->bits_per_field[i] == 64 ||
- (field_offset + ((1ULL << f->bits_per_field[i]) - 1) <
- field_offset)))
+ if ((f->bits_per_field[i] == unpacked_bits) && field_offset)
+ return "offset + bits overflow";
+
+ if (((field_offset + ((1ULL << f->bits_per_field[i]) - 1)) &
+ unpacked_mask) <
+ field_offset)
return "offset + bits overflow";
bits += f->bits_per_field[i];
@@ -1126,11 +1130,12 @@ void bch2_bkey_pack_test(void)
struct bkey_packed p;
struct bkey_format test_format = {
- .key_u64s = 2,
+ .key_u64s = 3,
.nr_fields = BKEY_NR_FIELDS,
.bits_per_field = {
13,
64,
+ 32,
},
};
diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h
index df23c5b48969..72b4267031d8 100644
--- a/fs/bcachefs/bkey.h
+++ b/fs/bcachefs/bkey.h
@@ -258,24 +258,46 @@ static inline unsigned bkey_format_key_bits(const struct bkey_format *format)
format->bits_per_field[BKEY_FIELD_SNAPSHOT];
}
-static inline struct bpos bkey_successor(struct bpos p)
+static inline struct bpos bpos_successor(struct bpos p)
{
- struct bpos ret = p;
+ if (!++p.snapshot &&
+ !++p.offset &&
+ !++p.inode)
+ BUG();
- if (!++ret.offset)
- BUG_ON(!++ret.inode);
+ return p;
+}
- return ret;
+static inline struct bpos bpos_predecessor(struct bpos p)
+{
+ if (!p.snapshot-- &&
+ !p.offset-- &&
+ !p.inode--)
+ BUG();
+
+ return p;
}
-static inline struct bpos bkey_predecessor(struct bpos p)
+static inline struct bpos bpos_nosnap_successor(struct bpos p)
{
- struct bpos ret = p;
+ p.snapshot = 0;
- if (!ret.offset--)
- BUG_ON(!ret.inode--);
+ if (!++p.offset &&
+ !++p.inode)
+ BUG();
- return ret;
+ return p;
+}
+
+static inline struct bpos bpos_nosnap_predecessor(struct bpos p)
+{
+ p.snapshot = 0;
+
+ if (!p.offset-- &&
+ !p.inode--)
+ BUG();
+
+ return p;
}
static inline u64 bkey_start_offset(const struct bkey *k)
diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c
index 5e7eadeb3b57..6fe95b802e13 100644
--- a/fs/bcachefs/bkey_methods.c
+++ b/fs/bcachefs/bkey_methods.c
@@ -119,10 +119,17 @@ const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k,
return "nonzero size field";
}
- if (k.k->p.snapshot)
+ if (type != BKEY_TYPE_btree &&
+ !btree_type_has_snapshots(type) &&
+ k.k->p.snapshot)
return "nonzero snapshot";
if (type != BKEY_TYPE_btree &&
+ btree_type_has_snapshots(type) &&
+ k.k->p.snapshot != U32_MAX)
+ return "invalid snapshot field";
+
+ if (type != BKEY_TYPE_btree &&
!bkey_cmp(k.k->p, POS_MAX))
return "POS_MAX key";
@@ -310,14 +317,15 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
const struct bkey_ops *ops;
struct bkey uk;
struct bkey_s u;
+ unsigned nr_compat = 5;
int i;
/*
* Do these operations in reverse order in the write path:
*/
- for (i = 0; i < 4; i++)
- switch (!write ? i : 3 - i) {
+ for (i = 0; i < nr_compat; i++)
+ switch (!write ? i : nr_compat - 1 - i) {
case 0:
if (big_endian != CPU_BIG_ENDIAN)
bch2_bkey_swab_key(f, k);
@@ -351,6 +359,28 @@ void __bch2_bkey_compat(unsigned level, enum btree_id btree_id,
}
break;
case 3:
+ if (version < bcachefs_metadata_version_snapshot &&
+ (level || btree_type_has_snapshots(btree_id))) {
+ struct bkey_i *u = packed_to_bkey(k);
+
+ if (u) {
+ u->k.p.snapshot = write
+ ? 0 : U32_MAX;
+ } else {
+ u64 min_packed = f->field_offset[BKEY_FIELD_SNAPSHOT];
+ u64 max_packed = min_packed +
+ ~(~0ULL << f->bits_per_field[BKEY_FIELD_SNAPSHOT]);
+
+ uk = __bch2_bkey_unpack_key(f, k);
+ uk.p.snapshot = write
+ ? min_packed : min_t(u64, U32_MAX, max_packed);
+
+ BUG_ON(!bch2_bkey_pack_key(k, &uk, f));
+ }
+ }
+
+ break;
+ case 4:
if (!bkey_packed(k)) {
u = bkey_i_to_s(packed_to_bkey(k));
} else {
diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c
index 5746199dfafb..de4dc2fac1d6 100644
--- a/fs/bcachefs/bset.c
+++ b/fs/bcachefs/bset.c
@@ -1438,7 +1438,7 @@ static void btree_node_iter_init_pack_failed(struct btree_node_iter *iter,
* to the search key is going to have 0 sectors after the search key.
*
* But this does mean that we can't just search for
- * bkey_successor(start_of_range) to get the first extent that overlaps with
+ * bpos_successor(start_of_range) to get the first extent that overlaps with
* the range we want - if we're unlucky and there's an extent that ends
* exactly where we searched, then there could be a deleted key at the same
* position and we'd get that when we search instead of the preceding extent
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index 63b8423fa87c..85ac08b9270a 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -1018,7 +1018,7 @@ out:
if (sib != btree_prev_sib)
swap(n1, n2);
- if (bpos_cmp(bkey_successor(n1->key.k.p),
+ if (bpos_cmp(bpos_successor(n1->key.k.p),
n2->data->min_key)) {
char buf1[200], buf2[200];
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 2710e4b35da3..842840664562 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -64,7 +64,7 @@ static int bch2_gc_check_topology(struct bch_fs *c,
struct bpos node_end = b->data->max_key;
struct bpos expected_start = bkey_deleted(&prev->k->k)
? node_start
- : bkey_successor(prev->k->k.p);
+ : bpos_successor(prev->k->k.p);
char buf1[200], buf2[200];
bool update_min = false;
bool update_max = false;
@@ -1187,7 +1187,9 @@ static int bch2_gc_btree_gens(struct bch_fs *c, enum btree_id btree_id)
bch2_trans_init(&trans, c, 0, 0);
iter = bch2_trans_get_iter(&trans, btree_id, POS_MIN,
- BTREE_ITER_PREFETCH);
+ BTREE_ITER_PREFETCH|
+ BTREE_ITER_NOT_EXTENTS|
+ BTREE_ITER_ALL_SNAPSHOTS);
while ((k = bch2_btree_iter_peek(iter)).k &&
!(ret = bkey_err(k))) {
@@ -1405,7 +1407,7 @@ static void bch2_coalesce_nodes(struct bch_fs *c, struct btree_iter *iter,
n1->key.k.p = n1->data->max_key =
bkey_unpack_pos(n1, last);
- n2->data->min_key = bkey_successor(n1->data->max_key);
+ n2->data->min_key = bpos_successor(n1->data->max_key);
memcpy_u64s(vstruct_last(s1),
s2->start, u64s);
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index 468b1a294ce9..bc09f9377425 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -612,12 +612,6 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
BTREE_ERR_MUST_RETRY, c, ca, b, i,
"incorrect level");
- if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) {
- u64 *p = (u64 *) &bn->ptr;
-
- *p = swab64(*p);
- }
-
if (!write)
compat_btree_node(b->c.level, b->c.btree_id, version,
BSET_BIG_ENDIAN(i), write, bn);
@@ -1328,8 +1322,8 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b,
if (bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_btree))
return -1;
- ret = validate_bset(c, NULL, b, i, sectors, WRITE, false) ?:
- validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false);
+ ret = validate_bset_keys(c, b, i, &whiteout_u64s, WRITE, false) ?:
+ validate_bset(c, NULL, b, i, sectors, WRITE, false);
if (ret) {
bch2_inconsistent_error(c);
dump_stack();
@@ -1482,7 +1476,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
validate_before_checksum = true;
/* validate_bset will be modifying: */
- if (le16_to_cpu(i->version) <= bcachefs_metadata_version_inode_btree_change)
+ if (le16_to_cpu(i->version) < bcachefs_metadata_version_current)
validate_before_checksum = true;
/* if we're going to be encrypting, check metadata validity first: */
diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h
index f155a6cc1755..9c14cd30a09e 100644
--- a/fs/bcachefs/btree_io.h
+++ b/fs/bcachefs/btree_io.h
@@ -189,8 +189,8 @@ void bch2_btree_flush_all_writes(struct bch_fs *);
void bch2_dirty_btree_nodes_to_text(struct printbuf *, struct bch_fs *);
static inline void compat_bformat(unsigned level, enum btree_id btree_id,
- unsigned version, unsigned big_endian,
- int write, struct bkey_format *f)
+ unsigned version, unsigned big_endian,
+ int write, struct bkey_format *f)
{
if (version < bcachefs_metadata_version_inode_btree_change &&
btree_id == BTREE_ID_inodes) {
@@ -199,6 +199,16 @@ static inline void compat_bformat(unsigned level, enum btree_id btree_id,
swap(f->field_offset[BKEY_FIELD_INODE],
f->field_offset[BKEY_FIELD_OFFSET]);
}
+
+ if (version < bcachefs_metadata_version_snapshot &&
+ (level || btree_type_has_snapshots(btree_id))) {
+ u64 max_packed =
+ ~(~0ULL << f->bits_per_field[BKEY_FIELD_SNAPSHOT]);
+
+ f->field_offset[BKEY_FIELD_SNAPSHOT] = write
+ ? 0
+ : U32_MAX - max_packed;
+ }
}
static inline void compat_bpos(unsigned level, enum btree_id btree_id,
@@ -222,16 +232,24 @@ static inline void compat_btree_node(unsigned level, enum btree_id btree_id,
btree_node_type_is_extents(btree_id) &&
bpos_cmp(bn->min_key, POS_MIN) &&
write)
- bn->min_key = bkey_predecessor(bn->min_key);
+ bn->min_key = bpos_nosnap_predecessor(bn->min_key);
+
+ if (version < bcachefs_metadata_version_snapshot &&
+ write)
+ bn->max_key.snapshot = 0;
compat_bpos(level, btree_id, version, big_endian, write, &bn->min_key);
compat_bpos(level, btree_id, version, big_endian, write, &bn->max_key);
+ if (version < bcachefs_metadata_version_snapshot &&
+ !write)
+ bn->max_key.snapshot = U32_MAX;
+
if (version < bcachefs_metadata_version_inode_btree_change &&
btree_node_type_is_extents(btree_id) &&
bpos_cmp(bn->min_key, POS_MIN) &&
!write)
- bn->min_key = bkey_successor(bn->min_key);
+ bn->min_key = bpos_nosnap_successor(bn->min_key);
}
#endif /* _BCACHEFS_BTREE_IO_H */
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 8c923aa01ea1..972486a1f724 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -18,6 +18,36 @@
static void btree_iter_set_search_pos(struct btree_iter *, struct bpos);
+static inline struct bpos bkey_successor(struct btree_iter *iter, struct bpos p)
+{
+ EBUG_ON(btree_iter_type(iter) == BTREE_ITER_NODES);
+
+ /* Are we iterating over keys in all snapshots? */
+ if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) {
+ p = bpos_successor(p);
+ } else {
+ p = bpos_nosnap_successor(p);
+ p.snapshot = iter->snapshot;
+ }
+
+ return p;
+}
+
+static inline struct bpos bkey_predecessor(struct btree_iter *iter, struct bpos p)
+{
+ EBUG_ON(btree_iter_type(iter) == BTREE_ITER_NODES);
+
+ /* Are we iterating over keys in all snapshots? */
+ if (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) {
+ p = bpos_predecessor(p);
+ } else {
+ p = bpos_nosnap_predecessor(p);
+ p.snapshot = iter->snapshot;
+ }
+
+ return p;
+}
+
static inline bool is_btree_node(struct btree_iter *iter, unsigned l)
{
return l < BTREE_MAX_DEPTH &&
@@ -30,7 +60,7 @@ static inline struct bpos btree_iter_search_key(struct btree_iter *iter)
if ((iter->flags & BTREE_ITER_IS_EXTENTS) &&
bkey_cmp(pos, POS_MAX))
- pos = bkey_successor(pos);
+ pos = bkey_successor(iter, pos);
return pos;
}
@@ -591,10 +621,24 @@ err:
static void bch2_btree_iter_verify(struct btree_iter *iter)
{
+ enum btree_iter_type type = btree_iter_type(iter);
unsigned i;
EBUG_ON(iter->btree_id >= BTREE_ID_NR);
+ BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
+ iter->pos.snapshot != iter->snapshot);
+
+ BUG_ON((iter->flags & BTREE_ITER_IS_EXTENTS) &&
+ (iter->flags & BTREE_ITER_ALL_SNAPSHOTS));
+
+ BUG_ON(type == BTREE_ITER_NODES &&
+ !(iter->flags & BTREE_ITER_ALL_SNAPSHOTS));
+
+ BUG_ON(type != BTREE_ITER_NODES &&
+ (iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
+ !btree_type_has_snapshots(iter->btree_id));
+
bch2_btree_iter_verify_locks(iter);
for (i = 0; i < BTREE_MAX_DEPTH; i++)
@@ -605,6 +649,9 @@ static void bch2_btree_iter_verify_entry_exit(struct btree_iter *iter)
{
enum btree_iter_type type = btree_iter_type(iter);
+ BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS) &&
+ iter->pos.snapshot != iter->snapshot);
+
BUG_ON((type == BTREE_ITER_KEYS ||
type == BTREE_ITER_CACHED) &&
(bkey_cmp(iter->pos, bkey_start_pos(&iter->k)) < 0 ||
@@ -1434,7 +1481,7 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
* Haven't gotten to the end of the parent node: go back down to
* the next child node
*/
- btree_iter_set_search_pos(iter, bkey_successor(iter->pos));
+ btree_iter_set_search_pos(iter, bpos_successor(iter->pos));
/* Unlock to avoid screwing up our lock invariants: */
btree_node_unlock(iter, iter->level);
@@ -1508,7 +1555,7 @@ inline bool bch2_btree_iter_advance(struct btree_iter *iter)
bool ret = bpos_cmp(pos, POS_MAX) != 0;
if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
- pos = bkey_successor(pos);
+ pos = bkey_successor(iter, pos);
bch2_btree_iter_set_pos(iter, pos);
return ret;
}
@@ -1519,7 +1566,7 @@ inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
bool ret = bpos_cmp(pos, POS_MIN) != 0;
if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
- pos = bkey_predecessor(pos);
+ pos = bkey_predecessor(iter, pos);
bch2_btree_iter_set_pos(iter, pos);
return ret;
}
@@ -1535,7 +1582,7 @@ static inline bool btree_iter_set_pos_to_next_leaf(struct btree_iter *iter)
* btree, in that case we want iter->pos to reflect that:
*/
if (ret)
- btree_iter_set_search_pos(iter, bkey_successor(next_pos));
+ btree_iter_set_search_pos(iter, bpos_successor(next_pos));
else
bch2_btree_iter_set_pos(iter, POS_MAX);
@@ -1548,7 +1595,7 @@ static inline bool btree_iter_set_pos_to_prev_leaf(struct btree_iter *iter)
bool ret = bpos_cmp(next_pos, POS_MIN) != 0;
if (ret)
- btree_iter_set_search_pos(iter, bkey_predecessor(next_pos));
+ btree_iter_set_search_pos(iter, bpos_predecessor(next_pos));
else
bch2_btree_iter_set_pos(iter, POS_MIN);
@@ -1594,13 +1641,13 @@ static inline struct bkey_s_c __btree_iter_peek(struct btree_iter *iter, bool wi
k = btree_iter_level_peek(iter, &iter->l[0]);
if (next_update &&
- bkey_cmp(next_update->k.p, iter->real_pos) <= 0)
+ bpos_cmp(next_update->k.p, iter->real_pos) <= 0)
k = bkey_i_to_s_c(next_update);
if (likely(k.k)) {
if (bkey_deleted(k.k)) {
btree_iter_set_search_pos(iter,
- bkey_successor(k.k->p));
+ bkey_successor(iter, k.k->p));
continue;
}
@@ -1739,7 +1786,7 @@ __bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
if (iter->pos.inode == KEY_INODE_MAX)
return bkey_s_c_null;
- bch2_btree_iter_set_pos(iter, bkey_successor(iter->pos));
+ bch2_btree_iter_set_pos(iter, bkey_successor(iter, iter->pos));
}
pos = iter->pos;
@@ -1973,6 +2020,14 @@ struct btree_iter *__bch2_trans_get_iter(struct btree_trans *trans,
{
struct btree_iter *iter, *best = NULL;
+ if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
+ !btree_type_has_snapshots(btree_id))
+ flags &= ~BTREE_ITER_ALL_SNAPSHOTS;
+
+ if (!(flags & BTREE_ITER_ALL_SNAPSHOTS))
+ pos.snapshot = btree_type_has_snapshots(btree_id)
+ ? U32_MAX : 0;
+
/* We always want a fresh iterator for node iterators: */
if ((flags & BTREE_ITER_TYPE) == BTREE_ITER_NODES)
goto alloc_iter;
@@ -2007,11 +2062,14 @@ alloc_iter:
if ((flags & BTREE_ITER_TYPE) != BTREE_ITER_NODES &&
btree_node_type_is_extents(btree_id) &&
- !(flags & BTREE_ITER_NOT_EXTENTS))
+ !(flags & BTREE_ITER_NOT_EXTENTS) &&
+ !(flags & BTREE_ITER_ALL_SNAPSHOTS))
flags |= BTREE_ITER_IS_EXTENTS;
iter->flags = flags;
+ iter->snapshot = pos.snapshot;
+
if (!(iter->flags & BTREE_ITER_INTENT))
bch2_btree_iter_downgrade(iter);
else if (!iter->locks_want)
@@ -2034,6 +2092,7 @@ struct btree_iter *bch2_trans_get_node_iter(struct btree_trans *trans,
__bch2_trans_get_iter(trans, btree_id, pos,
BTREE_ITER_NODES|
BTREE_ITER_NOT_EXTENTS|
+ BTREE_ITER_ALL_SNAPSHOTS|
flags);
unsigned i;
diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
index 176661b3b879..7585f989ad50 100644
--- a/fs/bcachefs/btree_iter.h
+++ b/fs/bcachefs/btree_iter.h
@@ -172,6 +172,9 @@ bool bch2_btree_iter_rewind(struct btree_iter *);
static inline void bch2_btree_iter_set_pos(struct btree_iter *iter, struct bpos new_pos)
{
+ if (!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS))
+ new_pos.snapshot = iter->snapshot;
+
bkey_init(&iter->k);
iter->k.p = iter->pos = new_pos;
}
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index bcd8db34d7ee..0bcf17159744 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -216,6 +216,7 @@ enum btree_iter_type {
#define BTREE_ITER_CACHED_NOFILL (1 << 9)
#define BTREE_ITER_CACHED_NOCREATE (1 << 10)
#define BTREE_ITER_NOT_EXTENTS (1 << 11)
+#define BTREE_ITER_ALL_SNAPSHOTS (1 << 12)
enum btree_iter_uptodate {
BTREE_ITER_UPTODATE = 0,
@@ -245,6 +246,8 @@ struct btree_iter {
/* what we're searching for/what the iterator actually points to: */
struct bpos real_pos;
struct bpos pos_after_commit;
+ /* When we're filtering by snapshot, the snapshot ID we're looking for: */
+ unsigned snapshot;
u16 flags;
u8 idx;
@@ -329,7 +332,7 @@ struct bkey_cached {
struct btree_insert_entry {
unsigned trigger_flags;
u8 bkey_type;
- u8 btree_id;
+ enum btree_id btree_id:8;
u8 level;
unsigned trans_triggers_run:1;
unsigned is_extent:1;
@@ -610,6 +613,17 @@ static inline bool btree_iter_is_extents(struct btree_iter *iter)
(BTREE_NODE_TYPE_HAS_TRANS_TRIGGERS| \
BTREE_NODE_TYPE_HAS_MEM_TRIGGERS)
+#define BTREE_ID_HAS_SNAPSHOTS \
+ ((1U << BTREE_ID_extents)| \
+ (1U << BTREE_ID_inodes)| \
+ (1U << BTREE_ID_dirents)| \
+ (1U << BTREE_ID_xattrs))
+
+static inline bool btree_type_has_snapshots(enum btree_id id)
+{
+ return (1 << id) & BTREE_ID_HAS_SNAPSHOTS;
+}
+
enum btree_trigger_flags {
__BTREE_TRIGGER_NORUN, /* Don't run triggers at all */
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index ddb0d03e268c..aad262937645 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -69,7 +69,7 @@ static void btree_node_interior_verify(struct bch_fs *c, struct btree *b)
break;
}
- next_node = bkey_successor(k.k->p);
+ next_node = bpos_successor(k.k->p);
}
#endif
}
@@ -289,7 +289,6 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
b->data->flags = 0;
SET_BTREE_NODE_ID(b->data, as->btree_id);
SET_BTREE_NODE_LEVEL(b->data, level);
- b->data->ptr = bch2_bkey_ptrs_c(bkey_i_to_s_c(&b->key)).start->ptr;
if (b->key.k.type == KEY_TYPE_btree_ptr_v2) {
struct bkey_i_btree_ptr_v2 *bp = bkey_i_to_btree_ptr_v2(&b->key);
@@ -1100,6 +1099,7 @@ static struct btree *__btree_split_node(struct btree_update *as,
struct btree *n2;
struct bset *set1, *set2;
struct bkey_packed *k, *set2_start, *set2_end, *out, *prev = NULL;
+ struct bpos n1_pos;
n2 = bch2_btree_node_alloc(as, n1->c.level);
bch2_btree_update_add_new_node(as, n2);
@@ -1146,8 +1146,12 @@ static struct btree *__btree_split_node(struct btree_update *as,
n1->nr.packed_keys = nr_packed;
n1->nr.unpacked_keys = nr_unpacked;
- btree_set_max(n1, bkey_unpack_pos(n1, prev));
- btree_set_min(n2, bkey_successor(n1->key.k.p));
+ n1_pos = bkey_unpack_pos(n1, prev);
+ if (as->c->sb.version < bcachefs_metadata_version_snapshot)
+ n1_pos.snapshot = U32_MAX;
+
+ btree_set_max(n1, n1_pos);
+ btree_set_min(n2, bpos_successor(n1->key.k.p));
bch2_bkey_format_init(&s);
bch2_bkey_format_add_pos(&s, n2->data->min_key);
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index a32c8f34039c..88da89e8b170 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -223,9 +223,17 @@ static inline void btree_insert_entry_checks(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
- BUG_ON(bch2_debug_check_bkeys &&
- bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), i->bkey_type));
- BUG_ON(bpos_cmp(i->k->k.p, i->iter->real_pos));
+ if (bch2_debug_check_bkeys) {
+ const char *invalid = bch2_bkey_invalid(c,
+ bkey_i_to_s_c(i->k), i->bkey_type);
+ if (invalid) {
+ char buf[200];
+
+ bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(i->k));
+ panic("invalid bkey %s on insert: %s\n", buf, invalid);
+ }
+ }
+ BUG_ON(!i->is_extent && bpos_cmp(i->k->k.p, i->iter->real_pos));
BUG_ON(i->level != i->iter->level);
BUG_ON(i->btree_id != i->iter->btree_id);
}
diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c
index 059972e5a124..111310344cec 100644
--- a/fs/bcachefs/debug.c
+++ b/fs/bcachefs/debug.c
@@ -222,7 +222,9 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf,
bch2_trans_init(&trans, i->c, 0, 0);
- iter = bch2_trans_get_iter(&trans, i->id, i->from, BTREE_ITER_PREFETCH);
+ iter = bch2_trans_get_iter(&trans, i->id, i->from,
+ BTREE_ITER_PREFETCH|
+ BTREE_ITER_ALL_SNAPSHOTS);
k = bch2_btree_iter_peek(iter);
while (k.k && !(err = bkey_err(k))) {
@@ -290,7 +292,7 @@ static ssize_t bch2_read_btree_formats(struct file *file, char __user *buf,
* all nodes, meh
*/
i->from = bpos_cmp(POS_MAX, b->key.k.p)
- ? bkey_successor(b->key.k.p)
+ ? bpos_successor(b->key.k.p)
: b->key.k.p;
if (!i->size)
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 7ac3d7587655..1f28dea26ca2 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -179,7 +179,8 @@ const char *bch2_btree_ptr_v2_invalid(const struct bch_fs *c, struct bkey_s_c k)
if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX)
return "value too big";
- if (bp.v->min_key.snapshot)
+ if (c->sb.version < bcachefs_metadata_version_snapshot &&
+ bp.v->min_key.snapshot)
return "invalid min_key.snapshot";
return bch2_bkey_ptrs_invalid(c, k);
@@ -211,8 +212,8 @@ void bch2_btree_ptr_v2_compat(enum btree_id btree_id, unsigned version,
btree_node_type_is_extents(btree_id) &&
bkey_cmp(bp.v->min_key, POS_MIN))
bp.v->min_key = write
- ? bkey_predecessor(bp.v->min_key)
- : bkey_successor(bp.v->min_key);
+ ? bpos_nosnap_predecessor(bp.v->min_key)
+ : bpos_nosnap_successor(bp.v->min_key);
}
/* KEY_TYPE_extent: */
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index ffb30ef7ef00..a3acae0ddfa9 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -1318,6 +1318,7 @@ static int check_inode(struct btree_trans *trans,
struct bkey_inode_buf p;
bch2_inode_pack(c, &p, &u);
+ p.inode.k.p = iter->pos;
ret = __bch2_trans_do(trans, NULL, NULL,
BTREE_INSERT_NOFAIL|
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index f676daf404a2..7044ab73831c 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -332,6 +332,7 @@ int bch2_inode_write(struct btree_trans *trans,
return PTR_ERR(inode_p);
bch2_inode_pack(trans->c, inode_p, inode);
+ inode_p->inode.k.p.snapshot = iter->snapshot;
bch2_trans_update(trans, iter, &inode_p->inode.k_i, 0);
return 0;
}
diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c
index 5ee9a6c2f4fd..9c46f67c0d8e 100644
--- a/fs/bcachefs/io.c
+++ b/fs/bcachefs/io.c
@@ -332,6 +332,9 @@ int bch2_extent_update(struct btree_trans *trans,
if (i_sectors_delta || new_i_size) {
bch2_inode_pack(trans->c, &inode_p, &inode_u);
+
+ inode_p.inode.k.p.snapshot = iter->snapshot;
+
bch2_trans_update(trans, inode_iter,
&inode_p.inode.k_i, 0);
}
@@ -447,6 +450,8 @@ int bch2_write_index_default(struct bch_write_op *op)
k = bch2_keylist_front(keys);
+ k->k.p.snapshot = iter->snapshot;
+
bch2_bkey_buf_realloc(&sk, c, k->k.u64s);
bkey_copy(sk.k, k);
bch2_cut_front(iter->pos, sk.k);
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index 7783a874640a..4ab9cebee218 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -1449,7 +1449,7 @@ void bch2_journal_write(struct closure *cl)
if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)))
validate_before_checksum = true;
- if (le32_to_cpu(jset->version) <= bcachefs_metadata_version_inode_btree_change)
+ if (le32_to_cpu(jset->version) < bcachefs_metadata_version_current)
validate_before_checksum = true;
if (validate_before_checksum &&
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 596f7c1e4245..a3a6abb88d6f 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -998,6 +998,13 @@ int bch2_fs_recovery(struct bch_fs *c)
goto err;
}
+ if (!(c->sb.compat & (1ULL << BCH_COMPAT_bformat_overflow_done))) {
+ bch_err(c, "filesystem may have incompatible bkey formats; run fsck from the compat branch to fix");
+ ret = -EINVAL;
+ goto err;
+
+ }
+
if (!(c->sb.features & (1ULL << BCH_FEATURE_alloc_v2))) {
bch_info(c, "alloc_v2 feature bit not set, fsck required");
c->opts.fsck = true;
@@ -1340,6 +1347,7 @@ int bch2_fs_initialize(struct bch_fs *c)
S_IFDIR|S_IRWXU|S_IRUGO|S_IXUGO, 0, NULL);
root_inode.bi_inum = BCACHEFS_ROOT_INO;
bch2_inode_pack(c, &packed_inode, &root_inode);
+ packed_inode.inode.k.p.snapshot = U32_MAX;
err = "error creating root directory";
ret = bch2_btree_insert(c, BTREE_ID_inodes,
diff --git a/fs/bcachefs/tests.c b/fs/bcachefs/tests.c
index 286587a118fe..3de48c593963 100644
--- a/fs/bcachefs/tests.c
+++ b/fs/bcachefs/tests.c
@@ -483,6 +483,7 @@ static int rand_insert(struct bch_fs *c, u64 nr)
for (i = 0; i < nr; i++) {
bkey_cookie_init(&k.k_i);
k.k.p.offset = test_rand();
+ k.k.p.snapshot = U32_MAX;
ret = __bch2_trans_do(&trans, NULL, NULL, 0,
__bch2_btree_insert(&trans, BTREE_ID_xattrs, &k.k_i));