From 26609b619fa2301eb7eb5855a7005d99f8a07a73 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 1 Nov 2018 15:10:01 -0400 Subject: bcachefs: Make bkey types globally unique this lets us get rid of a lot of extra switch statements - in a lot of places we dispatch on the btree node type, and then the key type, so this is a nice cleanup across a lot of code. Also improve the on disk format versioning stuff. Signed-off-by: Kent Overstreet --- fs/bcachefs/acl.c | 6 +- fs/bcachefs/alloc_background.c | 28 +- fs/bcachefs/alloc_background.h | 2 +- fs/bcachefs/alloc_foreground.c | 11 +- fs/bcachefs/alloc_foreground.h | 2 +- fs/bcachefs/bcachefs.h | 1 + fs/bcachefs/bcachefs_format.h | 184 +++---- fs/bcachefs/bkey.c | 2 +- fs/bcachefs/bkey.h | 88 ++- fs/bcachefs/bkey_methods.c | 218 +++++--- fs/bcachefs/bkey_methods.h | 65 +-- fs/bcachefs/bkey_sort.c | 21 +- fs/bcachefs/bkey_sort.h | 9 +- fs/bcachefs/bset.h | 6 +- fs/bcachefs/btree_cache.c | 18 +- fs/bcachefs/btree_cache.h | 6 +- fs/bcachefs/btree_gc.c | 197 ++----- fs/bcachefs/btree_gc.h | 8 +- fs/bcachefs/btree_io.c | 97 ++-- fs/bcachefs/btree_iter.c | 4 +- fs/bcachefs/btree_types.h | 35 +- fs/bcachefs/btree_update.h | 2 +- fs/bcachefs/btree_update_interior.c | 56 +- fs/bcachefs/btree_update_leaf.c | 10 +- fs/bcachefs/buckets.c | 222 ++++---- fs/bcachefs/buckets.h | 4 +- fs/bcachefs/debug.c | 6 +- fs/bcachefs/dirent.c | 72 +-- fs/bcachefs/dirent.h | 2 +- fs/bcachefs/ec.c | 60 +- fs/bcachefs/ec.h | 10 +- fs/bcachefs/extents.c | 1031 ++++++++++++++++------------------- fs/bcachefs/extents.h | 529 ++++++++++-------- fs/bcachefs/fs-io.c | 16 +- fs/bcachefs/fs.c | 12 +- fs/bcachefs/fsck.c | 28 +- fs/bcachefs/inode.c | 108 ++-- fs/bcachefs/inode.h | 12 +- fs/bcachefs/io.c | 18 +- fs/bcachefs/journal_io.c | 58 +- fs/bcachefs/migrate.c | 30 +- fs/bcachefs/move.c | 58 +- fs/bcachefs/move.h | 2 +- fs/bcachefs/movinggc.c | 34 +- fs/bcachefs/opts.h | 3 + fs/bcachefs/quota.c | 56 +- fs/bcachefs/quota.h | 8 +- fs/bcachefs/rebalance.c | 36 +- fs/bcachefs/recovery.c | 23 +- fs/bcachefs/replicas.c | 96 ++-- fs/bcachefs/replicas.h | 5 +- fs/bcachefs/str_hash.h | 9 +- fs/bcachefs/super-io.c | 51 +- fs/bcachefs/super-io.h | 2 + fs/bcachefs/super.c | 2 +- fs/bcachefs/sysfs.c | 2 +- fs/bcachefs/trace.h | 2 +- fs/bcachefs/xattr.c | 102 ++-- fs/bcachefs/xattr.h | 2 +- 59 files changed, 1777 insertions(+), 2010 deletions(-) diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index eb6fa4d7c1f6..bcfc9fdce35e 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -24,9 +24,9 @@ static inline int acl_to_xattr_type(int type) { switch (type) { case ACL_TYPE_ACCESS: - return BCH_XATTR_INDEX_POSIX_ACL_ACCESS; + return KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS; case ACL_TYPE_DEFAULT: - return BCH_XATTR_INDEX_POSIX_ACL_DEFAULT; + return KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT; default: BUG(); } @@ -355,7 +355,7 @@ int bch2_acl_chmod(struct btree_trans *trans, iter = bch2_hash_lookup(trans, bch2_xattr_hash_desc, &inode->ei_str_hash, inode->v.i_ino, - &X_SEARCH(BCH_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0), + &X_SEARCH(KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS, "", 0), BTREE_ITER_INTENT); if (IS_ERR(iter)) return PTR_ERR(iter) != -ENOENT ? PTR_ERR(iter) : 0; diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 390b008b0200..885aff511f97 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -76,22 +76,15 @@ static unsigned bch_alloc_val_u64s(const struct bch_alloc *a) const char *bch2_alloc_invalid(const struct bch_fs *c, struct bkey_s_c k) { + struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); + if (k.k->p.inode >= c->sb.nr_devices || !c->devs[k.k->p.inode]) return "invalid device"; - switch (k.k->type) { - case BCH_ALLOC: { - struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); - - /* allow for unknown fields */ - if (bkey_val_u64s(a.k) < bch_alloc_val_u64s(a.v)) - return "incorrect value size"; - break; - } - default: - return "invalid type"; - } + /* allow for unknown fields */ + if (bkey_val_u64s(a.k) < bch_alloc_val_u64s(a.v)) + return "incorrect value size"; return NULL; } @@ -99,14 +92,9 @@ const char *bch2_alloc_invalid(const struct bch_fs *c, struct bkey_s_c k) void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - switch (k.k->type) { - case BCH_ALLOC: { - struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); + struct bkey_s_c_alloc a = bkey_s_c_to_alloc(k); - pr_buf(out, "gen %u", a.v->gen); - break; - } - } + pr_buf(out, "gen %u", a.v->gen); } static inline unsigned get_alloc_field(const u8 **p, unsigned bytes) @@ -158,7 +146,7 @@ static void bch2_alloc_read_key(struct bch_fs *c, struct bkey_s_c k) struct bucket *g; const u8 *d; - if (k.k->type != BCH_ALLOC) + if (k.k->type != KEY_TYPE_alloc) return; a = bkey_s_c_to_alloc(k); diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 59b6a5f2f890..8ced4e845281 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -11,7 +11,7 @@ const char *bch2_alloc_invalid(const struct bch_fs *, struct bkey_s_c); void bch2_alloc_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -#define bch2_bkey_alloc_ops (struct bkey_ops) { \ +#define bch2_bkey_ops_alloc (struct bkey_ops) { \ .key_invalid = bch2_alloc_invalid, \ .val_to_text = bch2_alloc_to_text, \ } diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c index 6e5f6e57da56..ddcf2c407764 100644 --- a/fs/bcachefs/alloc_foreground.c +++ b/fs/bcachefs/alloc_foreground.c @@ -923,7 +923,8 @@ err: * as allocated out of @ob */ void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp, - struct bkey_i_extent *e, unsigned sectors) + struct bkey_i *k, unsigned sectors) + { struct open_bucket *ob; unsigned i; @@ -935,13 +936,11 @@ void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp, struct bch_dev *ca = bch_dev_bkey_exists(c, ob->ptr.dev); struct bch_extent_ptr tmp = ob->ptr; - EBUG_ON(bch2_extent_has_device(extent_i_to_s_c(e), ob->ptr.dev)); - - tmp.cached = bkey_extent_is_cached(&e->k) || - (!ca->mi.durability && wp->type == BCH_DATA_USER); + tmp.cached = !ca->mi.durability && + wp->type == BCH_DATA_USER; tmp.offset += ca->mi.bucket_size - ob->sectors_free; - extent_ptr_append(e, tmp); + bch2_bkey_append_ptr(k, tmp); BUG_ON(sectors > ob->sectors_free); ob->sectors_free -= sectors; diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h index c71cf7381729..94389052fa94 100644 --- a/fs/bcachefs/alloc_foreground.h +++ b/fs/bcachefs/alloc_foreground.h @@ -101,7 +101,7 @@ struct write_point *bch2_alloc_sectors_start(struct bch_fs *, struct closure *); void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *, - struct bkey_i_extent *, unsigned); + struct bkey_i *, unsigned); void bch2_alloc_sectors_done(struct bch_fs *, struct write_point *); void bch2_open_buckets_stop_dev(struct bch_fs *, struct bch_dev *, diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index 258a67d4437b..cd2fff851bbe 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -541,6 +541,7 @@ struct bch_fs { __uuid_t uuid; __uuid_t user_uuid; + u16 version; u16 encoded_extent_max; u8 nr_devices; diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index a00e77fa1d37..801156b74335 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -307,15 +307,6 @@ static inline void bkey_init(struct bkey *k) #define __BKEY_PADDED(key, pad) \ struct { struct bkey_i key; __u64 key ## _pad[pad]; } -#define BKEY_VAL_TYPE(name, nr) \ -struct bkey_i_##name { \ - union { \ - struct bkey k; \ - struct bkey_i k_i; \ - }; \ - struct bch_##name v; \ -} - /* * - DELETED keys are used internally to mark keys that should be ignored but * override keys in composition order. Their version number is ignored. @@ -330,19 +321,37 @@ struct bkey_i_##name { \ * by new writes or cluster-wide GC. Node repair can also overwrite them with * the same or a more recent version number, but not with an older version * number. + * + * - WHITEOUT: for hash table btrees */ -#define KEY_TYPE_DELETED 0 -#define KEY_TYPE_DISCARD 1 -#define KEY_TYPE_ERROR 2 -#define KEY_TYPE_COOKIE 3 -#define KEY_TYPE_PERSISTENT_DISCARD 4 -#define KEY_TYPE_GENERIC_NR 128 +#define BCH_BKEY_TYPES() \ + x(deleted, 0) \ + x(discard, 1) \ + x(error, 2) \ + x(cookie, 3) \ + x(whiteout, 4) \ + x(btree_ptr, 5) \ + x(extent, 6) \ + x(reservation, 7) \ + x(inode, 8) \ + x(inode_generation, 9) \ + x(dirent, 10) \ + x(xattr, 11) \ + x(alloc, 12) \ + x(quota, 13) \ + x(stripe, 14) + +enum bch_bkey_type { +#define x(name, nr) KEY_TYPE_##name = nr, + BCH_BKEY_TYPES() +#undef x + KEY_TYPE_MAX, +}; struct bch_cookie { struct bch_val v; __le64 cookie; }; -BKEY_VAL_TYPE(cookie, KEY_TYPE_COOKIE); /* Extents */ @@ -620,21 +629,12 @@ union bch_extent_entry { #undef x }; -enum { - BCH_EXTENT = 128, - - /* - * This is kind of a hack, we're overloading the type for a boolean that - * really should be part of the value - BCH_EXTENT and BCH_EXTENT_CACHED - * have the same value type: - */ - BCH_EXTENT_CACHED = 129, +struct bch_btree_ptr { + struct bch_val v; - /* - * Persistent reservation: - */ - BCH_RESERVATION = 130, -}; + __u64 _data[0]; + struct bch_extent_ptr start[]; +} __attribute__((packed, aligned(8))); struct bch_extent { struct bch_val v; @@ -642,7 +642,6 @@ struct bch_extent { __u64 _data[0]; union bch_extent_entry start[]; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(extent, BCH_EXTENT); struct bch_reservation { struct bch_val v; @@ -651,7 +650,6 @@ struct bch_reservation { __u8 nr_replicas; __u8 pad[3]; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(reservation, BCH_RESERVATION); /* Maximum size (in u64s) a single pointer could be: */ #define BKEY_EXTENT_PTR_U64s_MAX\ @@ -679,12 +677,6 @@ BKEY_VAL_TYPE(reservation, BCH_RESERVATION); #define BCACHEFS_ROOT_INO 4096 -enum bch_inode_types { - BCH_INODE_FS = 128, - BCH_INODE_BLOCKDEV = 129, - BCH_INODE_GENERATION = 130, -}; - struct bch_inode { struct bch_val v; @@ -693,7 +685,6 @@ struct bch_inode { __le16 bi_mode; __u8 fields[0]; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(inode, BCH_INODE_FS); struct bch_inode_generation { struct bch_val v; @@ -701,7 +692,6 @@ struct bch_inode_generation { __le32 bi_generation; __le32 pad; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(inode_generation, BCH_INODE_GENERATION); #define BCH_INODE_FIELDS() \ BCH_INODE_FIELD(bi_atime, 64) \ @@ -766,24 +756,6 @@ enum { LE32_BITMASK(INODE_STR_HASH, struct bch_inode, bi_flags, 20, 24); LE32_BITMASK(INODE_NR_FIELDS, struct bch_inode, bi_flags, 24, 32); -struct bch_inode_blockdev { - struct bch_val v; - - __le64 i_size; - __le64 i_flags; - - /* Seconds: */ - __le64 i_ctime; - __le64 i_mtime; - - __uuid_t i_uuid; - __u8 i_label[32]; -} __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(inode_blockdev, BCH_INODE_BLOCKDEV); - -/* Thin provisioned volume, or cache for another block device? */ -LE64_BITMASK(CACHED_DEV, struct bch_inode_blockdev, i_flags, 0, 1) - /* Dirents */ /* @@ -797,11 +769,6 @@ LE64_BITMASK(CACHED_DEV, struct bch_inode_blockdev, i_flags, 0, 1) * collision: */ -enum { - BCH_DIRENT = 128, - BCH_DIRENT_WHITEOUT = 129, -}; - struct bch_dirent { struct bch_val v; @@ -816,7 +783,6 @@ struct bch_dirent { __u8 d_name[]; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(dirent, BCH_DIRENT); #define BCH_NAME_MAX (U8_MAX * sizeof(u64) - \ sizeof(struct bkey) - \ @@ -825,16 +791,11 @@ BKEY_VAL_TYPE(dirent, BCH_DIRENT); /* Xattrs */ -enum { - BCH_XATTR = 128, - BCH_XATTR_WHITEOUT = 129, -}; - -#define BCH_XATTR_INDEX_USER 0 -#define BCH_XATTR_INDEX_POSIX_ACL_ACCESS 1 -#define BCH_XATTR_INDEX_POSIX_ACL_DEFAULT 2 -#define BCH_XATTR_INDEX_TRUSTED 3 -#define BCH_XATTR_INDEX_SECURITY 4 +#define KEY_TYPE_XATTR_INDEX_USER 0 +#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS 1 +#define KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT 2 +#define KEY_TYPE_XATTR_INDEX_TRUSTED 3 +#define KEY_TYPE_XATTR_INDEX_SECURITY 4 struct bch_xattr { struct bch_val v; @@ -843,14 +804,9 @@ struct bch_xattr { __le16 x_val_len; __u8 x_name[]; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(xattr, BCH_XATTR); /* Bucket/allocation information: */ -enum { - BCH_ALLOC = 128, -}; - enum { BCH_ALLOC_FIELD_READ_TIME = 0, BCH_ALLOC_FIELD_WRITE_TIME = 1, @@ -862,14 +818,9 @@ struct bch_alloc { __u8 gen; __u8 data[]; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(alloc, BCH_ALLOC); /* Quotas: */ -enum { - BCH_QUOTA = 128, -}; - enum quota_types { QTYP_USR = 0, QTYP_GRP = 1, @@ -892,14 +843,9 @@ struct bch_quota { struct bch_val v; struct bch_quota_counter c[Q_COUNTERS]; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(quota, BCH_QUOTA); /* Erasure coding */ -enum { - BCH_STRIPE = 128, -}; - struct bch_stripe { struct bch_val v; __le16 sectors; @@ -913,7 +859,6 @@ struct bch_stripe { struct bch_extent_ptr ptrs[0]; } __attribute__((packed, aligned(8))); -BKEY_VAL_TYPE(stripe, BCH_STRIPE); /* Optional/variable size superblock sections: */ @@ -1149,15 +1094,21 @@ struct bch_sb_field_clean { /* Superblock: */ /* - * Version 8: BCH_SB_ENCODED_EXTENT_MAX_BITS - * BCH_MEMBER_DATA_ALLOWED - * Version 9: incompatible extent nonce change + * New versioning scheme: + * One common version number for all on disk data structures - superblock, btree + * nodes, journal entries */ +#define BCH_JSET_VERSION_OLD 2 +#define BCH_BSET_VERSION_OLD 3 + +enum bcachefs_metadata_version { + bcachefs_metadata_version_min = 9, + bcachefs_metadata_version_new_versioning = 10, + bcachefs_metadata_version_bkey_renumber = 10, + bcachefs_metadata_version_max = 11, +}; -#define BCH_SB_VERSION_MIN 7 -#define BCH_SB_VERSION_EXTENT_MAX 8 -#define BCH_SB_VERSION_EXTENT_NONCE_V1 9 -#define BCH_SB_VERSION_MAX 9 +#define bcachefs_metadata_version_current (bcachefs_metadata_version_max - 1) #define BCH_SB_SECTOR 8 #define BCH_SB_MEMBERS_MAX 64 /* XXX kill */ @@ -1176,6 +1127,9 @@ struct bch_sb_layout { /* * @offset - sector where this sb was written * @version - on disk format version + * @version_min - Oldest metadata version this filesystem contains; so we can + * safely drop compatibility code and refuse to mount filesystems + * we'd need it for * @magic - identifies as a bcachefs superblock (BCACHE_MAGIC) * @seq - incremented each time superblock is written * @uuid - used for generating various magic numbers and identifying @@ -1369,11 +1323,6 @@ static inline __u64 __bset_magic(struct bch_sb *sb) /* Journal */ -#define BCACHE_JSET_VERSION_UUIDv1 1 -#define BCACHE_JSET_VERSION_UUID 1 /* Always latest UUID format */ -#define BCACHE_JSET_VERSION_JKEYS 2 -#define BCACHE_JSET_VERSION 2 - #define JSET_KEYS_U64s (sizeof(struct jset_entry) / sizeof(__u64)) #define BCH_JSET_ENTRY_TYPES() \ @@ -1453,35 +1402,26 @@ LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5); /* Btree: */ -#define DEFINE_BCH_BTREE_IDS() \ - DEF_BTREE_ID(EXTENTS, 0, "extents") \ - DEF_BTREE_ID(INODES, 1, "inodes") \ - DEF_BTREE_ID(DIRENTS, 2, "dirents") \ - DEF_BTREE_ID(XATTRS, 3, "xattrs") \ - DEF_BTREE_ID(ALLOC, 4, "alloc") \ - DEF_BTREE_ID(QUOTAS, 5, "quotas") \ - DEF_BTREE_ID(EC, 6, "erasure_coding") - -#define DEF_BTREE_ID(kwd, val, name) BTREE_ID_##kwd = val, +#define BCH_BTREE_IDS() \ + x(EXTENTS, 0, "extents") \ + x(INODES, 1, "inodes") \ + x(DIRENTS, 2, "dirents") \ + x(XATTRS, 3, "xattrs") \ + x(ALLOC, 4, "alloc") \ + x(QUOTAS, 5, "quotas") \ + x(EC, 6, "erasure_coding") enum btree_id { - DEFINE_BCH_BTREE_IDS() +#define x(kwd, val, name) BTREE_ID_##kwd = val, + BCH_BTREE_IDS() +#undef x BTREE_ID_NR }; -#undef DEF_BTREE_ID - #define BTREE_MAX_DEPTH 4U /* Btree nodes */ -/* Version 1: Seed pointer into btree node checksum - */ -#define BCACHE_BSET_CSUM 1 -#define BCACHE_BSET_KEY_v1 2 -#define BCACHE_BSET_JOURNAL_SEQ 3 -#define BCACHE_BSET_VERSION 3 - /* * Btree nodes * diff --git a/fs/bcachefs/bkey.c b/fs/bcachefs/bkey.c index d7e022ba2027..d35cdde299c4 100644 --- a/fs/bcachefs/bkey.c +++ b/fs/bcachefs/bkey.c @@ -488,7 +488,7 @@ enum bkey_pack_pos_ret bch2_bkey_pack_pos_lossy(struct bkey_packed *out, pack_state_finish(&state, out); out->u64s = f->key_u64s; out->format = KEY_FORMAT_LOCAL_BTREE; - out->type = KEY_TYPE_DELETED; + out->type = KEY_TYPE_deleted; #ifdef CONFIG_BCACHEFS_DEBUG if (exact) { diff --git a/fs/bcachefs/bkey.h b/fs/bcachefs/bkey.h index 9679631a7e89..44044fcd6f9f 100644 --- a/fs/bcachefs/bkey.h +++ b/fs/bcachefs/bkey.h @@ -61,10 +61,12 @@ static inline void set_bkey_val_bytes(struct bkey *k, unsigned bytes) k->u64s = BKEY_U64s + DIV_ROUND_UP(bytes, sizeof(u64)); } -#define bkey_deleted(_k) ((_k)->type == KEY_TYPE_DELETED) +#define bkey_val_end(_k) vstruct_idx((_k).v, bkey_val_u64s((_k).k)) + +#define bkey_deleted(_k) ((_k)->type == KEY_TYPE_deleted) #define bkey_whiteout(_k) \ - ((_k)->type == KEY_TYPE_DELETED || (_k)->type == KEY_TYPE_DISCARD) + ((_k)->type == KEY_TYPE_deleted || (_k)->type == KEY_TYPE_discard) #define bkey_packed_typecheck(_k) \ ({ \ @@ -439,7 +441,15 @@ static inline struct bkey_s_c bkey_i_to_s_c(const struct bkey_i *k) * bkey_i_extent to a bkey_i - since that's always safe, instead of conversion * functions. */ -#define __BKEY_VAL_ACCESSORS(name, nr, _assert) \ +#define BKEY_VAL_ACCESSORS(name) \ +struct bkey_i_##name { \ + union { \ + struct bkey k; \ + struct bkey_i k_i; \ + }; \ + struct bch_##name v; \ +}; \ + \ struct bkey_s_c_##name { \ union { \ struct { \ @@ -464,20 +474,20 @@ struct bkey_s_##name { \ \ static inline struct bkey_i_##name *bkey_i_to_##name(struct bkey_i *k) \ { \ - _assert(k->k.type, nr); \ + EBUG_ON(k->k.type != KEY_TYPE_##name); \ return container_of(&k->k, struct bkey_i_##name, k); \ } \ \ static inline const struct bkey_i_##name * \ bkey_i_to_##name##_c(const struct bkey_i *k) \ { \ - _assert(k->k.type, nr); \ + EBUG_ON(k->k.type != KEY_TYPE_##name); \ return container_of(&k->k, struct bkey_i_##name, k); \ } \ \ static inline struct bkey_s_##name bkey_s_to_##name(struct bkey_s k) \ { \ - _assert(k.k->type, nr); \ + EBUG_ON(k.k->type != KEY_TYPE_##name); \ return (struct bkey_s_##name) { \ .k = k.k, \ .v = container_of(k.v, struct bch_##name, v), \ @@ -486,7 +496,7 @@ static inline struct bkey_s_##name bkey_s_to_##name(struct bkey_s k) \ \ static inline struct bkey_s_c_##name bkey_s_c_to_##name(struct bkey_s_c k)\ { \ - _assert(k.k->type, nr); \ + EBUG_ON(k.k->type != KEY_TYPE_##name); \ return (struct bkey_s_c_##name) { \ .k = k.k, \ .v = container_of(k.v, struct bch_##name, v), \ @@ -512,7 +522,7 @@ name##_i_to_s_c(const struct bkey_i_##name *k) \ \ static inline struct bkey_s_##name bkey_i_to_s_##name(struct bkey_i *k) \ { \ - _assert(k->k.type, nr); \ + EBUG_ON(k->k.type != KEY_TYPE_##name); \ return (struct bkey_s_##name) { \ .k = &k->k, \ .v = container_of(&k->v, struct bch_##name, v), \ @@ -522,27 +532,13 @@ static inline struct bkey_s_##name bkey_i_to_s_##name(struct bkey_i *k) \ static inline struct bkey_s_c_##name \ bkey_i_to_s_c_##name(const struct bkey_i *k) \ { \ - _assert(k->k.type, nr); \ + EBUG_ON(k->k.type != KEY_TYPE_##name); \ return (struct bkey_s_c_##name) { \ .k = &k->k, \ .v = container_of(&k->v, struct bch_##name, v), \ }; \ } \ \ -static inline struct bch_##name * \ -bkey_p_##name##_val(const struct bkey_format *f, \ - struct bkey_packed *k) \ -{ \ - return container_of(bkeyp_val(f, k), struct bch_##name, v); \ -} \ - \ -static inline const struct bch_##name * \ -bkey_p_c_##name##_val(const struct bkey_format *f, \ - const struct bkey_packed *k) \ -{ \ - return container_of(bkeyp_val(f, k), struct bch_##name, v); \ -} \ - \ static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\ { \ struct bkey_i_##name *k = \ @@ -550,45 +546,23 @@ static inline struct bkey_i_##name *bkey_##name##_init(struct bkey_i *_k)\ \ bkey_init(&k->k); \ memset(&k->v, 0, sizeof(k->v)); \ - k->k.type = nr; \ + k->k.type = KEY_TYPE_##name; \ set_bkey_val_bytes(&k->k, sizeof(k->v)); \ \ return k; \ } -#define __BKEY_VAL_ASSERT(_type, _nr) EBUG_ON(_type != _nr) - -#define BKEY_VAL_ACCESSORS(name, _nr) \ - static inline void __bch_##name##_assert(u8 type, u8 nr) \ - { \ - EBUG_ON(type != _nr); \ - } \ - \ - __BKEY_VAL_ACCESSORS(name, _nr, __bch_##name##_assert) - -BKEY_VAL_ACCESSORS(cookie, KEY_TYPE_COOKIE); - -static inline void __bch2_extent_assert(u8 type, u8 nr) -{ - EBUG_ON(type != BCH_EXTENT && type != BCH_EXTENT_CACHED); -} - -__BKEY_VAL_ACCESSORS(extent, BCH_EXTENT, __bch2_extent_assert); -BKEY_VAL_ACCESSORS(reservation, BCH_RESERVATION); - -BKEY_VAL_ACCESSORS(inode, BCH_INODE_FS); -BKEY_VAL_ACCESSORS(inode_blockdev, BCH_INODE_BLOCKDEV); -BKEY_VAL_ACCESSORS(inode_generation, BCH_INODE_GENERATION); - -BKEY_VAL_ACCESSORS(dirent, BCH_DIRENT); - -BKEY_VAL_ACCESSORS(xattr, BCH_XATTR); - -BKEY_VAL_ACCESSORS(alloc, BCH_ALLOC); - -BKEY_VAL_ACCESSORS(quota, BCH_QUOTA); - -BKEY_VAL_ACCESSORS(stripe, BCH_STRIPE); +BKEY_VAL_ACCESSORS(cookie); +BKEY_VAL_ACCESSORS(btree_ptr); +BKEY_VAL_ACCESSORS(extent); +BKEY_VAL_ACCESSORS(reservation); +BKEY_VAL_ACCESSORS(inode); +BKEY_VAL_ACCESSORS(inode_generation); +BKEY_VAL_ACCESSORS(dirent); +BKEY_VAL_ACCESSORS(xattr); +BKEY_VAL_ACCESSORS(alloc); +BKEY_VAL_ACCESSORS(quota); +BKEY_VAL_ACCESSORS(stripe); /* byte order helpers */ diff --git a/fs/bcachefs/bkey_methods.c b/fs/bcachefs/bkey_methods.c index 81c66950668c..f518062d896b 100644 --- a/fs/bcachefs/bkey_methods.c +++ b/fs/bcachefs/bkey_methods.c @@ -12,66 +12,84 @@ #include "quota.h" #include "xattr.h" -const struct bkey_ops bch2_bkey_ops[] = { - [BKEY_TYPE_EXTENTS] = bch2_bkey_extent_ops, - [BKEY_TYPE_INODES] = bch2_bkey_inode_ops, - [BKEY_TYPE_DIRENTS] = bch2_bkey_dirent_ops, - [BKEY_TYPE_XATTRS] = bch2_bkey_xattr_ops, - [BKEY_TYPE_ALLOC] = bch2_bkey_alloc_ops, - [BKEY_TYPE_QUOTAS] = bch2_bkey_quota_ops, - [BKEY_TYPE_EC] = bch2_bkey_ec_ops, - [BKEY_TYPE_BTREE] = bch2_bkey_btree_ops, +const char * const bch_bkey_types[] = { +#define x(name, nr) #name, + BCH_BKEY_TYPES() +#undef x + NULL }; -const char *bch2_bkey_val_invalid(struct bch_fs *c, enum bkey_type type, - struct bkey_s_c k) +static const char *deleted_key_invalid(const struct bch_fs *c, + struct bkey_s_c k) { - const struct bkey_ops *ops = &bch2_bkey_ops[type]; + return NULL; +} + +const struct bkey_ops bch2_bkey_ops_deleted = { + .key_invalid = deleted_key_invalid, +}; + +const struct bkey_ops bch2_bkey_ops_discard = { + .key_invalid = deleted_key_invalid, +}; - switch (k.k->type) { - case KEY_TYPE_DELETED: - case KEY_TYPE_DISCARD: - return NULL; +static const char *empty_val_key_invalid(const struct bch_fs *c, struct bkey_s_c k) +{ + if (bkey_val_bytes(k.k)) + return "value size should be zero"; - case KEY_TYPE_ERROR: - return bkey_val_bytes(k.k) != 0 - ? "value size should be zero" - : NULL; + return NULL; +} - case KEY_TYPE_COOKIE: - return bkey_val_bytes(k.k) != sizeof(struct bch_cookie) - ? "incorrect value size" - : NULL; +const struct bkey_ops bch2_bkey_ops_error = { + .key_invalid = empty_val_key_invalid, +}; - default: - if (k.k->type < KEY_TYPE_GENERIC_NR) - return "invalid type"; +static const char *key_type_cookie_invalid(const struct bch_fs *c, + struct bkey_s_c k) +{ + if (bkey_val_bytes(k.k) != sizeof(struct bch_cookie)) + return "incorrect value size"; - return ops->key_invalid(c, k); - } + return NULL; } -const char *__bch2_bkey_invalid(struct bch_fs *c, enum bkey_type type, - struct bkey_s_c k) +const struct bkey_ops bch2_bkey_ops_cookie = { + .key_invalid = key_type_cookie_invalid, +}; + +const struct bkey_ops bch2_bkey_ops_whiteout = { + .key_invalid = empty_val_key_invalid, +}; + +static const struct bkey_ops bch2_bkey_ops[] = { +#define x(name, nr) [KEY_TYPE_##name] = bch2_bkey_ops_##name, + BCH_BKEY_TYPES() +#undef x +}; + +const char *bch2_bkey_val_invalid(struct bch_fs *c, struct bkey_s_c k) { - const struct bkey_ops *ops = &bch2_bkey_ops[type]; + if (k.k->type >= KEY_TYPE_MAX) + return "invalid type"; + + return bch2_bkey_ops[k.k->type].key_invalid(c, k); +} +const char *__bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, + enum btree_node_type type) +{ if (k.k->u64s < BKEY_U64s) return "u64s too small"; - if (!ops->is_extents) { - if (k.k->size) - return "nonzero size field"; - } else { + if (btree_node_type_is_extents(type)) { if ((k.k->size == 0) != bkey_deleted(k.k)) return "bad size field"; + } else { + if (k.k->size) + return "nonzero size field"; } - if (ops->is_extents && - !k.k->size && - !bkey_deleted(k.k)) - return "zero size field"; - if (k.k->p.snapshot) return "nonzero snapshot"; @@ -82,11 +100,11 @@ const char *__bch2_bkey_invalid(struct bch_fs *c, enum bkey_type type, return NULL; } -const char *bch2_bkey_invalid(struct bch_fs *c, enum bkey_type type, - struct bkey_s_c k) +const char *bch2_bkey_invalid(struct bch_fs *c, struct bkey_s_c k, + enum btree_node_type type) { - return __bch2_bkey_invalid(c, type, k) ?: - bch2_bkey_val_invalid(c, type, k); + return __bch2_bkey_invalid(c, k, type) ?: + bch2_bkey_val_invalid(c, k); } const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k) @@ -102,24 +120,22 @@ const char *bch2_bkey_in_btree_node(struct btree *b, struct bkey_s_c k) void bch2_bkey_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k) { - enum bkey_type type = btree_node_type(b); - const struct bkey_ops *ops = &bch2_bkey_ops[type]; + const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type]; const char *invalid; BUG_ON(!k.k->u64s); - invalid = bch2_bkey_invalid(c, type, k) ?: + invalid = bch2_bkey_invalid(c, k, btree_node_type(b)) ?: bch2_bkey_in_btree_node(b, k); if (invalid) { char buf[160]; - bch2_bkey_val_to_text(&PBUF(buf), c, type, k); + bch2_bkey_val_to_text(&PBUF(buf), c, k); bch2_fs_bug(c, "invalid bkey %s: %s", buf, invalid); return; } - if (k.k->type >= KEY_TYPE_GENERIC_NR && - ops->key_debugcheck) + if (ops->key_debugcheck) ops->key_debugcheck(c, b, k); } @@ -144,46 +160,90 @@ void bch2_bkey_to_text(struct printbuf *out, const struct bkey *k) } void bch2_val_to_text(struct printbuf *out, struct bch_fs *c, - enum bkey_type type, struct bkey_s_c k) -{ - const struct bkey_ops *ops = &bch2_bkey_ops[type]; - - switch (k.k->type) { - case KEY_TYPE_DELETED: - pr_buf(out, " deleted"); - break; - case KEY_TYPE_DISCARD: - pr_buf(out, " discard"); - break; - case KEY_TYPE_ERROR: - pr_buf(out, " error"); - break; - case KEY_TYPE_COOKIE: - pr_buf(out, " cookie"); - break; - default: - if (k.k->type >= KEY_TYPE_GENERIC_NR && ops->val_to_text) - ops->val_to_text(out, c, k); - break; - } + struct bkey_s_c k) +{ + const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type]; + + if (likely(ops->val_to_text)) + ops->val_to_text(out, c, k); + else + pr_buf(out, " %s", bch_bkey_types[k.k->type]); } void bch2_bkey_val_to_text(struct printbuf *out, struct bch_fs *c, - enum bkey_type type, struct bkey_s_c k) + struct bkey_s_c k) { bch2_bkey_to_text(out, k.k); pr_buf(out, ": "); - bch2_val_to_text(out, c, type, k); + bch2_val_to_text(out, c, k); } -void bch2_bkey_swab(enum bkey_type type, - const struct bkey_format *f, - struct bkey_packed *k) +void bch2_bkey_swab(const struct bkey_format *f, + struct bkey_packed *k) { - const struct bkey_ops *ops = &bch2_bkey_ops[type]; + const struct bkey_ops *ops = &bch2_bkey_ops[k->type]; bch2_bkey_swab_key(f, k); if (ops->swab) ops->swab(f, k); } + +bool bch2_bkey_normalize(struct bch_fs *c, struct bkey_s k) +{ + const struct bkey_ops *ops = &bch2_bkey_ops[k.k->type]; + + return ops->key_normalize + ? ops->key_normalize(c, k) + : false; +} + +enum merge_result bch2_bkey_merge(struct bch_fs *c, + struct bkey_i *l, struct bkey_i *r) +{ + const struct bkey_ops *ops = &bch2_bkey_ops[l->k.type]; + + if (!key_merging_disabled(c) && + ops->key_merge && + l->k.type == r->k.type && + !bversion_cmp(l->k.version, r->k.version) && + !bkey_cmp(l->k.p, bkey_start_pos(&r->k))) + return ops->key_merge(c, l, r); + + return BCH_MERGE_NOMERGE; +} + +static const struct old_bkey_type { + u8 btree_node_type; + u8 old; + u8 new; +} bkey_renumber_table[] = { + {BKEY_TYPE_BTREE, 128, KEY_TYPE_btree_ptr }, + {BKEY_TYPE_EXTENTS, 128, KEY_TYPE_extent }, + {BKEY_TYPE_EXTENTS, 129, KEY_TYPE_extent }, + {BKEY_TYPE_EXTENTS, 130, KEY_TYPE_reservation }, + {BKEY_TYPE_INODES, 128, KEY_TYPE_inode }, + {BKEY_TYPE_INODES, 130, KEY_TYPE_inode_generation }, + {BKEY_TYPE_DIRENTS, 128, KEY_TYPE_dirent }, + {BKEY_TYPE_DIRENTS, 129, KEY_TYPE_whiteout }, + {BKEY_TYPE_XATTRS, 128, KEY_TYPE_xattr }, + {BKEY_TYPE_XATTRS, 129, KEY_TYPE_whiteout }, + {BKEY_TYPE_ALLOC, 128, KEY_TYPE_alloc }, + {BKEY_TYPE_QUOTAS, 128, KEY_TYPE_quota }, +}; + +void bch2_bkey_renumber(enum btree_node_type btree_node_type, + struct bkey_packed *k, + int write) +{ + const struct old_bkey_type *i; + + for (i = bkey_renumber_table; + i < bkey_renumber_table + ARRAY_SIZE(bkey_renumber_table); + i++) + if (btree_node_type == i->btree_node_type && + k->type == (write ? i->new : i->old)) { + k->type = write ? i->old : i->new; + break; + } +} diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h index 62b86a8e2ba8..a4bfd2aef5bf 100644 --- a/fs/bcachefs/bkey_methods.h +++ b/fs/bcachefs/bkey_methods.h @@ -4,24 +4,12 @@ #include "bkey.h" -#define DEF_BTREE_ID(kwd, val, name) BKEY_TYPE_##kwd = val, - -enum bkey_type { - DEFINE_BCH_BTREE_IDS() - BKEY_TYPE_BTREE, -}; - -#undef DEF_BTREE_ID - -/* Type of a key in btree @id at level @level: */ -static inline enum bkey_type bkey_type(unsigned level, enum btree_id id) -{ - return level ? BKEY_TYPE_BTREE : (enum bkey_type) id; -} - struct bch_fs; struct btree; struct bkey; +enum btree_node_type; + +extern const char * const bch_bkey_types[]; enum merge_result { BCH_MERGE_NOMERGE, @@ -34,12 +22,6 @@ enum merge_result { BCH_MERGE_MERGE, }; -typedef bool (*key_filter_fn)(struct bch_fs *, struct btree *, - struct bkey_s); -typedef enum merge_result (*key_merge_fn)(struct bch_fs *, - struct btree *, - struct bkey_i *, struct bkey_i *); - struct bkey_ops { /* Returns reason for being invalid if invalid, else NULL: */ const char * (*key_invalid)(const struct bch_fs *, @@ -49,41 +31,34 @@ struct bkey_ops { void (*val_to_text)(struct printbuf *, struct bch_fs *, struct bkey_s_c); void (*swab)(const struct bkey_format *, struct bkey_packed *); - key_filter_fn key_normalize; - key_merge_fn key_merge; - bool is_extents; + bool (*key_normalize)(struct bch_fs *, struct bkey_s); + enum merge_result (*key_merge)(struct bch_fs *, + struct bkey_i *, struct bkey_i *); }; -static inline bool bkey_type_needs_gc(enum bkey_type type) -{ - switch (type) { - case BKEY_TYPE_BTREE: - case BKEY_TYPE_EXTENTS: - case BKEY_TYPE_EC: - return true; - default: - return false; - } -} - -const char *bch2_bkey_val_invalid(struct bch_fs *, enum bkey_type, - struct bkey_s_c); -const char *__bch2_bkey_invalid(struct bch_fs *, enum bkey_type, struct bkey_s_c); -const char *bch2_bkey_invalid(struct bch_fs *, enum bkey_type, struct bkey_s_c); +const char *bch2_bkey_val_invalid(struct bch_fs *, struct bkey_s_c); +const char *__bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, + enum btree_node_type); +const char *bch2_bkey_invalid(struct bch_fs *, struct bkey_s_c, + enum btree_node_type); const char *bch2_bkey_in_btree_node(struct btree *, struct bkey_s_c); void bch2_bkey_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c); void bch2_bpos_to_text(struct printbuf *, struct bpos); void bch2_bkey_to_text(struct printbuf *, const struct bkey *); -void bch2_val_to_text(struct printbuf *, struct bch_fs *, enum bkey_type, +void bch2_val_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); void bch2_bkey_val_to_text(struct printbuf *, struct bch_fs *, - enum bkey_type, struct bkey_s_c); + struct bkey_s_c); + +void bch2_bkey_swab(const struct bkey_format *, struct bkey_packed *); + +bool bch2_bkey_normalize(struct bch_fs *, struct bkey_s); -void bch2_bkey_swab(enum bkey_type, const struct bkey_format *, - struct bkey_packed *); +enum merge_result bch2_bkey_merge(struct bch_fs *, + struct bkey_i *, struct bkey_i *); -extern const struct bkey_ops bch2_bkey_ops[]; +void bch2_bkey_renumber(enum btree_node_type, struct bkey_packed *, int); #endif /* _BCACHEFS_BKEY_METHODS_H */ diff --git a/fs/bcachefs/bkey_sort.c b/fs/bcachefs/bkey_sort.c index 706ca77d4b17..12825c1b292f 100644 --- a/fs/bcachefs/bkey_sort.c +++ b/fs/bcachefs/bkey_sort.c @@ -257,7 +257,7 @@ static void extent_sort_append(struct bch_fs *c, bch2_bkey_unpack(b, &tmp.k, k); if (*prev && - bch2_extent_merge(c, b, (void *) *prev, &tmp.k)) + bch2_bkey_merge(c, (void *) *prev, &tmp.k)) return; if (*prev) { @@ -375,7 +375,7 @@ struct btree_nr_keys bch2_extent_sort_fix_overlapping(struct bch_fs *c, } /* Sort + repack in a new format: */ -static struct btree_nr_keys +struct btree_nr_keys bch2_sort_repack(struct bset *dst, struct btree *src, struct btree_node_iter *src_iter, struct bkey_format *out_f, @@ -411,18 +411,12 @@ bch2_sort_repack_merge(struct bch_fs *c, struct bset *dst, struct btree *src, struct btree_node_iter *iter, struct bkey_format *out_f, - bool filter_whiteouts, - key_filter_fn filter, - key_merge_fn merge) + bool filter_whiteouts) { struct bkey_packed *k, *prev = NULL, *out; struct btree_nr_keys nr; BKEY_PADDED(k) tmp; - if (!filter && !merge) - return bch2_sort_repack(dst, src, iter, out_f, - filter_whiteouts); - memset(&nr, 0, sizeof(nr)); while ((k = bch2_btree_node_iter_next_all(iter, src))) { @@ -435,14 +429,15 @@ bch2_sort_repack_merge(struct bch_fs *c, */ bch2_bkey_unpack(src, &tmp.k, k); - if (filter && filter(c, src, bkey_i_to_s(&tmp.k))) + if (filter_whiteouts && + bch2_bkey_normalize(c, bkey_i_to_s(&tmp.k))) continue; /* prev is always unpacked, for key merging: */ if (prev && - merge && - merge(c, src, (void *) prev, &tmp.k) == BCH_MERGE_MERGE) + bch2_bkey_merge(c, (void *) prev, &tmp.k) == + BCH_MERGE_MERGE) continue; /* @@ -606,7 +601,7 @@ unsigned bch2_sort_extent_whiteouts(struct bkey_packed *dst, continue; EBUG_ON(bkeyp_val_u64s(f, in)); - EBUG_ON(in->type != KEY_TYPE_DISCARD); + EBUG_ON(in->type != KEY_TYPE_discard); r.k = bkey_unpack_key(iter->b, in); diff --git a/fs/bcachefs/bkey_sort.h b/fs/bcachefs/bkey_sort.h index 6b1661dd221a..397009181eae 100644 --- a/fs/bcachefs/bkey_sort.h +++ b/fs/bcachefs/bkey_sort.h @@ -47,13 +47,14 @@ bch2_extent_sort_fix_overlapping(struct bch_fs *, struct bset *, struct btree_node_iter_large *); struct btree_nr_keys +bch2_sort_repack(struct bset *, struct btree *, + struct btree_node_iter *, + struct bkey_format *, bool); +struct btree_nr_keys bch2_sort_repack_merge(struct bch_fs *, struct bset *, struct btree *, struct btree_node_iter *, - struct bkey_format *, - bool, - key_filter_fn, - key_merge_fn); + struct bkey_format *, bool); unsigned bch2_sort_keys(struct bkey_packed *, struct sort_iter *, bool); diff --git a/fs/bcachefs/bset.h b/fs/bcachefs/bset.h index 5d03036620b9..329ffb0b6b3d 100644 --- a/fs/bcachefs/bset.h +++ b/fs/bcachefs/bset.h @@ -397,7 +397,7 @@ bch2_bkey_prev_all(struct btree *b, struct bset_tree *t, struct bkey_packed *k) static inline struct bkey_packed * bch2_bkey_prev(struct btree *b, struct bset_tree *t, struct bkey_packed *k) { - return bch2_bkey_prev_filter(b, t, k, KEY_TYPE_DISCARD + 1); + return bch2_bkey_prev_filter(b, t, k, KEY_TYPE_discard + 1); } enum bch_extent_overlap { @@ -529,7 +529,7 @@ bch2_btree_node_iter_peek_all(struct btree_node_iter *iter, static inline struct bkey_packed * bch2_btree_node_iter_peek(struct btree_node_iter *iter, struct btree *b) { - return bch2_btree_node_iter_peek_filter(iter, b, KEY_TYPE_DISCARD + 1); + return bch2_btree_node_iter_peek_filter(iter, b, KEY_TYPE_discard + 1); } static inline struct bkey_packed * @@ -555,7 +555,7 @@ bch2_btree_node_iter_prev_all(struct btree_node_iter *iter, struct btree *b) static inline struct bkey_packed * bch2_btree_node_iter_prev(struct btree_node_iter *iter, struct btree *b) { - return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_DISCARD + 1); + return bch2_btree_node_iter_prev_filter(iter, b, KEY_TYPE_discard + 1); } struct bkey_s_c bch2_btree_node_iter_peek_unpack(struct btree_node_iter *, diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 846d5e816aa2..b748afc778f4 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -6,20 +6,17 @@ #include "btree_iter.h" #include "btree_locking.h" #include "debug.h" -#include "extents.h" #include "trace.h" #include -#define DEF_BTREE_ID(kwd, val, name) name, - const char * const bch2_btree_ids[] = { - DEFINE_BCH_BTREE_IDS() +#define x(kwd, val, name) name, + BCH_BTREE_IDS() +#undef x NULL }; -#undef DEF_BTREE_ID - void bch2_recalc_btree_reserve(struct bch_fs *c) { unsigned i, reserve = 16; @@ -100,7 +97,7 @@ static struct btree *btree_node_mem_alloc(struct bch_fs *c, gfp_t gfp) if (!b) return NULL; - bkey_extent_init(&b->key); + bkey_btree_ptr_init(&b->key); six_lock_init(&b->lock); lockdep_set_novalidate_class(&b->lock); INIT_LIST_HEAD(&b->list); @@ -117,7 +114,7 @@ void bch2_btree_node_hash_remove(struct btree_cache *bc, struct btree *b) rhashtable_remove_fast(&bc->table, &b->hash, bch_btree_cache_params); /* Cause future lookups for this node to fail: */ - bkey_i_to_extent(&b->key)->v._data[0] = 0; + PTR_HASH(&b->key) = 0; } int __bch2_btree_node_hash_insert(struct btree_cache *bc, struct btree *b) @@ -604,7 +601,7 @@ static noinline struct btree *bch2_btree_node_fill(struct bch_fs *c, /* raced with another fill: */ /* mark as unhashed... */ - bkey_i_to_extent(&b->key)->v._data[0] = 0; + PTR_HASH(&b->key) = 0; mutex_lock(&bc->lock); list_add(&b->list, &bc->freeable); @@ -906,8 +903,7 @@ void bch2_btree_node_to_text(struct printbuf *out, struct bch_fs *c, b->data->min_key.offset, b->data->max_key.inode, b->data->max_key.offset); - bch2_val_to_text(out, c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&b->key)); + bch2_val_to_text(out, c, bkey_i_to_s_c(&b->key)); pr_buf(out, "\n" " format: u64s %u fields %u %u %u %u %u\n" " unpack fn len: %u\n" diff --git a/fs/bcachefs/btree_cache.h b/fs/bcachefs/btree_cache.h index cb7f66fc8bd4..7bd2bc84160d 100644 --- a/fs/bcachefs/btree_cache.h +++ b/fs/bcachefs/btree_cache.h @@ -4,7 +4,6 @@ #include "bcachefs.h" #include "btree_types.h" -#include "extents.h" struct btree_iter; @@ -37,12 +36,13 @@ void bch2_fs_btree_cache_exit(struct bch_fs *); int bch2_fs_btree_cache_init(struct bch_fs *); void bch2_fs_btree_cache_init_early(struct btree_cache *); -#define PTR_HASH(_k) (bkey_i_to_extent_c(_k)->v._data[0]) +#define PTR_HASH(_k) *((u64 *) &bkey_i_to_btree_ptr_c(_k)->v) /* is btree node in hash table? */ static inline bool btree_node_hashed(struct btree *b) { - return bkey_extent_is_data(&b->key.k) && PTR_HASH(&b->key); + return b->key.k.type == KEY_TYPE_btree_ptr && + PTR_HASH(&b->key); } #define for_each_cached_btree(_b, _c, _tbl, _iter, _pos) \ diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index a849f9e320b3..85fc181e76a8 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -112,137 +112,11 @@ static void btree_node_range_checks(struct bch_fs *c, struct btree *b, /* marking of btree keys/nodes: */ -static void ptr_gen_recalc_oldest(struct bch_fs *c, - const struct bch_extent_ptr *ptr, - u8 *max_stale) -{ - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - size_t b = PTR_BUCKET_NR(ca, ptr); - - if (gen_after(ca->oldest_gens[b], ptr->gen)) - ca->oldest_gens[b] = ptr->gen; - - *max_stale = max(*max_stale, ptr_stale(ca, ptr)); -} - -static void ptr_gens_recalc_oldest(struct bch_fs *c, enum bkey_type type, - struct bkey_s_c k, u8 *max_stale) -{ - const struct bch_extent_ptr *ptr; - - switch (type) { - case BKEY_TYPE_BTREE: - case BKEY_TYPE_EXTENTS: - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - - extent_for_each_ptr(e, ptr) - ptr_gen_recalc_oldest(c, ptr, max_stale); - break; - } - } - break; - case BKEY_TYPE_EC: - switch (k.k->type) { - case BCH_STRIPE: { - struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); - - for (ptr = s.v->ptrs; - ptr < s.v->ptrs + s.v->nr_blocks; - ptr++) - ptr_gen_recalc_oldest(c, ptr, max_stale); - } - } - default: - break; - } -} - -static int ptr_gen_check(struct bch_fs *c, - enum bkey_type type, - const struct bch_extent_ptr *ptr) -{ - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - size_t b = PTR_BUCKET_NR(ca, ptr); - struct bucket *g = PTR_BUCKET(ca, ptr); - int ret = 0; - - if (mustfix_fsck_err_on(!g->mark.gen_valid, c, - "found ptr with missing gen in alloc btree,\n" - "type %u gen %u", - type, ptr->gen)) { - g->_mark.gen = ptr->gen; - g->_mark.gen_valid = 1; - set_bit(b, ca->buckets_dirty); - } - - if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c, - "%u ptr gen in the future: %u > %u", - type, ptr->gen, g->mark.gen)) { - g->_mark.gen = ptr->gen; - g->_mark.gen_valid = 1; - set_bit(b, ca->buckets_dirty); - set_bit(BCH_FS_FIXED_GENS, &c->flags); - } -fsck_err: - return ret; -} - -static int ptr_gens_check(struct bch_fs *c, enum bkey_type type, - struct bkey_s_c k) -{ - const struct bch_extent_ptr *ptr; - int ret = 0; - - switch (type) { - case BKEY_TYPE_BTREE: - case BKEY_TYPE_EXTENTS: - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - - extent_for_each_ptr(e, ptr) { - ret = ptr_gen_check(c, type, ptr); - if (ret) - return ret; - - } - break; - } - } - break; - case BKEY_TYPE_EC: - switch (k.k->type) { - case BCH_STRIPE: { - struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); - - for (ptr = s.v->ptrs; - ptr < s.v->ptrs + s.v->nr_blocks; - ptr++) { - ret = ptr_gen_check(c, type, ptr); - if (ret) - return ret; - } - } - } - break; - default: - break; - } - - return ret; -} - -/* - * For runtime mark and sweep: - */ -static int bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type, - struct bkey_s_c k, +static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k, u8 *max_stale, bool initial) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const struct bch_extent_ptr *ptr; struct gc_pos pos = { 0 }; unsigned flags = BCH_BUCKET_MARK_GC| @@ -257,23 +131,50 @@ static int bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type, atomic64_set(&c->key_version, k.k->version.lo); if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) || - fsck_err_on(!bch2_bkey_replicas_marked(c, type, k, - false), c, + fsck_err_on(!bch2_bkey_replicas_marked(c, k, false), c, "superblock not marked as containing replicas (type %u)", - type)) { - ret = bch2_mark_bkey_replicas(c, type, k); + k.k->type)) { + ret = bch2_mark_bkey_replicas(c, k); if (ret) return ret; } - ret = ptr_gens_check(c, type, k); - if (ret) - return ret; + bkey_for_each_ptr(ptrs, ptr) { + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + size_t b = PTR_BUCKET_NR(ca, ptr); + struct bucket *g = PTR_BUCKET(ca, ptr); + + if (mustfix_fsck_err_on(!g->mark.gen_valid, c, + "found ptr with missing gen in alloc btree,\n" + "type %u gen %u", + k.k->type, ptr->gen)) { + g->_mark.gen = ptr->gen; + g->_mark.gen_valid = 1; + set_bit(b, ca->buckets_dirty); + } + + if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c, + "%u ptr gen in the future: %u > %u", + k.k->type, ptr->gen, g->mark.gen)) { + g->_mark.gen = ptr->gen; + g->_mark.gen_valid = 1; + set_bit(b, ca->buckets_dirty); + set_bit(BCH_FS_FIXED_GENS, &c->flags); + } + } } - bch2_mark_key(c, type, k, true, k.k->size, pos, NULL, 0, flags); + bkey_for_each_ptr(ptrs, ptr) { + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + size_t b = PTR_BUCKET_NR(ca, ptr); + + if (gen_after(ca->oldest_gens[b], ptr->gen)) + ca->oldest_gens[b] = ptr->gen; + + *max_stale = max(*max_stale, ptr_stale(ca, ptr)); + } - ptr_gens_recalc_oldest(c, type, k, max_stale); + bch2_mark_key(c, k, true, k.k->size, pos, NULL, 0, flags); fsck_err: return ret; } @@ -281,7 +182,6 @@ fsck_err: static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, u8 *max_stale, bool initial) { - enum bkey_type type = btree_node_type(b); struct btree_node_iter iter; struct bkey unpacked; struct bkey_s_c k; @@ -289,14 +189,14 @@ static int btree_gc_mark_node(struct bch_fs *c, struct btree *b, *max_stale = 0; - if (!bkey_type_needs_gc(type)) + if (!btree_node_type_needs_gc(btree_node_type(b))) return 0; for_each_btree_node_key_unpack(b, k, &iter, &unpacked) { bch2_bkey_debugcheck(c, b, k); - ret = bch2_gc_mark_key(c, type, k, max_stale, initial); + ret = bch2_gc_mark_key(c, k, max_stale, initial); if (ret) break; } @@ -310,7 +210,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, struct btree_iter iter; struct btree *b; struct range_checks r; - unsigned depth = bkey_type_needs_gc(btree_id) ? 0 : 1; + unsigned depth = btree_node_type_needs_gc(btree_id) ? 0 : 1; u8 max_stale; int ret = 0; @@ -364,7 +264,7 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id, b = c->btree_roots[btree_id].b; if (!btree_node_fake(b)) - bch2_gc_mark_key(c, BKEY_TYPE_BTREE, bkey_i_to_s_c(&b->key), + bch2_gc_mark_key(c, bkey_i_to_s_c(&b->key), &max_stale, initial); gc_pos_set(c, gc_pos_btree_root(b->btree_id)); @@ -391,13 +291,13 @@ static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal, for (i = 0; i < BTREE_ID_NR; i++) { enum btree_id id = ids[i]; - enum bkey_type type = bkey_type(0, id); + enum btree_node_type type = __btree_node_type(0, id); int ret = bch2_gc_btree(c, id, initial); if (ret) return ret; - if (journal && bkey_type_needs_gc(type)) { + if (journal && btree_node_type_needs_gc(type)) { struct bkey_i *k, *n; struct jset_entry *j; struct journal_replay *r; @@ -405,8 +305,8 @@ static int bch2_gc_btrees(struct bch_fs *c, struct list_head *journal, list_for_each_entry(r, journal, list) for_each_jset_key(k, n, j, &r->j) { - if (type == bkey_type(j->level, j->btree_id)) { - ret = bch2_gc_mark_key(c, type, + if (type == __btree_node_type(j->level, j->btree_id)) { + ret = bch2_gc_mark_key(c, bkey_i_to_s_c(k), &max_stale, initial); if (ret) @@ -507,8 +407,7 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c) for_each_pending_btree_node_free(c, as, d) if (d->index_update_done) - bch2_mark_key(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&d->key), + bch2_mark_key(c, bkey_i_to_s_c(&d->key), true, 0, pos, NULL, 0, BCH_BUCKET_MARK_GC); diff --git a/fs/bcachefs/btree_gc.h b/fs/bcachefs/btree_gc.h index bb77564b9463..89ee72ac49f6 100644 --- a/fs/bcachefs/btree_gc.h +++ b/fs/bcachefs/btree_gc.h @@ -4,8 +4,6 @@ #include "btree_types.h" -enum bkey_type; - void bch2_coalesce(struct bch_fs *); int bch2_gc(struct bch_fs *, struct list_head *, bool); void bch2_gc_thread_stop(struct bch_fs *); @@ -58,9 +56,9 @@ static inline int gc_pos_cmp(struct gc_pos l, struct gc_pos r) static inline enum gc_phase btree_id_to_gc_phase(enum btree_id id) { switch (id) { -#define DEF_BTREE_ID(n, v, s) case BTREE_ID_##n: return GC_PHASE_BTREE_##n; - DEFINE_BCH_BTREE_IDS() -#undef DEF_BTREE_ID +#define x(n, v, s) case BTREE_ID_##n: return GC_PHASE_BTREE_##n; + BCH_BTREE_IDS() +#undef x default: BUG(); } diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 506bf9e8df38..f205bddd814d 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -392,12 +392,16 @@ void bch2_btree_sort_into(struct bch_fs *c, bch2_btree_node_iter_init_from_start(&src_iter, src); - nr = bch2_sort_repack_merge(c, btree_bset_first(dst), - src, &src_iter, - &dst->format, - true, - btree_node_ops(src)->key_normalize, - btree_node_ops(src)->key_merge); + if (btree_node_is_extents(src)) + nr = bch2_sort_repack_merge(c, btree_bset_first(dst), + src, &src_iter, + &dst->format, + true); + else + nr = bch2_sort_repack(btree_bset_first(dst), + src, &src_iter, + &dst->format, + true); bch2_time_stats_update(&c->times[BCH_TIME_btree_sort], start_time); @@ -598,8 +602,8 @@ static int validate_bset(struct bch_fs *c, struct btree *b, { struct bkey_packed *k, *prev = NULL; struct bpos prev_pos = POS_MIN; - enum bkey_type type = btree_node_type(b); bool seen_non_whiteout = false; + unsigned version; const char *err; int ret = 0; @@ -645,13 +649,12 @@ static int validate_bset(struct bch_fs *c, struct btree *b, "invalid bkey format: %s", err); } - if (btree_err_on(le16_to_cpu(i->version) != BCACHE_BSET_VERSION, - BTREE_ERR_FIXABLE, c, b, i, - "unsupported bset version")) { - i->version = cpu_to_le16(BCACHE_BSET_VERSION); - i->u64s = 0; - return 0; - } + version = le16_to_cpu(i->version); + btree_err_on((version != BCH_BSET_VERSION_OLD && + version < bcachefs_metadata_version_min) || + version >= bcachefs_metadata_version_max, + BTREE_ERR_FATAL, c, b, i, + "unsupported bset version"); if (btree_err_on(b->written + sectors > c->opts.btree_node_size, BTREE_ERR_FIXABLE, c, b, i, @@ -700,17 +703,21 @@ static int validate_bset(struct bch_fs *c, struct btree *b, } if (BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN) - bch2_bkey_swab(type, &b->format, k); + bch2_bkey_swab(&b->format, k); + + if (!write && + version < bcachefs_metadata_version_bkey_renumber) + bch2_bkey_renumber(btree_node_type(b), k, write); u = bkey_disassemble(b, k, &tmp); - invalid = __bch2_bkey_invalid(c, type, u) ?: + invalid = __bch2_bkey_invalid(c, u, btree_node_type(b)) ?: bch2_bkey_in_btree_node(b, u) ?: - (write ? bch2_bkey_val_invalid(c, type, u) : NULL); + (write ? bch2_bkey_val_invalid(c, u) : NULL); if (invalid) { char buf[160]; - bch2_bkey_val_to_text(&PBUF(buf), c, type, u); + bch2_bkey_val_to_text(&PBUF(buf), c, u); btree_err(BTREE_ERR_FIXABLE, c, b, i, "invalid bkey:\n%s\n%s", invalid, buf); @@ -720,6 +727,10 @@ static int validate_bset(struct bch_fs *c, struct btree *b, continue; } + if (write && + version < bcachefs_metadata_version_bkey_renumber) + bch2_bkey_renumber(btree_node_type(b), k, write); + /* * with the separate whiteouts thing (used for extents), the * second set of keys actually can have whiteouts too, so we @@ -885,17 +896,16 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry i = &b->data->keys; for (k = i->start; k != vstruct_last(i);) { - enum bkey_type type = btree_node_type(b); struct bkey tmp; struct bkey_s_c u = bkey_disassemble(b, k, &tmp); - const char *invalid = bch2_bkey_val_invalid(c, type, u); + const char *invalid = bch2_bkey_val_invalid(c, u); if (invalid || (inject_invalid_keys(c) && !bversion_cmp(u.k->version, MAX_VERSION))) { char buf[160]; - bch2_bkey_val_to_text(&PBUF(buf), c, type, u); + bch2_bkey_val_to_text(&PBUF(buf), c, u); btree_err(BTREE_ERR_FIXABLE, c, b, i, "invalid bkey %s: %s", buf, invalid); @@ -964,7 +974,9 @@ start: bch2_mark_io_failure(&failed, &rb->pick); - can_retry = bch2_btree_pick_ptr(c, b, &failed, &rb->pick) > 0; + can_retry = bch2_bkey_pick_read_device(c, + bkey_i_to_s_c(&b->key), + &failed, &rb->pick) > 0; if (!bio->bi_status && !bch2_btree_node_read_done(c, b, can_retry)) @@ -1007,7 +1019,8 @@ void bch2_btree_node_read(struct bch_fs *c, struct btree *b, trace_btree_read(c, b); - ret = bch2_btree_pick_ptr(c, b, NULL, &pick); + ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), + NULL, &pick); if (bch2_fs_fatal_err_on(ret <= 0, c, "btree node read error: no device to read from")) { set_btree_node_read_error(b); @@ -1135,8 +1148,8 @@ static void bch2_btree_node_write_error(struct bch_fs *c, { struct btree *b = wbio->wbio.bio.bi_private; __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; - struct bkey_i_extent *new_key; - struct bkey_s_extent e; + struct bkey_i_btree_ptr *new_key; + struct bkey_s_btree_ptr bp; struct bch_extent_ptr *ptr; struct btree_iter iter; int ret; @@ -1160,13 +1173,13 @@ retry: bkey_copy(&tmp.k, &b->key); - new_key = bkey_i_to_extent(&tmp.k); - e = extent_i_to_s(new_key); + new_key = bkey_i_to_btree_ptr(&tmp.k); + bp = btree_ptr_i_to_s(new_key); - bch2_extent_drop_ptrs(e, ptr, + bch2_bkey_drop_ptrs(bkey_i_to_s(&tmp.k), ptr, bch2_dev_list_has_dev(wbio->wbio.failed, ptr->dev)); - if (!bch2_extent_nr_ptrs(e.c)) + if (!bch2_bkey_nr_ptrs(bp.s_c)) goto err; ret = bch2_btree_node_update_key(c, &iter, b, new_key); @@ -1269,12 +1282,11 @@ static void btree_node_write_endio(struct bio *bio) static int validate_bset_for_write(struct bch_fs *c, struct btree *b, struct bset *i, unsigned sectors) { - const struct bch_extent_ptr *ptr; unsigned whiteout_u64s = 0; int ret; - extent_for_each_ptr(bkey_i_to_s_c_extent(&b->key), ptr) - break; + if (bch2_bkey_invalid(c, bkey_i_to_s_c(&b->key), BKEY_TYPE_BTREE)) + return -1; ret = validate_bset(c, b, i, sectors, &whiteout_u64s, WRITE, false); if (ret) @@ -1292,7 +1304,6 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, struct btree_node *bn = NULL; struct btree_node_entry *bne = NULL; BKEY_PADDED(key) k; - struct bkey_s_extent e; struct bch_extent_ptr *ptr; struct sort_iter sort_iter; struct nonce nonce; @@ -1300,6 +1311,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, u64 seq = 0; bool used_mempool; unsigned long old, new; + bool validate_before_checksum = false; void *data; if (test_bit(BCH_FS_HOLD_BTREE_WRITES, &c->flags)) @@ -1433,11 +1445,21 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, BUG_ON(BSET_BIG_ENDIAN(i) != CPU_BIG_ENDIAN); BUG_ON(i->seq != b->data->keys.seq); - i->version = cpu_to_le16(BCACHE_BSET_VERSION); + i->version = c->sb.version < bcachefs_metadata_version_new_versioning + ? cpu_to_le16(BCH_BSET_VERSION_OLD) + : cpu_to_le16(c->sb.version); SET_BSET_CSUM_TYPE(i, bch2_meta_checksum_type(c)); + if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i))) + validate_before_checksum = true; + + /* validate_bset will be modifying: */ + if (le16_to_cpu(i->version) < + bcachefs_metadata_version_bkey_renumber) + validate_before_checksum = true; + /* if we're going to be encrypting, check metadata validity first: */ - if (bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) && + if (validate_before_checksum && validate_bset_for_write(c, b, i, sectors_to_write)) goto err; @@ -1451,7 +1473,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, bne->csum = csum_vstruct(c, BSET_CSUM_TYPE(i), nonce, bne); /* if we're not encrypting, check metadata after checksumming: */ - if (!bch2_csum_type_is_encryption(BSET_CSUM_TYPE(i)) && + if (!validate_before_checksum && validate_bset_for_write(c, b, i, sectors_to_write)) goto err; @@ -1506,9 +1528,8 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, */ bkey_copy(&k.key, &b->key); - e = bkey_i_to_s_extent(&k.key); - extent_for_each_ptr(e, ptr) + bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&k.key)), ptr) ptr->offset += b->written; b->written += sectors_to_write; diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index afc43722c1fc..4720061e9562 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -433,7 +433,7 @@ static void __bch2_btree_iter_verify(struct btree_iter *iter, * whiteouts) */ k = b->level || iter->flags & BTREE_ITER_IS_EXTENTS - ? bch2_btree_node_iter_prev_filter(&tmp, b, KEY_TYPE_DISCARD) + ? bch2_btree_node_iter_prev_filter(&tmp, b, KEY_TYPE_discard) : bch2_btree_node_iter_prev_all(&tmp, b); if (k && btree_iter_pos_cmp(iter, b, k) > 0) { char buf[100]; @@ -622,7 +622,7 @@ static inline struct bkey_s_c __btree_iter_unpack(struct btree_iter *iter, * signal to bch2_btree_iter_peek_slot() that we're currently at * a hole */ - u->type = KEY_TYPE_DELETED; + u->type = KEY_TYPE_deleted; return bkey_s_c_null; } diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 7eecaa6cd5a2..b4a826369a57 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -405,20 +405,45 @@ static inline unsigned bset_byte_offset(struct btree *b, void *i) return i - (void *) b->data; } +enum btree_node_type { +#define x(kwd, val, name) BKEY_TYPE_##kwd = val, + BCH_BTREE_IDS() +#undef x + BKEY_TYPE_BTREE, +}; + +/* Type of a key in btree @id at level @level: */ +static inline enum btree_node_type __btree_node_type(unsigned level, enum btree_id id) +{ + return level ? BKEY_TYPE_BTREE : (enum btree_node_type) id; +} + /* Type of keys @b contains: */ -static inline enum bkey_type btree_node_type(struct btree *b) +static inline enum btree_node_type btree_node_type(struct btree *b) { - return b->level ? BKEY_TYPE_BTREE : b->btree_id; + return __btree_node_type(b->level, b->btree_id); } -static inline const struct bkey_ops *btree_node_ops(struct btree *b) +static inline bool btree_node_type_is_extents(enum btree_node_type type) { - return &bch2_bkey_ops[btree_node_type(b)]; + return type == BKEY_TYPE_EXTENTS; } static inline bool btree_node_is_extents(struct btree *b) { - return btree_node_type(b) == BKEY_TYPE_EXTENTS; + return btree_node_type_is_extents(btree_node_type(b)); +} + +static inline bool btree_node_type_needs_gc(enum btree_node_type type) +{ + switch (type) { + case BKEY_TYPE_BTREE: + case BKEY_TYPE_EXTENTS: + case BKEY_TYPE_EC: + return true; + default: + return false; + } } struct btree_root { diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index f6b0082235af..d1647f6eb476 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -120,7 +120,7 @@ int bch2_btree_delete_range(struct bch_fs *, enum btree_id, int bch2_btree_node_rewrite(struct bch_fs *c, struct btree_iter *, __le64, unsigned); int bch2_btree_node_update_key(struct bch_fs *, struct btree_iter *, - struct btree *, struct bkey_i_extent *); + struct btree *, struct bkey_i_btree_ptr *); /* new transactional interface: */ diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 7d7a021416f3..22f087098776 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -132,13 +132,15 @@ bool bch2_btree_node_format_fits(struct bch_fs *c, struct btree *b, /* Btree node freeing/allocation: */ static bool btree_key_matches(struct bch_fs *c, - struct bkey_s_c_extent l, - struct bkey_s_c_extent r) + struct bkey_s_c l, + struct bkey_s_c r) { + struct bkey_ptrs_c ptrs1 = bch2_bkey_ptrs_c(l); + struct bkey_ptrs_c ptrs2 = bch2_bkey_ptrs_c(r); const struct bch_extent_ptr *ptr1, *ptr2; - extent_for_each_ptr(l, ptr1) - extent_for_each_ptr(r, ptr2) + bkey_for_each_ptr(ptrs1, ptr1) + bkey_for_each_ptr(ptrs2, ptr2) if (ptr1->dev == ptr2->dev && ptr1->gen == ptr2->gen && ptr1->offset == ptr2->offset) @@ -164,8 +166,7 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b, for (d = as->pending; d < as->pending + as->nr_pending; d++) if (!bkey_cmp(k.k->p, d->key.k.p) && - btree_key_matches(c, bkey_s_c_to_extent(k), - bkey_i_to_s_c_extent(&d->key))) + btree_key_matches(c, k, bkey_i_to_s_c(&d->key))) goto found; BUG(); found: @@ -197,7 +198,7 @@ found: ? gc_pos_btree_node(b) : gc_pos_btree_root(as->btree_id)) >= 0 && gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0) - bch2_mark_key_locked(c, BKEY_TYPE_BTREE, + bch2_mark_key_locked(c, bkey_i_to_s_c(&d->key), false, 0, pos, NULL, 0, BCH_BUCKET_MARK_GC); @@ -270,8 +271,7 @@ static void bch2_btree_node_free_ondisk(struct bch_fs *c, { BUG_ON(!pending->index_update_done); - bch2_mark_key(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&pending->key), + bch2_mark_key(c, bkey_i_to_s_c(&pending->key), false, 0, gc_phase(GC_PHASE_PENDING_DELETE), NULL, 0, 0); @@ -285,7 +285,6 @@ static struct btree *__bch2_btree_node_alloc(struct bch_fs *c, struct write_point *wp; struct btree *b; BKEY_PADDED(k) tmp; - struct bkey_i_extent *e; struct open_buckets ob = { .nr = 0 }; struct bch_devs_list devs_have = (struct bch_devs_list) { 0 }; unsigned nr_reserve; @@ -336,8 +335,8 @@ retry: goto retry; } - e = bkey_extent_init(&tmp.k); - bch2_alloc_sectors_append_ptrs(c, wp, e, c->opts.btree_node_size); + bkey_btree_ptr_init(&tmp.k); + bch2_alloc_sectors_append_ptrs(c, wp, &tmp.k, c->opts.btree_node_size); bch2_open_bucket_get(c, wp, &ob); bch2_alloc_sectors_done(c, wp); @@ -375,7 +374,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev b->data->flags = 0; SET_BTREE_NODE_ID(b->data, as->btree_id); SET_BTREE_NODE_LEVEL(b->data, level); - b->data->ptr = bkey_i_to_extent(&b->key)->v.start->ptr; + b->data->ptr = bkey_i_to_btree_ptr(&b->key)->v.start[0]; bch2_btree_build_aux_trees(b); @@ -528,8 +527,7 @@ static struct btree_reserve *bch2_btree_reserve_get(struct bch_fs *c, goto err_free; } - ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&b->key)); + ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key)); if (ret) goto err_free; @@ -1072,8 +1070,7 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b) mutex_lock(&c->btree_interior_update_lock); percpu_down_read(&c->usage_lock); - bch2_mark_key_locked(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&b->key), + bch2_mark_key_locked(c, bkey_i_to_s_c(&b->key), true, 0, gc_pos_btree_root(b->btree_id), &stats, 0, 0); @@ -1166,11 +1163,9 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b mutex_lock(&c->btree_interior_update_lock); percpu_down_read(&c->usage_lock); - if (bkey_extent_is_data(&insert->k)) - bch2_mark_key_locked(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(insert), - true, 0, - gc_pos_btree_node(b), &stats, 0, 0); + bch2_mark_key_locked(c, bkey_i_to_s_c(insert), + true, 0, + gc_pos_btree_node(b), &stats, 0, 0); while ((k = bch2_btree_node_iter_peek_all(node_iter, b)) && bkey_iter_pos_cmp(b, &insert->k.p, k) > 0) @@ -1893,7 +1888,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, struct btree_update *as, struct btree_iter *iter, struct btree *b, struct btree *new_hash, - struct bkey_i_extent *new_key) + struct bkey_i_btree_ptr *new_key) { struct btree *parent; int ret; @@ -1938,7 +1933,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, */ ret = bch2_disk_reservation_add(c, &as->reserve->disk_res, c->opts.btree_node_size * - bch2_extent_nr_ptrs(extent_i_to_s_c(new_key)), + bch2_bkey_nr_ptrs(bkey_i_to_s_c(&new_key->k_i)), BCH_DISK_RESERVATION_NOFAIL| BCH_DISK_RESERVATION_GC_LOCK_HELD); BUG_ON(ret); @@ -1978,8 +1973,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, mutex_lock(&c->btree_interior_update_lock); percpu_down_read(&c->usage_lock); - bch2_mark_key_locked(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&new_key->k_i), + bch2_mark_key_locked(c, bkey_i_to_s_c(&new_key->k_i), true, 0, gc_pos_btree_root(b->btree_id), &stats, 0, 0); @@ -2012,7 +2006,8 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, } int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter, - struct btree *b, struct bkey_i_extent *new_key) + struct btree *b, + struct bkey_i_btree_ptr *new_key) { struct btree *parent = btree_node_parent(iter, b); struct btree_update *as = NULL; @@ -2078,8 +2073,7 @@ int bch2_btree_node_update_key(struct bch_fs *c, struct btree_iter *iter, goto err; } - ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE, - extent_i_to_s_c(new_key).s_c); + ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&new_key->k_i)); if (ret) goto err_free_update; @@ -2137,9 +2131,9 @@ void bch2_btree_root_alloc(struct bch_fs *c, enum btree_id id) b->level = 0; b->btree_id = id; - bkey_extent_init(&b->key); + bkey_btree_ptr_init(&b->key); b->key.k.p = POS_MAX; - bkey_i_to_extent(&b->key)->v._data[0] = U64_MAX - id; + PTR_HASH(&b->key) = U64_MAX - id; bch2_bset_init_first(b, &b->data->keys); bch2_btree_build_aux_trees(b); diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index 4b0d674472db..fd27334cf2a4 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -71,7 +71,7 @@ bool bch2_btree_bset_insert_key(struct btree_iter *iter, goto overwrite; } - k->type = KEY_TYPE_DELETED; + k->type = KEY_TYPE_deleted; bch2_btree_node_iter_fix(iter, b, node_iter, k, k->u64s, k->u64s); bch2_btree_iter_verify(iter, b); @@ -312,7 +312,6 @@ btree_key_can_insert(struct btree_insert *trans, return BTREE_INSERT_BTREE_NODE_FULL; if (!bch2_bkey_replicas_marked(c, - insert->iter->btree_id, bkey_i_to_s_c(insert->k), true)) return BTREE_INSERT_NEED_MARK_REPLICAS; @@ -449,8 +448,8 @@ static inline void btree_insert_entry_checks(struct bch_fs *c, BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos)); BUG_ON(debug_check_bkeys(c) && !bkey_deleted(&i->k->k) && - bch2_bkey_invalid(c, (enum bkey_type) i->iter->btree_id, - bkey_i_to_s_c(i->k))); + bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), + i->iter->btree_id)); } /** @@ -585,8 +584,7 @@ err: } bch2_btree_iter_unlock(trans->entries[0].iter); - ret = bch2_mark_bkey_replicas(c, i->iter->btree_id, - bkey_i_to_s_c(i->k)) + ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(i->k)) ?: -EINTR; break; default: diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index 3f4bbf280a78..d08e95020cef 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -567,7 +567,7 @@ static int __disk_sectors(struct bch_extent_crc_unpacked crc, unsigned sectors) crc.uncompressed_size)); } -static s64 ptr_disk_sectors(struct bkey_s_c_extent e, +static s64 ptr_disk_sectors(const struct bkey *k, struct extent_ptr_decoded p, s64 sectors) { @@ -579,8 +579,8 @@ static s64 ptr_disk_sectors(struct bkey_s_c_extent e, old_sectors = 0; new_sectors = sectors; } else { - old_sectors = e.k->size; - new_sectors = e.k->size + sectors; + old_sectors = k->size; + new_sectors = k->size + sectors; } sectors = -__disk_sectors(p.crc, old_sectors) @@ -596,7 +596,6 @@ static s64 ptr_disk_sectors(struct bkey_s_c_extent e, * that with the gc pos seqlock held. */ static void bch2_mark_pointer(struct bch_fs *c, - struct bkey_s_c_extent e, struct extent_ptr_decoded p, s64 sectors, enum bch_data_type data_type, struct bch_fs_usage *fs_usage, @@ -709,70 +708,54 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k, u64 journal_seq, unsigned flags, bool gc) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + s64 cached_sectors = 0; + s64 dirty_sectors = 0; + s64 ec_sectors = 0; + unsigned replicas = 0; + unsigned ec_redundancy = 0; + unsigned i; + int ret; + BUG_ON(!sectors); - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - s64 cached_sectors = 0; - s64 dirty_sectors = 0; - s64 ec_sectors = 0; - unsigned replicas = 0; - unsigned ec_redundancy = 0; - unsigned i; - int ret; - - extent_for_each_ptr_decode(e, p, entry) { - s64 disk_sectors = ptr_disk_sectors(e, p, sectors); - s64 adjusted_disk_sectors = disk_sectors; - - bch2_mark_pointer(c, e, p, disk_sectors, data_type, - stats, journal_seq, flags, gc); - - if (!p.ptr.cached) - for (i = 0; i < p.ec_nr; i++) { - ret = bch2_mark_stripe_ptr(c, p.ec[i], - disk_sectors, flags, - &adjusted_disk_sectors, - &ec_redundancy, gc); - if (ret) - return ret; - } - if (!p.ptr.cached) - replicas++; - - if (p.ptr.cached) - cached_sectors += adjusted_disk_sectors; - else if (!p.ec_nr) - dirty_sectors += adjusted_disk_sectors; - else - ec_sectors += adjusted_disk_sectors; - } + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + s64 disk_sectors = ptr_disk_sectors(k.k, p, sectors); + s64 adjusted_disk_sectors = disk_sectors; - replicas = clamp_t(unsigned, replicas, - 1, ARRAY_SIZE(stats->replicas)); - ec_redundancy = clamp_t(unsigned, ec_redundancy, - 1, ARRAY_SIZE(stats->replicas)); + bch2_mark_pointer(c, p, disk_sectors, data_type, + stats, journal_seq, flags, gc); - stats->replicas[0].data[BCH_DATA_CACHED] += cached_sectors; - stats->replicas[replicas - 1].data[data_type] += dirty_sectors; - stats->replicas[ec_redundancy - 1].ec_data += ec_sectors; - break; + if (!p.ptr.cached) + for (i = 0; i < p.ec_nr; i++) { + ret = bch2_mark_stripe_ptr(c, p.ec[i], + disk_sectors, flags, + &adjusted_disk_sectors, + &ec_redundancy, gc); + if (ret) + return ret; + } + if (!p.ptr.cached) + replicas++; + + if (p.ptr.cached) + cached_sectors += adjusted_disk_sectors; + else if (!p.ec_nr) + dirty_sectors += adjusted_disk_sectors; + else + ec_sectors += adjusted_disk_sectors; } - case BCH_RESERVATION: { - unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; - sectors *= replicas; - replicas = clamp_t(unsigned, replicas, - 1, ARRAY_SIZE(stats->replicas)); + replicas = clamp_t(unsigned, replicas, + 1, ARRAY_SIZE(stats->replicas)); + ec_redundancy = clamp_t(unsigned, ec_redundancy, + 1, ARRAY_SIZE(stats->replicas)); - stats->replicas[replicas - 1].persistent_reserved += sectors; - break; - } - } + stats->replicas[0].data[BCH_DATA_CACHED] += cached_sectors; + stats->replicas[replicas - 1].data[data_type] += dirty_sectors; + stats->replicas[ec_redundancy - 1].ec_data += ec_sectors; return 0; } @@ -813,56 +796,49 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k, u64 journal_seq, unsigned flags, bool gc) { - switch (k.k->type) { - case BCH_STRIPE: { - struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); - size_t idx = s.k->p.offset; - struct stripe *m = genradix_ptr(&c->stripes[gc], idx); - unsigned i; - - if (!m || (!inserting && !m->alive)) { - bch_err_ratelimited(c, "error marking nonexistent stripe %zu", - idx); - return -1; - } - - if (inserting && m->alive) { - bch_err_ratelimited(c, "error marking stripe %zu: already exists", - idx); - return -1; - } + struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); + size_t idx = s.k->p.offset; + struct stripe *m = genradix_ptr(&c->stripes[gc], idx); + unsigned i; - BUG_ON(atomic_read(&m->blocks_nonempty)); + if (!m || (!inserting && !m->alive)) { + bch_err_ratelimited(c, "error marking nonexistent stripe %zu", + idx); + return -1; + } - for (i = 0; i < EC_STRIPE_MAX; i++) - BUG_ON(atomic_read(&m->block_sectors[i])); + if (inserting && m->alive) { + bch_err_ratelimited(c, "error marking stripe %zu: already exists", + idx); + return -1; + } - if (inserting) { - m->sectors = le16_to_cpu(s.v->sectors); - m->algorithm = s.v->algorithm; - m->nr_blocks = s.v->nr_blocks; - m->nr_redundant = s.v->nr_redundant; - } + BUG_ON(atomic_read(&m->blocks_nonempty)); - if (!gc) { - if (inserting) - bch2_stripes_heap_insert(c, m, idx); - else - bch2_stripes_heap_del(c, m, idx); - } else { - m->alive = inserting; - } + for (i = 0; i < EC_STRIPE_MAX; i++) + BUG_ON(atomic_read(&m->block_sectors[i])); - bucket_set_stripe(c, s.v, inserting, fs_usage, 0, gc); - break; + if (inserting) { + m->sectors = le16_to_cpu(s.v->sectors); + m->algorithm = s.v->algorithm; + m->nr_blocks = s.v->nr_blocks; + m->nr_redundant = s.v->nr_redundant; } + + if (!gc) { + if (inserting) + bch2_stripes_heap_insert(c, m, idx); + else + bch2_stripes_heap_del(c, m, idx); + } else { + m->alive = inserting; } + bucket_set_stripe(c, s.v, inserting, fs_usage, 0, gc); return 0; } -static int __bch2_mark_key(struct bch_fs *c, - enum bkey_type type, struct bkey_s_c k, +static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, bool inserting, s64 sectors, struct bch_fs_usage *stats, u64 journal_seq, unsigned flags, @@ -870,22 +846,32 @@ static int __bch2_mark_key(struct bch_fs *c, { int ret = 0; - switch (type) { - case BKEY_TYPE_BTREE: + switch (k.k->type) { + case KEY_TYPE_btree_ptr: ret = bch2_mark_extent(c, k, inserting ? c->opts.btree_node_size : -c->opts.btree_node_size, BCH_DATA_BTREE, stats, journal_seq, flags, gc); break; - case BKEY_TYPE_EXTENTS: + case KEY_TYPE_extent: ret = bch2_mark_extent(c, k, sectors, BCH_DATA_USER, stats, journal_seq, flags, gc); break; - case BKEY_TYPE_EC: + case KEY_TYPE_stripe: ret = bch2_mark_stripe(c, k, inserting, stats, journal_seq, flags, gc); break; + case KEY_TYPE_reservation: { + unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas; + + sectors *= replicas; + replicas = clamp_t(unsigned, replicas, + 1, ARRAY_SIZE(stats->replicas)); + + stats->replicas[replicas - 1].persistent_reserved += sectors; + break; + } default: break; } @@ -894,7 +880,7 @@ static int __bch2_mark_key(struct bch_fs *c, } int bch2_mark_key_locked(struct bch_fs *c, - enum bkey_type type, struct bkey_s_c k, + struct bkey_s_c k, bool inserting, s64 sectors, struct gc_pos pos, struct bch_fs_usage *stats, @@ -906,7 +892,7 @@ int bch2_mark_key_locked(struct bch_fs *c, if (!stats) stats = this_cpu_ptr(c->usage[0]); - ret = __bch2_mark_key(c, type, k, inserting, sectors, + ret = __bch2_mark_key(c, k, inserting, sectors, stats, journal_seq, flags, false); if (ret) return ret; @@ -914,7 +900,7 @@ int bch2_mark_key_locked(struct bch_fs *c, if ((flags & BCH_BUCKET_MARK_GC) || gc_visited(c, pos)) { - ret = __bch2_mark_key(c, type, k, inserting, sectors, + ret = __bch2_mark_key(c, k, inserting, sectors, this_cpu_ptr(c->usage[1]), journal_seq, flags, true); if (ret) @@ -924,8 +910,7 @@ int bch2_mark_key_locked(struct bch_fs *c, return 0; } -int bch2_mark_key(struct bch_fs *c, - enum bkey_type type, struct bkey_s_c k, +int bch2_mark_key(struct bch_fs *c, struct bkey_s_c k, bool inserting, s64 sectors, struct gc_pos pos, struct bch_fs_usage *stats, @@ -934,7 +919,7 @@ int bch2_mark_key(struct bch_fs *c, int ret; percpu_down_read(&c->usage_lock); - ret = bch2_mark_key_locked(c, type, k, inserting, sectors, + ret = bch2_mark_key_locked(c, k, inserting, sectors, pos, stats, journal_seq, flags); percpu_up_read(&c->usage_lock); @@ -952,20 +937,19 @@ void bch2_mark_update(struct btree_insert *trans, struct gc_pos pos = gc_pos_btree_node(b); struct bkey_packed *_k; - if (!bkey_type_needs_gc(iter->btree_id)) + if (!btree_node_type_needs_gc(iter->btree_id)) return; percpu_down_read(&c->usage_lock); if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) - bch2_mark_key_locked(c, btree_node_type(b), - bkey_i_to_s_c(insert->k), true, + bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true, bpos_min(insert->k->k.p, b->key.k.p).offset - bkey_start_offset(&insert->k->k), pos, &stats, trans->journal_res.seq, 0); while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b, - KEY_TYPE_DISCARD))) { + KEY_TYPE_discard))) { struct bkey unpacked; struct bkey_s_c k; s64 sectors = 0; @@ -994,9 +978,8 @@ void bch2_mark_update(struct btree_insert *trans, sectors = k.k->p.offset - insert->k->k.p.offset; BUG_ON(sectors <= 0); - bch2_mark_key_locked(c, btree_node_type(b), - k, true, sectors, pos, &stats, - trans->journal_res.seq, 0); + bch2_mark_key_locked(c, k, true, sectors, + pos, &stats, trans->journal_res.seq, 0); sectors = bkey_start_offset(&insert->k->k) - k.k->p.offset; @@ -1006,9 +989,8 @@ void bch2_mark_update(struct btree_insert *trans, BUG_ON(sectors >= 0); } - bch2_mark_key_locked(c, btree_node_type(b), - k, false, sectors, pos, &stats, - trans->journal_res.seq, 0); + bch2_mark_key_locked(c, k, false, sectors, + pos, &stats, trans->journal_res.seq, 0); bch2_btree_node_iter_advance(&node_iter, b); } diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h index 884041b53eb9..c584ad1b4375 100644 --- a/fs/bcachefs/buckets.h +++ b/fs/bcachefs/buckets.h @@ -220,10 +220,10 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *, #define BCH_BUCKET_MARK_NOATOMIC (1 << 0) #define BCH_BUCKET_MARK_GC (1 << 1) -int bch2_mark_key_locked(struct bch_fs *, enum bkey_type, struct bkey_s_c, +int bch2_mark_key_locked(struct bch_fs *, struct bkey_s_c, bool, s64, struct gc_pos, struct bch_fs_usage *, u64, unsigned); -int bch2_mark_key(struct bch_fs *, enum bkey_type, struct bkey_s_c, +int bch2_mark_key(struct bch_fs *, struct bkey_s_c, bool, s64, struct gc_pos, struct bch_fs_usage *, u64, unsigned); void bch2_mark_update(struct btree_insert *, struct btree_insert_entry *); diff --git a/fs/bcachefs/debug.c b/fs/bcachefs/debug.c index 0a9efe57d5a9..f15c29878a9e 100644 --- a/fs/bcachefs/debug.c +++ b/fs/bcachefs/debug.c @@ -56,7 +56,8 @@ void __bch2_btree_verify(struct bch_fs *c, struct btree *b) v->btree_id = b->btree_id; bch2_btree_keys_init(v, &c->expensive_debug_checks); - if (bch2_btree_pick_ptr(c, b, NULL, &pick) <= 0) + if (bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), + NULL, &pick) <= 0) return; ca = bch_dev_bkey_exists(c, pick.ptr.dev); @@ -223,8 +224,7 @@ static ssize_t bch2_read_btree(struct file *file, char __user *buf, k = bch2_btree_iter_peek(&iter); while (k.k && !(err = btree_iter_err(k))) { - bch2_bkey_val_to_text(&PBUF(i->buf), i->c, - bkey_type(0, i->id), k); + bch2_bkey_val_to_text(&PBUF(i->buf), i->c, k); i->bytes = strlen(i->buf); BUG_ON(i->bytes >= PAGE_SIZE); i->buf[i->bytes] = '\n'; diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c index c1a611b4d9ec..80d37c568272 100644 --- a/fs/bcachefs/dirent.c +++ b/fs/bcachefs/dirent.c @@ -65,8 +65,7 @@ static bool dirent_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r) const struct bch_hash_desc bch2_dirent_hash_desc = { .btree_id = BTREE_ID_DIRENTS, - .key_type = BCH_DIRENT, - .whiteout_type = BCH_DIRENT_WHITEOUT, + .key_type = KEY_TYPE_dirent, .hash_key = dirent_hash_key, .hash_bkey = dirent_hash_bkey, .cmp_key = dirent_cmp_key, @@ -75,58 +74,37 @@ const struct bch_hash_desc bch2_dirent_hash_desc = { const char *bch2_dirent_invalid(const struct bch_fs *c, struct bkey_s_c k) { - struct bkey_s_c_dirent d; + struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); unsigned len; - switch (k.k->type) { - case BCH_DIRENT: - if (bkey_val_bytes(k.k) < sizeof(struct bch_dirent)) - return "value too small"; - - d = bkey_s_c_to_dirent(k); - len = bch2_dirent_name_bytes(d); - - if (!len) - return "empty name"; + if (bkey_val_bytes(k.k) < sizeof(struct bch_dirent)) + return "value too small"; - /* - * older versions of bcachefs were buggy and creating dirent - * keys that were bigger than necessary: - */ - if (bkey_val_u64s(k.k) > dirent_val_u64s(len + 7)) - return "value too big"; + len = bch2_dirent_name_bytes(d); + if (!len) + return "empty name"; - if (len > BCH_NAME_MAX) - return "dirent name too big"; + /* + * older versions of bcachefs were buggy and creating dirent + * keys that were bigger than necessary: + */ + if (bkey_val_u64s(k.k) > dirent_val_u64s(len + 7)) + return "value too big"; - return NULL; - case BCH_DIRENT_WHITEOUT: - return bkey_val_bytes(k.k) != 0 - ? "value size should be zero" - : NULL; + if (len > BCH_NAME_MAX) + return "dirent name too big"; - default: - return "invalid type"; - } + return NULL; } void bch2_dirent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - struct bkey_s_c_dirent d; - - switch (k.k->type) { - case BCH_DIRENT: - d = bkey_s_c_to_dirent(k); - - bch_scnmemcpy(out, d.v->d_name, - bch2_dirent_name_bytes(d)); - pr_buf(out, " -> %llu", d.v->d_inum); - break; - case BCH_DIRENT_WHITEOUT: - pr_buf(out, "whiteout"); - break; - } + struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); + + bch_scnmemcpy(out, d.v->d_name, + bch2_dirent_name_bytes(d)); + pr_buf(out, " -> %llu", d.v->d_inum); } static struct bkey_i_dirent *dirent_create_key(struct btree_trans *trans, @@ -287,7 +265,7 @@ int bch2_dirent_rename(struct btree_trans *trans, * overwrite old_dst - just make sure to use a * whiteout when deleting src: */ - new_src->k.type = BCH_DIRENT_WHITEOUT; + new_src->k.type = KEY_TYPE_whiteout; } } else { /* Check if we need a whiteout to delete src: */ @@ -298,7 +276,7 @@ int bch2_dirent_rename(struct btree_trans *trans, return ret; if (ret) - new_src->k.type = BCH_DIRENT_WHITEOUT; + new_src->k.type = KEY_TYPE_whiteout; } } @@ -361,7 +339,7 @@ int bch2_empty_dir(struct bch_fs *c, u64 dir_inum) if (k.k->p.inode > dir_inum) break; - if (k.k->type == BCH_DIRENT) { + if (k.k->type == KEY_TYPE_dirent) { ret = -ENOTEMPTY; break; } @@ -385,7 +363,7 @@ int bch2_readdir(struct bch_fs *c, struct file *file, for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS(inode->v.i_ino, ctx->pos), 0, k) { - if (k.k->type != BCH_DIRENT) + if (k.k->type != KEY_TYPE_dirent) continue; dirent = bkey_s_c_to_dirent(k); diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h index 2afb0baed11a..7b47573dcc46 100644 --- a/fs/bcachefs/dirent.h +++ b/fs/bcachefs/dirent.h @@ -9,7 +9,7 @@ extern const struct bch_hash_desc bch2_dirent_hash_desc; const char *bch2_dirent_invalid(const struct bch_fs *, struct bkey_s_c); void bch2_dirent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -#define bch2_bkey_dirent_ops (struct bkey_ops) { \ +#define bch2_bkey_ops_dirent (struct bkey_ops) { \ .key_invalid = bch2_dirent_invalid, \ .val_to_text = bch2_dirent_to_text, \ } diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index 091a1f0a0432..010b9b90f2fc 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -123,49 +123,39 @@ static void *stripe_csum(struct bch_stripe *s, unsigned dev, unsigned csum_idx) return csums + (dev * stripe_csums_per_device(s) + csum_idx) * csum_bytes; } -const char *bch2_ec_key_invalid(const struct bch_fs *c, struct bkey_s_c k) +const char *bch2_stripe_invalid(const struct bch_fs *c, struct bkey_s_c k) { + const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; + if (k.k->p.inode) return "invalid stripe key"; - switch (k.k->type) { - case BCH_STRIPE: { - const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; - - if (bkey_val_bytes(k.k) < sizeof(*s)) - return "incorrect value size"; + if (bkey_val_bytes(k.k) < sizeof(*s)) + return "incorrect value size"; - if (bkey_val_u64s(k.k) != stripe_val_u64s(s)) - return "incorrect value size"; + if (bkey_val_u64s(k.k) != stripe_val_u64s(s)) + return "incorrect value size"; - return NULL; - } - default: - return "invalid type"; - } + return NULL; } -void bch2_ec_key_to_text(struct printbuf *out, struct bch_fs *c, +void bch2_stripe_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - switch (k.k->type) { - case BCH_STRIPE: { - const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; - unsigned i; - - pr_buf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u", - s->algorithm, - le16_to_cpu(s->sectors), - s->nr_blocks - s->nr_redundant, - s->nr_redundant, - s->csum_type, - 1U << s->csum_granularity_bits); - - for (i = 0; i < s->nr_blocks; i++) - pr_buf(out, " %u:%llu", s->ptrs[i].dev, - (u64) s->ptrs[i].offset); - } - } + const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; + unsigned i; + + pr_buf(out, "algo %u sectors %u blocks %u:%u csum %u gran %u", + s->algorithm, + le16_to_cpu(s->sectors), + s->nr_blocks - s->nr_redundant, + s->nr_redundant, + s->csum_type, + 1U << s->csum_granularity_bits); + + for (i = 0; i < s->nr_blocks; i++) + pr_buf(out, " %u:%llu", s->ptrs[i].dev, + (u64) s->ptrs[i].offset); } static int ptr_matches_stripe(struct bch_fs *c, @@ -454,7 +444,7 @@ int bch2_ec_read_extent(struct bch_fs *c, struct bch_read_bio *rbio) POS(0, stripe_idx), BTREE_ITER_SLOTS); k = bch2_btree_iter_peek_slot(&iter); - if (btree_iter_err(k) || k.k->type != BCH_STRIPE) { + if (btree_iter_err(k) || k.k->type != KEY_TYPE_stripe) { __bcache_io_error(c, "error doing reconstruct read: stripe not found"); kfree(buf); @@ -695,7 +685,7 @@ static void ec_stripe_delete(struct bch_fs *c, size_t idx) POS(0, idx), BTREE_ITER_SLOTS|BTREE_ITER_INTENT); k = bch2_btree_iter_peek_slot(&iter); - if (btree_iter_err(k) || k.k->type != BCH_STRIPE) + if (btree_iter_err(k) || k.k->type != KEY_TYPE_stripe) goto out; v = kmalloc(bkey_val_bytes(k.k), GFP_KERNEL); diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h index c35de8b1ef64..4a8cade37c7a 100644 --- a/fs/bcachefs/ec.h +++ b/fs/bcachefs/ec.h @@ -5,13 +5,13 @@ #include "ec_types.h" #include "keylist_types.h" -const char *bch2_ec_key_invalid(const struct bch_fs *, struct bkey_s_c); -void bch2_ec_key_to_text(struct printbuf *, struct bch_fs *, +const char *bch2_stripe_invalid(const struct bch_fs *, struct bkey_s_c); +void bch2_stripe_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -#define bch2_bkey_ec_ops (struct bkey_ops) { \ - .key_invalid = bch2_ec_key_invalid, \ - .val_to_text = bch2_ec_key_to_text, \ +#define bch2_bkey_ops_stripe (struct bkey_ops) { \ + .key_invalid = bch2_stripe_invalid, \ + .val_to_text = bch2_stripe_to_text, \ } struct bch_read_bio; diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 582499b08f31..c9a6f6e4a165 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -27,84 +27,34 @@ #include "util.h" #include "xattr.h" -/* Common among btree and extent ptrs */ - -const struct bch_extent_ptr * -bch2_extent_has_device(struct bkey_s_c_extent e, unsigned dev) -{ - const struct bch_extent_ptr *ptr; - - extent_for_each_ptr(e, ptr) - if (ptr->dev == dev) - return ptr; - - return NULL; -} - -void bch2_extent_drop_device(struct bkey_s_extent e, unsigned dev) -{ - struct bch_extent_ptr *ptr; - - bch2_extent_drop_ptrs(e, ptr, ptr->dev == dev); -} - -const struct bch_extent_ptr * -bch2_extent_has_group(struct bch_fs *c, struct bkey_s_c_extent e, unsigned group) -{ - const struct bch_extent_ptr *ptr; - - extent_for_each_ptr(e, ptr) { - struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); - - if (ca->mi.group && - ca->mi.group - 1 == group) - return ptr; - } - - return NULL; -} - -const struct bch_extent_ptr * -bch2_extent_has_target(struct bch_fs *c, struct bkey_s_c_extent e, unsigned target) -{ - const struct bch_extent_ptr *ptr; - - extent_for_each_ptr(e, ptr) - if (bch2_dev_in_target(c, ptr->dev, target) && - (!ptr->cached || - !ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr))) - return ptr; - - return NULL; -} - -unsigned bch2_extent_nr_ptrs(struct bkey_s_c_extent e) +unsigned bch2_bkey_nr_ptrs(struct bkey_s_c k) { + struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); const struct bch_extent_ptr *ptr; unsigned nr_ptrs = 0; - extent_for_each_ptr(e, ptr) + bkey_for_each_ptr(p, ptr) nr_ptrs++; return nr_ptrs; } -unsigned bch2_extent_nr_dirty_ptrs(struct bkey_s_c k) +unsigned bch2_bkey_nr_dirty_ptrs(struct bkey_s_c k) { - struct bkey_s_c_extent e; - const struct bch_extent_ptr *ptr; unsigned nr_ptrs = 0; switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - e = bkey_s_c_to_extent(k); + case KEY_TYPE_btree_ptr: + case KEY_TYPE_extent: { + struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); + const struct bch_extent_ptr *ptr; - extent_for_each_ptr(e, ptr) + bkey_for_each_ptr(p, ptr) nr_ptrs += !ptr->cached; + BUG_ON(!nr_ptrs); break; - - case BCH_RESERVATION: + } + case KEY_TYPE_reservation: nr_ptrs = bkey_s_c_to_reservation(k).v->nr_replicas; break; } @@ -139,25 +89,216 @@ static unsigned bch2_extent_ptr_durability(struct bch_fs *c, return durability; } -unsigned bch2_extent_durability(struct bch_fs *c, struct bkey_s_c_extent e) +unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; unsigned durability = 0; - extent_for_each_ptr_decode(e, p, entry) + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) durability += bch2_extent_ptr_durability(c, p); return durability; } +static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f, + unsigned dev) +{ + struct bch_dev_io_failures *i; + + for (i = f->devs; i < f->devs + f->nr; i++) + if (i->dev == dev) + return i; + + return NULL; +} + +void bch2_mark_io_failure(struct bch_io_failures *failed, + struct extent_ptr_decoded *p) +{ + struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev); + + if (!f) { + BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs)); + + f = &failed->devs[failed->nr++]; + f->dev = p->ptr.dev; + f->idx = p->idx; + f->nr_failed = 1; + f->nr_retries = 0; + } else if (p->idx != f->idx) { + f->idx = p->idx; + f->nr_failed = 1; + f->nr_retries = 0; + } else { + f->nr_failed++; + } +} + +/* + * returns true if p1 is better than p2: + */ +static inline bool ptr_better(struct bch_fs *c, + const struct extent_ptr_decoded p1, + const struct extent_ptr_decoded p2) +{ + if (likely(!p1.idx && !p2.idx)) { + struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev); + struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev); + + u64 l1 = atomic64_read(&dev1->cur_latency[READ]); + u64 l2 = atomic64_read(&dev2->cur_latency[READ]); + + /* Pick at random, biased in favor of the faster device: */ + + return bch2_rand_range(l1 + l2) > l1; + } + + if (force_reconstruct_read(c)) + return p1.idx > p2.idx; + + return p1.idx < p2.idx; +} + +/* + * This picks a non-stale pointer, preferably from a device other than @avoid. + * Avoid can be NULL, meaning pick any. If there are no non-stale pointers to + * other devices, it will still pick a pointer from avoid. + */ +int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, + struct bch_io_failures *failed, + struct extent_ptr_decoded *pick) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + struct bch_dev_io_failures *f; + struct bch_dev *ca; + int ret = 0; + + if (k.k->type == KEY_TYPE_error) + return -EIO; + + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + ca = bch_dev_bkey_exists(c, p.ptr.dev); + + /* + * If there are any dirty pointers it's an error if we can't + * read: + */ + if (!ret && !p.ptr.cached) + ret = -EIO; + + if (p.ptr.cached && ptr_stale(ca, &p.ptr)) + continue; + + f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL; + if (f) + p.idx = f->nr_failed < f->nr_retries + ? f->idx + : f->idx + 1; + + if (!p.idx && + !bch2_dev_is_readable(ca)) + p.idx++; + + if (force_reconstruct_read(c) && + !p.idx && p.ec_nr) + p.idx++; + + if (p.idx >= p.ec_nr + 1) + continue; + + if (ret > 0 && !ptr_better(c, p, *pick)) + continue; + + *pick = p; + ret = 1; + } + + return ret; +} + +void bch2_bkey_append_ptr(struct bkey_i *k, + struct bch_extent_ptr ptr) +{ + EBUG_ON(bch2_bkey_has_device(bkey_i_to_s_c(k), ptr.dev)); + + switch (k->k.type) { + case KEY_TYPE_btree_ptr: + case KEY_TYPE_extent: + EBUG_ON(bkey_val_u64s(&k->k) >= BKEY_EXTENT_VAL_U64s_MAX); + + ptr.type = 1 << BCH_EXTENT_ENTRY_ptr; + + memcpy((void *) &k->v + bkey_val_bytes(&k->k), + &ptr, + sizeof(ptr)); + k->u64s++; + break; + default: + BUG(); + } +} + +void bch2_bkey_drop_device(struct bkey_s k, unsigned dev) +{ + struct bch_extent_ptr *ptr; + + bch2_bkey_drop_ptrs(k, ptr, ptr->dev == dev); +} + +/* extent specific utility code */ + +const struct bch_extent_ptr * +bch2_extent_has_device(struct bkey_s_c_extent e, unsigned dev) +{ + const struct bch_extent_ptr *ptr; + + extent_for_each_ptr(e, ptr) + if (ptr->dev == dev) + return ptr; + + return NULL; +} + +const struct bch_extent_ptr * +bch2_extent_has_group(struct bch_fs *c, struct bkey_s_c_extent e, unsigned group) +{ + const struct bch_extent_ptr *ptr; + + extent_for_each_ptr(e, ptr) { + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + + if (ca->mi.group && + ca->mi.group - 1 == group) + return ptr; + } + + return NULL; +} + +const struct bch_extent_ptr * +bch2_extent_has_target(struct bch_fs *c, struct bkey_s_c_extent e, unsigned target) +{ + const struct bch_extent_ptr *ptr; + + extent_for_each_ptr(e, ptr) + if (bch2_dev_in_target(c, ptr->dev, target) && + (!ptr->cached || + !ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr))) + return ptr; + + return NULL; +} + unsigned bch2_extent_is_compressed(struct bkey_s_c k) { unsigned ret = 0; switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: { + case KEY_TYPE_extent: { struct bkey_s_c_extent e = bkey_s_c_to_extent(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; @@ -189,10 +330,10 @@ bool bch2_extent_matches_ptr(struct bch_fs *c, struct bkey_s_c_extent e, return false; } -static union bch_extent_entry *extent_entry_prev(struct bkey_s_extent e, +static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs, union bch_extent_entry *entry) { - union bch_extent_entry *i = e.v->start; + union bch_extent_entry *i = ptrs.start; if (i == entry) return NULL; @@ -202,23 +343,24 @@ static union bch_extent_entry *extent_entry_prev(struct bkey_s_extent e, return i; } -union bch_extent_entry *bch2_extent_drop_ptr(struct bkey_s_extent e, - struct bch_extent_ptr *ptr) +union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k, + struct bch_extent_ptr *ptr) { + struct bkey_ptrs ptrs = bch2_bkey_ptrs(k); union bch_extent_entry *dst, *src, *prev; bool drop_crc = true; - EBUG_ON(ptr < &e.v->start->ptr || - ptr >= &extent_entry_last(e)->ptr); + EBUG_ON(ptr < &ptrs.start->ptr || + ptr >= &ptrs.end->ptr); EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr); src = extent_entry_next(to_entry(ptr)); - if (src != extent_entry_last(e) && + if (src != ptrs.end && !extent_entry_is_crc(src)) drop_crc = false; dst = to_entry(ptr); - while ((prev = extent_entry_prev(e, dst))) { + while ((prev = extent_entry_prev(ptrs, dst))) { if (extent_entry_is_ptr(prev)) break; @@ -232,8 +374,8 @@ union bch_extent_entry *bch2_extent_drop_ptr(struct bkey_s_extent e, } memmove_u64s_down(dst, src, - (u64 *) extent_entry_last(e) - (u64 *) src); - e.k->u64s -= (u64 *) src - (u64 *) dst; + (u64 *) ptrs.end - (u64 *) src); + k.k->u64s -= (u64 *) src - (u64 *) dst; return dst; } @@ -300,7 +442,7 @@ found: restart_narrow_pointers: extent_for_each_ptr_decode(extent_i_to_s(e), p, i) if (can_narrow_crc(p.crc, n)) { - bch2_extent_drop_ptr(extent_i_to_s(e), &i->ptr); + bch2_bkey_drop_ptr(extent_i_to_s(e).s, &i->ptr); p.ptr.offset += p.crc.offset; p.crc = n; bch2_extent_ptr_decoded_append(e, &p); @@ -325,302 +467,165 @@ static inline bool bch2_crc_unpacked_cmp(struct bch_extent_crc_unpacked l, bch2_crc_cmp(l.csum, r.csum)); } -static void bch2_extent_drop_stale(struct bch_fs *c, struct bkey_s_extent e) -{ - struct bch_extent_ptr *ptr; - - bch2_extent_drop_ptrs(e, ptr, - ptr->cached && - ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr)); -} - -bool bch2_ptr_normalize(struct bch_fs *c, struct btree *b, struct bkey_s k) -{ - return bch2_extent_normalize(c, k); -} - void bch2_ptr_swab(const struct bkey_format *f, struct bkey_packed *k) { - switch (k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: { - union bch_extent_entry *entry; - u64 *d = (u64 *) bkeyp_val(f, k); - unsigned i; - - for (i = 0; i < bkeyp_val_u64s(f, k); i++) - d[i] = swab64(d[i]); - - for (entry = (union bch_extent_entry *) d; - entry < (union bch_extent_entry *) (d + bkeyp_val_u64s(f, k)); - entry = extent_entry_next(entry)) { - switch (extent_entry_type(entry)) { - case BCH_EXTENT_ENTRY_ptr: - break; - case BCH_EXTENT_ENTRY_crc32: - entry->crc32.csum = swab32(entry->crc32.csum); - break; - case BCH_EXTENT_ENTRY_crc64: - entry->crc64.csum_hi = swab16(entry->crc64.csum_hi); - entry->crc64.csum_lo = swab64(entry->crc64.csum_lo); - break; - case BCH_EXTENT_ENTRY_crc128: - entry->crc128.csum.hi = (__force __le64) - swab64((__force u64) entry->crc128.csum.hi); - entry->crc128.csum.lo = (__force __le64) - swab64((__force u64) entry->crc128.csum.lo); - break; - case BCH_EXTENT_ENTRY_stripe_ptr: - break; - } - } - break; - } - } -} - -static const char *extent_ptr_invalid(const struct bch_fs *c, - struct bkey_s_c_extent e, - const struct bch_extent_ptr *ptr, - unsigned size_ondisk, - bool metadata) -{ - const struct bch_extent_ptr *ptr2; - struct bch_dev *ca; - - if (ptr->dev >= c->sb.nr_devices || - !c->devs[ptr->dev]) - return "pointer to invalid device"; - - ca = bch_dev_bkey_exists(c, ptr->dev); - if (!ca) - return "pointer to invalid device"; - - extent_for_each_ptr(e, ptr2) - if (ptr != ptr2 && ptr->dev == ptr2->dev) - return "multiple pointers to same device"; - - if (ptr->offset + size_ondisk > bucket_to_sector(ca, ca->mi.nbuckets)) - return "offset past end of device"; - - if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket)) - return "offset before first bucket"; - - if (bucket_remainder(ca, ptr->offset) + - size_ondisk > ca->mi.bucket_size) - return "spans multiple buckets"; - - return NULL; -} - -static void extent_print_ptrs(struct printbuf *out, struct bch_fs *c, - struct bkey_s_c_extent e) -{ - const union bch_extent_entry *entry; - struct bch_extent_crc_unpacked crc; - const struct bch_extent_ptr *ptr; - const struct bch_extent_stripe_ptr *ec; - struct bch_dev *ca; - bool first = true; + union bch_extent_entry *entry; + u64 *d = (u64 *) bkeyp_val(f, k); + unsigned i; - extent_for_each_entry(e, entry) { - if (!first) - pr_buf(out, " "); + for (i = 0; i < bkeyp_val_u64s(f, k); i++) + d[i] = swab64(d[i]); - switch (__extent_entry_type(entry)) { + for (entry = (union bch_extent_entry *) d; + entry < (union bch_extent_entry *) (d + bkeyp_val_u64s(f, k)); + entry = extent_entry_next(entry)) { + switch (extent_entry_type(entry)) { case BCH_EXTENT_ENTRY_ptr: - ptr = entry_to_ptr(entry); - ca = ptr->dev < c->sb.nr_devices && c->devs[ptr->dev] - ? bch_dev_bkey_exists(c, ptr->dev) - : NULL; - - pr_buf(out, "ptr: %u:%llu gen %u%s%s", ptr->dev, - (u64) ptr->offset, ptr->gen, - ptr->cached ? " cached" : "", - ca && ptr_stale(ca, ptr) - ? " stale" : ""); break; case BCH_EXTENT_ENTRY_crc32: + entry->crc32.csum = swab32(entry->crc32.csum); + break; case BCH_EXTENT_ENTRY_crc64: + entry->crc64.csum_hi = swab16(entry->crc64.csum_hi); + entry->crc64.csum_lo = swab64(entry->crc64.csum_lo); + break; case BCH_EXTENT_ENTRY_crc128: - crc = bch2_extent_crc_unpack(e.k, entry_to_crc(entry)); - - pr_buf(out, "crc: c_size %u size %u offset %u nonce %u csum %u compress %u", - crc.compressed_size, - crc.uncompressed_size, - crc.offset, crc.nonce, - crc.csum_type, - crc.compression_type); + entry->crc128.csum.hi = (__force __le64) + swab64((__force u64) entry->crc128.csum.hi); + entry->crc128.csum.lo = (__force __le64) + swab64((__force u64) entry->crc128.csum.lo); break; case BCH_EXTENT_ENTRY_stripe_ptr: - ec = &entry->stripe_ptr; - - pr_buf(out, "ec: idx %llu block %u", - (u64) ec->idx, ec->block); break; - default: - pr_buf(out, "(invalid extent entry %.16llx)", *((u64 *) entry)); - goto out; } - - first = false; } -out: - if (bkey_extent_is_cached(e.k)) - pr_buf(out, " cached"); } -static struct bch_dev_io_failures *dev_io_failures(struct bch_io_failures *f, - unsigned dev) -{ - struct bch_dev_io_failures *i; - - for (i = f->devs; i < f->devs + f->nr; i++) - if (i->dev == dev) - return i; - - return NULL; -} - -void bch2_mark_io_failure(struct bch_io_failures *failed, - struct extent_ptr_decoded *p) -{ - struct bch_dev_io_failures *f = dev_io_failures(failed, p->ptr.dev); - - if (!f) { - BUG_ON(failed->nr >= ARRAY_SIZE(failed->devs)); - - f = &failed->devs[failed->nr++]; - f->dev = p->ptr.dev; - f->idx = p->idx; - f->nr_failed = 1; - f->nr_retries = 0; - } else if (p->idx != f->idx) { - f->idx = p->idx; - f->nr_failed = 1; - f->nr_retries = 0; - } else { - f->nr_failed++; - } -} - -/* - * returns true if p1 is better than p2: - */ -static inline bool ptr_better(struct bch_fs *c, - const struct extent_ptr_decoded p1, - const struct extent_ptr_decoded p2) +static const char *extent_ptr_invalid(const struct bch_fs *c, + struct bkey_s_c k, + const struct bch_extent_ptr *ptr, + unsigned size_ondisk, + bool metadata) { - if (likely(!p1.idx && !p2.idx)) { - struct bch_dev *dev1 = bch_dev_bkey_exists(c, p1.ptr.dev); - struct bch_dev *dev2 = bch_dev_bkey_exists(c, p2.ptr.dev); + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const struct bch_extent_ptr *ptr2; + struct bch_dev *ca; - u64 l1 = atomic64_read(&dev1->cur_latency[READ]); - u64 l2 = atomic64_read(&dev2->cur_latency[READ]); + if (ptr->dev >= c->sb.nr_devices || + !c->devs[ptr->dev]) + return "pointer to invalid device"; - /* Pick at random, biased in favor of the faster device: */ + ca = bch_dev_bkey_exists(c, ptr->dev); + if (!ca) + return "pointer to invalid device"; - return bch2_rand_range(l1 + l2) > l1; - } + bkey_for_each_ptr(ptrs, ptr2) + if (ptr != ptr2 && ptr->dev == ptr2->dev) + return "multiple pointers to same device"; - if (force_reconstruct_read(c)) - return p1.idx > p2.idx; + if (ptr->offset + size_ondisk > bucket_to_sector(ca, ca->mi.nbuckets)) + return "offset past end of device"; - return p1.idx < p2.idx; + if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket)) + return "offset before first bucket"; + + if (bucket_remainder(ca, ptr->offset) + + size_ondisk > ca->mi.bucket_size) + return "spans multiple buckets"; + + return NULL; } -static int extent_pick_read_device(struct bch_fs *c, - struct bkey_s_c_extent e, - struct bch_io_failures *failed, - struct extent_ptr_decoded *pick) +static void bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) { + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; - struct extent_ptr_decoded p; - struct bch_dev_io_failures *f; + struct bch_extent_crc_unpacked crc; + const struct bch_extent_ptr *ptr; + const struct bch_extent_stripe_ptr *ec; struct bch_dev *ca; - int ret = 0; - - extent_for_each_ptr_decode(e, p, entry) { - ca = bch_dev_bkey_exists(c, p.ptr.dev); - - if (p.ptr.cached && ptr_stale(ca, &p.ptr)) - continue; + bool first = true; - f = failed ? dev_io_failures(failed, p.ptr.dev) : NULL; - if (f) - p.idx = f->nr_failed < f->nr_retries - ? f->idx - : f->idx + 1; + bkey_extent_entry_for_each(ptrs, entry) { + if (!first) + pr_buf(out, " "); - if (!p.idx && - !bch2_dev_is_readable(ca)) - p.idx++; + switch (__extent_entry_type(entry)) { + case BCH_EXTENT_ENTRY_ptr: + ptr = entry_to_ptr(entry); + ca = ptr->dev < c->sb.nr_devices && c->devs[ptr->dev] + ? bch_dev_bkey_exists(c, ptr->dev) + : NULL; - if (force_reconstruct_read(c) && - !p.idx && p.ec_nr) - p.idx++; + pr_buf(out, "ptr: %u:%llu gen %u%s%s", ptr->dev, + (u64) ptr->offset, ptr->gen, + ptr->cached ? " cached" : "", + ca && ptr_stale(ca, ptr) + ? " stale" : ""); + break; + case BCH_EXTENT_ENTRY_crc32: + case BCH_EXTENT_ENTRY_crc64: + case BCH_EXTENT_ENTRY_crc128: + crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); - if (p.idx >= p.ec_nr + 1) - continue; + pr_buf(out, "crc: c_size %u size %u offset %u nonce %u csum %u compress %u", + crc.compressed_size, + crc.uncompressed_size, + crc.offset, crc.nonce, + crc.csum_type, + crc.compression_type); + break; + case BCH_EXTENT_ENTRY_stripe_ptr: + ec = &entry->stripe_ptr; - if (ret && !ptr_better(c, p, *pick)) - continue; + pr_buf(out, "ec: idx %llu block %u", + (u64) ec->idx, ec->block); + break; + default: + pr_buf(out, "(invalid extent entry %.16llx)", *((u64 *) entry)); + return; + } - *pick = p; - ret = 1; + first = false; } - - return ret; } /* Btree ptrs */ const char *bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k) { - if (bkey_extent_is_cached(k.k)) - return "cached"; - - if (k.k->size) - return "nonzero key size"; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + const struct bch_extent_ptr *ptr; + const char *reason; if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX) return "value too big"; - switch (k.k->type) { - case BCH_EXTENT: { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const union bch_extent_entry *entry; - const struct bch_extent_ptr *ptr; - const char *reason; - - extent_for_each_entry(e, entry) { - if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) - return "invalid extent entry type"; - - if (!extent_entry_is_ptr(entry)) - return "has non ptr field"; - } - - extent_for_each_ptr(e, ptr) { - reason = extent_ptr_invalid(c, e, ptr, - c->opts.btree_node_size, - true); - if (reason) - return reason; - } + bkey_extent_entry_for_each(ptrs, entry) { + if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) + return "invalid extent entry type"; - return NULL; + if (!extent_entry_is_ptr(entry)) + return "has non ptr field"; } - default: - return "invalid value type"; + bkey_for_each_ptr(ptrs, ptr) { + reason = extent_ptr_invalid(c, k, ptr, + c->opts.btree_node_size, + true); + if (reason) + return reason; } + + return NULL; } void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k) { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const struct bch_extent_ptr *ptr; unsigned seq; const char *err; @@ -630,7 +635,7 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b, unsigned replicas = 0; bool bad; - extent_for_each_ptr(e, ptr) { + bkey_for_each_ptr(ptrs, ptr) { ca = bch_dev_bkey_exists(c, ptr->dev); replicas++; @@ -656,9 +661,8 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b, } if (!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) && - !bch2_bkey_replicas_marked(c, btree_node_type(b), - e.s_c, false)) { - bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), k); + !bch2_bkey_replicas_marked(c, k, false)) { + bch2_bkey_val_to_text(&PBUF(buf), c, k); bch2_fs_bug(c, "btree key bad (replicas not marked in superblock):\n%s", buf); @@ -667,7 +671,7 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b, return; err: - bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), k); + bch2_bkey_val_to_text(&PBUF(buf), c, k); bch2_fs_bug(c, "%s btree pointer %s: bucket %zi gen %i mark %08x", err, buf, PTR_BUCKET_NR(ca, ptr), mark.gen, (unsigned) mark.v.counter); @@ -678,22 +682,13 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, { const char *invalid; - if (bkey_extent_is_data(k.k)) - extent_print_ptrs(out, c, bkey_s_c_to_extent(k)); + bkey_ptrs_to_text(out, c, k); invalid = bch2_btree_ptr_invalid(c, k); if (invalid) pr_buf(out, " invalid: %s", invalid); } -int bch2_btree_pick_ptr(struct bch_fs *c, const struct btree *b, - struct bch_io_failures *failed, - struct extent_ptr_decoded *pick) -{ - return extent_pick_read_device(c, bkey_i_to_s_c_extent(&b->key), - failed, pick); -} - /* Extents */ bool __bch2_cut_front(struct bpos where, struct bkey_s k) @@ -714,7 +709,7 @@ bool __bch2_cut_front(struct bpos where, struct bkey_s k) * cause offset to point to the next bucket: */ if (!len) - k.k->type = KEY_TYPE_DELETED; + k.k->type = KEY_TYPE_deleted; else if (bkey_extent_is_data(k.k)) { struct bkey_s_extent e = bkey_s_to_extent(k); union bch_extent_entry *entry; @@ -766,7 +761,7 @@ bool bch2_cut_back(struct bpos where, struct bkey *k) k->size = len; if (!len) - k->type = KEY_TYPE_DELETED; + k->type = KEY_TYPE_deleted; return true; } @@ -830,13 +825,13 @@ static void verify_extent_nonoverlapping(struct btree *b, struct bkey uk; iter = *_iter; - k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_DISCARD); + k = bch2_btree_node_iter_prev_filter(&iter, b, KEY_TYPE_discard); BUG_ON(k && (uk = bkey_unpack_key(b, k), bkey_cmp(uk.p, bkey_start_pos(&insert->k)) > 0)); iter = *_iter; - k = bch2_btree_node_iter_peek_filter(&iter, b, KEY_TYPE_DISCARD); + k = bch2_btree_node_iter_peek_filter(&iter, b, KEY_TYPE_discard); #if 0 BUG_ON(k && (uk = bkey_unpack_key(b, k), @@ -882,13 +877,13 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter, verify_extent_nonoverlapping(l->b, &l->iter, insert); node_iter = l->iter; - k = bch2_btree_node_iter_prev_filter(&node_iter, l->b, KEY_TYPE_DISCARD); + k = bch2_btree_node_iter_prev_filter(&node_iter, l->b, KEY_TYPE_discard); if (k && !bkey_written(l->b, k) && bch2_extent_merge_inline(c, iter, k, bkey_to_packed(insert), true)) return; node_iter = l->iter; - k = bch2_btree_node_iter_peek_filter(&node_iter, l->b, KEY_TYPE_DISCARD); + k = bch2_btree_node_iter_peek_filter(&node_iter, l->b, KEY_TYPE_discard); if (k && !bkey_written(l->b, k) && bch2_extent_merge_inline(c, iter, bkey_to_packed(insert), k, false)) return; @@ -912,7 +907,7 @@ static void extent_insert_committed(struct extent_insert_state *s) bkey_copy(&split.k, insert); if (s->deleting) - split.k.k.type = KEY_TYPE_DISCARD; + split.k.k.type = KEY_TYPE_discard; bch2_cut_back(s->committed, &split.k.k); @@ -934,7 +929,7 @@ static void extent_insert_committed(struct extent_insert_state *s) if (s->update_journal) { bkey_copy(&split.k, !s->deleting ? insert : &s->whiteout); if (s->deleting) - split.k.k.type = KEY_TYPE_DISCARD; + split.k.k.type = KEY_TYPE_discard; bch2_cut_back(s->committed, &split.k.k); @@ -985,7 +980,7 @@ bch2_extent_can_insert(struct btree_insert *trans, *u64s += BKEY_U64s; _k = bch2_btree_node_iter_peek_filter(&node_iter, l->b, - KEY_TYPE_DISCARD); + KEY_TYPE_discard); if (!_k) return BTREE_INSERT_OK; @@ -1062,7 +1057,7 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert, btree_account_key_drop(l->b, _k); k.k->size = 0; - k.k->type = KEY_TYPE_DELETED; + k.k->type = KEY_TYPE_deleted; if (_k >= btree_bset_last(l->b)->start) { unsigned u64s = _k->u64s; @@ -1123,7 +1118,7 @@ static void __bch2_insert_fixup_extent(struct extent_insert_state *s) while (bkey_cmp(s->committed, insert->k.p) < 0 && (_k = bch2_btree_node_iter_peek_filter(&l->iter, l->b, - KEY_TYPE_DISCARD))) { + KEY_TYPE_discard))) { struct bkey_s k = __bkey_disassemble(l->b, _k, &unpacked); enum bch_extent_overlap overlap = bch2_extent_overlap(&insert->k, k.k); @@ -1155,7 +1150,7 @@ static void __bch2_insert_fixup_extent(struct extent_insert_state *s) !bkey_cmp(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) { if (!bkey_whiteout(k.k)) { btree_account_key_drop(l->b, _k); - _k->type = KEY_TYPE_DISCARD; + _k->type = KEY_TYPE_discard; reserve_whiteout(l->b, _k); } break; @@ -1286,88 +1281,66 @@ bch2_insert_fixup_extent(struct btree_insert *trans, const char *bch2_extent_invalid(const struct bch_fs *c, struct bkey_s_c k) { - if (bkey_val_u64s(k.k) > BKEY_EXTENT_VAL_U64s_MAX) - return "value too big"; - - if (!k.k->size) - return "zero key size"; + struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + const union bch_extent_entry *entry; + struct bch_extent_crc_unpacked crc; + const struct bch_extent_ptr *ptr; + unsigned size_ondisk = e.k->size; + const char *reason; + unsigned nonce = UINT_MAX; - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const union bch_extent_entry *entry; - struct bch_extent_crc_unpacked crc; - const struct bch_extent_ptr *ptr; - unsigned size_ondisk = e.k->size; - const char *reason; - unsigned nonce = UINT_MAX; + if (bkey_val_u64s(e.k) > BKEY_EXTENT_VAL_U64s_MAX) + return "value too big"; - extent_for_each_entry(e, entry) { - if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) - return "invalid extent entry type"; + extent_for_each_entry(e, entry) { + if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) + return "invalid extent entry type"; - switch (extent_entry_type(entry)) { - case BCH_EXTENT_ENTRY_ptr: - ptr = entry_to_ptr(entry); + switch (extent_entry_type(entry)) { + case BCH_EXTENT_ENTRY_ptr: + ptr = entry_to_ptr(entry); - reason = extent_ptr_invalid(c, e, &entry->ptr, - size_ondisk, false); - if (reason) - return reason; - break; - case BCH_EXTENT_ENTRY_crc32: - case BCH_EXTENT_ENTRY_crc64: - case BCH_EXTENT_ENTRY_crc128: - crc = bch2_extent_crc_unpack(e.k, entry_to_crc(entry)); + reason = extent_ptr_invalid(c, e.s_c, &entry->ptr, + size_ondisk, false); + if (reason) + return reason; + break; + case BCH_EXTENT_ENTRY_crc32: + case BCH_EXTENT_ENTRY_crc64: + case BCH_EXTENT_ENTRY_crc128: + crc = bch2_extent_crc_unpack(e.k, entry_to_crc(entry)); - if (crc.offset + e.k->size > - crc.uncompressed_size) - return "checksum offset + key size > uncompressed size"; + if (crc.offset + e.k->size > + crc.uncompressed_size) + return "checksum offset + key size > uncompressed size"; - size_ondisk = crc.compressed_size; + size_ondisk = crc.compressed_size; - if (!bch2_checksum_type_valid(c, crc.csum_type)) - return "invalid checksum type"; + if (!bch2_checksum_type_valid(c, crc.csum_type)) + return "invalid checksum type"; - if (crc.compression_type >= BCH_COMPRESSION_NR) - return "invalid compression type"; + if (crc.compression_type >= BCH_COMPRESSION_NR) + return "invalid compression type"; - if (bch2_csum_type_is_encryption(crc.csum_type)) { - if (nonce == UINT_MAX) - nonce = crc.offset + crc.nonce; - else if (nonce != crc.offset + crc.nonce) - return "incorrect nonce"; - } - break; - case BCH_EXTENT_ENTRY_stripe_ptr: - break; + if (bch2_csum_type_is_encryption(crc.csum_type)) { + if (nonce == UINT_MAX) + nonce = crc.offset + crc.nonce; + else if (nonce != crc.offset + crc.nonce) + return "incorrect nonce"; } + break; + case BCH_EXTENT_ENTRY_stripe_ptr: + break; } - - return NULL; - } - - case BCH_RESERVATION: { - struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); - - if (bkey_val_bytes(k.k) != sizeof(struct bch_reservation)) - return "incorrect value size"; - - if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX) - return "invalid nr_replicas"; - - return NULL; } - default: - return "invalid value type"; - } + return NULL; } -static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b, - struct bkey_s_c_extent e) +void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b, + struct bkey_s_c k) { + struct bkey_s_c_extent e = bkey_s_c_to_extent(k); const struct bch_extent_ptr *ptr; struct bch_dev *ca; struct bucket_mark mark; @@ -1429,8 +1402,7 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b, } if (replicas > BCH_REPLICAS_MAX) { - bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), - e.s_c); + bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c); bch2_fs_bug(c, "extent key bad (too many replicas: %u): %s", replicas, buf); @@ -1438,10 +1410,8 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b, } if (!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) && - !bch2_bkey_replicas_marked(c, btree_node_type(b), - e.s_c, false)) { - bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), - e.s_c); + !bch2_bkey_replicas_marked(c, e.s_c, false)) { + bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c); bch2_fs_bug(c, "extent key bad (replicas not marked in superblock):\n%s", buf); @@ -1451,34 +1421,18 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b, return; bad_ptr: - bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), - e.s_c); + bch2_bkey_val_to_text(&PBUF(buf), c, e.s_c); bch2_fs_bug(c, "extent pointer bad gc mark: %s:\nbucket %zu " "gen %i type %u", buf, PTR_BUCKET_NR(ca, ptr), mark.gen, mark.data_type); } -void bch2_extent_debugcheck(struct bch_fs *c, struct btree *b, struct bkey_s_c k) -{ - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - bch2_extent_debugcheck_extent(c, b, bkey_s_c_to_extent(k)); - break; - case BCH_RESERVATION: - break; - default: - BUG(); - } -} - void bch2_extent_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { const char *invalid; - if (bkey_extent_is_data(k.k)) - extent_print_ptrs(out, c, bkey_s_c_to_extent(k)); + bkey_ptrs_to_text(out, c, k); invalid = bch2_extent_invalid(c, k); if (invalid) @@ -1593,41 +1547,17 @@ found: */ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k) { - struct bkey_s_extent e; - - switch (k.k->type) { - case KEY_TYPE_ERROR: - return false; - - case KEY_TYPE_DELETED: - return true; - case KEY_TYPE_DISCARD: - return bversion_zero(k.k->version); - case KEY_TYPE_COOKIE: - return false; - - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - e = bkey_s_to_extent(k); + struct bch_extent_ptr *ptr; - bch2_extent_drop_stale(c, e); + bch2_bkey_drop_ptrs(k, ptr, + ptr->cached && + ptr_stale(bch_dev_bkey_exists(c, ptr->dev), ptr)); - if (!bkey_val_u64s(e.k)) { - if (bkey_extent_is_cached(e.k)) { - k.k->type = KEY_TYPE_DISCARD; - if (bversion_zero(k.k->version)) - return true; - } else { - k.k->type = KEY_TYPE_ERROR; - } - } + /* will only happen if all pointers were cached: */ + if (!bkey_val_u64s(k.k)) + k.k->type = KEY_TYPE_deleted; - return false; - case BCH_RESERVATION: - return false; - default: - BUG(); - } + return false; } void bch2_extent_mark_replicas_cached(struct bch_fs *c, @@ -1637,7 +1567,7 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c, { union bch_extent_entry *entry; struct extent_ptr_decoded p; - int extra = bch2_extent_durability(c, e.c) - nr_desired_replicas; + int extra = bch2_bkey_durability(c, e.s_c) - nr_desired_replicas; if (target && extra > 0) extent_for_each_ptr_decode(e, p, entry) { @@ -1661,106 +1591,40 @@ void bch2_extent_mark_replicas_cached(struct bch_fs *c, } } -/* - * This picks a non-stale pointer, preferably from a device other than @avoid. - * Avoid can be NULL, meaning pick any. If there are no non-stale pointers to - * other devices, it will still pick a pointer from avoid. - */ -int bch2_extent_pick_ptr(struct bch_fs *c, struct bkey_s_c k, - struct bch_io_failures *failed, - struct extent_ptr_decoded *pick) -{ - int ret; - - switch (k.k->type) { - case KEY_TYPE_ERROR: - return -EIO; - - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - ret = extent_pick_read_device(c, bkey_s_c_to_extent(k), - failed, pick); - - if (!ret && !bkey_extent_is_cached(k.k)) - ret = -EIO; - - return ret; - - default: - return 0; - } -} - -enum merge_result bch2_extent_merge(struct bch_fs *c, struct btree *b, +enum merge_result bch2_extent_merge(struct bch_fs *c, struct bkey_i *l, struct bkey_i *r) { - struct bkey_s_extent el, er; + struct bkey_s_extent el = bkey_i_to_s_extent(l); + struct bkey_s_extent er = bkey_i_to_s_extent(r); union bch_extent_entry *en_l, *en_r; - if (key_merging_disabled(c)) - return BCH_MERGE_NOMERGE; - - /* - * Generic header checks - * Assumes left and right are in order - * Left and right must be exactly aligned - */ - - if (l->k.u64s != r->k.u64s || - l->k.type != r->k.type || - bversion_cmp(l->k.version, r->k.version) || - bkey_cmp(l->k.p, bkey_start_pos(&r->k))) + if (bkey_val_u64s(&l->k) != bkey_val_u64s(&r->k)) return BCH_MERGE_NOMERGE; - switch (l->k.type) { - case KEY_TYPE_DISCARD: - case KEY_TYPE_ERROR: - /* These types are mergeable, and no val to check */ - break; - - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - el = bkey_i_to_s_extent(l); - er = bkey_i_to_s_extent(r); - - extent_for_each_entry(el, en_l) { - struct bch_extent_ptr *lp, *rp; - struct bch_dev *ca; - - en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data); - - if ((extent_entry_type(en_l) != - extent_entry_type(en_r)) || - !extent_entry_is_ptr(en_l)) - return BCH_MERGE_NOMERGE; + extent_for_each_entry(el, en_l) { + struct bch_extent_ptr *lp, *rp; + struct bch_dev *ca; - lp = &en_l->ptr; - rp = &en_r->ptr; + en_r = vstruct_idx(er.v, (u64 *) en_l - el.v->_data); - if (lp->offset + el.k->size != rp->offset || - lp->dev != rp->dev || - lp->gen != rp->gen) - return BCH_MERGE_NOMERGE; + if ((extent_entry_type(en_l) != + extent_entry_type(en_r)) || + !extent_entry_is_ptr(en_l)) + return BCH_MERGE_NOMERGE; - /* We don't allow extents to straddle buckets: */ - ca = bch_dev_bkey_exists(c, lp->dev); + lp = &en_l->ptr; + rp = &en_r->ptr; - if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp)) - return BCH_MERGE_NOMERGE; - } + if (lp->offset + el.k->size != rp->offset || + lp->dev != rp->dev || + lp->gen != rp->gen) + return BCH_MERGE_NOMERGE; - break; - case BCH_RESERVATION: { - struct bkey_i_reservation *li = bkey_i_to_reservation(l); - struct bkey_i_reservation *ri = bkey_i_to_reservation(r); + /* We don't allow extents to straddle buckets: */ + ca = bch_dev_bkey_exists(c, lp->dev); - if (li->v.generation != ri->v.generation || - li->v.nr_replicas != ri->v.nr_replicas) + if (PTR_BUCKET_NR(ca, lp) != PTR_BUCKET_NR(ca, rp)) return BCH_MERGE_NOMERGE; - break; - } - default: - return BCH_MERGE_NOMERGE; } l->k.needs_whiteout |= r->k.needs_whiteout; @@ -1810,7 +1674,7 @@ static bool bch2_extent_merge_inline(struct bch_fs *c, bch2_bkey_unpack(b, &li.k, l); bch2_bkey_unpack(b, &ri.k, r); - ret = bch2_extent_merge(c, b, &li.k, &ri.k); + ret = bch2_bkey_merge(c, &li.k, &ri.k); if (ret == BCH_MERGE_NOMERGE) return false; @@ -1878,3 +1742,54 @@ int bch2_check_range_allocated(struct bch_fs *c, struct bpos pos, u64 size) return ret; } + +/* KEY_TYPE_reservation: */ + +const char *bch2_reservation_invalid(const struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); + + if (bkey_val_bytes(k.k) != sizeof(struct bch_reservation)) + return "incorrect value size"; + + if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX) + return "invalid nr_replicas"; + + return NULL; +} + +void bch2_reservation_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) +{ + struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); + + pr_buf(out, "generation %u replicas %u", + le32_to_cpu(r.v->generation), + r.v->nr_replicas); +} + +enum merge_result bch2_reservation_merge(struct bch_fs *c, + struct bkey_i *l, struct bkey_i *r) +{ + struct bkey_i_reservation *li = bkey_i_to_reservation(l); + struct bkey_i_reservation *ri = bkey_i_to_reservation(r); + + if (li->v.generation != ri->v.generation || + li->v.nr_replicas != ri->v.nr_replicas) + return BCH_MERGE_NOMERGE; + + l->k.needs_whiteout |= r->k.needs_whiteout; + + /* Keys with no pointers aren't restricted to one bucket and could + * overflow KEY_SIZE + */ + if ((u64) l->k.size + r->k.size > KEY_SIZE_MAX) { + bch2_key_resize(&l->k, KEY_SIZE_MAX); + bch2_cut_front(l->k.p, r); + return BCH_MERGE_PARTIAL; + } + + bch2_key_resize(&l->k, l->k.size + r->k.size); + + return BCH_MERGE_MERGE; +} diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index 389604f25630..57eb35699545 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -10,125 +10,34 @@ struct bch_fs; struct btree_insert; struct btree_insert_entry; -const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c); -void bch2_btree_ptr_debugcheck(struct bch_fs *, struct btree *, - struct bkey_s_c); -void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *, - struct bkey_s_c); -void bch2_ptr_swab(const struct bkey_format *, struct bkey_packed *); - -#define bch2_bkey_btree_ops (struct bkey_ops) { \ - .key_invalid = bch2_btree_ptr_invalid, \ - .key_debugcheck = bch2_btree_ptr_debugcheck, \ - .val_to_text = bch2_btree_ptr_to_text, \ - .swab = bch2_ptr_swab, \ -} - -const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c); -void bch2_extent_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c); -void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -bool bch2_ptr_normalize(struct bch_fs *, struct btree *, struct bkey_s); -enum merge_result bch2_extent_merge(struct bch_fs *, struct btree *, - struct bkey_i *, struct bkey_i *); - -#define bch2_bkey_extent_ops (struct bkey_ops) { \ - .key_invalid = bch2_extent_invalid, \ - .key_debugcheck = bch2_extent_debugcheck, \ - .val_to_text = bch2_extent_to_text, \ - .swab = bch2_ptr_swab, \ - .key_normalize = bch2_ptr_normalize, \ - .key_merge = bch2_extent_merge, \ - .is_extents = true, \ -} - -void bch2_mark_io_failure(struct bch_io_failures *, - struct extent_ptr_decoded *); -int bch2_btree_pick_ptr(struct bch_fs *, const struct btree *, - struct bch_io_failures *, - struct extent_ptr_decoded *); -int bch2_extent_pick_ptr(struct bch_fs *, struct bkey_s_c, - struct bch_io_failures *, - struct extent_ptr_decoded *); - -void bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *); - -static inline bool bch2_extent_is_atomic(struct bkey *k, - struct btree_iter *iter) -{ - struct btree *b = iter->l[0].b; - - return bkey_cmp(k->p, b->key.k.p) <= 0 && - bkey_cmp(bkey_start_pos(k), b->data->min_key) >= 0; -} - -enum btree_insert_ret -bch2_extent_can_insert(struct btree_insert *, struct btree_insert_entry *, - unsigned *); -enum btree_insert_ret -bch2_insert_fixup_extent(struct btree_insert *, struct btree_insert_entry *); - -bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); -void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent, - unsigned, unsigned); - -const struct bch_extent_ptr * -bch2_extent_has_device(struct bkey_s_c_extent, unsigned); -void bch2_extent_drop_device(struct bkey_s_extent, unsigned); -const struct bch_extent_ptr * -bch2_extent_has_group(struct bch_fs *, struct bkey_s_c_extent, unsigned); -const struct bch_extent_ptr * -bch2_extent_has_target(struct bch_fs *, struct bkey_s_c_extent, unsigned); - -unsigned bch2_extent_nr_ptrs(struct bkey_s_c_extent); -unsigned bch2_extent_nr_dirty_ptrs(struct bkey_s_c); -unsigned bch2_extent_is_compressed(struct bkey_s_c); - -unsigned bch2_extent_durability(struct bch_fs *, struct bkey_s_c_extent); - -bool bch2_extent_matches_ptr(struct bch_fs *, struct bkey_s_c_extent, - struct bch_extent_ptr, u64); - -static inline bool bkey_extent_is_data(const struct bkey *k) -{ - switch (k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - return true; - default: - return false; - } -} - -static inline bool bkey_extent_is_allocation(const struct bkey *k) -{ - switch (k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - case BCH_RESERVATION: - return true; - default: - return false; - } -} - -static inline bool bch2_extent_is_fully_allocated(struct bkey_s_c k) -{ - return bkey_extent_is_allocation(k.k) && - !bch2_extent_is_compressed(k); -} +/* extent entries: */ -static inline bool bkey_extent_is_cached(const struct bkey *k) -{ - return k->type == BCH_EXTENT_CACHED; -} +#define extent_entry_last(_e) bkey_val_end(_e) -static inline void bkey_extent_set_cached(struct bkey *k, bool cached) -{ - EBUG_ON(k->type != BCH_EXTENT && - k->type != BCH_EXTENT_CACHED); +#define entry_to_ptr(_entry) \ +({ \ + EBUG_ON((_entry) && !extent_entry_is_ptr(_entry)); \ + \ + __builtin_choose_expr( \ + type_is_exact(_entry, const union bch_extent_entry *), \ + (const struct bch_extent_ptr *) (_entry), \ + (struct bch_extent_ptr *) (_entry)); \ +}) - k->type = cached ? BCH_EXTENT_CACHED : BCH_EXTENT; -} +/* downcast, preserves const */ +#define to_entry(_entry) \ +({ \ + BUILD_BUG_ON(!type_is(_entry, union bch_extent_crc *) && \ + !type_is(_entry, struct bch_extent_ptr *) && \ + !type_is(_entry, struct bch_extent_stripe_ptr *)); \ + \ + __builtin_choose_expr( \ + (type_is_exact(_entry, const union bch_extent_crc *) || \ + type_is_exact(_entry, const struct bch_extent_ptr *) ||\ + type_is_exact(_entry, const struct bch_extent_stripe_ptr *)),\ + (const union bch_extent_entry *) (_entry), \ + (union bch_extent_entry *) (_entry)); \ +}) static inline unsigned __extent_entry_type(const union bch_extent_entry *e) @@ -193,21 +102,6 @@ union bch_extent_crc { struct bch_extent_crc128 crc128; }; -/* downcast, preserves const */ -#define to_entry(_entry) \ -({ \ - BUILD_BUG_ON(!type_is(_entry, union bch_extent_crc *) && \ - !type_is(_entry, struct bch_extent_ptr *) && \ - !type_is(_entry, struct bch_extent_stripe_ptr *)); \ - \ - __builtin_choose_expr( \ - (type_is_exact(_entry, const union bch_extent_crc *) || \ - type_is_exact(_entry, const struct bch_extent_ptr *) ||\ - type_is_exact(_entry, const struct bch_extent_stripe_ptr *)),\ - (const union bch_extent_entry *) (_entry), \ - (union bch_extent_entry *) (_entry)); \ -}) - #define __entry_to_crc(_entry) \ __builtin_choose_expr( \ type_is_exact(_entry, const union bch_extent_entry *), \ @@ -221,18 +115,6 @@ union bch_extent_crc { __entry_to_crc(_entry); \ }) -#define entry_to_ptr(_entry) \ -({ \ - EBUG_ON((_entry) && !extent_entry_is_ptr(_entry)); \ - \ - __builtin_choose_expr( \ - type_is_exact(_entry, const union bch_extent_entry *), \ - (const struct bch_extent_ptr *) (_entry), \ - (struct bch_extent_ptr *) (_entry)); \ -}) - -/* checksum entries: */ - static inline struct bch_extent_crc_unpacked bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc) { @@ -290,71 +172,64 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc) #undef common_fields } -/* Extent entry iteration: */ - -#define extent_entry_next(_entry) \ - ((typeof(_entry)) ((void *) (_entry) + extent_entry_bytes(_entry))) +/* bkey_ptrs: generically over any key type that has ptrs */ -#define extent_entry_last(_e) \ - vstruct_idx((_e).v, bkey_val_u64s((_e).k)) +struct bkey_ptrs_c { + const union bch_extent_entry *start; + const union bch_extent_entry *end; +}; -/* Iterate over all entries: */ +struct bkey_ptrs { + union bch_extent_entry *start; + union bch_extent_entry *end; +}; -#define extent_for_each_entry_from(_e, _entry, _start) \ - for ((_entry) = _start; \ - (_entry) < extent_entry_last(_e); \ - (_entry) = extent_entry_next(_entry)) +/* iterate over bkey ptrs */ -#define extent_for_each_entry(_e, _entry) \ - extent_for_each_entry_from(_e, _entry, (_e).v->start) +#define extent_entry_next(_entry) \ + ((typeof(_entry)) ((void *) (_entry) + extent_entry_bytes(_entry))) -/* Iterate over pointers only: */ +#define __bkey_extent_entry_for_each_from(_start, _end, _entry) \ + for ((_entry) = (_start); \ + (_entry) < (_end); \ + (_entry) = extent_entry_next(_entry)) -#define extent_ptr_next(_e, _ptr) \ +#define __bkey_ptr_next(_ptr, _end) \ ({ \ - typeof(&(_e).v->start[0]) _entry; \ + typeof(_end) _entry; \ \ - extent_for_each_entry_from(_e, _entry, to_entry(_ptr)) \ + __bkey_extent_entry_for_each_from(to_entry(_ptr), _end, _entry) \ if (extent_entry_is_ptr(_entry)) \ break; \ \ - _entry < extent_entry_last(_e) ? entry_to_ptr(_entry) : NULL; \ + _entry < (_end) ? entry_to_ptr(_entry) : NULL; \ }) -#define extent_for_each_ptr(_e, _ptr) \ - for ((_ptr) = &(_e).v->start->ptr; \ - ((_ptr) = extent_ptr_next(_e, _ptr)); \ - (_ptr)++) +#define bkey_extent_entry_for_each_from(_p, _entry, _start) \ + __bkey_extent_entry_for_each_from(_start, (_p).end, _entry) -/* Iterate over crcs only: */ +#define bkey_extent_entry_for_each(_p, _entry) \ + bkey_extent_entry_for_each_from(_p, _entry, _p.start) -#define extent_crc_next(_e, _crc, _iter) \ -({ \ - extent_for_each_entry_from(_e, _iter, _iter) \ - if (extent_entry_is_crc(_iter)) { \ - (_crc) = bch2_extent_crc_unpack((_e).k, entry_to_crc(_iter));\ - break; \ - } \ - \ - (_iter) < extent_entry_last(_e); \ -}) +#define __bkey_for_each_ptr(_start, _end, _ptr) \ + for ((_ptr) = (_start); \ + ((_ptr) = __bkey_ptr_next(_ptr, _end)); \ + (_ptr)++) -#define extent_for_each_crc(_e, _crc, _iter) \ - for ((_crc) = bch2_extent_crc_unpack((_e).k, NULL), \ - (_iter) = (_e).v->start; \ - extent_crc_next(_e, _crc, _iter); \ - (_iter) = extent_entry_next(_iter)) +#define bkey_ptr_next(_p, _ptr) \ + __bkey_ptr_next(_ptr, (_p).end) -/* Iterate over pointers, with crcs: */ +#define bkey_for_each_ptr(_p, _ptr) \ + __bkey_for_each_ptr(&(_p).start->ptr, (_p).end, _ptr) -#define __extent_ptr_next_decode(_e, _ptr, _entry) \ +#define __bkey_ptr_next_decode(_k, _end, _ptr, _entry) \ ({ \ __label__ out; \ \ (_ptr).idx = 0; \ (_ptr).ec_nr = 0; \ \ - extent_for_each_entry_from(_e, _entry, _entry) \ + __bkey_extent_entry_for_each_from(_entry, _end, _entry) \ switch (extent_entry_type(_entry)) { \ case BCH_EXTENT_ENTRY_ptr: \ (_ptr).ptr = _entry->ptr; \ @@ -362,7 +237,7 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc) case BCH_EXTENT_ENTRY_crc32: \ case BCH_EXTENT_ENTRY_crc64: \ case BCH_EXTENT_ENTRY_crc128: \ - (_ptr).crc = bch2_extent_crc_unpack((_e).k, \ + (_ptr).crc = bch2_extent_crc_unpack(_k, \ entry_to_crc(_entry)); \ break; \ case BCH_EXTENT_ENTRY_stripe_ptr: \ @@ -370,122 +245,298 @@ bch2_extent_crc_unpack(const struct bkey *k, const union bch_extent_crc *crc) break; \ } \ out: \ - _entry < extent_entry_last(_e); \ + _entry < (_end); \ }) -#define extent_for_each_ptr_decode(_e, _ptr, _entry) \ - for ((_ptr).crc = bch2_extent_crc_unpack((_e).k, NULL), \ - (_entry) = (_e).v->start; \ - __extent_ptr_next_decode(_e, _ptr, _entry); \ +#define __bkey_for_each_ptr_decode(_k, _start, _end, _ptr, _entry) \ + for ((_ptr).crc = bch2_extent_crc_unpack(_k, NULL), \ + (_entry) = _start; \ + __bkey_ptr_next_decode(_k, _end, _ptr, _entry); \ (_entry) = extent_entry_next(_entry)) -/* Iterate over pointers backwards: */ +#define bkey_for_each_ptr_decode(_k, _p, _ptr, _entry) \ + __bkey_for_each_ptr_decode(_k, (_p).start, (_p).end, \ + _ptr, _entry) -void bch2_extent_crc_append(struct bkey_i_extent *, - struct bch_extent_crc_unpacked); -void bch2_extent_ptr_decoded_append(struct bkey_i_extent *, - struct extent_ptr_decoded *); +/* utility code common to all keys with pointers: */ -static inline void __extent_entry_push(struct bkey_i_extent *e) +static inline struct bkey_ptrs_c bch2_bkey_ptrs_c(struct bkey_s_c k) { - union bch_extent_entry *entry = extent_entry_last(extent_i_to_s(e)); - - EBUG_ON(bkey_val_u64s(&e->k) + extent_entry_u64s(entry) > - BKEY_EXTENT_VAL_U64s_MAX); - - e->k.u64s += extent_entry_u64s(entry); + switch (k.k->type) { + case KEY_TYPE_btree_ptr: { + struct bkey_s_c_btree_ptr e = bkey_s_c_to_btree_ptr(k); + return (struct bkey_ptrs_c) { + to_entry(&e.v->start[0]), + to_entry(bkey_val_end(e)) + }; + } + case KEY_TYPE_extent: { + struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + return (struct bkey_ptrs_c) { + e.v->start, + extent_entry_last(e) + }; + } + case KEY_TYPE_stripe: { + struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); + return (struct bkey_ptrs_c) { + to_entry(&s.v->ptrs[0]), + to_entry(&s.v->ptrs[s.v->nr_blocks]), + }; + } + default: + return (struct bkey_ptrs_c) { NULL, NULL }; + } } -static inline void extent_ptr_append(struct bkey_i_extent *e, - struct bch_extent_ptr ptr) +static inline struct bkey_ptrs bch2_bkey_ptrs(struct bkey_s k) { - ptr.type = 1 << BCH_EXTENT_ENTRY_ptr; - extent_entry_last(extent_i_to_s(e))->ptr = ptr; - __extent_entry_push(e); + struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k.s_c); + + return (struct bkey_ptrs) { + (void *) p.start, + (void *) p.end + }; } -static inline struct bch_devs_list bch2_extent_devs(struct bkey_s_c_extent e) +static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k) { struct bch_devs_list ret = (struct bch_devs_list) { 0 }; + struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); const struct bch_extent_ptr *ptr; - extent_for_each_ptr(e, ptr) + bkey_for_each_ptr(p, ptr) ret.devs[ret.nr++] = ptr->dev; return ret; } -static inline struct bch_devs_list bch2_extent_dirty_devs(struct bkey_s_c_extent e) +static inline struct bch_devs_list bch2_bkey_dirty_devs(struct bkey_s_c k) { struct bch_devs_list ret = (struct bch_devs_list) { 0 }; + struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); const struct bch_extent_ptr *ptr; - extent_for_each_ptr(e, ptr) + bkey_for_each_ptr(p, ptr) if (!ptr->cached) ret.devs[ret.nr++] = ptr->dev; return ret; } -static inline struct bch_devs_list bch2_extent_cached_devs(struct bkey_s_c_extent e) +static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k) { struct bch_devs_list ret = (struct bch_devs_list) { 0 }; + struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); const struct bch_extent_ptr *ptr; - extent_for_each_ptr(e, ptr) + bkey_for_each_ptr(p, ptr) if (ptr->cached) ret.devs[ret.nr++] = ptr->dev; return ret; } -static inline struct bch_devs_list bch2_bkey_devs(struct bkey_s_c k) +static inline bool bch2_bkey_has_device(struct bkey_s_c k, unsigned dev) { - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - return bch2_extent_devs(bkey_s_c_to_extent(k)); - default: - return (struct bch_devs_list) { .nr = 0 }; - } + struct bkey_ptrs_c p = bch2_bkey_ptrs_c(k); + const struct bch_extent_ptr *ptr; + + bkey_for_each_ptr(p, ptr) + if (ptr->dev == dev) + return ptr; + + return NULL; } -static inline struct bch_devs_list bch2_bkey_dirty_devs(struct bkey_s_c k) +unsigned bch2_bkey_nr_ptrs(struct bkey_s_c); +unsigned bch2_bkey_nr_dirty_ptrs(struct bkey_s_c); +unsigned bch2_bkey_durability(struct bch_fs *, struct bkey_s_c); + +void bch2_mark_io_failure(struct bch_io_failures *, + struct extent_ptr_decoded *); +int bch2_bkey_pick_read_device(struct bch_fs *, struct bkey_s_c, + struct bch_io_failures *, + struct extent_ptr_decoded *); + +/* bch_btree_ptr: */ + +const char *bch2_btree_ptr_invalid(const struct bch_fs *, struct bkey_s_c); +void bch2_btree_ptr_debugcheck(struct bch_fs *, struct btree *, + struct bkey_s_c); +void bch2_btree_ptr_to_text(struct printbuf *, struct bch_fs *, + struct bkey_s_c); +void bch2_ptr_swab(const struct bkey_format *, struct bkey_packed *); + +#define bch2_bkey_ops_btree_ptr (struct bkey_ops) { \ + .key_invalid = bch2_btree_ptr_invalid, \ + .key_debugcheck = bch2_btree_ptr_debugcheck, \ + .val_to_text = bch2_btree_ptr_to_text, \ + .swab = bch2_ptr_swab, \ +} + +/* bch_extent: */ + +const char *bch2_extent_invalid(const struct bch_fs *, struct bkey_s_c); +void bch2_extent_debugcheck(struct bch_fs *, struct btree *, struct bkey_s_c); +void bch2_extent_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); +bool bch2_extent_normalize(struct bch_fs *, struct bkey_s); +enum merge_result bch2_extent_merge(struct bch_fs *, + struct bkey_i *, struct bkey_i *); + +#define bch2_bkey_ops_extent (struct bkey_ops) { \ + .key_invalid = bch2_extent_invalid, \ + .key_debugcheck = bch2_extent_debugcheck, \ + .val_to_text = bch2_extent_to_text, \ + .swab = bch2_ptr_swab, \ + .key_normalize = bch2_extent_normalize, \ + .key_merge = bch2_extent_merge, \ +} + +/* bch_reservation: */ + +const char *bch2_reservation_invalid(const struct bch_fs *, struct bkey_s_c); +void bch2_reservation_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); +enum merge_result bch2_reservation_merge(struct bch_fs *, + struct bkey_i *, struct bkey_i *); + +#define bch2_bkey_ops_reservation (struct bkey_ops) { \ + .key_invalid = bch2_reservation_invalid, \ + .val_to_text = bch2_reservation_to_text, \ + .key_merge = bch2_reservation_merge, \ +} + +void bch2_extent_trim_atomic(struct bkey_i *, struct btree_iter *); + +static inline bool bch2_extent_is_atomic(struct bkey *k, + struct btree_iter *iter) { - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - return bch2_extent_dirty_devs(bkey_s_c_to_extent(k)); + struct btree *b = iter->l[0].b; + + return bkey_cmp(k->p, b->key.k.p) <= 0 && + bkey_cmp(bkey_start_pos(k), b->data->min_key) >= 0; +} + +enum btree_insert_ret +bch2_extent_can_insert(struct btree_insert *, struct btree_insert_entry *, + unsigned *); +enum btree_insert_ret +bch2_insert_fixup_extent(struct btree_insert *, struct btree_insert_entry *); + +void bch2_extent_mark_replicas_cached(struct bch_fs *, struct bkey_s_extent, + unsigned, unsigned); + +const struct bch_extent_ptr * +bch2_extent_has_device(struct bkey_s_c_extent, unsigned); +const struct bch_extent_ptr * +bch2_extent_has_group(struct bch_fs *, struct bkey_s_c_extent, unsigned); +const struct bch_extent_ptr * +bch2_extent_has_target(struct bch_fs *, struct bkey_s_c_extent, unsigned); + +unsigned bch2_extent_is_compressed(struct bkey_s_c); + +bool bch2_extent_matches_ptr(struct bch_fs *, struct bkey_s_c_extent, + struct bch_extent_ptr, u64); + +static inline bool bkey_extent_is_data(const struct bkey *k) +{ + switch (k->type) { + case KEY_TYPE_btree_ptr: + case KEY_TYPE_extent: + return true; default: - return (struct bch_devs_list) { .nr = 0 }; + return false; } } -static inline struct bch_devs_list bch2_bkey_cached_devs(struct bkey_s_c k) +static inline bool bkey_extent_is_allocation(const struct bkey *k) { - switch (k.k->type) { - case BCH_EXTENT: - case BCH_EXTENT_CACHED: - return bch2_extent_cached_devs(bkey_s_c_to_extent(k)); + switch (k->type) { + case KEY_TYPE_extent: + case KEY_TYPE_reservation: + return true; default: - return (struct bch_devs_list) { .nr = 0 }; + return false; } } +static inline bool bch2_extent_is_fully_allocated(struct bkey_s_c k) +{ + return bkey_extent_is_allocation(k.k) && + !bch2_extent_is_compressed(k); +} + +void bch2_bkey_append_ptr(struct bkey_i *, struct bch_extent_ptr); +void bch2_bkey_drop_device(struct bkey_s, unsigned); + +/* Extent entry iteration: */ + +#define extent_for_each_entry_from(_e, _entry, _start) \ + __bkey_extent_entry_for_each_from(_start, \ + extent_entry_last(_e),_entry) + +#define extent_for_each_entry(_e, _entry) \ + extent_for_each_entry_from(_e, _entry, (_e).v->start) + +#define extent_ptr_next(_e, _ptr) \ + __bkey_ptr_next(_ptr, extent_entry_last(_e)) + +#define extent_for_each_ptr(_e, _ptr) \ + __bkey_for_each_ptr(&(_e).v->start->ptr, extent_entry_last(_e), _ptr) + +#define extent_crc_next(_e, _crc, _iter) \ +({ \ + extent_for_each_entry_from(_e, _iter, _iter) \ + if (extent_entry_is_crc(_iter)) { \ + (_crc) = bch2_extent_crc_unpack((_e).k, entry_to_crc(_iter));\ + break; \ + } \ + \ + (_iter) < extent_entry_last(_e); \ +}) + +#define extent_for_each_crc(_e, _crc, _iter) \ + for ((_crc) = bch2_extent_crc_unpack((_e).k, NULL), \ + (_iter) = (_e).v->start; \ + extent_crc_next(_e, _crc, _iter); \ + (_iter) = extent_entry_next(_iter)) + +#define extent_for_each_ptr_decode(_e, _ptr, _entry) \ + __bkey_for_each_ptr_decode((_e).k, (_e).v->start, \ + extent_entry_last(_e), _ptr, _entry) + +void bch2_extent_crc_append(struct bkey_i_extent *, + struct bch_extent_crc_unpacked); +void bch2_extent_ptr_decoded_append(struct bkey_i_extent *, + struct extent_ptr_decoded *); + +static inline void __extent_entry_push(struct bkey_i_extent *e) +{ + union bch_extent_entry *entry = extent_entry_last(extent_i_to_s(e)); + + EBUG_ON(bkey_val_u64s(&e->k) + extent_entry_u64s(entry) > + BKEY_EXTENT_VAL_U64s_MAX); + + e->k.u64s += extent_entry_u64s(entry); +} + bool bch2_can_narrow_extent_crcs(struct bkey_s_c_extent, struct bch_extent_crc_unpacked); bool bch2_extent_narrow_crcs(struct bkey_i_extent *, struct bch_extent_crc_unpacked); -union bch_extent_entry *bch2_extent_drop_ptr(struct bkey_s_extent , - struct bch_extent_ptr *); +union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s, + struct bch_extent_ptr *); -#define bch2_extent_drop_ptrs(_e, _ptr, _cond) \ +#define bch2_bkey_drop_ptrs(_k, _ptr, _cond) \ do { \ - _ptr = &(_e).v->start->ptr; \ + struct bkey_ptrs _ptrs = bch2_bkey_ptrs(_k); \ + \ + _ptr = &_ptrs.start->ptr; \ \ - while ((_ptr = extent_ptr_next(e, _ptr))) { \ + while ((_ptr = bkey_ptr_next(_ptrs, _ptr))) { \ if (_cond) { \ - _ptr = (void *) bch2_extent_drop_ptr(_e, _ptr); \ + _ptr = (void *) bch2_bkey_drop_ptr(_k, _ptr); \ + _ptrs = bch2_bkey_ptrs(_k); \ continue; \ } \ \ diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index e7d7c5fe6db7..ad06db069fcf 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -121,7 +121,7 @@ static void bch2_quota_reservation_put(struct bch_fs *c, BUG_ON(res->sectors > inode->ei_quota_reserved); bch2_quota_acct(c, inode->ei_qid, Q_SPC, - -((s64) res->sectors), BCH_QUOTA_PREALLOC); + -((s64) res->sectors), KEY_TYPE_QUOTA_PREALLOC); inode->ei_quota_reserved -= res->sectors; mutex_unlock(&inode->ei_quota_lock); @@ -138,7 +138,7 @@ static int bch2_quota_reservation_add(struct bch_fs *c, mutex_lock(&inode->ei_quota_lock); ret = bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, - check_enospc ? BCH_QUOTA_PREALLOC : BCH_QUOTA_NOCHECK); + check_enospc ? KEY_TYPE_QUOTA_PREALLOC : KEY_TYPE_QUOTA_NOCHECK); if (likely(!ret)) { inode->ei_quota_reserved += sectors; res->sectors += sectors; @@ -220,7 +220,7 @@ static void i_sectors_acct(struct bch_fs *c, struct bch_inode_info *inode, quota_res->sectors -= sectors; inode->ei_quota_reserved -= sectors; } else { - bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, BCH_QUOTA_WARN); + bch2_quota_acct(c, inode->ei_qid, Q_SPC, sectors, KEY_TYPE_QUOTA_WARN); } #endif inode->v.i_blocks += sectors; @@ -813,7 +813,7 @@ static void bch2_add_page_sectors(struct bio *bio, struct bkey_s_c k) struct bvec_iter iter; struct bio_vec bv; unsigned nr_ptrs = !bch2_extent_is_compressed(k) - ? bch2_extent_nr_dirty_ptrs(k) + ? bch2_bkey_nr_dirty_ptrs(k) : 0; bio_for_each_segment(bv, bio, iter) { @@ -2397,7 +2397,7 @@ static long bch2_fcollapse(struct bch_inode_info *inode, BUG_ON(bkey_cmp(dst->pos, bkey_start_pos(©.k.k))); ret = bch2_disk_reservation_get(c, &disk_res, copy.k.k.size, - bch2_extent_nr_dirty_ptrs(bkey_i_to_s_c(©.k)), + bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(©.k)), BCH_DISK_RESERVATION_NOFAIL); BUG_ON(ret); @@ -2504,7 +2504,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode, goto btree_iter_err; /* already reserved */ - if (k.k->type == BCH_RESERVATION && + if (k.k->type == KEY_TYPE_reservation && bkey_s_c_to_reservation(k).v->nr_replicas >= replicas) { bch2_btree_iter_next_slot(iter); continue; @@ -2517,7 +2517,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode, } bkey_reservation_init(&reservation.k_i); - reservation.k.type = BCH_RESERVATION; + reservation.k.type = KEY_TYPE_reservation; reservation.k.p = k.k->p; reservation.k.size = k.k->size; @@ -2525,7 +2525,7 @@ static long bch2_fallocate(struct bch_inode_info *inode, int mode, bch2_cut_back(end_pos, &reservation.k); sectors = reservation.k.size; - reservation.v.nr_replicas = bch2_extent_nr_dirty_ptrs(k); + reservation.v.nr_replicas = bch2_bkey_nr_dirty_ptrs(k); if (!bkey_extent_is_allocation(k.k)) { ret = bch2_quota_reservation_add(c, inode, diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index b6fe2059fe5f..93e1f3aaacd4 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -340,7 +340,7 @@ __bch2_create(struct mnt_idmap *idmap, if (tmpfile) inode_u.bi_flags |= BCH_INODE_UNLINKED; - ret = bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1, BCH_QUOTA_PREALLOC); + ret = bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, 1, KEY_TYPE_QUOTA_PREALLOC); if (ret) return ERR_PTR(ret); @@ -457,7 +457,7 @@ err_trans: make_bad_inode(&inode->v); iput(&inode->v); err: - bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, BCH_QUOTA_WARN); + bch2_quota_acct(c, bch_qid(&inode_u), Q_INO, -1, KEY_TYPE_QUOTA_WARN); inode = ERR_PTR(ret); goto out; } @@ -1079,7 +1079,7 @@ static int bch2_fill_extent(struct fiemap_extent_info *info, } return 0; - } else if (k->k.type == BCH_RESERVATION) { + } else if (k->k.type == KEY_TYPE_reservation) { return fiemap_fill_next_extent(info, bkey_start_offset(&k->k) << 9, 0, k->k.size << 9, @@ -1112,7 +1112,7 @@ static int bch2_fiemap(struct inode *vinode, struct fiemap_extent_info *info, for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS(ei->v.i_ino, start >> 9), 0, k) if (bkey_extent_is_data(k.k) || - k.k->type == BCH_RESERVATION) { + k.k->type == KEY_TYPE_reservation) { if (bkey_cmp(bkey_start_pos(k.k), POS(ei->v.i_ino, (start + len) >> 9)) >= 0) break; @@ -1414,9 +1414,9 @@ static void bch2_evict_inode(struct inode *vinode) if (!inode->v.i_nlink && !is_bad_inode(&inode->v)) { bch2_quota_acct(c, inode->ei_qid, Q_SPC, -((s64) inode->v.i_blocks), - BCH_QUOTA_WARN); + KEY_TYPE_QUOTA_WARN); bch2_quota_acct(c, inode->ei_qid, Q_INO, -1, - BCH_QUOTA_WARN); + KEY_TYPE_QUOTA_WARN); bch2_inode_rm(c, inode->v.i_ino); WARN_ONCE(atomic_long_dec_return(&c->nr_inodes) < 0, diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index 74b83201c213..57ab8f088415 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -235,7 +235,6 @@ static int hash_check_duplicates(const struct bch_hash_desc desc, !desc.cmp_bkey(k, k2), c, "duplicate hash table keys:\n%s", (bch2_bkey_val_to_text(&PBUF(buf), c, - bkey_type(0, desc.btree_id), k), buf))) { ret = fsck_hash_delete_at(desc, &h->info, k_iter); if (ret) @@ -255,7 +254,7 @@ static bool key_has_correct_hash(const struct bch_hash_desc desc, { u64 hash; - if (k.k->type != desc.whiteout_type && + if (k.k->type != KEY_TYPE_whiteout && k.k->type != desc.key_type) return true; @@ -280,7 +279,7 @@ static int hash_check_key(const struct bch_hash_desc desc, u64 hashed; int ret = 0; - if (k.k->type != desc.whiteout_type && + if (k.k->type != KEY_TYPE_whiteout && k.k->type != desc.key_type) return 0; @@ -300,7 +299,6 @@ static int hash_check_key(const struct bch_hash_desc desc, desc.btree_id, k.k->p.offset, hashed, h->chain->pos.offset, (bch2_bkey_val_to_text(&PBUF(buf), c, - bkey_type(0, desc.btree_id), k), buf))) { ret = hash_redo_key(desc, h, c, k_iter, k, hashed); if (ret) { @@ -370,7 +368,7 @@ static int check_dirent_hash(struct hash_check *h, struct bch_fs *c, *k = bch2_btree_iter_peek(iter); - BUG_ON(k->k->type != BCH_DIRENT); + BUG_ON(k->k->type != KEY_TYPE_dirent); } err: fsck_err: @@ -385,7 +383,6 @@ err_redo: buf, strlen(buf), BTREE_ID_DIRENTS, k->k->p.offset, hash, h->chain->pos.offset, (bch2_bkey_val_to_text(&PBUF(buf), c, - bkey_type(0, BTREE_ID_DIRENTS), *k), buf))) { ret = hash_redo_key(bch2_dirent_hash_desc, h, c, iter, *k, hash); @@ -471,7 +468,7 @@ static int check_extents(struct bch_fs *c) if (fsck_err_on(w.have_inode && !(w.inode.bi_flags & BCH_INODE_I_SIZE_DIRTY) && - k.k->type != BCH_RESERVATION && + k.k->type != KEY_TYPE_reservation && k.k->p.offset > round_up(w.inode.bi_size, PAGE_SIZE) >> 9, c, "extent type %u offset %llu past end of inode %llu, i_size %llu", k.k->type, k.k->p.offset, k.k->p.inode, w.inode.bi_size)) { @@ -529,13 +526,11 @@ static int check_dirents(struct bch_fs *c) if (fsck_err_on(!w.have_inode, c, "dirent in nonexisting directory:\n%s", (bch2_bkey_val_to_text(&PBUF(buf), c, - (enum bkey_type) BTREE_ID_DIRENTS, k), buf)) || fsck_err_on(!S_ISDIR(w.inode.bi_mode), c, "dirent in non directory inode type %u:\n%s", mode_to_type(w.inode.bi_mode), (bch2_bkey_val_to_text(&PBUF(buf), c, - (enum bkey_type) BTREE_ID_DIRENTS, k), buf))) { ret = bch2_btree_delete_at(iter, 0); if (ret) @@ -557,7 +552,7 @@ static int check_dirents(struct bch_fs *c) if (ret) goto fsck_err; - if (k.k->type != BCH_DIRENT) + if (k.k->type != KEY_TYPE_dirent) continue; d = bkey_s_c_to_dirent(k); @@ -586,7 +581,6 @@ static int check_dirents(struct bch_fs *c) if (fsck_err_on(d_inum == d.k->p.inode, c, "dirent points to own directory:\n%s", (bch2_bkey_val_to_text(&PBUF(buf), c, - (enum bkey_type) BTREE_ID_DIRENTS, k), buf))) { ret = remove_dirent(c, iter, d); if (ret) @@ -604,7 +598,6 @@ static int check_dirents(struct bch_fs *c) if (fsck_err_on(!have_target, c, "dirent points to missing inode:\n%s", (bch2_bkey_val_to_text(&PBUF(buf), c, - (enum bkey_type) BTREE_ID_DIRENTS, k), buf))) { ret = remove_dirent(c, iter, d); if (ret) @@ -618,7 +611,6 @@ static int check_dirents(struct bch_fs *c) "incorrect d_type: should be %u:\n%s", mode_to_type(target.bi_mode), (bch2_bkey_val_to_text(&PBUF(buf), c, - (enum bkey_type) BTREE_ID_DIRENTS, k), buf))) { struct bkey_i_dirent *n; @@ -899,7 +891,7 @@ next: e->offset = k.k->p.offset; - if (k.k->type != BCH_DIRENT) + if (k.k->type != KEY_TYPE_dirent) continue; dirent = bkey_s_c_to_dirent(k); @@ -942,7 +934,7 @@ up: } for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) { - if (k.k->type != BCH_INODE_FS) + if (k.k->type != KEY_TYPE_inode) continue; if (!S_ISDIR(le16_to_cpu(bkey_s_c_to_inode(k).v->bi_mode))) @@ -1030,7 +1022,7 @@ static int bch2_gc_walk_dirents(struct bch_fs *c, nlink_table *links, for_each_btree_key(&iter, c, BTREE_ID_DIRENTS, POS_MIN, 0, k) { switch (k.k->type) { - case BCH_DIRENT: + case KEY_TYPE_dirent: d = bkey_s_c_to_dirent(k); d_inum = le64_to_cpu(d.v->d_inum); @@ -1310,7 +1302,7 @@ peek_nlinks: link = genradix_iter_peek(&nlinks_iter, links); if (iter.pos.inode < nlinks_pos || !link) link = &zero_links; - if (k.k && k.k->type == BCH_INODE_FS) { + if (k.k && k.k->type == KEY_TYPE_inode) { /* * Avoid potential deadlocks with iter for * truncate/rm/etc.: @@ -1392,7 +1384,7 @@ static int check_inodes_fast(struct bch_fs *c) int ret = 0; for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, 0, k) { - if (k.k->type != BCH_INODE_FS) + if (k.k->type != KEY_TYPE_inode) continue; inode = bkey_s_c_to_inode(k); diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c index 0a350c6d0932..30f93fbe280d 100644 --- a/fs/bcachefs/inode.c +++ b/fs/bcachefs/inode.c @@ -178,76 +178,69 @@ int bch2_inode_unpack(struct bkey_s_c_inode inode, const char *bch2_inode_invalid(const struct bch_fs *c, struct bkey_s_c k) { - if (k.k->p.offset) - return "nonzero offset"; - - switch (k.k->type) { - case BCH_INODE_FS: { struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); struct bch_inode_unpacked unpacked; - if (bkey_val_bytes(k.k) < sizeof(struct bch_inode)) - return "incorrect value size"; - - if (k.k->p.inode < BLOCKDEV_INODE_MAX) - return "fs inode in blockdev range"; + if (k.k->p.offset) + return "nonzero offset"; - if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR) - return "invalid str hash type"; + if (bkey_val_bytes(k.k) < sizeof(struct bch_inode)) + return "incorrect value size"; - if (bch2_inode_unpack(inode, &unpacked)) - return "invalid variable length fields"; + if (k.k->p.inode < BLOCKDEV_INODE_MAX) + return "fs inode in blockdev range"; - if (unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1) - return "invalid data checksum type"; + if (INODE_STR_HASH(inode.v) >= BCH_STR_HASH_NR) + return "invalid str hash type"; - if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1) - return "invalid data checksum type"; + if (bch2_inode_unpack(inode, &unpacked)) + return "invalid variable length fields"; - if ((unpacked.bi_flags & BCH_INODE_UNLINKED) && - unpacked.bi_nlink != 0) - return "flagged as unlinked but bi_nlink != 0"; + if (unpacked.bi_data_checksum >= BCH_CSUM_OPT_NR + 1) + return "invalid data checksum type"; - return NULL; - } - case BCH_INODE_BLOCKDEV: - if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_blockdev)) - return "incorrect value size"; + if (unpacked.bi_compression >= BCH_COMPRESSION_OPT_NR + 1) + return "invalid data checksum type"; - if (k.k->p.inode >= BLOCKDEV_INODE_MAX) - return "blockdev inode in fs range"; + if ((unpacked.bi_flags & BCH_INODE_UNLINKED) && + unpacked.bi_nlink != 0) + return "flagged as unlinked but bi_nlink != 0"; - return NULL; - case BCH_INODE_GENERATION: - if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_generation)) - return "incorrect value size"; - - return NULL; - default: - return "invalid type"; - } + return NULL; } void bch2_inode_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - struct bkey_s_c_inode inode; + struct bkey_s_c_inode inode = bkey_s_c_to_inode(k); struct bch_inode_unpacked unpacked; - switch (k.k->type) { - case BCH_INODE_FS: - inode = bkey_s_c_to_inode(k); - if (bch2_inode_unpack(inode, &unpacked)) { - pr_buf(out, "(unpack error)"); - break; - } + if (bch2_inode_unpack(inode, &unpacked)) { + pr_buf(out, "(unpack error)"); + return; + } #define BCH_INODE_FIELD(_name, _bits) \ - pr_buf(out, #_name ": %llu ", (u64) unpacked._name); - BCH_INODE_FIELDS() + pr_buf(out, #_name ": %llu ", (u64) unpacked._name); + BCH_INODE_FIELDS() #undef BCH_INODE_FIELD - break; - } +} + +const char *bch2_inode_generation_invalid(const struct bch_fs *c, + struct bkey_s_c k) +{ + if (k.k->p.offset) + return "nonzero offset"; + + if (bkey_val_bytes(k.k) != sizeof(struct bch_inode_generation)) + return "incorrect value size"; + + return NULL; +} + +void bch2_inode_generation_to_text(struct printbuf *out, struct bch_fs *c, + struct bkey_s_c k) +{ } void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, @@ -281,10 +274,9 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u, static inline u32 bkey_generation(struct bkey_s_c k) { switch (k.k->type) { - case BCH_INODE_BLOCKDEV: - case BCH_INODE_FS: + case KEY_TYPE_inode: BUG(); - case BCH_INODE_GENERATION: + case KEY_TYPE_inode_generation: return le32_to_cpu(bkey_s_c_to_inode_generation(k).v->bi_generation); default: return 0; @@ -330,8 +322,7 @@ again: return ret; switch (k.k->type) { - case BCH_INODE_BLOCKDEV: - case BCH_INODE_FS: + case KEY_TYPE_inode: /* slot used */ if (iter->pos.inode >= max) goto out; @@ -405,19 +396,19 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr) return ret; } - bch2_fs_inconsistent_on(k.k->type != BCH_INODE_FS, c, + bch2_fs_inconsistent_on(k.k->type != KEY_TYPE_inode, c, "inode %llu not found when deleting", inode_nr); switch (k.k->type) { - case BCH_INODE_FS: { + case KEY_TYPE_inode: { struct bch_inode_unpacked inode_u; if (!bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u)) bi_generation = inode_u.bi_generation + 1; break; } - case BCH_INODE_GENERATION: { + case KEY_TYPE_inode_generation: { struct bkey_s_c_inode_generation g = bkey_s_c_to_inode_generation(k); bi_generation = le32_to_cpu(g.v->bi_generation); @@ -455,7 +446,7 @@ int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr, POS(inode_nr, 0), BTREE_ITER_SLOTS, k) { switch (k.k->type) { - case BCH_INODE_FS: + case KEY_TYPE_inode: ret = bch2_inode_unpack(bkey_s_c_to_inode(k), inode); break; default: @@ -464,7 +455,6 @@ int bch2_inode_find_by_inum(struct bch_fs *c, u64 inode_nr, } break; - } return bch2_btree_iter_unlock(&iter) ?: ret; diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h index 897ff65d01cb..0bc852e69355 100644 --- a/fs/bcachefs/inode.h +++ b/fs/bcachefs/inode.h @@ -9,11 +9,21 @@ const char *bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c); void bch2_inode_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -#define bch2_bkey_inode_ops (struct bkey_ops) { \ +#define bch2_bkey_ops_inode (struct bkey_ops) { \ .key_invalid = bch2_inode_invalid, \ .val_to_text = bch2_inode_to_text, \ } +const char *bch2_inode_generation_invalid(const struct bch_fs *, + struct bkey_s_c); +void bch2_inode_generation_to_text(struct printbuf *, struct bch_fs *, + struct bkey_s_c); + +#define bch2_bkey_ops_inode_generation (struct bkey_ops) { \ + .key_invalid = bch2_inode_generation_invalid, \ + .val_to_text = bch2_inode_generation_to_text, \ +} + struct bch_inode_unpacked { u64 bi_inum; __le64 bi_hash_seed; diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 2fee2f2efd38..3e990709fedb 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -213,20 +213,20 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c, enum bch_data_type type, const struct bkey_i *k) { - struct bkey_s_c_extent e = bkey_i_to_s_c_extent(k); + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(k)); const struct bch_extent_ptr *ptr; struct bch_write_bio *n; struct bch_dev *ca; BUG_ON(c->opts.nochanges); - extent_for_each_ptr(e, ptr) { + bkey_for_each_ptr(ptrs, ptr) { BUG_ON(ptr->dev >= BCH_SB_MEMBERS_MAX || !c->devs[ptr->dev]); ca = bch_dev_bkey_exists(c, ptr->dev); - if (ptr + 1 < &extent_entry_last(e)->ptr) { + if (to_entry(ptr + 1) < ptrs.end) { n = to_wbio(bio_alloc_clone(NULL, &wbio->bio, GFP_NOIO, &ca->replica_set)); @@ -317,7 +317,6 @@ static void __bch2_write_index(struct bch_write_op *op) { struct bch_fs *c = op->c; struct keylist *keys = &op->insert_keys; - struct bkey_s_extent e; struct bch_extent_ptr *ptr; struct bkey_i *src, *dst = keys->keys, *n, *k; unsigned dev; @@ -327,12 +326,10 @@ static void __bch2_write_index(struct bch_write_op *op) n = bkey_next(src); bkey_copy(dst, src); - e = bkey_i_to_s_extent(dst); - - bch2_extent_drop_ptrs(e, ptr, + bch2_bkey_drop_ptrs(bkey_i_to_s(dst), ptr, test_bit(ptr->dev, op->failed.d)); - if (!bch2_extent_nr_ptrs(e.c)) { + if (!bch2_bkey_nr_ptrs(bkey_i_to_s_c(dst))) { ret = -EIO; goto err; } @@ -433,14 +430,13 @@ static void init_append_extent(struct bch_write_op *op, e->k.p = op->pos; e->k.size = crc.uncompressed_size; e->k.version = version; - bkey_extent_set_cached(&e->k, op->flags & BCH_WRITE_CACHED); if (crc.csum_type || crc.compression_type || crc.nonce) bch2_extent_crc_append(e, crc); - bch2_alloc_sectors_append_ptrs(op->c, wp, e, crc.compressed_size); + bch2_alloc_sectors_append_ptrs(op->c, wp, &e->k_i, crc.compressed_size); bch2_keylist_push(&op->insert_keys); } @@ -1608,7 +1604,7 @@ int __bch2_read_extent(struct bch_fs *c, struct bch_read_bio *orig, struct bpos pos = bkey_start_pos(k.k); int pick_ret; - pick_ret = bch2_extent_pick_ptr(c, k, failed, &pick); + pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick); /* hole or reservation - just zero fill: */ if (!pick_ret) diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index a74566764630..f3bb28f32c6e 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -141,11 +141,12 @@ static void journal_entry_null_range(void *start, void *end) static int journal_validate_key(struct bch_fs *c, struct jset *jset, struct jset_entry *entry, - struct bkey_i *k, enum bkey_type key_type, + struct bkey_i *k, enum btree_node_type key_type, const char *type, int write) { void *next = vstruct_next(entry); const char *invalid; + unsigned version = le32_to_cpu(jset->version); int ret = 0; if (journal_entry_err_on(!k->k.u64s, c, @@ -174,14 +175,17 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset, } if (JSET_BIG_ENDIAN(jset) != CPU_BIG_ENDIAN) - bch2_bkey_swab(key_type, NULL, bkey_to_packed(k)); + bch2_bkey_swab(NULL, bkey_to_packed(k)); - invalid = bch2_bkey_invalid(c, key_type, bkey_i_to_s_c(k)); + if (!write && + version < bcachefs_metadata_version_bkey_renumber) + bch2_bkey_renumber(key_type, bkey_to_packed(k), write); + + invalid = bch2_bkey_invalid(c, bkey_i_to_s_c(k), key_type); if (invalid) { char buf[160]; - bch2_bkey_val_to_text(&PBUF(buf), c, key_type, - bkey_i_to_s_c(k)); + bch2_bkey_val_to_text(&PBUF(buf), c, bkey_i_to_s_c(k)); mustfix_fsck_err(c, "invalid %s in journal: %s\n%s", type, invalid, buf); @@ -190,6 +194,10 @@ static int journal_validate_key(struct bch_fs *c, struct jset *jset, journal_entry_null_range(vstruct_next(entry), next); return 0; } + + if (write && + version < bcachefs_metadata_version_bkey_renumber) + bch2_bkey_renumber(key_type, bkey_to_packed(k), write); fsck_err: return ret; } @@ -203,8 +211,8 @@ static int journal_entry_validate_btree_keys(struct bch_fs *c, vstruct_for_each(entry, k) { int ret = journal_validate_key(c, jset, entry, k, - bkey_type(entry->level, - entry->btree_id), + __btree_node_type(entry->level, + entry->btree_id), "key", write); if (ret) return ret; @@ -351,14 +359,17 @@ static int jset_validate(struct bch_fs *c, { size_t bytes = vstruct_bytes(jset); struct bch_csum csum; + unsigned version; int ret = 0; if (le64_to_cpu(jset->magic) != jset_magic(c)) return JOURNAL_ENTRY_NONE; - if (le32_to_cpu(jset->version) != BCACHE_JSET_VERSION) { - bch_err(c, "unknown journal entry version %u", - le32_to_cpu(jset->version)); + version = le32_to_cpu(jset->version); + if ((version != BCH_JSET_VERSION_OLD && + version < bcachefs_metadata_version_min) || + version >= bcachefs_metadata_version_max) { + bch_err(c, "unknown journal entry version %u", jset->version); return BCH_FSCK_UNKNOWN_VERSION; } @@ -929,7 +940,6 @@ static void __journal_write_alloc(struct journal *j, unsigned replicas_want) { struct bch_fs *c = container_of(j, struct bch_fs, journal); - struct bkey_i_extent *e = bkey_i_to_extent(&w->key); struct journal_device *ja; struct bch_dev *ca; unsigned i; @@ -951,13 +961,14 @@ static void __journal_write_alloc(struct journal *j, if (!ca->mi.durability || ca->mi.state != BCH_MEMBER_STATE_RW || !ja->nr || - bch2_extent_has_device(extent_i_to_s_c(e), ca->dev_idx) || + bch2_bkey_has_device(bkey_i_to_s_c(&w->key), + ca->dev_idx) || sectors > ja->sectors_free) continue; bch2_dev_stripe_increment(c, ca, &j->wp.stripe); - extent_ptr_append(e, + bch2_bkey_append_ptr(&w->key, (struct bch_extent_ptr) { .offset = bucket_to_sector(ca, ja->buckets[ja->cur_idx]) + @@ -1096,7 +1107,7 @@ static void journal_write_done(struct closure *cl) struct bch_fs *c = container_of(j, struct bch_fs, journal); struct journal_buf *w = journal_prev_buf(j); struct bch_devs_list devs = - bch2_extent_devs(bkey_i_to_s_c_extent(&w->key)); + bch2_bkey_devs(bkey_i_to_s_c(&w->key)); u64 seq = le64_to_cpu(w->data->seq); u64 last_seq = le64_to_cpu(w->data->last_seq); @@ -1158,7 +1169,7 @@ static void journal_write_endio(struct bio *bio) unsigned long flags; spin_lock_irqsave(&j->err_lock, flags); - bch2_extent_drop_device(bkey_i_to_s_extent(&w->key), ca->dev_idx); + bch2_bkey_drop_device(bkey_i_to_s(&w->key), ca->dev_idx); spin_unlock_irqrestore(&j->err_lock, flags); } @@ -1175,6 +1186,7 @@ void bch2_journal_write(struct closure *cl) struct jset *jset; struct bio *bio; struct bch_extent_ptr *ptr; + bool validate_before_checksum = false; unsigned i, sectors, bytes; journal_buf_realloc(j, w); @@ -1196,12 +1208,22 @@ void bch2_journal_write(struct closure *cl) jset->read_clock = cpu_to_le16(c->bucket_clock[READ].hand); jset->write_clock = cpu_to_le16(c->bucket_clock[WRITE].hand); jset->magic = cpu_to_le64(jset_magic(c)); - jset->version = cpu_to_le32(BCACHE_JSET_VERSION); + + jset->version = c->sb.version < bcachefs_metadata_version_new_versioning + ? cpu_to_le32(BCH_JSET_VERSION_OLD) + : cpu_to_le32(c->sb.version); SET_JSET_BIG_ENDIAN(jset, CPU_BIG_ENDIAN); SET_JSET_CSUM_TYPE(jset, bch2_meta_checksum_type(c)); - if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)) && + if (bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset))) + validate_before_checksum = true; + + if (le32_to_cpu(jset->version) < + bcachefs_metadata_version_bkey_renumber) + validate_before_checksum = true; + + if (validate_before_checksum && jset_validate_entries(c, jset, WRITE)) goto err; @@ -1212,7 +1234,7 @@ void bch2_journal_write(struct closure *cl) jset->csum = csum_vstruct(c, JSET_CSUM_TYPE(jset), journal_nonce(jset), jset); - if (!bch2_csum_type_is_encryption(JSET_CSUM_TYPE(jset)) && + if (!validate_before_checksum && jset_validate_entries(c, jset, WRITE)) goto err; diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c index 46878590327d..63fe8cbb0564 100644 --- a/fs/bcachefs/migrate.c +++ b/fs/bcachefs/migrate.c @@ -15,7 +15,7 @@ #include "replicas.h" #include "super-io.h" -static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s_extent e, +static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k, unsigned dev_idx, int flags, bool metadata) { unsigned replicas = metadata ? c->opts.metadata_replicas : c->opts.data_replicas; @@ -23,9 +23,9 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s_extent e, unsigned degraded = metadata ? BCH_FORCE_IF_METADATA_DEGRADED : BCH_FORCE_IF_DATA_DEGRADED; unsigned nr_good; - bch2_extent_drop_device(e, dev_idx); + bch2_bkey_drop_device(k, dev_idx); - nr_good = bch2_extent_durability(c, e.c); + nr_good = bch2_bkey_durability(c, k.s_c); if ((!nr_good && !(flags & lost)) || (nr_good < replicas && !(flags & degraded))) return -EINVAL; @@ -36,7 +36,6 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s_extent e, static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) { struct bkey_s_c k; - struct bkey_s_extent e; BKEY_PADDED(key) tmp; struct btree_iter iter; int ret = 0; @@ -51,7 +50,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) !(ret = btree_iter_err(k))) { if (!bkey_extent_is_data(k.k) || !bch2_extent_has_device(bkey_s_c_to_extent(k), dev_idx)) { - ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS, k); + ret = bch2_mark_bkey_replicas(c, k); if (ret) break; bch2_btree_iter_next(&iter); @@ -59,18 +58,18 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags) } bkey_reassemble(&tmp.key, k); - e = bkey_i_to_s_extent(&tmp.key); - ret = drop_dev_ptrs(c, e, dev_idx, flags, false); + ret = drop_dev_ptrs(c, bkey_i_to_s(&tmp.key), + dev_idx, flags, false); if (ret) break; /* * If the new extent no longer has any pointers, bch2_extent_normalize() * will do the appropriate thing with it (turning it into a - * KEY_TYPE_ERROR key, or just a discard if it was a cached extent) + * KEY_TYPE_error key, or just a discard if it was a cached extent) */ - bch2_extent_normalize(c, e.s); + bch2_extent_normalize(c, bkey_i_to_s(&tmp.key)); iter.pos = bkey_start_pos(&tmp.key.k); @@ -118,10 +117,10 @@ static int bch2_dev_metadata_drop(struct bch_fs *c, unsigned dev_idx, int flags) for (id = 0; id < BTREE_ID_NR; id++) { for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) { __BKEY_PADDED(k, BKEY_BTREE_PTR_VAL_U64s_MAX) tmp; - struct bkey_i_extent *new_key; + struct bkey_i_btree_ptr *new_key; retry: - if (!bch2_extent_has_device(bkey_i_to_s_c_extent(&b->key), - dev_idx)) { + if (!bch2_bkey_has_device(bkey_i_to_s_c(&b->key), + dev_idx)) { /* * we might have found a btree node key we * needed to update, and then tried to update it @@ -130,15 +129,14 @@ retry: */ bch2_btree_iter_downgrade(&iter); - ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&b->key)); + ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key)); if (ret) goto err; } else { bkey_copy(&tmp.k, &b->key); - new_key = bkey_i_to_extent(&tmp.k); + new_key = bkey_i_to_btree_ptr(&tmp.k); - ret = drop_dev_ptrs(c, extent_i_to_s(new_key), + ret = drop_dev_ptrs(c, bkey_i_to_s(&new_key->k_i), dev_idx, flags, true); if (ret) goto err; diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 7de3c6c475be..aff611c908ef 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -100,8 +100,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op) bch2_cut_back(insert->k.p, &new->k); if (m->data_cmd == DATA_REWRITE) - bch2_extent_drop_device(extent_i_to_s(insert), - m->data_opts.rewrite_dev); + bch2_bkey_drop_device(extent_i_to_s(insert).s, + m->data_opts.rewrite_dev); extent_for_each_ptr_decode(extent_i_to_s(new), p, entry) { if (bch2_extent_has_device(extent_i_to_s_c(insert), p.ptr.dev)) { @@ -132,8 +132,8 @@ static int bch2_migrate_index_update(struct bch_write_op *op) * has fewer replicas than when we last looked at it - meaning * we need to get a disk reservation here: */ - nr = bch2_extent_nr_dirty_ptrs(bkey_i_to_s_c(&insert->k_i)) - - (bch2_extent_nr_dirty_ptrs(k) + m->nr_ptrs_reserved); + nr = bch2_bkey_nr_dirty_ptrs(bkey_i_to_s_c(&insert->k_i)) - + (bch2_bkey_nr_dirty_ptrs(k) + m->nr_ptrs_reserved); if (nr > 0) { /* * can't call bch2_disk_reservation_add() with btree @@ -243,7 +243,7 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m, switch (data_cmd) { case DATA_ADD_REPLICAS: { int nr = (int) io_opts.data_replicas - - bch2_extent_nr_dirty_ptrs(k); + bch2_bkey_nr_dirty_ptrs(k); if (nr > 0) { m->op.nr_replicas = m->nr_ptrs_reserved = nr; @@ -477,7 +477,6 @@ int bch2_move_data(struct bch_fs *c, struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts); BKEY_PADDED(k) tmp; struct bkey_s_c k; - struct bkey_s_c_extent e; struct data_opts data_opts; enum data_cmd data_cmd; u64 delay, cur_inum = U64_MAX; @@ -530,8 +529,6 @@ peek: if (!bkey_extent_is_data(k.k)) goto next_nondata; - e = bkey_s_c_to_extent(k); - if (cur_inum != k.k->p.inode) { struct bch_inode_unpacked inode; @@ -545,8 +542,7 @@ peek: goto peek; } - switch ((data_cmd = pred(c, arg, BKEY_TYPE_EXTENTS, e, - &io_opts, &data_opts))) { + switch ((data_cmd = pred(c, arg, k, &io_opts, &data_opts))) { case DATA_SKIP: goto next; case DATA_SCRUB: @@ -581,7 +577,7 @@ peek: if (rate) bch2_ratelimit_increment(rate, k.k->size); next: - atomic64_add(k.k->size * bch2_extent_nr_dirty_ptrs(k), + atomic64_add(k.k->size * bch2_bkey_nr_dirty_ptrs(k), &stats->sectors_seen); next_nondata: bch2_btree_iter_next(&stats->iter); @@ -613,7 +609,7 @@ static int bch2_gc_data_replicas(struct bch_fs *c) for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, BTREE_ITER_PREFETCH, k) { - ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS, k); + ret = bch2_mark_bkey_replicas(c, k); if (ret) break; } @@ -637,8 +633,7 @@ static int bch2_gc_btree_replicas(struct bch_fs *c) for (id = 0; id < BTREE_ID_NR; id++) { for_each_btree_node(&iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) { - ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_BTREE, - bkey_i_to_s_c(&b->key)); + ret = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(&b->key)); bch2_btree_iter_cond_resched(&iter); } @@ -668,10 +663,9 @@ static int bch2_move_btree(struct bch_fs *c, for (id = 0; id < BTREE_ID_NR; id++) { for_each_btree_node(&stats->iter, c, id, POS_MIN, BTREE_ITER_PREFETCH, b) { - switch ((cmd = pred(c, arg, BKEY_TYPE_BTREE, - bkey_i_to_s_c_extent(&b->key), - &io_opts, - &data_opts))) { + switch ((cmd = pred(c, arg, + bkey_i_to_s_c(&b->key), + &io_opts, &data_opts))) { case DATA_SKIP: goto next; case DATA_SCRUB: @@ -697,8 +691,7 @@ next: #if 0 static enum data_cmd scrub_pred(struct bch_fs *c, void *arg, - enum bkey_type type, - struct bkey_s_c_extent e, + struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_opts *data_opts) { @@ -707,33 +700,38 @@ static enum data_cmd scrub_pred(struct bch_fs *c, void *arg, #endif static enum data_cmd rereplicate_pred(struct bch_fs *c, void *arg, - enum bkey_type type, - struct bkey_s_c_extent e, + struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_opts *data_opts) { - unsigned nr_good = bch2_extent_durability(c, e); - unsigned replicas = type == BKEY_TYPE_BTREE - ? c->opts.metadata_replicas - : io_opts->data_replicas; + unsigned nr_good = bch2_bkey_durability(c, k); + unsigned replicas = 0; + + switch (k.k->type) { + case KEY_TYPE_btree_ptr: + replicas = c->opts.metadata_replicas; + break; + case KEY_TYPE_extent: + replicas = io_opts->data_replicas; + break; + } if (!nr_good || nr_good >= replicas) return DATA_SKIP; data_opts->target = 0; - data_opts->btree_insert_flags = 0; + data_opts->btree_insert_flags = 0; return DATA_ADD_REPLICAS; } static enum data_cmd migrate_pred(struct bch_fs *c, void *arg, - enum bkey_type type, - struct bkey_s_c_extent e, + struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_opts *data_opts) { struct bch_ioctl_data *op = arg; - if (!bch2_extent_has_device(e, op->migrate.dev)) + if (!bch2_bkey_has_device(k, op->migrate.dev)) return DATA_SKIP; data_opts->target = 0; diff --git a/fs/bcachefs/move.h b/fs/bcachefs/move.h index 3f7e31cc8f6e..71b3d2b2ddb6 100644 --- a/fs/bcachefs/move.h +++ b/fs/bcachefs/move.h @@ -47,7 +47,7 @@ int bch2_migrate_write_init(struct bch_fs *, struct migrate_write *, struct bkey_s_c); typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *, - enum bkey_type, struct bkey_s_c_extent, + struct bkey_s_c, struct bch_io_opts *, struct data_opts *); int bch2_move_data(struct bch_fs *, struct bch_ratelimit *, diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 80577661e008..4bf4cc33dbb1 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -66,36 +66,42 @@ static int bucket_offset_cmp(const void *_l, const void *_r, size_t size) } static bool __copygc_pred(struct bch_dev *ca, - struct bkey_s_c_extent e) + struct bkey_s_c k) { copygc_heap *h = &ca->copygc_heap; - const struct bch_extent_ptr *ptr = - bch2_extent_has_device(e, ca->dev_idx); - if (ptr) { - struct copygc_heap_entry search = { .offset = ptr->offset }; + switch (k.k->type) { + case KEY_TYPE_extent: { + struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + const struct bch_extent_ptr *ptr = + bch2_extent_has_device(e, ca->dev_idx); - ssize_t i = eytzinger0_find_le(h->data, h->used, - sizeof(h->data[0]), - bucket_offset_cmp, &search); + if (ptr) { + struct copygc_heap_entry search = { .offset = ptr->offset }; - return (i >= 0 && - ptr->offset < h->data[i].offset + ca->mi.bucket_size && - ptr->gen == h->data[i].gen); + ssize_t i = eytzinger0_find_le(h->data, h->used, + sizeof(h->data[0]), + bucket_offset_cmp, &search); + + return (i >= 0 && + ptr->offset < h->data[i].offset + ca->mi.bucket_size && + ptr->gen == h->data[i].gen); + } + break; + } } return false; } static enum data_cmd copygc_pred(struct bch_fs *c, void *arg, - enum bkey_type type, - struct bkey_s_c_extent e, + struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_opts *data_opts) { struct bch_dev *ca = arg; - if (!__copygc_pred(ca, e)) + if (!__copygc_pred(ca, k)) return DATA_SKIP; data_opts->target = dev_to_target(ca->dev_idx); diff --git a/fs/bcachefs/opts.h b/fs/bcachefs/opts.h index 80869e34e3b6..acdc952c48be 100644 --- a/fs/bcachefs/opts.h +++ b/fs/bcachefs/opts.h @@ -184,6 +184,9 @@ enum opt_type { OPT_BOOL(), \ NO_SB_OPT, false) \ BCH_OPT(no_data_io, u8, OPT_MOUNT, \ + OPT_BOOL(), \ + NO_SB_OPT, false) \ + BCH_OPT(version_upgrade, u8, OPT_MOUNT, \ OPT_BOOL(), \ NO_SB_OPT, false) diff --git a/fs/bcachefs/quota.c b/fs/bcachefs/quota.c index cc20742d542b..7c38daac1cac 100644 --- a/fs/bcachefs/quota.c +++ b/fs/bcachefs/quota.c @@ -22,23 +22,13 @@ const struct bch_sb_field_ops bch_sb_field_ops_quota = { const char *bch2_quota_invalid(const struct bch_fs *c, struct bkey_s_c k) { - struct bkey_s_c_quota dq; - if (k.k->p.inode >= QTYP_NR) return "invalid quota type"; - switch (k.k->type) { - case BCH_QUOTA: { - dq = bkey_s_c_to_quota(k); + if (bkey_val_bytes(k.k) != sizeof(struct bch_quota)) + return "incorrect value size"; - if (bkey_val_bytes(k.k) != sizeof(struct bch_quota)) - return "incorrect value size"; - - return NULL; - } - default: - return "invalid type"; - } + return NULL; } static const char * const bch2_quota_counters[] = { @@ -49,20 +39,14 @@ static const char * const bch2_quota_counters[] = { void bch2_quota_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - struct bkey_s_c_quota dq; + struct bkey_s_c_quota dq = bkey_s_c_to_quota(k); unsigned i; - switch (k.k->type) { - case BCH_QUOTA: - dq = bkey_s_c_to_quota(k); - - for (i = 0; i < Q_COUNTERS; i++) - pr_buf(out, "%s hardlimit %llu softlimit %llu", - bch2_quota_counters[i], - le64_to_cpu(dq.v->c[i].hardlimit), - le64_to_cpu(dq.v->c[i].softlimit)); - break; - } + for (i = 0; i < Q_COUNTERS; i++) + pr_buf(out, "%s hardlimit %llu softlimit %llu", + bch2_quota_counters[i], + le64_to_cpu(dq.v->c[i].hardlimit), + le64_to_cpu(dq.v->c[i].softlimit)); } #ifdef CONFIG_BCACHEFS_QUOTA @@ -178,7 +162,7 @@ static int bch2_quota_check_limit(struct bch_fs *c, BUG_ON((s64) n < 0); - if (mode == BCH_QUOTA_NOCHECK) + if (mode == KEY_TYPE_QUOTA_NOCHECK) return 0; if (v <= 0) { @@ -201,7 +185,7 @@ static int bch2_quota_check_limit(struct bch_fs *c, if (qc->hardlimit && qc->hardlimit < n && !ignore_hardlimit(q)) { - if (mode == BCH_QUOTA_PREALLOC) + if (mode == KEY_TYPE_QUOTA_PREALLOC) return -EDQUOT; prepare_warning(qc, qtype, counter, msgs, HARDWARN); @@ -212,7 +196,7 @@ static int bch2_quota_check_limit(struct bch_fs *c, qc->timer && ktime_get_real_seconds() >= qc->timer && !ignore_hardlimit(q)) { - if (mode == BCH_QUOTA_PREALLOC) + if (mode == KEY_TYPE_QUOTA_PREALLOC) return -EDQUOT; prepare_warning(qc, qtype, counter, msgs, SOFTLONGWARN); @@ -221,7 +205,7 @@ static int bch2_quota_check_limit(struct bch_fs *c, if (qc->softlimit && qc->softlimit < n && qc->timer == 0) { - if (mode == BCH_QUOTA_PREALLOC) + if (mode == KEY_TYPE_QUOTA_PREALLOC) return -EDQUOT; prepare_warning(qc, qtype, counter, msgs, SOFTWARN); @@ -312,13 +296,13 @@ int bch2_quota_transfer(struct bch_fs *c, unsigned qtypes, ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_SPC, dst_q[i]->c[Q_SPC].v + space, - BCH_QUOTA_PREALLOC); + KEY_TYPE_QUOTA_PREALLOC); if (ret) goto err; ret = bch2_quota_check_limit(c, i, dst_q[i], &msgs, Q_INO, dst_q[i]->c[Q_INO].v + 1, - BCH_QUOTA_PREALLOC); + KEY_TYPE_QUOTA_PREALLOC); if (ret) goto err; } @@ -347,7 +331,7 @@ static int __bch2_quota_set(struct bch_fs *c, struct bkey_s_c k) BUG_ON(k.k->p.inode >= QTYP_NR); switch (k.k->type) { - case BCH_QUOTA: + case KEY_TYPE_quota: dq = bkey_s_c_to_quota(k); q = &c->quotas[k.k->p.inode]; @@ -447,15 +431,15 @@ int bch2_fs_quota_read(struct bch_fs *c) for_each_btree_key(&iter, c, BTREE_ID_INODES, POS_MIN, BTREE_ITER_PREFETCH, k) { switch (k.k->type) { - case BCH_INODE_FS: + case KEY_TYPE_inode: ret = bch2_inode_unpack(bkey_s_c_to_inode(k), &u); if (ret) return ret; bch2_quota_acct(c, bch_qid(&u), Q_SPC, u.bi_sectors, - BCH_QUOTA_NOCHECK); + KEY_TYPE_QUOTA_NOCHECK); bch2_quota_acct(c, bch_qid(&u), Q_INO, 1, - BCH_QUOTA_NOCHECK); + KEY_TYPE_QUOTA_NOCHECK); } } return bch2_btree_iter_unlock(&iter) ?: ret; @@ -743,7 +727,7 @@ static int bch2_set_quota(struct super_block *sb, struct kqid qid, return ret; switch (k.k->type) { - case BCH_QUOTA: + case KEY_TYPE_quota: new_quota.v = *bkey_s_c_to_quota(k).v; break; } diff --git a/fs/bcachefs/quota.h b/fs/bcachefs/quota.h index 9c06eb07bccb..294a04db84bf 100644 --- a/fs/bcachefs/quota.h +++ b/fs/bcachefs/quota.h @@ -10,15 +10,15 @@ extern const struct bch_sb_field_ops bch_sb_field_ops_quota; const char *bch2_quota_invalid(const struct bch_fs *, struct bkey_s_c); void bch2_quota_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -#define bch2_bkey_quota_ops (struct bkey_ops) { \ +#define bch2_bkey_ops_quota (struct bkey_ops) { \ .key_invalid = bch2_quota_invalid, \ .val_to_text = bch2_quota_to_text, \ } enum quota_acct_mode { - BCH_QUOTA_PREALLOC, - BCH_QUOTA_WARN, - BCH_QUOTA_NOCHECK, + KEY_TYPE_QUOTA_PREALLOC, + KEY_TYPE_QUOTA_WARN, + KEY_TYPE_QUOTA_NOCHECK, }; static inline struct bch_qid bch_qid(struct bch_inode_unpacked *u) diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c index 5d246c5b8186..eec74d4a5712 100644 --- a/fs/bcachefs/rebalance.c +++ b/fs/bcachefs/rebalance.c @@ -70,28 +70,34 @@ void bch2_rebalance_add_work(struct bch_fs *c, u64 sectors) } static enum data_cmd rebalance_pred(struct bch_fs *c, void *arg, - enum bkey_type type, - struct bkey_s_c_extent e, + struct bkey_s_c k, struct bch_io_opts *io_opts, struct data_opts *data_opts) { - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; + switch (k.k->type) { + case KEY_TYPE_extent: { + struct bkey_s_c_extent e = bkey_s_c_to_extent(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; - /* Make sure we have room to add a new pointer: */ - if (bkey_val_u64s(e.k) + BKEY_EXTENT_PTR_U64s_MAX > - BKEY_EXTENT_VAL_U64s_MAX) - return DATA_SKIP; + /* Make sure we have room to add a new pointer: */ + if (bkey_val_u64s(e.k) + BKEY_EXTENT_PTR_U64s_MAX > + BKEY_EXTENT_VAL_U64s_MAX) + return DATA_SKIP; - extent_for_each_ptr_decode(e, p, entry) - if (rebalance_ptr_pred(c, p, io_opts)) - goto found; + extent_for_each_ptr_decode(e, p, entry) + if (rebalance_ptr_pred(c, p, io_opts)) + goto found; - return DATA_SKIP; + return DATA_SKIP; found: - data_opts->target = io_opts->background_target; - data_opts->btree_insert_flags = 0; - return DATA_ADD_REPLICAS; + data_opts->target = io_opts->background_target; + data_opts->btree_insert_flags = 0; + return DATA_ADD_REPLICAS; + } + default: + return DATA_SKIP; + } } struct rebalance_work { diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 1cb0c9940ec1..172770606294 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -147,6 +147,10 @@ int bch2_fs_recovery(struct bch_fs *c) mutex_unlock(&c->sb_lock); goto err; } + + if (le16_to_cpu(c->disk_sb.sb->version) < + bcachefs_metadata_version_bkey_renumber) + bch2_sb_clean_renumber(clean, READ); } mutex_unlock(&c->sb_lock); @@ -265,12 +269,18 @@ int bch2_fs_recovery(struct bch_fs *c) if (ret) goto err; - if (!test_bit(BCH_FS_FSCK_UNFIXED_ERRORS, &c->flags)) { - mutex_lock(&c->sb_lock); - c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK; - mutex_unlock(&c->sb_lock); + mutex_lock(&c->sb_lock); + if (c->opts.version_upgrade) { + if (c->sb.version < bcachefs_metadata_version_new_versioning) + c->disk_sb.sb->version_min = + le16_to_cpu(bcachefs_metadata_version_min); + c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current); } + if (!test_bit(BCH_FS_FSCK_UNFIXED_ERRORS, &c->flags)) + c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK; + mutex_unlock(&c->sb_lock); + if (enabled_qtypes(c)) { bch_verbose(c, "reading quotas:"); ret = bch2_fs_quota_read(c); @@ -379,9 +389,12 @@ int bch2_fs_initialize(struct bch_fs *c) goto err; mutex_lock(&c->sb_lock); + c->disk_sb.sb->version = c->disk_sb.sb->version_min = + le16_to_cpu(bcachefs_metadata_version_current); + c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK; + SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true); SET_BCH_SB_CLEAN(c->disk_sb.sb, false); - c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK; bch2_write_super(c); mutex_unlock(&c->sb_lock); diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c index 0296931b6b8c..77d175f34b2b 100644 --- a/fs/bcachefs/replicas.c +++ b/fs/bcachefs/replicas.c @@ -73,64 +73,57 @@ void bch2_cpu_replicas_to_text(struct printbuf *out, static void extent_to_replicas(struct bkey_s_c k, struct bch_replicas_entry *r) { - if (bkey_extent_is_data(k.k)) { - struct bkey_s_c_extent e = bkey_s_c_to_extent(k); - const union bch_extent_entry *entry; - struct extent_ptr_decoded p; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; - r->nr_required = 1; + r->nr_required = 1; - extent_for_each_ptr_decode(e, p, entry) { - if (p.ptr.cached) - continue; + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + if (p.ptr.cached) + continue; - if (p.ec_nr) { - r->nr_devs = 0; - break; - } - - r->devs[r->nr_devs++] = p.ptr.dev; + if (p.ec_nr) { + r->nr_devs = 0; + break; } + + r->devs[r->nr_devs++] = p.ptr.dev; } } static void stripe_to_replicas(struct bkey_s_c k, struct bch_replicas_entry *r) { - if (k.k->type == BCH_STRIPE) { - struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); - const struct bch_extent_ptr *ptr; + struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); + const struct bch_extent_ptr *ptr; - r->nr_required = s.v->nr_blocks - s.v->nr_redundant; + r->nr_required = s.v->nr_blocks - s.v->nr_redundant; - for (ptr = s.v->ptrs; - ptr < s.v->ptrs + s.v->nr_blocks; - ptr++) - r->devs[r->nr_devs++] = ptr->dev; - } + for (ptr = s.v->ptrs; + ptr < s.v->ptrs + s.v->nr_blocks; + ptr++) + r->devs[r->nr_devs++] = ptr->dev; } -static void bkey_to_replicas(enum bkey_type type, - struct bkey_s_c k, +static void bkey_to_replicas(struct bkey_s_c k, struct bch_replicas_entry *e) { e->nr_devs = 0; - switch (type) { - case BKEY_TYPE_BTREE: + switch (k.k->type) { + case KEY_TYPE_btree_ptr: e->data_type = BCH_DATA_BTREE; extent_to_replicas(k, e); break; - case BKEY_TYPE_EXTENTS: + case KEY_TYPE_extent: e->data_type = BCH_DATA_USER; extent_to_replicas(k, e); break; - case BKEY_TYPE_EC: + case KEY_TYPE_stripe: e->data_type = BCH_DATA_USER; stripe_to_replicas(k, e); break; - default: - break; } replicas_entry_sort(e); @@ -296,26 +289,21 @@ int bch2_mark_replicas(struct bch_fs *c, return __bch2_mark_replicas(c, &search.e); } -int bch2_mark_bkey_replicas(struct bch_fs *c, - enum bkey_type type, - struct bkey_s_c k) +int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k) { struct bch_replicas_entry_padded search; + struct bch_devs_list cached = bch2_bkey_cached_devs(k); + unsigned i; int ret; memset(&search, 0, sizeof(search)); - if (type == BKEY_TYPE_EXTENTS) { - struct bch_devs_list cached = bch2_bkey_cached_devs(k); - unsigned i; + for (i = 0; i < cached.nr; i++) + if ((ret = bch2_mark_replicas(c, BCH_DATA_CACHED, + bch2_dev_list_single(cached.devs[i])))) + return ret; - for (i = 0; i < cached.nr; i++) - if ((ret = bch2_mark_replicas(c, BCH_DATA_CACHED, - bch2_dev_list_single(cached.devs[i])))) - return ret; - } - - bkey_to_replicas(type, k, &search.e); + bkey_to_replicas(k, &search.e); return search.e.nr_devs ? __bch2_mark_replicas(c, &search.e) @@ -719,26 +707,22 @@ bool bch2_replicas_marked(struct bch_fs *c, } bool bch2_bkey_replicas_marked(struct bch_fs *c, - enum bkey_type type, struct bkey_s_c k, bool check_gc_replicas) { struct bch_replicas_entry_padded search; + struct bch_devs_list cached = bch2_bkey_cached_devs(k); + unsigned i; memset(&search, 0, sizeof(search)); - if (type == BKEY_TYPE_EXTENTS) { - struct bch_devs_list cached = bch2_bkey_cached_devs(k); - unsigned i; - - for (i = 0; i < cached.nr; i++) - if (!bch2_replicas_marked(c, BCH_DATA_CACHED, - bch2_dev_list_single(cached.devs[i]), - check_gc_replicas)) - return false; - } + for (i = 0; i < cached.nr; i++) + if (!bch2_replicas_marked(c, BCH_DATA_CACHED, + bch2_dev_list_single(cached.devs[i]), + check_gc_replicas)) + return false; - bkey_to_replicas(type, k, &search.e); + bkey_to_replicas(k, &search.e); return search.e.nr_devs ? replicas_has_entry(c, &search.e, check_gc_replicas) diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h index e22d2d7cd08a..03aaafdc7c17 100644 --- a/fs/bcachefs/replicas.h +++ b/fs/bcachefs/replicas.h @@ -6,12 +6,11 @@ bool bch2_replicas_marked(struct bch_fs *, enum bch_data_type, struct bch_devs_list, bool); -bool bch2_bkey_replicas_marked(struct bch_fs *, enum bkey_type, +bool bch2_bkey_replicas_marked(struct bch_fs *, struct bkey_s_c, bool); int bch2_mark_replicas(struct bch_fs *, enum bch_data_type, struct bch_devs_list); -int bch2_mark_bkey_replicas(struct bch_fs *, enum bkey_type, - struct bkey_s_c); +int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c); void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *); diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h index fbd6c3372677..6f30fbe44eb8 100644 --- a/fs/bcachefs/str_hash.h +++ b/fs/bcachefs/str_hash.h @@ -118,7 +118,6 @@ static inline u64 bch2_str_hash_end(struct bch_str_hash_ctx *ctx, struct bch_hash_desc { enum btree_id btree_id; u8 key_type; - u8 whiteout_type; u64 (*hash_key)(const struct bch_hash_info *, const void *); u64 (*hash_bkey)(const struct bch_hash_info *, struct bkey_s_c); @@ -149,7 +148,7 @@ bch2_hash_lookup(struct btree_trans *trans, if (k.k->type == desc.key_type) { if (!desc.cmp_key(k, key)) return iter; - } else if (k.k->type == desc.whiteout_type) { + } else if (k.k->type == KEY_TYPE_whiteout) { ; } else { /* hole, not found */ @@ -202,7 +201,7 @@ static inline int bch2_hash_needs_whiteout(struct btree_trans *trans, for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k) { if (k.k->type != desc.key_type && - k.k->type != desc.whiteout_type) + k.k->type != KEY_TYPE_whiteout) return false; if (k.k->type == desc.key_type && @@ -245,7 +244,7 @@ static inline int __bch2_hash_set(struct btree_trans *trans, return PTR_ERR(slot); } - if (k.k->type != desc.whiteout_type) + if (k.k->type != KEY_TYPE_whiteout) goto not_found; } @@ -295,7 +294,7 @@ static inline int bch2_hash_delete_at(struct btree_trans *trans, bkey_init(&delete->k); delete->k.p = iter->pos; - delete->k.type = ret ? desc.whiteout_type : KEY_TYPE_DELETED; + delete->k.type = ret ? KEY_TYPE_whiteout : KEY_TYPE_deleted; bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, delete)); return 0; diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index 3dbcb6d7d261..dafdc45b442c 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -240,21 +240,25 @@ const char *bch2_sb_validate(struct bch_sb_handle *disk_sb) struct bch_sb_field *f; struct bch_sb_field_members *mi; const char *err; + u32 version, version_min; u16 block_size; - if (le16_to_cpu(sb->version) < BCH_SB_VERSION_MIN || - le16_to_cpu(sb->version) > BCH_SB_VERSION_MAX) + version = le16_to_cpu(sb->version); + version_min = version >= bcachefs_metadata_version_new_versioning + ? le16_to_cpu(sb->version_min) + : version; + + if (version >= bcachefs_metadata_version_max || + version_min < bcachefs_metadata_version_min) return "Unsupported superblock version"; + if (version_min > version) + return "Bad minimum version"; + if (sb->features[1] || (le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR))) return "Filesystem has incompatible features"; - if (le16_to_cpu(sb->version) < BCH_SB_VERSION_EXTENT_MAX) { - SET_BCH_SB_ENCODED_EXTENT_MAX_BITS(sb, 7); - SET_BCH_SB_POSIX_ACL(sb, 1); - } - block_size = le16_to_cpu(sb->block_size); if (!is_power_of_2(block_size) || @@ -341,13 +345,6 @@ const char *bch2_sb_validate(struct bch_sb_handle *disk_sb) return err; } - if (le16_to_cpu(sb->version) < BCH_SB_VERSION_EXTENT_NONCE_V1 && - bch2_sb_get_crypt(sb) && - BCH_SB_INITIALIZED(sb)) - return "Incompatible extent nonces"; - - sb->version = cpu_to_le16(BCH_SB_VERSION_MAX); - return NULL; } @@ -364,6 +361,7 @@ static void bch2_sb_update(struct bch_fs *c) c->sb.uuid = src->uuid; c->sb.user_uuid = src->user_uuid; + c->sb.version = le16_to_cpu(src->version); c->sb.nr_devices = src->nr_devices; c->sb.clean = BCH_SB_CLEAN(src); c->sb.encryption_type = BCH_SB_ENCRYPTION_TYPE(src); @@ -385,6 +383,7 @@ static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src) unsigned i; dst->version = src->version; + dst->version_min = src->version_min; dst->seq = src->seq; dst->uuid = src->uuid; dst->user_uuid = src->user_uuid; @@ -483,8 +482,8 @@ reread: !uuid_equal(&sb->sb->magic, &BCHFS_MAGIC)) return "Not a bcachefs superblock"; - if (le16_to_cpu(sb->sb->version) < BCH_SB_VERSION_MIN || - le16_to_cpu(sb->sb->version) > BCH_SB_VERSION_MAX) + if (le16_to_cpu(sb->sb->version) < bcachefs_metadata_version_min || + le16_to_cpu(sb->sb->version) >= bcachefs_metadata_version_max) return "Unsupported superblock version"; bytes = vstruct_bytes(sb->sb); @@ -846,12 +845,6 @@ static const char *bch2_sb_validate_members(struct bch_sb *sb, return "bucket size smaller than btree node size"; } - if (le16_to_cpu(sb->version) < BCH_SB_VERSION_EXTENT_MAX) - for (m = mi->members; - m < mi->members + sb->nr_devices; - m++) - SET_BCH_MEMBER_DATA_ALLOWED(m, ~0); - return NULL; } @@ -881,6 +874,16 @@ static const struct bch_sb_field_ops bch_sb_field_ops_crypt = { /* BCH_SB_FIELD_clean: */ +void bch2_sb_clean_renumber(struct bch_sb_field_clean *clean, int write) +{ + struct jset_entry *entry; + + for (entry = clean->start; + entry < (struct jset_entry *) vstruct_end(&clean->field); + entry = vstruct_next(entry)) + bch2_bkey_renumber(BKEY_TYPE_BTREE, bkey_to_packed(entry->start), write); +} + void bch2_fs_mark_clean(struct bch_fs *c, bool clean) { struct bch_sb_field_clean *sb_clean; @@ -935,6 +938,10 @@ void bch2_fs_mark_clean(struct bch_fs *c, bool clean) BUG_ON(entry != vstruct_end(&sb_clean->field)); + if (le16_to_cpu(c->disk_sb.sb->version) < + bcachefs_metadata_version_bkey_renumber) + bch2_sb_clean_renumber(sb_clean, WRITE); + mutex_unlock(&c->btree_root_lock); write_super: bch2_write_super(c); diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h index aa618fe9cd22..ac3b704f0540 100644 --- a/fs/bcachefs/super-io.h +++ b/fs/bcachefs/super-io.h @@ -135,6 +135,8 @@ static inline struct bch_member_cpu bch2_mi_to_cpu(struct bch_member *mi) /* BCH_SB_FIELD_clean: */ +void bch2_sb_clean_renumber(struct bch_sb_field_clean *, int); + void bch2_fs_mark_clean(struct bch_fs *, bool); void bch2_sb_field_to_text(struct printbuf *, struct bch_sb *, diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index cadbc5481bcb..7405b5cdd1bf 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -1799,7 +1799,7 @@ err: BCH_DEBUG_PARAMS() #undef BCH_DEBUG_PARAM -unsigned bch2_metadata_version = BCH_SB_VERSION_MAX; +unsigned bch2_metadata_version = bcachefs_metadata_version_current; module_param_named(version, bch2_metadata_version, uint, 0400); module_exit(bcachefs_exit); diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c index 8eacc0d2550b..7e46b254da38 100644 --- a/fs/bcachefs/sysfs.c +++ b/fs/bcachefs/sysfs.c @@ -277,7 +277,7 @@ static ssize_t bch2_compression_stats(struct bch_fs *c, char *buf) return -EPERM; for_each_btree_key(&iter, c, BTREE_ID_EXTENTS, POS_MIN, 0, k) - if (k.k->type == BCH_EXTENT) { + if (k.k->type == KEY_TYPE_extent) { struct bkey_s_c_extent e = bkey_s_c_to_extent(k); const union bch_extent_entry *entry; struct extent_ptr_decoded p; diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 9730540f7375..1aa6ac05d50e 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -165,7 +165,7 @@ TRACE_EVENT(btree_write, TP_ARGS(b, bytes, sectors), TP_STRUCT__entry( - __field(enum bkey_type, type) + __field(enum btree_node_type, type) __field(unsigned, bytes ) __field(unsigned, sectors ) ), diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c index ab358c434753..ff2d59ee1658 100644 --- a/fs/bcachefs/xattr.c +++ b/fs/bcachefs/xattr.c @@ -62,8 +62,7 @@ static bool xattr_cmp_bkey(struct bkey_s_c _l, struct bkey_s_c _r) const struct bch_hash_desc bch2_xattr_hash_desc = { .btree_id = BTREE_ID_XATTRS, - .key_type = BCH_XATTR, - .whiteout_type = BCH_XATTR_WHITEOUT, + .key_type = KEY_TYPE_xattr, .hash_key = xattr_hash_key, .hash_bkey = xattr_hash_bkey, .cmp_key = xattr_cmp_key, @@ -73,71 +72,50 @@ const struct bch_hash_desc bch2_xattr_hash_desc = { const char *bch2_xattr_invalid(const struct bch_fs *c, struct bkey_s_c k) { const struct xattr_handler *handler; - struct bkey_s_c_xattr xattr; - - switch (k.k->type) { - case BCH_XATTR: - if (bkey_val_bytes(k.k) < sizeof(struct bch_xattr)) - return "value too small"; - - xattr = bkey_s_c_to_xattr(k); + struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k); - if (bkey_val_u64s(k.k) < - xattr_val_u64s(xattr.v->x_name_len, - le16_to_cpu(xattr.v->x_val_len))) - return "value too small"; + if (bkey_val_bytes(k.k) < sizeof(struct bch_xattr)) + return "value too small"; - if (bkey_val_u64s(k.k) > - xattr_val_u64s(xattr.v->x_name_len, - le16_to_cpu(xattr.v->x_val_len) + 4)) - return "value too big"; + if (bkey_val_u64s(k.k) < + xattr_val_u64s(xattr.v->x_name_len, + le16_to_cpu(xattr.v->x_val_len))) + return "value too small"; - handler = bch2_xattr_type_to_handler(xattr.v->x_type); - if (!handler) - return "invalid type"; + if (bkey_val_u64s(k.k) > + xattr_val_u64s(xattr.v->x_name_len, + le16_to_cpu(xattr.v->x_val_len) + 4)) + return "value too big"; - if (memchr(xattr.v->x_name, '\0', xattr.v->x_name_len)) - return "xattr name has invalid characters"; + handler = bch2_xattr_type_to_handler(xattr.v->x_type); + if (!handler) + return "invalid type"; - return NULL; - case BCH_XATTR_WHITEOUT: - return bkey_val_bytes(k.k) != 0 - ? "value size should be zero" - : NULL; + if (memchr(xattr.v->x_name, '\0', xattr.v->x_name_len)) + return "xattr name has invalid characters"; - default: - return "invalid type"; - } + return NULL; } void bch2_xattr_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { const struct xattr_handler *handler; - struct bkey_s_c_xattr xattr; + struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k); - switch (k.k->type) { - case BCH_XATTR: - xattr = bkey_s_c_to_xattr(k); + handler = bch2_xattr_type_to_handler(xattr.v->x_type); + if (handler && handler->prefix) + pr_buf(out, "%s", handler->prefix); + else if (handler) + pr_buf(out, "(type %u)", xattr.v->x_type); + else + pr_buf(out, "(unknown type %u)", xattr.v->x_type); - handler = bch2_xattr_type_to_handler(xattr.v->x_type); - if (handler && handler->prefix) - pr_buf(out, "%s", handler->prefix); - else if (handler) - pr_buf(out, "(type %u)", xattr.v->x_type); - else - pr_buf(out, "(unknown type %u)", xattr.v->x_type); - - bch_scnmemcpy(out, xattr.v->x_name, - xattr.v->x_name_len); - pr_buf(out, ":"); - bch_scnmemcpy(out, xattr_val(xattr.v), - le16_to_cpu(xattr.v->x_val_len)); - break; - case BCH_XATTR_WHITEOUT: - pr_buf(out, "whiteout"); - break; - } + bch_scnmemcpy(out, xattr.v->x_name, + xattr.v->x_name_len); + pr_buf(out, ":"); + bch_scnmemcpy(out, xattr_val(xattr.v), + le16_to_cpu(xattr.v->x_val_len)); } int bch2_xattr_get(struct bch_fs *c, struct bch_inode_info *inode, @@ -261,7 +239,7 @@ ssize_t bch2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size) if (k.k->p.inode > inum) break; - if (k.k->type != BCH_XATTR) + if (k.k->type != KEY_TYPE_xattr) continue; xattr = bkey_s_c_to_xattr(k).v; @@ -315,7 +293,7 @@ static const struct xattr_handler bch_xattr_user_handler = { .prefix = XATTR_USER_PREFIX, .get = bch2_xattr_get_handler, .set = bch2_xattr_set_handler, - .flags = BCH_XATTR_INDEX_USER, + .flags = KEY_TYPE_XATTR_INDEX_USER, }; static bool bch2_xattr_trusted_list(struct dentry *dentry) @@ -328,14 +306,14 @@ static const struct xattr_handler bch_xattr_trusted_handler = { .list = bch2_xattr_trusted_list, .get = bch2_xattr_get_handler, .set = bch2_xattr_set_handler, - .flags = BCH_XATTR_INDEX_TRUSTED, + .flags = KEY_TYPE_XATTR_INDEX_TRUSTED, }; static const struct xattr_handler bch_xattr_security_handler = { .prefix = XATTR_SECURITY_PREFIX, .get = bch2_xattr_get_handler, .set = bch2_xattr_set_handler, - .flags = BCH_XATTR_INDEX_SECURITY, + .flags = KEY_TYPE_XATTR_INDEX_SECURITY, }; #ifndef NO_BCACHEFS_FS @@ -474,13 +452,13 @@ const struct xattr_handler *bch2_xattr_handlers[] = { }; static const struct xattr_handler *bch_xattr_handler_map[] = { - [BCH_XATTR_INDEX_USER] = &bch_xattr_user_handler, - [BCH_XATTR_INDEX_POSIX_ACL_ACCESS] = + [KEY_TYPE_XATTR_INDEX_USER] = &bch_xattr_user_handler, + [KEY_TYPE_XATTR_INDEX_POSIX_ACL_ACCESS] = &nop_posix_acl_access, - [BCH_XATTR_INDEX_POSIX_ACL_DEFAULT] = + [KEY_TYPE_XATTR_INDEX_POSIX_ACL_DEFAULT] = &nop_posix_acl_default, - [BCH_XATTR_INDEX_TRUSTED] = &bch_xattr_trusted_handler, - [BCH_XATTR_INDEX_SECURITY] = &bch_xattr_security_handler, + [KEY_TYPE_XATTR_INDEX_TRUSTED] = &bch_xattr_trusted_handler, + [KEY_TYPE_XATTR_INDEX_SECURITY] = &bch_xattr_security_handler, }; static const struct xattr_handler *bch2_xattr_type_to_handler(unsigned type) diff --git a/fs/bcachefs/xattr.h b/fs/bcachefs/xattr.h index 63be44b02a2b..4151065ab853 100644 --- a/fs/bcachefs/xattr.h +++ b/fs/bcachefs/xattr.h @@ -9,7 +9,7 @@ extern const struct bch_hash_desc bch2_xattr_hash_desc; const char *bch2_xattr_invalid(const struct bch_fs *, struct bkey_s_c); void bch2_xattr_to_text(struct printbuf *, struct bch_fs *, struct bkey_s_c); -#define bch2_bkey_xattr_ops (struct bkey_ops) { \ +#define bch2_bkey_ops_xattr (struct bkey_ops) { \ .key_invalid = bch2_xattr_invalid, \ .val_to_text = bch2_xattr_to_text, \ } -- cgit v1.2.3-70-g09d2