summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/bcachefs/bcachefs_format.h3
-rw-r--r--fs/bcachefs/btree_io.c11
-rw-r--r--fs/bcachefs/btree_io.h9
-rw-r--r--fs/bcachefs/btree_types.h3
-rw-r--r--fs/bcachefs/btree_update.h1
-rw-r--r--fs/bcachefs/btree_update_interior.c352
-rw-r--r--fs/bcachefs/btree_update_interior.h16
-rw-r--r--fs/bcachefs/btree_update_leaf.c23
-rw-r--r--fs/bcachefs/super-io.c2
9 files changed, 146 insertions, 274 deletions
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index 1ad5ff449a5b..6f74fda1f21d 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -1316,7 +1316,8 @@ LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3], 0, 16);
x(new_extent_overwrite, 9) \
x(incompressible, 10) \
x(btree_ptr_v2, 11) \
- x(extents_above_btree_updates, 12)
+ x(extents_above_btree_updates, 12) \
+ x(btree_updates_journalled, 13)
#define BCH_SB_FEATURES_ALL \
((1ULL << BCH_FEATURE_new_siphash)| \
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index d0b761417903..e43d1b2ce5c7 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -1260,7 +1260,6 @@ void bch2_btree_complete_write(struct bch_fs *c, struct btree *b,
closure_put(&((struct btree_update *) new)->cl);
bch2_journal_pin_drop(&c->journal, &w->journal);
- closure_wake_up(&w->wait);
}
static void btree_node_write_done(struct bch_fs *c, struct btree *b)
@@ -1618,9 +1617,6 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b,
wbio->wbio.bio.bi_end_io = btree_node_write_endio;
wbio->wbio.bio.bi_private = b;
- if (b->c.level || !b->written)
- wbio->wbio.bio.bi_opf |= REQ_FUA;
-
bch2_bio_map(&wbio->wbio.bio, data, sectors_to_write << 9);
/*
@@ -1794,12 +1790,11 @@ ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *c, char *buf)
rcu_read_lock();
for_each_cached_btree(b, c, tbl, i, pos) {
unsigned long flags = READ_ONCE(b->flags);
- unsigned idx = (flags & (1 << BTREE_NODE_write_idx)) != 0;
if (!(flags & (1 << BTREE_NODE_dirty)))
continue;
- pr_buf(&out, "%p d %u n %u l %u w %u b %u r %u:%lu c %u p %u\n",
+ pr_buf(&out, "%p d %u n %u l %u w %u b %u r %u:%lu\n",
b,
(flags & (1 << BTREE_NODE_dirty)) != 0,
(flags & (1 << BTREE_NODE_need_write)) != 0,
@@ -1807,9 +1802,7 @@ ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *c, char *buf)
b->written,
!list_empty_careful(&b->write_blocked),
b->will_make_reachable != 0,
- b->will_make_reachable & 1,
- b->writes[ idx].wait.list.first != NULL,
- b->writes[!idx].wait.list.first != NULL);
+ b->will_make_reachable & 1);
}
rcu_read_unlock();
diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h
index 43fa8a6dbee5..a02e261c2eb2 100644
--- a/fs/bcachefs/btree_io.h
+++ b/fs/bcachefs/btree_io.h
@@ -102,19 +102,20 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *);
void bch2_btree_node_write(struct bch_fs *, struct btree *,
enum six_lock_type);
-static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b)
+static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b,
+ enum six_lock_type lock_held)
{
while (b->written &&
btree_node_need_write(b) &&
btree_node_may_write(b)) {
if (!btree_node_write_in_flight(b)) {
- bch2_btree_node_write(c, b, SIX_LOCK_read);
+ bch2_btree_node_write(c, b, lock_held);
break;
}
six_unlock_read(&b->c.lock);
btree_node_wait_on_io(b);
- btree_node_lock_type(c, b, SIX_LOCK_read);
+ btree_node_lock_type(c, b, lock_held);
}
}
@@ -131,7 +132,7 @@ do { \
new |= (1 << BTREE_NODE_need_write); \
} while ((v = cmpxchg(&(_b)->flags, old, new)) != old); \
\
- btree_node_write_if_need(_c, _b); \
+ btree_node_write_if_need(_c, _b, SIX_LOCK_read); \
} while (0)
void bch2_btree_flush_all_reads(struct bch_fs *);
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index 885cc9500f36..a794f9fe4fce 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -53,7 +53,6 @@ struct bset_tree {
struct btree_write {
struct journal_entry_pin journal;
- struct closure_waitlist wait;
};
struct btree_alloc {
@@ -547,8 +546,6 @@ static inline bool btree_node_type_needs_gc(enum btree_node_type type)
struct btree_root {
struct btree *b;
- struct btree_update *as;
-
/* On disk root - see async splits: */
__BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX);
u8 level;
diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h
index 9f58d47ef5d6..11f7d02de622 100644
--- a/fs/bcachefs/btree_update.h
+++ b/fs/bcachefs/btree_update.h
@@ -12,6 +12,7 @@ void bch2_btree_node_lock_for_insert(struct bch_fs *, struct btree *,
struct btree_iter *);
bool bch2_btree_bset_insert_key(struct btree_iter *, struct btree *,
struct btree_node_iter *, struct bkey_i *);
+void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64);
enum btree_insert_flags {
__BTREE_INSERT_NOUNLOCK,
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index fa9c7f5e0bb9..68deb4eb31a6 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -24,7 +24,6 @@
static void btree_node_will_make_reachable(struct btree_update *,
struct btree *);
static void btree_update_drop_new_node(struct bch_fs *, struct btree *);
-static void bch2_btree_set_root_ondisk(struct bch_fs *, struct btree *, int);
/* Debug code: */
@@ -260,16 +259,17 @@ void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b,
}
static void bch2_btree_node_free_ondisk(struct bch_fs *c,
- struct pending_btree_node_free *pending)
+ struct pending_btree_node_free *pending,
+ u64 journal_seq)
{
BUG_ON(!pending->index_update_done);
bch2_mark_key(c, bkey_i_to_s_c(&pending->key),
- 0, 0, NULL, 0, BTREE_TRIGGER_OVERWRITE);
+ 0, 0, NULL, journal_seq, BTREE_TRIGGER_OVERWRITE);
if (gc_visited(c, gc_phase(GC_PHASE_PENDING_DELETE)))
bch2_mark_key(c, bkey_i_to_s_c(&pending->key),
- 0, 0, NULL, 0,
+ 0, 0, NULL, journal_seq,
BTREE_TRIGGER_OVERWRITE|
BTREE_TRIGGER_GC);
}
@@ -585,10 +585,13 @@ static void bch2_btree_update_free(struct btree_update *as)
{
struct bch_fs *c = as->c;
+ bch2_journal_preres_put(&c->journal, &as->journal_preres);
+
+ bch2_journal_pin_drop(&c->journal, &as->journal);
bch2_journal_pin_flush(&c->journal, &as->journal);
- BUG_ON(as->nr_new_nodes);
- BUG_ON(as->nr_pending);
+ BUG_ON((as->nr_new_nodes || as->nr_pending) &&
+ !bch2_journal_error(&c->journal));;
if (as->reserve)
bch2_btree_reserve_put(c, as->reserve);
@@ -603,13 +606,10 @@ static void bch2_btree_update_free(struct btree_update *as)
mutex_unlock(&c->btree_interior_update_lock);
}
-static void btree_update_nodes_reachable(struct closure *cl)
+static void btree_update_nodes_reachable(struct btree_update *as, u64 seq)
{
- struct btree_update *as = container_of(cl, struct btree_update, cl);
struct bch_fs *c = as->c;
- bch2_journal_pin_drop(&c->journal, &as->journal);
-
mutex_lock(&c->btree_interior_update_lock);
while (as->nr_new_nodes) {
@@ -630,39 +630,22 @@ static void btree_update_nodes_reachable(struct closure *cl)
}
while (as->nr_pending)
- bch2_btree_node_free_ondisk(c, &as->pending[--as->nr_pending]);
+ bch2_btree_node_free_ondisk(c, &as->pending[--as->nr_pending],
+ seq);
mutex_unlock(&c->btree_interior_update_lock);
-
- closure_wake_up(&as->wait);
-
- bch2_btree_update_free(as);
-}
-
-static void btree_update_wait_on_journal(struct closure *cl)
-{
- struct btree_update *as = container_of(cl, struct btree_update, cl);
- struct bch_fs *c = as->c;
- int ret;
-
- ret = bch2_journal_open_seq_async(&c->journal, as->journal_seq, cl);
- if (ret == -EAGAIN) {
- continue_at(cl, btree_update_wait_on_journal, system_wq);
- return;
- }
- if (ret < 0)
- goto err;
-
- bch2_journal_flush_seq_async(&c->journal, as->journal_seq, cl);
-err:
- continue_at(cl, btree_update_nodes_reachable, system_wq);
}
static void btree_update_nodes_written(struct closure *cl)
{
struct btree_update *as = container_of(cl, struct btree_update, cl);
+ struct journal_res res = { 0 };
struct bch_fs *c = as->c;
struct btree *b;
+ struct bset *i;
+ struct bkey_i *k;
+ unsigned journal_u64s = 0;
+ int ret;
/*
* We did an update to a parent node where the pointers we added pointed
@@ -671,7 +654,7 @@ static void btree_update_nodes_written(struct closure *cl)
*/
mutex_lock(&c->btree_interior_update_lock);
as->nodes_written = true;
-retry:
+again:
as = list_first_entry_or_null(&c->btree_interior_updates_unwritten,
struct btree_update, unwritten_list);
if (!as || !as->nodes_written) {
@@ -679,31 +662,53 @@ retry:
return;
}
+ b = as->b;
+ if (b && !six_trylock_intent(&b->c.lock)) {
+ mutex_unlock(&c->btree_interior_update_lock);
+ btree_node_lock_type(c, b, SIX_LOCK_intent);
+ six_unlock_intent(&b->c.lock);
+ goto out;
+ }
+
+ journal_u64s = 0;
+
+ if (as->mode != BTREE_INTERIOR_UPDATING_ROOT)
+ for_each_keylist_key(&as->parent_keys, k)
+ journal_u64s += jset_u64s(k->k.u64s);
+
+ ret = bch2_journal_res_get(&c->journal, &res, journal_u64s,
+ JOURNAL_RES_GET_RESERVED);
+ if (ret) {
+ BUG_ON(!bch2_journal_error(&c->journal));
+ /* can't unblock btree writes */
+ goto free_update;
+ }
+
+ if (as->mode != BTREE_INTERIOR_UPDATING_ROOT)
+ for_each_keylist_key(&as->parent_keys, k)
+ bch2_journal_add_entry(&c->journal, &res,
+ BCH_JSET_ENTRY_btree_keys,
+ as->btree_id,
+ as->level,
+ k, k->k.u64s);
+
switch (as->mode) {
case BTREE_INTERIOR_NO_UPDATE:
BUG();
case BTREE_INTERIOR_UPDATING_NODE:
- /* The usual case: */
- b = READ_ONCE(as->b);
-
- if (!six_trylock_read(&b->c.lock)) {
- mutex_unlock(&c->btree_interior_update_lock);
- btree_node_lock_type(c, b, SIX_LOCK_read);
- six_unlock_read(&b->c.lock);
- mutex_lock(&c->btree_interior_update_lock);
- goto retry;
- }
-
- BUG_ON(!btree_node_dirty(b));
- closure_wait(&btree_current_write(b)->wait, &as->cl);
+ /* @b is the node we did the final insert into: */
+ BUG_ON(!res.ref);
+ six_lock_write(&b->c.lock, NULL, NULL);
list_del(&as->write_blocked_list);
- /*
- * for flush_held_btree_writes() waiting on updates to flush or
- * nodes to be writeable:
- */
- closure_wake_up(&c->btree_interior_update_wait);
+ i = btree_bset_last(b);
+ i->journal_seq = cpu_to_le64(
+ max(res.seq,
+ le64_to_cpu(i->journal_seq)));
+
+ bch2_btree_add_journal_pin(c, b, res.seq);
+ six_unlock_write(&b->c.lock);
list_del(&as->unwritten_list);
mutex_unlock(&c->btree_interior_update_lock);
@@ -712,82 +717,51 @@ retry:
* b->write_blocked prevented it from being written, so
* write it now if it needs to be written:
*/
- bch2_btree_node_write_cond(c, b, true);
- six_unlock_read(&b->c.lock);
- continue_at(&as->cl, btree_update_nodes_reachable, system_wq);
+ btree_node_write_if_need(c, b, SIX_LOCK_intent);
+ six_unlock_intent(&b->c.lock);
break;
case BTREE_INTERIOR_UPDATING_AS:
- /*
- * The btree node we originally updated has been freed and is
- * being rewritten - so we need to write anything here, we just
- * need to signal to that btree_update that it's ok to make the
- * new replacement node visible:
- */
- closure_put(&as->parent_as->cl);
-
- /*
- * and then we have to wait on that btree_update to finish:
- */
- closure_wait(&as->parent_as->wait, &as->cl);
+ BUG_ON(b);
list_del(&as->unwritten_list);
mutex_unlock(&c->btree_interior_update_lock);
-
- continue_at(&as->cl, btree_update_nodes_reachable, system_wq);
break;
- case BTREE_INTERIOR_UPDATING_ROOT:
- /* b is the new btree root: */
- b = READ_ONCE(as->b);
-
- if (!six_trylock_read(&b->c.lock)) {
- mutex_unlock(&c->btree_interior_update_lock);
- btree_node_lock_type(c, b, SIX_LOCK_read);
- six_unlock_read(&b->c.lock);
- mutex_lock(&c->btree_interior_update_lock);
- goto retry;
- }
-
- BUG_ON(c->btree_roots[b->c.btree_id].as != as);
- c->btree_roots[b->c.btree_id].as = NULL;
+ case BTREE_INTERIOR_UPDATING_ROOT: {
+ struct btree_root *r = &c->btree_roots[as->btree_id];
- bch2_btree_set_root_ondisk(c, b, WRITE);
+ BUG_ON(b);
- /*
- * We don't have to wait anything anything here (before
- * btree_update_nodes_reachable frees the old nodes
- * ondisk) - we've ensured that the very next journal write will
- * have the pointer to the new root, and before the allocator
- * can reuse the old nodes it'll have to do a journal commit:
- */
- six_unlock_read(&b->c.lock);
+ mutex_lock(&c->btree_root_lock);
+ bkey_copy(&r->key, as->parent_keys.keys);
+ r->level = as->level;
+ r->alive = true;
+ c->btree_roots_dirty = true;
+ mutex_unlock(&c->btree_root_lock);
list_del(&as->unwritten_list);
mutex_unlock(&c->btree_interior_update_lock);
-
- /*
- * Bit of funny circularity going on here we have to break:
- *
- * We have to drop our journal pin before writing the journal
- * entry that points to the new btree root: else, we could
- * deadlock if the journal currently happens to be full.
- *
- * This mean we're dropping the journal pin _before_ the new
- * nodes are technically reachable - but this is safe, because
- * after the bch2_btree_set_root_ondisk() call above they will
- * be reachable as of the very next journal write:
- */
- bch2_journal_pin_drop(&c->journal, &as->journal);
-
- as->journal_seq = bch2_journal_last_unwritten_seq(&c->journal);
-
- btree_update_wait_on_journal(&as->cl);
break;
}
+ }
+ bch2_journal_pin_drop(&c->journal, &as->journal);
+
+ bch2_journal_res_put(&c->journal, &res);
+ bch2_journal_preres_put(&c->journal, &as->journal_preres);
+
+ btree_update_nodes_reachable(as, res.seq);
+free_update:
+ bch2_btree_update_free(as);
+ /*
+ * for flush_held_btree_writes() waiting on updates to flush or
+ * nodes to be writeable:
+ */
+ closure_wake_up(&c->btree_interior_update_wait);
+out:
mutex_lock(&c->btree_interior_update_lock);
- goto retry;
+ goto again;
}
/*
@@ -804,48 +778,12 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b)
BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
BUG_ON(!btree_node_dirty(b));
- as->mode = BTREE_INTERIOR_UPDATING_NODE;
- as->b = b;
+ as->mode = BTREE_INTERIOR_UPDATING_NODE;
+ as->b = b;
+ as->level = b->c.level;
list_add(&as->write_blocked_list, &b->write_blocked);
mutex_unlock(&c->btree_interior_update_lock);
-
- /*
- * In general, when you're staging things in a journal that will later
- * be written elsewhere, and you also want to guarantee ordering: that
- * is, if you have updates a, b, c, after a crash you should never see c
- * and not a or b - there's a problem:
- *
- * If the final destination of the update(s) (i.e. btree node) can be
- * written/flushed _before_ the relevant journal entry - oops, that
- * breaks ordering, since the various leaf nodes can be written in any
- * order.
- *
- * Normally we use bset->journal_seq to deal with this - if during
- * recovery we find a btree node write that's newer than the newest
- * journal entry, we just ignore it - we don't need it, anything we're
- * supposed to have (that we reported as completed via fsync()) will
- * still be in the journal, and as far as the state of the journal is
- * concerned that btree node write never happened.
- *
- * That breaks when we're rewriting/splitting/merging nodes, since we're
- * mixing btree node writes that haven't happened yet with previously
- * written data that has been reported as completed to the journal.
- *
- * Thus, before making the new nodes reachable, we have to wait the
- * newest journal sequence number we have data for to be written (if it
- * hasn't been yet).
- */
- bch2_journal_wait_on_seq(&c->journal, as->journal_seq, &as->cl);
-}
-
-static void interior_update_flush(struct journal *j,
- struct journal_entry_pin *pin, u64 seq)
-{
- struct btree_update *as =
- container_of(pin, struct btree_update, journal);
-
- bch2_journal_flush_seq_async(j, as->journal_seq, NULL);
}
static void btree_update_reparent(struct btree_update *as,
@@ -853,10 +791,10 @@ static void btree_update_reparent(struct btree_update *as,
{
struct bch_fs *c = as->c;
+ lockdep_assert_held(&c->btree_interior_update_lock);
+
child->b = NULL;
child->mode = BTREE_INTERIOR_UPDATING_AS;
- child->parent_as = as;
- closure_get(&as->cl);
/*
* When we write a new btree root, we have to drop our journal pin
@@ -867,46 +805,24 @@ static void btree_update_reparent(struct btree_update *as,
* just transfer the journal pin to the new interior update so
* btree_update_nodes_written() can drop it.
*/
- bch2_journal_pin_copy(&c->journal, &as->journal,
- &child->journal, interior_update_flush);
+ bch2_journal_pin_copy(&c->journal, &as->journal, &child->journal, NULL);
bch2_journal_pin_drop(&c->journal, &child->journal);
-
- as->journal_seq = max(as->journal_seq, child->journal_seq);
}
-static void btree_update_updated_root(struct btree_update *as)
+static void btree_update_updated_root(struct btree_update *as, struct btree *b)
{
struct bch_fs *c = as->c;
- struct btree_root *r = &c->btree_roots[as->btree_id];
-
- mutex_lock(&c->btree_interior_update_lock);
- list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE);
+ BUG_ON(!bch2_keylist_empty(&as->parent_keys));
- /*
- * Old root might not be persistent yet - if so, redirect its
- * btree_update operation to point to us:
- */
- if (r->as)
- btree_update_reparent(as, r->as);
-
- as->mode = BTREE_INTERIOR_UPDATING_ROOT;
- as->b = r->b;
- r->as = as;
+ mutex_lock(&c->btree_interior_update_lock);
+ list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten);
+ as->mode = BTREE_INTERIOR_UPDATING_ROOT;
+ as->level = b->c.level;
+ bch2_keylist_add(&as->parent_keys, &b->key);
mutex_unlock(&c->btree_interior_update_lock);
-
- /*
- * When we're rewriting nodes and updating interior nodes, there's an
- * issue with updates that haven't been written in the journal getting
- * mixed together with older data - see btree_update_updated_node()
- * for the explanation.
- *
- * However, this doesn't affect us when we're writing a new btree root -
- * because to make that new root reachable we have to write out a new
- * journal entry, which must necessarily be newer than as->journal_seq.
- */
}
static void btree_node_will_make_reachable(struct btree_update *as,
@@ -983,10 +899,8 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as,
struct btree *b)
{
struct bch_fs *c = as->c;
- struct closure *cl, *cl_n;
struct btree_update *p, *n;
struct btree_write *w;
- struct bset_tree *t;
set_btree_node_dying(b);
@@ -995,18 +909,6 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as,
btree_interior_update_add_node_reference(as, b);
- /*
- * Does this node have data that hasn't been written in the journal?
- *
- * If so, we have to wait for the corresponding journal entry to be
- * written before making the new nodes reachable - we can't just carry
- * over the bset->journal_seq tracking, since we'll be mixing those keys
- * in with keys that aren't in the journal anymore:
- */
- for_each_bset(b, t)
- as->journal_seq = max(as->journal_seq,
- le64_to_cpu(bset(b, t)->journal_seq));
-
mutex_lock(&c->btree_interior_update_lock);
/*
@@ -1030,16 +932,6 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as,
clear_btree_node_dirty(b);
clear_btree_node_need_write(b);
- w = btree_current_write(b);
-
- /*
- * Does this node have any btree_update operations waiting on this node
- * to be written?
- *
- * If so, wake them up when this btree_update operation is reachable:
- */
- llist_for_each_entry_safe(cl, cl_n, llist_del_all(&w->wait.list), list)
- llist_add(&cl->list, &as->wait.list);
/*
* Does this node have unwritten data that has a pin on the journal?
@@ -1049,13 +941,12 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as,
* oldest pin of any of the nodes we're freeing. We'll release the pin
* when the new nodes are persistent and reachable on disk:
*/
- bch2_journal_pin_copy(&c->journal, &as->journal,
- &w->journal, interior_update_flush);
+ w = btree_current_write(b);
+ bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal, NULL);
bch2_journal_pin_drop(&c->journal, &w->journal);
w = btree_prev_write(b);
- bch2_journal_pin_copy(&c->journal, &as->journal,
- &w->journal, interior_update_flush);
+ bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal, NULL);
bch2_journal_pin_drop(&c->journal, &w->journal);
mutex_unlock(&c->btree_interior_update_lock);
@@ -1078,6 +969,7 @@ bch2_btree_update_start(struct bch_fs *c, enum btree_id id,
{
struct btree_reserve *reserve;
struct btree_update *as;
+ int ret;
reserve = bch2_btree_reserve_get(c, nr_nodes, flags, cl);
if (IS_ERR(reserve))
@@ -1094,6 +986,15 @@ bch2_btree_update_start(struct bch_fs *c, enum btree_id id,
bch2_keylist_init(&as->parent_keys, as->inline_keys);
+ ret = bch2_journal_preres_get(&c->journal, &as->journal_preres,
+ jset_u64s(BKEY_BTREE_PTR_U64s_MAX) * 3, 0);
+ if (ret) {
+ bch2_btree_reserve_put(c, reserve);
+ closure_debug_destroy(&as->cl);
+ mempool_free(as, &c->btree_interior_update_pool);
+ return ERR_PTR(ret);
+ }
+
mutex_lock(&c->btree_interior_update_lock);
list_add_tail(&as->list, &c->btree_interior_update_list);
mutex_unlock(&c->btree_interior_update_lock);
@@ -1153,22 +1054,6 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
mutex_unlock(&c->btree_interior_update_lock);
}
-static void bch2_btree_set_root_ondisk(struct bch_fs *c, struct btree *b, int rw)
-{
- struct btree_root *r = &c->btree_roots[b->c.btree_id];
-
- mutex_lock(&c->btree_root_lock);
-
- BUG_ON(b != r->b);
- bkey_copy(&r->key, &b->key);
- r->level = b->c.level;
- r->alive = true;
- if (rw == WRITE)
- c->btree_roots_dirty = true;
-
- mutex_unlock(&c->btree_root_lock);
-}
-
/**
* bch_btree_set_root - update the root in memory and on disk
*
@@ -1201,7 +1086,7 @@ static void bch2_btree_set_root(struct btree_update *as, struct btree *b,
bch2_btree_set_root_inmem(as, b);
- btree_update_updated_root(as);
+ btree_update_updated_root(as, b);
/*
* Unlock old root after new root is visible:
@@ -1471,7 +1356,8 @@ static void btree_split(struct btree_update *as, struct btree *b,
bch2_btree_build_aux_trees(n1);
six_unlock_write(&n1->c.lock);
- bch2_keylist_add(&as->parent_keys, &n1->key);
+ if (parent)
+ bch2_keylist_add(&as->parent_keys, &n1->key);
}
bch2_btree_node_write(c, n1, SIX_LOCK_intent);
@@ -1545,12 +1431,8 @@ bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b,
(bkey_cmp_packed(b, k, &insert->k) >= 0))
;
- while (!bch2_keylist_empty(keys)) {
- insert = bch2_keylist_front(keys);
-
+ for_each_keylist_key(keys, insert)
bch2_insert_fixup_btree_ptr(as, b, iter, insert, &node_iter);
- bch2_keylist_pop_front(keys);
- }
btree_update_updated_node(as, b);
@@ -2107,7 +1989,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
bkey_copy(&b->key, new_key);
}
- btree_update_updated_root(as);
+ btree_update_updated_root(as, b);
bch2_btree_node_unlock_write(b, iter);
}
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index f6aceed89427..4a2ea69f6a2c 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -69,8 +69,10 @@ struct btree_update {
unsigned nodes_written:1;
enum btree_id btree_id;
+ u8 level;
struct btree_reserve *reserve;
+ struct journal_preres journal_preres;
/*
* BTREE_INTERIOR_UPDATING_NODE:
@@ -84,18 +86,6 @@ struct btree_update {
struct list_head write_blocked_list;
/*
- * BTREE_INTERIOR_UPDATING_AS: btree node we updated was freed, so now
- * we're now blocking another btree_update
- * @parent_as - btree_update that's waiting on our nodes to finish
- * writing, before it can make new nodes visible on disk
- * @wait - list of child btree_updates that are waiting on this
- * btree_update to make all the new nodes visible before they can free
- * their old btree nodes
- */
- struct btree_update *parent_as;
- struct closure_waitlist wait;
-
- /*
* We may be freeing nodes that were dirty, and thus had journal entries
* pinned: we need to transfer the oldest of those pins to the
* btree_update operation, and release it when the new node(s)
@@ -103,8 +93,6 @@ struct btree_update {
*/
struct journal_entry_pin journal;
- u64 journal_seq;
-
/*
* Nodes being freed:
* Protected by c->btree_node_pending_free_lock
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index a8487f8275b6..06e735fc69ec 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -145,6 +145,17 @@ static void btree_node_flush1(struct journal *j, struct journal_entry_pin *pin,
return __btree_node_flush(j, pin, 1, seq);
}
+inline void bch2_btree_add_journal_pin(struct bch_fs *c,
+ struct btree *b, u64 seq)
+{
+ struct btree_write *w = btree_current_write(b);
+
+ bch2_journal_pin_add(&c->journal, seq, &w->journal,
+ btree_node_write_idx(b) == 0
+ ? btree_node_flush0
+ : btree_node_flush1);
+}
+
static inline void __btree_journal_key(struct btree_trans *trans,
enum btree_id btree_id,
struct bkey_i *insert)
@@ -173,10 +184,6 @@ static void bch2_btree_journal_key(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct journal *j = &c->journal;
struct btree *b = iter_l(iter)->b;
- struct btree_write *w = btree_current_write(b);
- u64 seq = likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
- ? trans->journal_res.seq
- : j->replay_journal_seq;
EBUG_ON(trans->journal_res.ref !=
!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY));
@@ -187,10 +194,10 @@ static void bch2_btree_journal_key(struct btree_trans *trans,
cpu_to_le64(trans->journal_res.seq);
}
- bch2_journal_pin_add(j, seq, &w->journal,
- btree_node_write_idx(b) == 0
- ? btree_node_flush0
- : btree_node_flush1);
+ bch2_btree_add_journal_pin(c, b,
+ likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
+ ? trans->journal_res.seq
+ : j->replay_journal_seq);
if (unlikely(!btree_node_dirty(b)))
set_btree_node_dirty(b);
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index b50f85d1b057..c9d2a01fec29 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -958,6 +958,7 @@ int bch2_fs_mark_dirty(struct bch_fs *c)
c->disk_sb.sb->compat[0] &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA);
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_extent_overwrite;
c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_extents_above_btree_updates;
+ c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_btree_updates_journalled;
ret = bch2_write_super(c);
mutex_unlock(&c->sb_lock);
@@ -1090,6 +1091,7 @@ void bch2_fs_mark_clean(struct bch_fs *c)
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA;
c->disk_sb.sb->features[0] &= ~(1ULL << BCH_FEATURE_extents_above_btree_updates);
+ c->disk_sb.sb->features[0] &= ~(1ULL << BCH_FEATURE_btree_updates_journalled);
u64s = sizeof(*sb_clean) / sizeof(u64) + c->journal.entry_u64s_reserved;