diff options
-rw-r--r-- | fs/bcachefs/bcachefs_format.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.c | 11 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.h | 9 | ||||
-rw-r--r-- | fs/bcachefs/btree_types.h | 3 | ||||
-rw-r--r-- | fs/bcachefs/btree_update.h | 1 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 352 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.h | 16 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_leaf.c | 23 | ||||
-rw-r--r-- | fs/bcachefs/super-io.c | 2 |
9 files changed, 146 insertions, 274 deletions
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h index 1ad5ff449a5b..6f74fda1f21d 100644 --- a/fs/bcachefs/bcachefs_format.h +++ b/fs/bcachefs/bcachefs_format.h @@ -1316,7 +1316,8 @@ LE64_BITMASK(BCH_SB_ERASURE_CODE, struct bch_sb, flags[3], 0, 16); x(new_extent_overwrite, 9) \ x(incompressible, 10) \ x(btree_ptr_v2, 11) \ - x(extents_above_btree_updates, 12) + x(extents_above_btree_updates, 12) \ + x(btree_updates_journalled, 13) #define BCH_SB_FEATURES_ALL \ ((1ULL << BCH_FEATURE_new_siphash)| \ diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index d0b761417903..e43d1b2ce5c7 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -1260,7 +1260,6 @@ void bch2_btree_complete_write(struct bch_fs *c, struct btree *b, closure_put(&((struct btree_update *) new)->cl); bch2_journal_pin_drop(&c->journal, &w->journal); - closure_wake_up(&w->wait); } static void btree_node_write_done(struct bch_fs *c, struct btree *b) @@ -1618,9 +1617,6 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, wbio->wbio.bio.bi_end_io = btree_node_write_endio; wbio->wbio.bio.bi_private = b; - if (b->c.level || !b->written) - wbio->wbio.bio.bi_opf |= REQ_FUA; - bch2_bio_map(&wbio->wbio.bio, data, sectors_to_write << 9); /* @@ -1794,12 +1790,11 @@ ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *c, char *buf) rcu_read_lock(); for_each_cached_btree(b, c, tbl, i, pos) { unsigned long flags = READ_ONCE(b->flags); - unsigned idx = (flags & (1 << BTREE_NODE_write_idx)) != 0; if (!(flags & (1 << BTREE_NODE_dirty))) continue; - pr_buf(&out, "%p d %u n %u l %u w %u b %u r %u:%lu c %u p %u\n", + pr_buf(&out, "%p d %u n %u l %u w %u b %u r %u:%lu\n", b, (flags & (1 << BTREE_NODE_dirty)) != 0, (flags & (1 << BTREE_NODE_need_write)) != 0, @@ -1807,9 +1802,7 @@ ssize_t bch2_dirty_btree_nodes_print(struct bch_fs *c, char *buf) b->written, !list_empty_careful(&b->write_blocked), b->will_make_reachable != 0, - b->will_make_reachable & 1, - b->writes[ idx].wait.list.first != NULL, - b->writes[!idx].wait.list.first != NULL); + b->will_make_reachable & 1); } rcu_read_unlock(); diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h index 43fa8a6dbee5..a02e261c2eb2 100644 --- a/fs/bcachefs/btree_io.h +++ b/fs/bcachefs/btree_io.h @@ -102,19 +102,20 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *); void bch2_btree_node_write(struct bch_fs *, struct btree *, enum six_lock_type); -static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b) +static inline void btree_node_write_if_need(struct bch_fs *c, struct btree *b, + enum six_lock_type lock_held) { while (b->written && btree_node_need_write(b) && btree_node_may_write(b)) { if (!btree_node_write_in_flight(b)) { - bch2_btree_node_write(c, b, SIX_LOCK_read); + bch2_btree_node_write(c, b, lock_held); break; } six_unlock_read(&b->c.lock); btree_node_wait_on_io(b); - btree_node_lock_type(c, b, SIX_LOCK_read); + btree_node_lock_type(c, b, lock_held); } } @@ -131,7 +132,7 @@ do { \ new |= (1 << BTREE_NODE_need_write); \ } while ((v = cmpxchg(&(_b)->flags, old, new)) != old); \ \ - btree_node_write_if_need(_c, _b); \ + btree_node_write_if_need(_c, _b, SIX_LOCK_read); \ } while (0) void bch2_btree_flush_all_reads(struct bch_fs *); diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 885cc9500f36..a794f9fe4fce 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -53,7 +53,6 @@ struct bset_tree { struct btree_write { struct journal_entry_pin journal; - struct closure_waitlist wait; }; struct btree_alloc { @@ -547,8 +546,6 @@ static inline bool btree_node_type_needs_gc(enum btree_node_type type) struct btree_root { struct btree *b; - struct btree_update *as; - /* On disk root - see async splits: */ __BKEY_PADDED(key, BKEY_BTREE_PTR_VAL_U64s_MAX); u8 level; diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 9f58d47ef5d6..11f7d02de622 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -12,6 +12,7 @@ void bch2_btree_node_lock_for_insert(struct bch_fs *, struct btree *, struct btree_iter *); bool bch2_btree_bset_insert_key(struct btree_iter *, struct btree *, struct btree_node_iter *, struct bkey_i *); +void bch2_btree_add_journal_pin(struct bch_fs *, struct btree *, u64); enum btree_insert_flags { __BTREE_INSERT_NOUNLOCK, diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index fa9c7f5e0bb9..68deb4eb31a6 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -24,7 +24,6 @@ static void btree_node_will_make_reachable(struct btree_update *, struct btree *); static void btree_update_drop_new_node(struct bch_fs *, struct btree *); -static void bch2_btree_set_root_ondisk(struct bch_fs *, struct btree *, int); /* Debug code: */ @@ -260,16 +259,17 @@ void bch2_btree_node_free_inmem(struct bch_fs *c, struct btree *b, } static void bch2_btree_node_free_ondisk(struct bch_fs *c, - struct pending_btree_node_free *pending) + struct pending_btree_node_free *pending, + u64 journal_seq) { BUG_ON(!pending->index_update_done); bch2_mark_key(c, bkey_i_to_s_c(&pending->key), - 0, 0, NULL, 0, BTREE_TRIGGER_OVERWRITE); + 0, 0, NULL, journal_seq, BTREE_TRIGGER_OVERWRITE); if (gc_visited(c, gc_phase(GC_PHASE_PENDING_DELETE))) bch2_mark_key(c, bkey_i_to_s_c(&pending->key), - 0, 0, NULL, 0, + 0, 0, NULL, journal_seq, BTREE_TRIGGER_OVERWRITE| BTREE_TRIGGER_GC); } @@ -585,10 +585,13 @@ static void bch2_btree_update_free(struct btree_update *as) { struct bch_fs *c = as->c; + bch2_journal_preres_put(&c->journal, &as->journal_preres); + + bch2_journal_pin_drop(&c->journal, &as->journal); bch2_journal_pin_flush(&c->journal, &as->journal); - BUG_ON(as->nr_new_nodes); - BUG_ON(as->nr_pending); + BUG_ON((as->nr_new_nodes || as->nr_pending) && + !bch2_journal_error(&c->journal));; if (as->reserve) bch2_btree_reserve_put(c, as->reserve); @@ -603,13 +606,10 @@ static void bch2_btree_update_free(struct btree_update *as) mutex_unlock(&c->btree_interior_update_lock); } -static void btree_update_nodes_reachable(struct closure *cl) +static void btree_update_nodes_reachable(struct btree_update *as, u64 seq) { - struct btree_update *as = container_of(cl, struct btree_update, cl); struct bch_fs *c = as->c; - bch2_journal_pin_drop(&c->journal, &as->journal); - mutex_lock(&c->btree_interior_update_lock); while (as->nr_new_nodes) { @@ -630,39 +630,22 @@ static void btree_update_nodes_reachable(struct closure *cl) } while (as->nr_pending) - bch2_btree_node_free_ondisk(c, &as->pending[--as->nr_pending]); + bch2_btree_node_free_ondisk(c, &as->pending[--as->nr_pending], + seq); mutex_unlock(&c->btree_interior_update_lock); - - closure_wake_up(&as->wait); - - bch2_btree_update_free(as); -} - -static void btree_update_wait_on_journal(struct closure *cl) -{ - struct btree_update *as = container_of(cl, struct btree_update, cl); - struct bch_fs *c = as->c; - int ret; - - ret = bch2_journal_open_seq_async(&c->journal, as->journal_seq, cl); - if (ret == -EAGAIN) { - continue_at(cl, btree_update_wait_on_journal, system_wq); - return; - } - if (ret < 0) - goto err; - - bch2_journal_flush_seq_async(&c->journal, as->journal_seq, cl); -err: - continue_at(cl, btree_update_nodes_reachable, system_wq); } static void btree_update_nodes_written(struct closure *cl) { struct btree_update *as = container_of(cl, struct btree_update, cl); + struct journal_res res = { 0 }; struct bch_fs *c = as->c; struct btree *b; + struct bset *i; + struct bkey_i *k; + unsigned journal_u64s = 0; + int ret; /* * We did an update to a parent node where the pointers we added pointed @@ -671,7 +654,7 @@ static void btree_update_nodes_written(struct closure *cl) */ mutex_lock(&c->btree_interior_update_lock); as->nodes_written = true; -retry: +again: as = list_first_entry_or_null(&c->btree_interior_updates_unwritten, struct btree_update, unwritten_list); if (!as || !as->nodes_written) { @@ -679,31 +662,53 @@ retry: return; } + b = as->b; + if (b && !six_trylock_intent(&b->c.lock)) { + mutex_unlock(&c->btree_interior_update_lock); + btree_node_lock_type(c, b, SIX_LOCK_intent); + six_unlock_intent(&b->c.lock); + goto out; + } + + journal_u64s = 0; + + if (as->mode != BTREE_INTERIOR_UPDATING_ROOT) + for_each_keylist_key(&as->parent_keys, k) + journal_u64s += jset_u64s(k->k.u64s); + + ret = bch2_journal_res_get(&c->journal, &res, journal_u64s, + JOURNAL_RES_GET_RESERVED); + if (ret) { + BUG_ON(!bch2_journal_error(&c->journal)); + /* can't unblock btree writes */ + goto free_update; + } + + if (as->mode != BTREE_INTERIOR_UPDATING_ROOT) + for_each_keylist_key(&as->parent_keys, k) + bch2_journal_add_entry(&c->journal, &res, + BCH_JSET_ENTRY_btree_keys, + as->btree_id, + as->level, + k, k->k.u64s); + switch (as->mode) { case BTREE_INTERIOR_NO_UPDATE: BUG(); case BTREE_INTERIOR_UPDATING_NODE: - /* The usual case: */ - b = READ_ONCE(as->b); - - if (!six_trylock_read(&b->c.lock)) { - mutex_unlock(&c->btree_interior_update_lock); - btree_node_lock_type(c, b, SIX_LOCK_read); - six_unlock_read(&b->c.lock); - mutex_lock(&c->btree_interior_update_lock); - goto retry; - } - - BUG_ON(!btree_node_dirty(b)); - closure_wait(&btree_current_write(b)->wait, &as->cl); + /* @b is the node we did the final insert into: */ + BUG_ON(!res.ref); + six_lock_write(&b->c.lock, NULL, NULL); list_del(&as->write_blocked_list); - /* - * for flush_held_btree_writes() waiting on updates to flush or - * nodes to be writeable: - */ - closure_wake_up(&c->btree_interior_update_wait); + i = btree_bset_last(b); + i->journal_seq = cpu_to_le64( + max(res.seq, + le64_to_cpu(i->journal_seq))); + + bch2_btree_add_journal_pin(c, b, res.seq); + six_unlock_write(&b->c.lock); list_del(&as->unwritten_list); mutex_unlock(&c->btree_interior_update_lock); @@ -712,82 +717,51 @@ retry: * b->write_blocked prevented it from being written, so * write it now if it needs to be written: */ - bch2_btree_node_write_cond(c, b, true); - six_unlock_read(&b->c.lock); - continue_at(&as->cl, btree_update_nodes_reachable, system_wq); + btree_node_write_if_need(c, b, SIX_LOCK_intent); + six_unlock_intent(&b->c.lock); break; case BTREE_INTERIOR_UPDATING_AS: - /* - * The btree node we originally updated has been freed and is - * being rewritten - so we need to write anything here, we just - * need to signal to that btree_update that it's ok to make the - * new replacement node visible: - */ - closure_put(&as->parent_as->cl); - - /* - * and then we have to wait on that btree_update to finish: - */ - closure_wait(&as->parent_as->wait, &as->cl); + BUG_ON(b); list_del(&as->unwritten_list); mutex_unlock(&c->btree_interior_update_lock); - - continue_at(&as->cl, btree_update_nodes_reachable, system_wq); break; - case BTREE_INTERIOR_UPDATING_ROOT: - /* b is the new btree root: */ - b = READ_ONCE(as->b); - - if (!six_trylock_read(&b->c.lock)) { - mutex_unlock(&c->btree_interior_update_lock); - btree_node_lock_type(c, b, SIX_LOCK_read); - six_unlock_read(&b->c.lock); - mutex_lock(&c->btree_interior_update_lock); - goto retry; - } - - BUG_ON(c->btree_roots[b->c.btree_id].as != as); - c->btree_roots[b->c.btree_id].as = NULL; + case BTREE_INTERIOR_UPDATING_ROOT: { + struct btree_root *r = &c->btree_roots[as->btree_id]; - bch2_btree_set_root_ondisk(c, b, WRITE); + BUG_ON(b); - /* - * We don't have to wait anything anything here (before - * btree_update_nodes_reachable frees the old nodes - * ondisk) - we've ensured that the very next journal write will - * have the pointer to the new root, and before the allocator - * can reuse the old nodes it'll have to do a journal commit: - */ - six_unlock_read(&b->c.lock); + mutex_lock(&c->btree_root_lock); + bkey_copy(&r->key, as->parent_keys.keys); + r->level = as->level; + r->alive = true; + c->btree_roots_dirty = true; + mutex_unlock(&c->btree_root_lock); list_del(&as->unwritten_list); mutex_unlock(&c->btree_interior_update_lock); - - /* - * Bit of funny circularity going on here we have to break: - * - * We have to drop our journal pin before writing the journal - * entry that points to the new btree root: else, we could - * deadlock if the journal currently happens to be full. - * - * This mean we're dropping the journal pin _before_ the new - * nodes are technically reachable - but this is safe, because - * after the bch2_btree_set_root_ondisk() call above they will - * be reachable as of the very next journal write: - */ - bch2_journal_pin_drop(&c->journal, &as->journal); - - as->journal_seq = bch2_journal_last_unwritten_seq(&c->journal); - - btree_update_wait_on_journal(&as->cl); break; } + } + bch2_journal_pin_drop(&c->journal, &as->journal); + + bch2_journal_res_put(&c->journal, &res); + bch2_journal_preres_put(&c->journal, &as->journal_preres); + + btree_update_nodes_reachable(as, res.seq); +free_update: + bch2_btree_update_free(as); + /* + * for flush_held_btree_writes() waiting on updates to flush or + * nodes to be writeable: + */ + closure_wake_up(&c->btree_interior_update_wait); +out: mutex_lock(&c->btree_interior_update_lock); - goto retry; + goto again; } /* @@ -804,48 +778,12 @@ static void btree_update_updated_node(struct btree_update *as, struct btree *b) BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE); BUG_ON(!btree_node_dirty(b)); - as->mode = BTREE_INTERIOR_UPDATING_NODE; - as->b = b; + as->mode = BTREE_INTERIOR_UPDATING_NODE; + as->b = b; + as->level = b->c.level; list_add(&as->write_blocked_list, &b->write_blocked); mutex_unlock(&c->btree_interior_update_lock); - - /* - * In general, when you're staging things in a journal that will later - * be written elsewhere, and you also want to guarantee ordering: that - * is, if you have updates a, b, c, after a crash you should never see c - * and not a or b - there's a problem: - * - * If the final destination of the update(s) (i.e. btree node) can be - * written/flushed _before_ the relevant journal entry - oops, that - * breaks ordering, since the various leaf nodes can be written in any - * order. - * - * Normally we use bset->journal_seq to deal with this - if during - * recovery we find a btree node write that's newer than the newest - * journal entry, we just ignore it - we don't need it, anything we're - * supposed to have (that we reported as completed via fsync()) will - * still be in the journal, and as far as the state of the journal is - * concerned that btree node write never happened. - * - * That breaks when we're rewriting/splitting/merging nodes, since we're - * mixing btree node writes that haven't happened yet with previously - * written data that has been reported as completed to the journal. - * - * Thus, before making the new nodes reachable, we have to wait the - * newest journal sequence number we have data for to be written (if it - * hasn't been yet). - */ - bch2_journal_wait_on_seq(&c->journal, as->journal_seq, &as->cl); -} - -static void interior_update_flush(struct journal *j, - struct journal_entry_pin *pin, u64 seq) -{ - struct btree_update *as = - container_of(pin, struct btree_update, journal); - - bch2_journal_flush_seq_async(j, as->journal_seq, NULL); } static void btree_update_reparent(struct btree_update *as, @@ -853,10 +791,10 @@ static void btree_update_reparent(struct btree_update *as, { struct bch_fs *c = as->c; + lockdep_assert_held(&c->btree_interior_update_lock); + child->b = NULL; child->mode = BTREE_INTERIOR_UPDATING_AS; - child->parent_as = as; - closure_get(&as->cl); /* * When we write a new btree root, we have to drop our journal pin @@ -867,46 +805,24 @@ static void btree_update_reparent(struct btree_update *as, * just transfer the journal pin to the new interior update so * btree_update_nodes_written() can drop it. */ - bch2_journal_pin_copy(&c->journal, &as->journal, - &child->journal, interior_update_flush); + bch2_journal_pin_copy(&c->journal, &as->journal, &child->journal, NULL); bch2_journal_pin_drop(&c->journal, &child->journal); - - as->journal_seq = max(as->journal_seq, child->journal_seq); } -static void btree_update_updated_root(struct btree_update *as) +static void btree_update_updated_root(struct btree_update *as, struct btree *b) { struct bch_fs *c = as->c; - struct btree_root *r = &c->btree_roots[as->btree_id]; - - mutex_lock(&c->btree_interior_update_lock); - list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten); BUG_ON(as->mode != BTREE_INTERIOR_NO_UPDATE); + BUG_ON(!bch2_keylist_empty(&as->parent_keys)); - /* - * Old root might not be persistent yet - if so, redirect its - * btree_update operation to point to us: - */ - if (r->as) - btree_update_reparent(as, r->as); - - as->mode = BTREE_INTERIOR_UPDATING_ROOT; - as->b = r->b; - r->as = as; + mutex_lock(&c->btree_interior_update_lock); + list_add_tail(&as->unwritten_list, &c->btree_interior_updates_unwritten); + as->mode = BTREE_INTERIOR_UPDATING_ROOT; + as->level = b->c.level; + bch2_keylist_add(&as->parent_keys, &b->key); mutex_unlock(&c->btree_interior_update_lock); - - /* - * When we're rewriting nodes and updating interior nodes, there's an - * issue with updates that haven't been written in the journal getting - * mixed together with older data - see btree_update_updated_node() - * for the explanation. - * - * However, this doesn't affect us when we're writing a new btree root - - * because to make that new root reachable we have to write out a new - * journal entry, which must necessarily be newer than as->journal_seq. - */ } static void btree_node_will_make_reachable(struct btree_update *as, @@ -983,10 +899,8 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as, struct btree *b) { struct bch_fs *c = as->c; - struct closure *cl, *cl_n; struct btree_update *p, *n; struct btree_write *w; - struct bset_tree *t; set_btree_node_dying(b); @@ -995,18 +909,6 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as, btree_interior_update_add_node_reference(as, b); - /* - * Does this node have data that hasn't been written in the journal? - * - * If so, we have to wait for the corresponding journal entry to be - * written before making the new nodes reachable - we can't just carry - * over the bset->journal_seq tracking, since we'll be mixing those keys - * in with keys that aren't in the journal anymore: - */ - for_each_bset(b, t) - as->journal_seq = max(as->journal_seq, - le64_to_cpu(bset(b, t)->journal_seq)); - mutex_lock(&c->btree_interior_update_lock); /* @@ -1030,16 +932,6 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as, clear_btree_node_dirty(b); clear_btree_node_need_write(b); - w = btree_current_write(b); - - /* - * Does this node have any btree_update operations waiting on this node - * to be written? - * - * If so, wake them up when this btree_update operation is reachable: - */ - llist_for_each_entry_safe(cl, cl_n, llist_del_all(&w->wait.list), list) - llist_add(&cl->list, &as->wait.list); /* * Does this node have unwritten data that has a pin on the journal? @@ -1049,13 +941,12 @@ void bch2_btree_interior_update_will_free_node(struct btree_update *as, * oldest pin of any of the nodes we're freeing. We'll release the pin * when the new nodes are persistent and reachable on disk: */ - bch2_journal_pin_copy(&c->journal, &as->journal, - &w->journal, interior_update_flush); + w = btree_current_write(b); + bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal, NULL); bch2_journal_pin_drop(&c->journal, &w->journal); w = btree_prev_write(b); - bch2_journal_pin_copy(&c->journal, &as->journal, - &w->journal, interior_update_flush); + bch2_journal_pin_copy(&c->journal, &as->journal, &w->journal, NULL); bch2_journal_pin_drop(&c->journal, &w->journal); mutex_unlock(&c->btree_interior_update_lock); @@ -1078,6 +969,7 @@ bch2_btree_update_start(struct bch_fs *c, enum btree_id id, { struct btree_reserve *reserve; struct btree_update *as; + int ret; reserve = bch2_btree_reserve_get(c, nr_nodes, flags, cl); if (IS_ERR(reserve)) @@ -1094,6 +986,15 @@ bch2_btree_update_start(struct bch_fs *c, enum btree_id id, bch2_keylist_init(&as->parent_keys, as->inline_keys); + ret = bch2_journal_preres_get(&c->journal, &as->journal_preres, + jset_u64s(BKEY_BTREE_PTR_U64s_MAX) * 3, 0); + if (ret) { + bch2_btree_reserve_put(c, reserve); + closure_debug_destroy(&as->cl); + mempool_free(as, &c->btree_interior_update_pool); + return ERR_PTR(ret); + } + mutex_lock(&c->btree_interior_update_lock); list_add_tail(&as->list, &c->btree_interior_update_list); mutex_unlock(&c->btree_interior_update_lock); @@ -1153,22 +1054,6 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b) mutex_unlock(&c->btree_interior_update_lock); } -static void bch2_btree_set_root_ondisk(struct bch_fs *c, struct btree *b, int rw) -{ - struct btree_root *r = &c->btree_roots[b->c.btree_id]; - - mutex_lock(&c->btree_root_lock); - - BUG_ON(b != r->b); - bkey_copy(&r->key, &b->key); - r->level = b->c.level; - r->alive = true; - if (rw == WRITE) - c->btree_roots_dirty = true; - - mutex_unlock(&c->btree_root_lock); -} - /** * bch_btree_set_root - update the root in memory and on disk * @@ -1201,7 +1086,7 @@ static void bch2_btree_set_root(struct btree_update *as, struct btree *b, bch2_btree_set_root_inmem(as, b); - btree_update_updated_root(as); + btree_update_updated_root(as, b); /* * Unlock old root after new root is visible: @@ -1471,7 +1356,8 @@ static void btree_split(struct btree_update *as, struct btree *b, bch2_btree_build_aux_trees(n1); six_unlock_write(&n1->c.lock); - bch2_keylist_add(&as->parent_keys, &n1->key); + if (parent) + bch2_keylist_add(&as->parent_keys, &n1->key); } bch2_btree_node_write(c, n1, SIX_LOCK_intent); @@ -1545,12 +1431,8 @@ bch2_btree_insert_keys_interior(struct btree_update *as, struct btree *b, (bkey_cmp_packed(b, k, &insert->k) >= 0)) ; - while (!bch2_keylist_empty(keys)) { - insert = bch2_keylist_front(keys); - + for_each_keylist_key(keys, insert) bch2_insert_fixup_btree_ptr(as, b, iter, insert, &node_iter); - bch2_keylist_pop_front(keys); - } btree_update_updated_node(as, b); @@ -2107,7 +1989,7 @@ static void __bch2_btree_node_update_key(struct bch_fs *c, bkey_copy(&b->key, new_key); } - btree_update_updated_root(as); + btree_update_updated_root(as, b); bch2_btree_node_unlock_write(b, iter); } diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index f6aceed89427..4a2ea69f6a2c 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -69,8 +69,10 @@ struct btree_update { unsigned nodes_written:1; enum btree_id btree_id; + u8 level; struct btree_reserve *reserve; + struct journal_preres journal_preres; /* * BTREE_INTERIOR_UPDATING_NODE: @@ -84,18 +86,6 @@ struct btree_update { struct list_head write_blocked_list; /* - * BTREE_INTERIOR_UPDATING_AS: btree node we updated was freed, so now - * we're now blocking another btree_update - * @parent_as - btree_update that's waiting on our nodes to finish - * writing, before it can make new nodes visible on disk - * @wait - list of child btree_updates that are waiting on this - * btree_update to make all the new nodes visible before they can free - * their old btree nodes - */ - struct btree_update *parent_as; - struct closure_waitlist wait; - - /* * We may be freeing nodes that were dirty, and thus had journal entries * pinned: we need to transfer the oldest of those pins to the * btree_update operation, and release it when the new node(s) @@ -103,8 +93,6 @@ struct btree_update { */ struct journal_entry_pin journal; - u64 journal_seq; - /* * Nodes being freed: * Protected by c->btree_node_pending_free_lock diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index a8487f8275b6..06e735fc69ec 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -145,6 +145,17 @@ static void btree_node_flush1(struct journal *j, struct journal_entry_pin *pin, return __btree_node_flush(j, pin, 1, seq); } +inline void bch2_btree_add_journal_pin(struct bch_fs *c, + struct btree *b, u64 seq) +{ + struct btree_write *w = btree_current_write(b); + + bch2_journal_pin_add(&c->journal, seq, &w->journal, + btree_node_write_idx(b) == 0 + ? btree_node_flush0 + : btree_node_flush1); +} + static inline void __btree_journal_key(struct btree_trans *trans, enum btree_id btree_id, struct bkey_i *insert) @@ -173,10 +184,6 @@ static void bch2_btree_journal_key(struct btree_trans *trans, struct bch_fs *c = trans->c; struct journal *j = &c->journal; struct btree *b = iter_l(iter)->b; - struct btree_write *w = btree_current_write(b); - u64 seq = likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) - ? trans->journal_res.seq - : j->replay_journal_seq; EBUG_ON(trans->journal_res.ref != !(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)); @@ -187,10 +194,10 @@ static void bch2_btree_journal_key(struct btree_trans *trans, cpu_to_le64(trans->journal_res.seq); } - bch2_journal_pin_add(j, seq, &w->journal, - btree_node_write_idx(b) == 0 - ? btree_node_flush0 - : btree_node_flush1); + bch2_btree_add_journal_pin(c, b, + likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) + ? trans->journal_res.seq + : j->replay_journal_seq); if (unlikely(!btree_node_dirty(b))) set_btree_node_dirty(b); diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c index b50f85d1b057..c9d2a01fec29 100644 --- a/fs/bcachefs/super-io.c +++ b/fs/bcachefs/super-io.c @@ -958,6 +958,7 @@ int bch2_fs_mark_dirty(struct bch_fs *c) c->disk_sb.sb->compat[0] &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA); c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_extent_overwrite; c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_extents_above_btree_updates; + c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_btree_updates_journalled; ret = bch2_write_super(c); mutex_unlock(&c->sb_lock); @@ -1090,6 +1091,7 @@ void bch2_fs_mark_clean(struct bch_fs *c) c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO; c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA; c->disk_sb.sb->features[0] &= ~(1ULL << BCH_FEATURE_extents_above_btree_updates); + c->disk_sb.sb->features[0] &= ~(1ULL << BCH_FEATURE_btree_updates_journalled); u64s = sizeof(*sb_clean) / sizeof(u64) + c->journal.entry_u64s_reserved; |