diff options
author | Kent Overstreet <kent.overstreet@gmail.com> | 2020-07-11 16:28:54 -0400 |
---|---|---|
committer | Kent Overstreet <kent.overstreet@linux.dev> | 2023-10-22 17:08:42 -0400 |
commit | e6d1161530bcd632ad10b6aa0ad511abb146dbcc (patch) | |
tree | df05a7dd1dc962373bedc7aa4bd47ddd403ad7ac /fs/bcachefs/movinggc.c | |
parent | f793bc15491c04481b3f12a10ff22a53cd126842 (diff) |
bcachefs: Make copygc thread global
Per device copygc threads don't move data to different devices and they
make fragmentation works - they don't make much sense anymore.
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs/movinggc.c')
-rw-r--r-- | fs/bcachefs/movinggc.c | 213 |
1 files changed, 117 insertions, 96 deletions
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c index 135bbc102b53..c33b58dc5c50 100644 --- a/fs/bcachefs/movinggc.c +++ b/fs/bcachefs/movinggc.c @@ -43,13 +43,6 @@ #define COPYGC_BUCKETS_PER_ITER(ca) \ ((ca)->free[RESERVE_MOVINGGC].size / 2) -/* - * Max sectors to move per iteration: Have to take into account internal - * fragmentation from the multiple write points for each generation: - */ -#define COPYGC_SECTORS_PER_ITER(ca) \ - ((ca)->mi.bucket_size * COPYGC_BUCKETS_PER_ITER(ca)) - static inline int sectors_used_cmp(copygc_heap *heap, struct copygc_heap_entry l, struct copygc_heap_entry r) @@ -62,18 +55,22 @@ static int bucket_offset_cmp(const void *_l, const void *_r, size_t size) const struct copygc_heap_entry *l = _l; const struct copygc_heap_entry *r = _r; - return cmp_int(l->offset, r->offset); + return cmp_int(l->dev, r->dev) ?: + cmp_int(l->offset, r->offset); } -static bool __copygc_pred(struct bch_dev *ca, - struct bkey_s_c k) +static int __copygc_pred(struct bch_fs *c, struct bkey_s_c k) { - copygc_heap *h = &ca->copygc_heap; - const struct bch_extent_ptr *ptr = - bch2_bkey_has_device(k, ca->dev_idx); - - if (ptr) { - struct copygc_heap_entry search = { .offset = ptr->offset }; + copygc_heap *h = &c->copygc_heap; + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const struct bch_extent_ptr *ptr; + + bkey_for_each_ptr(ptrs, ptr) { + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); + struct copygc_heap_entry search = { + .dev = ptr->dev, + .offset = ptr->offset + }; ssize_t i = eytzinger0_find_le(h->data, h->used, sizeof(h->data[0]), @@ -89,12 +86,13 @@ static bool __copygc_pred(struct bch_dev *ca, BUG_ON(i != j); #endif - return (i >= 0 && - ptr->offset < h->data[i].offset + ca->mi.bucket_size && - ptr->gen == h->data[i].gen); + if (i >= 0 && + ptr->offset < h->data[i].offset + ca->mi.bucket_size && + ptr->gen == h->data[i].gen) + return ptr->dev; } - return false; + return -1; } static enum data_cmd copygc_pred(struct bch_fs *c, void *arg, @@ -102,14 +100,14 @@ static enum data_cmd copygc_pred(struct bch_fs *c, void *arg, struct bch_io_opts *io_opts, struct data_opts *data_opts) { - struct bch_dev *ca = arg; - - if (!__copygc_pred(ca, k)) + int dev_idx = __copygc_pred(c, k); + if (dev_idx < 0) return DATA_SKIP; - data_opts->target = dev_to_target(ca->dev_idx); + /* XXX: use io_opts for this inode */ + data_opts->target = dev_to_target(dev_idx); data_opts->btree_insert_flags = BTREE_INSERT_USE_RESERVE; - data_opts->rewrite_dev = ca->dev_idx; + data_opts->rewrite_dev = dev_idx; return DATA_REWRITE; } @@ -125,20 +123,21 @@ static bool have_copygc_reserve(struct bch_dev *ca) return ret; } -static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca) +static void bch2_copygc(struct bch_fs *c) { - copygc_heap *h = &ca->copygc_heap; + copygc_heap *h = &c->copygc_heap; struct copygc_heap_entry e, *i; struct bucket_array *buckets; struct bch_move_stats move_stats; u64 sectors_to_move = 0, sectors_not_moved = 0; + u64 sectors_reserved = 0; u64 buckets_to_move, buckets_not_moved = 0; - size_t b; + struct bch_dev *ca; + unsigned dev_idx; + size_t b, heap_size = 0; int ret; memset(&move_stats, 0, sizeof(move_stats)); - closure_wait_event(&c->freelist_wait, have_copygc_reserve(ca)); - /* * Find buckets with lowest sector counts, skipping completely * empty buckets, by building a maxheap sorted by sector count, @@ -147,38 +146,51 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca) */ h->used = 0; - /* - * We need bucket marks to be up to date - gc can't be recalculating - * them: - */ - down_read(&c->gc_lock); - down_read(&ca->bucket_lock); - buckets = bucket_array(ca); - - for (b = buckets->first_bucket; b < buckets->nbuckets; b++) { - struct bucket_mark m = READ_ONCE(buckets->b[b].mark); - struct copygc_heap_entry e; - - if (m.owned_by_allocator || - m.data_type != BCH_DATA_user || - !bucket_sectors_used(m) || - bucket_sectors_used(m) >= ca->mi.bucket_size) - continue; + for_each_rw_member(ca, c, dev_idx) + heap_size += ca->mi.nbuckets >> 7; - e = (struct copygc_heap_entry) { - .gen = m.gen, - .sectors = bucket_sectors_used(m), - .offset = bucket_to_sector(ca, b), - }; - heap_add_or_replace(h, e, -sectors_used_cmp, NULL); + if (h->size < heap_size) { + free_heap(&c->copygc_heap); + if (!init_heap(&c->copygc_heap, heap_size, GFP_KERNEL)) { + bch_err(c, "error allocating copygc heap"); + return; + } + } + + for_each_rw_member(ca, c, dev_idx) { + closure_wait_event(&c->freelist_wait, have_copygc_reserve(ca)); + + spin_lock(&ca->fs->freelist_lock); + sectors_reserved += fifo_used(&ca->free[RESERVE_MOVINGGC]) * ca->mi.bucket_size; + spin_unlock(&ca->fs->freelist_lock); + + down_read(&ca->bucket_lock); + buckets = bucket_array(ca); + + for (b = buckets->first_bucket; b < buckets->nbuckets; b++) { + struct bucket_mark m = READ_ONCE(buckets->b[b].mark); + struct copygc_heap_entry e; + + if (m.owned_by_allocator || + m.data_type != BCH_DATA_user || + !bucket_sectors_used(m) || + bucket_sectors_used(m) >= ca->mi.bucket_size) + continue; + + e = (struct copygc_heap_entry) { + .gen = m.gen, + .sectors = bucket_sectors_used(m), + .offset = bucket_to_sector(ca, b), + }; + heap_add_or_replace(h, e, -sectors_used_cmp, NULL); + } + up_read(&ca->bucket_lock); } - up_read(&ca->bucket_lock); - up_read(&c->gc_lock); for (i = h->data; i < h->data + h->used; i++) sectors_to_move += i->sectors; - while (sectors_to_move > COPYGC_SECTORS_PER_ITER(ca)) { + while (sectors_to_move > sectors_reserved) { BUG_ON(!heap_pop(h, e, -sectors_used_cmp, NULL)); sectors_to_move -= e.sectors; } @@ -192,24 +204,26 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca) sizeof(h->data[0]), bucket_offset_cmp, NULL); - ret = bch2_move_data(c, &ca->copygc_pd.rate, - writepoint_ptr(&ca->copygc_write_point), + ret = bch2_move_data(c, &c->copygc_pd.rate, + writepoint_ptr(&c->copygc_write_point), POS_MIN, POS_MAX, - copygc_pred, ca, + copygc_pred, NULL, &move_stats); - down_read(&ca->bucket_lock); - buckets = bucket_array(ca); - for (i = h->data; i < h->data + h->used; i++) { - size_t b = sector_to_bucket(ca, i->offset); - struct bucket_mark m = READ_ONCE(buckets->b[b].mark); - - if (i->gen == m.gen && bucket_sectors_used(m)) { - sectors_not_moved += bucket_sectors_used(m); - buckets_not_moved++; + for_each_rw_member(ca, c, dev_idx) { + down_read(&ca->bucket_lock); + buckets = bucket_array(ca); + for (i = h->data; i < h->data + h->used; i++) { + size_t b = sector_to_bucket(ca, i->offset); + struct bucket_mark m = READ_ONCE(buckets->b[b].mark); + + if (i->gen == m.gen && bucket_sectors_used(m)) { + sectors_not_moved += bucket_sectors_used(m); + buckets_not_moved++; + } } + up_read(&ca->bucket_lock); } - up_read(&ca->bucket_lock); if (sectors_not_moved && !ret) bch_warn_ratelimited(c, @@ -220,7 +234,7 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca) atomic64_read(&move_stats.keys_raced), atomic64_read(&move_stats.sectors_raced)); - trace_copygc(ca, + trace_copygc(c, atomic64_read(&move_stats.sectors_moved), sectors_not_moved, buckets_to_move, buckets_not_moved); } @@ -239,20 +253,27 @@ static void bch2_copygc(struct bch_fs *c, struct bch_dev *ca) * often and continually reduce the amount of fragmented space as the device * fills up. So, we increase the threshold by half the current free space. */ -unsigned long bch2_copygc_wait_amount(struct bch_dev *ca) +unsigned long bch2_copygc_wait_amount(struct bch_fs *c) { - struct bch_fs *c = ca->fs; - struct bch_dev_usage usage = bch2_dev_usage_read(c, ca); - u64 fragmented_allowed = ca->copygc_threshold + - ((__dev_buckets_available(ca, usage) * ca->mi.bucket_size) >> 1); + struct bch_dev *ca; + unsigned dev_idx; + u64 fragmented_allowed = c->copygc_threshold; + u64 fragmented = 0; + + for_each_rw_member(ca, c, dev_idx) { + struct bch_dev_usage usage = bch2_dev_usage_read(c, ca); + + fragmented_allowed += ((__dev_buckets_available(ca, usage) * + ca->mi.bucket_size) >> 1); + fragmented += usage.sectors_fragmented; + } - return max_t(s64, 0, fragmented_allowed - usage.sectors_fragmented); + return max_t(s64, 0, fragmented_allowed - fragmented); } static int bch2_copygc_thread(void *arg) { - struct bch_dev *ca = arg; - struct bch_fs *c = ca->fs; + struct bch_fs *c = arg; struct io_clock *clock = &c->io_clock[WRITE]; unsigned long last, wait; @@ -263,7 +284,7 @@ static int bch2_copygc_thread(void *arg) break; last = atomic_long_read(&clock->now); - wait = bch2_copygc_wait_amount(ca); + wait = bch2_copygc_wait_amount(c); if (wait > clock->max_slop) { bch2_kthread_io_clock_wait(clock, last + wait, @@ -271,29 +292,29 @@ static int bch2_copygc_thread(void *arg) continue; } - bch2_copygc(c, ca); + bch2_copygc(c); } return 0; } -void bch2_copygc_stop(struct bch_dev *ca) +void bch2_copygc_stop(struct bch_fs *c) { - ca->copygc_pd.rate.rate = UINT_MAX; - bch2_ratelimit_reset(&ca->copygc_pd.rate); + c->copygc_pd.rate.rate = UINT_MAX; + bch2_ratelimit_reset(&c->copygc_pd.rate); - if (ca->copygc_thread) { - kthread_stop(ca->copygc_thread); - put_task_struct(ca->copygc_thread); + if (c->copygc_thread) { + kthread_stop(c->copygc_thread); + put_task_struct(c->copygc_thread); } - ca->copygc_thread = NULL; + c->copygc_thread = NULL; } -int bch2_copygc_start(struct bch_fs *c, struct bch_dev *ca) +int bch2_copygc_start(struct bch_fs *c) { struct task_struct *t; - if (ca->copygc_thread) + if (c->copygc_thread) return 0; if (c->opts.nochanges) @@ -302,21 +323,21 @@ int bch2_copygc_start(struct bch_fs *c, struct bch_dev *ca) if (bch2_fs_init_fault("copygc_start")) return -ENOMEM; - t = kthread_create(bch2_copygc_thread, ca, - "bch_copygc[%s]", ca->name); + t = kthread_create(bch2_copygc_thread, c, + "bch_copygc[%s]", c->name); if (IS_ERR(t)) return PTR_ERR(t); get_task_struct(t); - ca->copygc_thread = t; - wake_up_process(ca->copygc_thread); + c->copygc_thread = t; + wake_up_process(c->copygc_thread); return 0; } -void bch2_dev_copygc_init(struct bch_dev *ca) +void bch2_fs_copygc_init(struct bch_fs *c) { - bch2_pd_controller_init(&ca->copygc_pd); - ca->copygc_pd.d_term = 0; + bch2_pd_controller_init(&c->copygc_pd); + c->copygc_pd.d_term = 0; } |