diff options
author | Ingo Molnar <mingo@kernel.org> | 2020-02-05 08:44:22 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2020-02-05 08:44:22 +0100 |
commit | fdff7c21ea00787e3f70a1a00b40b88eb998c6ad (patch) | |
tree | 03016a8375e849e2c39dec8a15d660055bb16a8c /fs/btrfs/free-space-cache.c | |
parent | f1ec3a517b4352e78dbef6b1e591f43202ecb3fe (diff) | |
parent | b3a6082223369203d7e7db7e81253ac761377644 (diff) |
Merge branch 'linus' into perf/urgent, to synchronize with upstream
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'fs/btrfs/free-space-cache.c')
-rw-r--r-- | fs/btrfs/free-space-cache.c | 619 |
1 files changed, 540 insertions, 79 deletions
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 3283da419200..0598fd3c6e3f 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -21,9 +21,11 @@ #include "space-info.h" #include "delalloc-space.h" #include "block-group.h" +#include "discard.h" #define BITS_PER_BITMAP (PAGE_SIZE * 8UL) -#define MAX_CACHE_BYTES_PER_GIG SZ_32K +#define MAX_CACHE_BYTES_PER_GIG SZ_64K +#define FORCE_EXTENT_THRESHOLD SZ_1M struct btrfs_trim_range { u64 start; @@ -31,6 +33,8 @@ struct btrfs_trim_range { struct list_head list; }; +static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *bitmap_info); static int link_free_space(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info); static void unlink_free_space(struct btrfs_free_space_ctl *ctl, @@ -752,6 +756,16 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, goto free_cache; } + /* + * Sync discard ensures that the free space cache is always + * trimmed. So when reading this in, the state should reflect + * that. We also do this for async as a stop gap for lack of + * persistence. + */ + if (btrfs_test_opt(fs_info, DISCARD_SYNC) || + btrfs_test_opt(fs_info, DISCARD_ASYNC)) + e->trim_state = BTRFS_TRIM_STATE_TRIMMED; + if (!e->bytes) { kmem_cache_free(btrfs_free_space_cachep, e); goto free_cache; @@ -805,12 +819,19 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, ret = io_ctl_read_bitmap(&io_ctl, e); if (ret) goto free_cache; + e->bitmap_extents = count_bitmap_extents(ctl, e); + if (!btrfs_free_space_trimmed(e)) { + ctl->discardable_extents[BTRFS_STAT_CURR] += + e->bitmap_extents; + ctl->discardable_bytes[BTRFS_STAT_CURR] += e->bytes; + } } io_ctl_drop_pages(&io_ctl); merge_space_tree(ctl); ret = 1; out: + btrfs_discard_update_discardable(ctl->private, ctl); io_ctl_free(&io_ctl); return ret; free_cache: @@ -1624,6 +1645,11 @@ __unlink_free_space(struct btrfs_free_space_ctl *ctl, { rb_erase(&info->offset_index, &ctl->free_space_offset); ctl->free_extents--; + + if (!info->bitmap && !btrfs_free_space_trimmed(info)) { + ctl->discardable_extents[BTRFS_STAT_CURR]--; + ctl->discardable_bytes[BTRFS_STAT_CURR] -= info->bytes; + } } static void unlink_free_space(struct btrfs_free_space_ctl *ctl, @@ -1644,6 +1670,11 @@ static int link_free_space(struct btrfs_free_space_ctl *ctl, if (ret) return ret; + if (!info->bitmap && !btrfs_free_space_trimmed(info)) { + ctl->discardable_extents[BTRFS_STAT_CURR]++; + ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes; + } + ctl->free_space += info->bytes; ctl->free_extents++; return ret; @@ -1664,26 +1695,17 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl) ASSERT(ctl->total_bitmaps <= max_bitmaps); /* - * The goal is to keep the total amount of memory used per 1gb of space - * at or below 32k, so we need to adjust how much memory we allow to be - * used by extent based free space tracking + * We are trying to keep the total amount of memory used per 1GiB of + * space to be MAX_CACHE_BYTES_PER_GIG. However, with a reclamation + * mechanism of pulling extents >= FORCE_EXTENT_THRESHOLD out of + * bitmaps, we may end up using more memory than this. */ if (size < SZ_1G) max_bytes = MAX_CACHE_BYTES_PER_GIG; else max_bytes = MAX_CACHE_BYTES_PER_GIG * div_u64(size, SZ_1G); - /* - * we want to account for 1 more bitmap than what we have so we can make - * sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as - * we add more bitmaps. - */ - bitmap_bytes = (ctl->total_bitmaps + 1) * ctl->unit; - - if (bitmap_bytes >= max_bytes) { - ctl->extents_thresh = 0; - return; - } + bitmap_bytes = ctl->total_bitmaps * ctl->unit; /* * we want the extent entry threshold to always be at most 1/2 the max @@ -1700,17 +1722,31 @@ static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info, u64 offset, u64 bytes) { - unsigned long start, count; + unsigned long start, count, end; + int extent_delta = -1; start = offset_to_bit(info->offset, ctl->unit, offset); count = bytes_to_bits(bytes, ctl->unit); - ASSERT(start + count <= BITS_PER_BITMAP); + end = start + count; + ASSERT(end <= BITS_PER_BITMAP); bitmap_clear(info->bitmap, start, count); info->bytes -= bytes; if (info->max_extent_size > ctl->unit) info->max_extent_size = 0; + + if (start && test_bit(start - 1, info->bitmap)) + extent_delta++; + + if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap)) + extent_delta++; + + info->bitmap_extents += extent_delta; + if (!btrfs_free_space_trimmed(info)) { + ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta; + ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes; + } } static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl, @@ -1725,16 +1761,30 @@ static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info, u64 offset, u64 bytes) { - unsigned long start, count; + unsigned long start, count, end; + int extent_delta = 1; start = offset_to_bit(info->offset, ctl->unit, offset); count = bytes_to_bits(bytes, ctl->unit); - ASSERT(start + count <= BITS_PER_BITMAP); + end = start + count; + ASSERT(end <= BITS_PER_BITMAP); bitmap_set(info->bitmap, start, count); info->bytes += bytes; ctl->free_space += bytes; + + if (start && test_bit(start - 1, info->bitmap)) + extent_delta--; + + if (end < BITS_PER_BITMAP && test_bit(end, info->bitmap)) + extent_delta--; + + info->bitmap_extents += extent_delta; + if (!btrfs_free_space_trimmed(info)) { + ctl->discardable_extents[BTRFS_STAT_CURR] += extent_delta; + ctl->discardable_bytes[BTRFS_STAT_CURR] += bytes; + } } /* @@ -1870,11 +1920,35 @@ out: return NULL; } +static int count_bitmap_extents(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *bitmap_info) +{ + struct btrfs_block_group *block_group = ctl->private; + u64 bytes = bitmap_info->bytes; + unsigned int rs, re; + int count = 0; + + if (!block_group || !bytes) + return count; + + bitmap_for_each_set_region(bitmap_info->bitmap, rs, re, 0, + BITS_PER_BITMAP) { + bytes -= (rs - re) * ctl->unit; + count++; + + if (!bytes) + break; + } + + return count; +} + static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info, u64 offset) { info->offset = offset_to_bitmap(ctl, offset); info->bytes = 0; + info->bitmap_extents = 0; INIT_LIST_HEAD(&info->list); link_free_space(ctl, info); ctl->total_bitmaps++; @@ -1885,6 +1959,18 @@ static void add_new_bitmap(struct btrfs_free_space_ctl *ctl, static void free_bitmap(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *bitmap_info) { + /* + * Normally when this is called, the bitmap is completely empty. However, + * if we are blowing up the free space cache for one reason or another + * via __btrfs_remove_free_space_cache(), then it may not be freed and + * we may leave stats on the table. + */ + if (bitmap_info->bytes && !btrfs_free_space_trimmed(bitmap_info)) { + ctl->discardable_extents[BTRFS_STAT_CURR] -= + bitmap_info->bitmap_extents; + ctl->discardable_bytes[BTRFS_STAT_CURR] -= bitmap_info->bytes; + + } unlink_free_space(ctl, bitmap_info); kmem_cache_free(btrfs_free_space_bitmap_cachep, bitmap_info->bitmap); kmem_cache_free(btrfs_free_space_cachep, bitmap_info); @@ -1971,11 +2057,24 @@ again: static u64 add_bytes_to_bitmap(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info, u64 offset, - u64 bytes) + u64 bytes, enum btrfs_trim_state trim_state) { u64 bytes_to_set = 0; u64 end; + /* + * This is a tradeoff to make bitmap trim state minimal. We mark the + * whole bitmap untrimmed if at any point we add untrimmed regions. + */ + if (trim_state == BTRFS_TRIM_STATE_UNTRIMMED) { + if (btrfs_free_space_trimmed(info)) { + ctl->discardable_extents[BTRFS_STAT_CURR] += + info->bitmap_extents; + ctl->discardable_bytes[BTRFS_STAT_CURR] += info->bytes; + } + info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; + } + end = info->offset + (u64)(BITS_PER_BITMAP * ctl->unit); bytes_to_set = min(end - offset, bytes); @@ -2004,6 +2103,10 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl, forced = true; #endif + /* This is a way to reclaim large regions from the bitmaps. */ + if (!forced && info->bytes >= FORCE_EXTENT_THRESHOLD) + return false; + /* * If we are below the extents threshold then we can add this as an * extent, and don't have to deal with the bitmap @@ -2016,8 +2119,8 @@ static bool use_bitmap(struct btrfs_free_space_ctl *ctl, * of cache left then go ahead an dadd them, no sense in adding * the overhead of a bitmap if we don't have to. */ - if (info->bytes <= fs_info->sectorsize * 4) { - if (ctl->free_extents * 2 <= ctl->extents_thresh) + if (info->bytes <= fs_info->sectorsize * 8) { + if (ctl->free_extents * 3 <= ctl->extents_thresh) return false; } else { return false; @@ -2050,10 +2153,12 @@ static int insert_into_bitmap(struct btrfs_free_space_ctl *ctl, struct btrfs_block_group *block_group = NULL; int added = 0; u64 bytes, offset, bytes_added; + enum btrfs_trim_state trim_state; int ret; bytes = info->bytes; offset = info->offset; + trim_state = info->trim_state; if (!ctl->op->use_bitmap(ctl, info)) return 0; @@ -2088,8 +2193,8 @@ again: } if (entry->offset == offset_to_bitmap(ctl, offset)) { - bytes_added = add_bytes_to_bitmap(ctl, entry, - offset, bytes); + bytes_added = add_bytes_to_bitmap(ctl, entry, offset, + bytes, trim_state); bytes -= bytes_added; offset += bytes_added; } @@ -2108,7 +2213,8 @@ no_cluster_bitmap: goto new_bitmap; } - bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); + bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes, + trim_state); bytes -= bytes_added; offset += bytes_added; added = 0; @@ -2142,6 +2248,7 @@ new_bitmap: /* allocate the bitmap */ info->bitmap = kmem_cache_zalloc(btrfs_free_space_bitmap_cachep, GFP_NOFS); + info->trim_state = BTRFS_TRIM_STATE_TRIMMED; spin_lock(&ctl->tree_lock); if (!info->bitmap) { ret = -ENOMEM; @@ -2161,6 +2268,22 @@ out: return ret; } +/* + * Free space merging rules: + * 1) Merge trimmed areas together + * 2) Let untrimmed areas coalesce with trimmed areas + * 3) Always pull neighboring regions from bitmaps + * + * The above rules are for when we merge free space based on btrfs_trim_state. + * Rules 2 and 3 are subtle because they are suboptimal, but are done for the + * same reason: to promote larger extent regions which makes life easier for + * find_free_extent(). Rule 2 enables coalescing based on the common path + * being returning free space from btrfs_finish_extent_commit(). So when free + * space is trimmed, it will prevent aggregating trimmed new region and + * untrimmed regions in the rb_tree. Rule 3 is purely to obtain larger extents + * and provide find_free_extent() with the largest extents possible hoping for + * the reuse path. + */ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, struct btrfs_free_space *info, bool update_stat) { @@ -2169,6 +2292,7 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, bool merged = false; u64 offset = info->offset; u64 bytes = info->bytes; + const bool is_trimmed = btrfs_free_space_trimmed(info); /* * first we want to see if there is free space adjacent to the range we @@ -2182,7 +2306,9 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, else left_info = tree_search_offset(ctl, offset - 1, 0, 0); - if (right_info && !right_info->bitmap) { + /* See try_merge_free_space() comment. */ + if (right_info && !right_info->bitmap && + (!is_trimmed || btrfs_free_space_trimmed(right_info))) { if (update_stat) unlink_free_space(ctl, right_info); else @@ -2192,8 +2318,10 @@ static bool try_merge_free_space(struct btrfs_free_space_ctl *ctl, merged = true; } + /* See try_merge_free_space() comment. */ if (left_info && !left_info->bitmap && - left_info->offset + left_info->bytes == offset) { + left_info->offset + left_info->bytes == offset && + (!is_trimmed || btrfs_free_space_trimmed(left_info))) { if (update_stat) unlink_free_space(ctl, left_info); else @@ -2229,6 +2357,10 @@ static bool steal_from_bitmap_to_end(struct btrfs_free_space_ctl *ctl, bytes = (j - i) * ctl->unit; info->bytes += bytes; + /* See try_merge_free_space() comment. */ + if (!btrfs_free_space_trimmed(bitmap)) + info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; + if (update_stat) bitmap_clear_bits(ctl, bitmap, end, bytes); else @@ -2282,6 +2414,10 @@ static bool steal_from_bitmap_to_front(struct btrfs_free_space_ctl *ctl, info->offset -= bytes; info->bytes += bytes; + /* See try_merge_free_space() comment. */ + if (!btrfs_free_space_trimmed(bitmap)) + info->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; + if (update_stat) bitmap_clear_bits(ctl, bitmap, info->offset, bytes); else @@ -2331,10 +2467,13 @@ static void steal_from_bitmap(struct btrfs_free_space_ctl *ctl, int __btrfs_add_free_space(struct btrfs_fs_info *fs_info, struct btrfs_free_space_ctl *ctl, - u64 offset, u64 bytes) + u64 offset, u64 bytes, + enum btrfs_trim_state trim_state) { + struct btrfs_block_group *block_group = ctl->private; struct btrfs_free_space *info; int ret = 0; + u64 filter_bytes = bytes; info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); if (!info) @@ -2342,6 +2481,7 @@ int __btrfs_add_free_space(struct btrfs_fs_info *fs_info, info->offset = offset; info->bytes = bytes; + info->trim_state = trim_state; RB_CLEAR_NODE(&info->offset_index); spin_lock(&ctl->tree_lock); @@ -2370,10 +2510,13 @@ link: */ steal_from_bitmap(ctl, info, true); + filter_bytes = max(filter_bytes, info->bytes); + ret = link_free_space(ctl, info); if (ret) kmem_cache_free(btrfs_free_space_cachep, info); out: + btrfs_discard_update_discardable(block_group, ctl); spin_unlock(&ctl->tree_lock); if (ret) { @@ -2381,15 +2524,44 @@ out: ASSERT(ret != -EEXIST); } + if (trim_state != BTRFS_TRIM_STATE_TRIMMED) { + btrfs_discard_check_filter(block_group, filter_bytes); + btrfs_discard_queue_work(&fs_info->discard_ctl, block_group); + } + return ret; } int btrfs_add_free_space(struct btrfs_block_group *block_group, u64 bytenr, u64 size) { + enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; + + if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC)) + trim_state = BTRFS_TRIM_STATE_TRIMMED; + + return __btrfs_add_free_space(block_group->fs_info, + block_group->free_space_ctl, + bytenr, size, trim_state); +} + +/* + * This is a subtle distinction because when adding free space back in general, + * we want it to be added as untrimmed for async. But in the case where we add + * it on loading of a block group, we want to consider it trimmed. + */ +int btrfs_add_free_space_async_trimmed(struct btrfs_block_group *block_group, + u64 bytenr, u64 size) +{ + enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; + + if (btrfs_test_opt(block_group->fs_info, DISCARD_SYNC) || + btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC)) + trim_state = BTRFS_TRIM_STATE_TRIMMED; + return __btrfs_add_free_space(block_group->fs_info, block_group->free_space_ctl, - bytenr, size); + bytenr, size, trim_state); } int btrfs_remove_free_space(struct btrfs_block_group *block_group, @@ -2464,8 +2636,10 @@ again: } spin_unlock(&ctl->tree_lock); - ret = btrfs_add_free_space(block_group, offset + bytes, - old_end - (offset + bytes)); + ret = __btrfs_add_free_space(block_group->fs_info, ctl, + offset + bytes, + old_end - (offset + bytes), + info->trim_state); WARN_ON(ret); goto out; } @@ -2477,6 +2651,7 @@ again: goto again; } out_lock: + btrfs_discard_update_discardable(block_group, ctl); spin_unlock(&ctl->tree_lock); out: return ret; @@ -2562,8 +2737,22 @@ __btrfs_return_cluster_to_free_space( bitmap = (entry->bitmap != NULL); if (!bitmap) { + /* Merging treats extents as if they were new */ + if (!btrfs_free_space_trimmed(entry)) { + ctl->discardable_extents[BTRFS_STAT_CURR]--; + ctl->discardable_bytes[BTRFS_STAT_CURR] -= + entry->bytes; + } + try_merge_free_space(ctl, entry, false); steal_from_bitmap(ctl, entry, false); + + /* As we insert directly, update these statistics */ + if (!btrfs_free_space_trimmed(entry)) { + ctl->discardable_extents[BTRFS_STAT_CURR]++; + ctl->discardable_bytes[BTRFS_STAT_CURR] += + entry->bytes; + } } tree_insert_offset(&ctl->free_space_offset, entry->offset, &entry->offset_index, bitmap); @@ -2599,6 +2788,8 @@ void __btrfs_remove_free_space_cache(struct btrfs_free_space_ctl *ctl) { spin_lock(&ctl->tree_lock); __btrfs_remove_free_space_cache_locked(ctl); + if (ctl->private) + btrfs_discard_update_discardable(ctl->private, ctl); spin_unlock(&ctl->tree_lock); } @@ -2620,20 +2811,55 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group *block_group) cond_resched_lock(&ctl->tree_lock); } __btrfs_remove_free_space_cache_locked(ctl); + btrfs_discard_update_discardable(block_group, ctl); spin_unlock(&ctl->tree_lock); } +/** + * btrfs_is_free_space_trimmed - see if everything is trimmed + * @block_group: block_group of interest + * + * Walk @block_group's free space rb_tree to determine if everything is trimmed. + */ +bool btrfs_is_free_space_trimmed(struct btrfs_block_group *block_group) +{ + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; + struct btrfs_free_space *info; + struct rb_node *node; + bool ret = true; + + spin_lock(&ctl->tree_lock); + node = rb_first(&ctl->free_space_offset); + + while (node) { + info = rb_entry(node, struct btrfs_free_space, offset_index); + + if (!btrfs_free_space_trimmed(info)) { + ret = false; + break; + } + + node = rb_next(node); + } + + spin_unlock(&ctl->tree_lock); + return ret; +} + u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group, u64 offset, u64 bytes, u64 empty_size, u64 *max_extent_size) { struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; + struct btrfs_discard_ctl *discard_ctl = + &block_group->fs_info->discard_ctl; struct btrfs_free_space *entry = NULL; u64 bytes_search = bytes + empty_size; u64 ret = 0; u64 align_gap = 0; u64 align_gap_len = 0; + enum btrfs_trim_state align_gap_trim_state = BTRFS_TRIM_STATE_UNTRIMMED; spin_lock(&ctl->tree_lock); entry = find_free_space(ctl, &offset, &bytes_search, @@ -2644,12 +2870,20 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group, ret = offset; if (entry->bitmap) { bitmap_clear_bits(ctl, entry, offset, bytes); + + if (!btrfs_free_space_trimmed(entry)) + atomic64_add(bytes, &discard_ctl->discard_bytes_saved); + if (!entry->bytes) free_bitmap(ctl, entry); } else { unlink_free_space(ctl, entry); align_gap_len = offset - entry->offset; align_gap = entry->offset; + align_gap_trim_state = entry->trim_state; + + if (!btrfs_free_space_trimmed(entry)) + atomic64_add(bytes, &discard_ctl->discard_bytes_saved); entry->offset = offset + bytes; WARN_ON(entry->bytes < bytes + align_gap_len); @@ -2661,11 +2895,13 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group *block_group, link_free_space(ctl, entry); } out: + btrfs_discard_update_discardable(block_group, ctl); spin_unlock(&ctl->tree_lock); if (align_gap_len) __btrfs_add_free_space(block_group->fs_info, ctl, - align_gap, align_gap_len); + align_gap, align_gap_len, + align_gap_trim_state); return ret; } @@ -2707,6 +2943,8 @@ int btrfs_return_cluster_to_free_space( ret = __btrfs_return_cluster_to_free_space(block_group, cluster); spin_unlock(&ctl->tree_lock); + btrfs_discard_queue_work(&block_group->fs_info->discard_ctl, block_group); + /* finally drop our ref */ btrfs_put_block_group(block_group); return ret; @@ -2750,6 +2988,8 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group, u64 min_start, u64 *max_extent_size) { struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; + struct btrfs_discard_ctl *discard_ctl = + &block_group->fs_info->discard_ctl; struct btrfs_free_space *entry = NULL; struct rb_node *node; u64 ret = 0; @@ -2814,7 +3054,12 @@ out: spin_lock(&ctl->tree_lock); + if (!btrfs_free_space_trimmed(entry)) + atomic64_add(bytes, &discard_ctl->discard_bytes_saved); + ctl->free_space -= bytes; + if (!entry->bitmap && !btrfs_free_space_trimmed(entry)) + ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes; if (entry->bytes == 0) { ctl->free_extents--; if (entry->bitmap) { @@ -2822,6 +3067,8 @@ out: entry->bitmap); ctl->total_bitmaps--; ctl->op->recalc_thresholds(ctl); + } else if (!btrfs_free_space_trimmed(entry)) { + ctl->discardable_extents[BTRFS_STAT_CURR]--; } kmem_cache_free(btrfs_free_space_cachep, entry); } @@ -3148,6 +3395,7 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) static int do_trimming(struct btrfs_block_group *block_group, u64 *total_trimmed, u64 start, u64 bytes, u64 reserved_start, u64 reserved_bytes, + enum btrfs_trim_state reserved_trim_state, struct btrfs_trim_range *trim_entry) { struct btrfs_space_info *space_info = block_group->space_info; @@ -3155,6 +3403,9 @@ static int do_trimming(struct btrfs_block_group *block_group, struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; int ret; int update = 0; + const u64 end = start + bytes; + const u64 reserved_end = reserved_start + reserved_bytes; + enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_UNTRIMMED; u64 trimmed = 0; spin_lock(&space_info->lock); @@ -3168,11 +3419,20 @@ static int do_trimming(struct btrfs_block_group *block_group, spin_unlock(&space_info->lock); ret = btrfs_discard_extent(fs_info, start, bytes, &trimmed); - if (!ret) + if (!ret) { *total_trimmed += trimmed; + trim_state = BTRFS_TRIM_STATE_TRIMMED; + } mutex_lock(&ctl->cache_writeout_mutex); - btrfs_add_free_space(block_group, reserved_start, reserved_bytes); + if (reserved_start < start) + __btrfs_add_free_space(fs_info, ctl, reserved_start, + start - reserved_start, + reserved_trim_state); + if (start + bytes < reserved_start + reserved_bytes) + __btrfs_add_free_space(fs_info, ctl, end, reserved_end - end, + reserved_trim_state); + __btrfs_add_free_space(fs_info, ctl, start, bytes, trim_state); list_del(&trim_entry->list); mutex_unlock(&ctl->cache_writeout_mutex); @@ -3190,16 +3450,24 @@ static int do_trimming(struct btrfs_block_group *block_group, return ret; } +/* + * If @async is set, then we will trim 1 region and return. + */ static int trim_no_bitmap(struct btrfs_block_group *block_group, - u64 *total_trimmed, u64 start, u64 end, u64 minlen) + u64 *total_trimmed, u64 start, u64 end, u64 minlen, + bool async) { + struct btrfs_discard_ctl *discard_ctl = + &block_group->fs_info->discard_ctl; struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct btrfs_free_space *entry; struct rb_node *node; int ret = 0; u64 extent_start; u64 extent_bytes; + enum btrfs_trim_state extent_trim_state; u64 bytes; + const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size); while (start < end) { struct btrfs_trim_range trim_entry; @@ -3207,49 +3475,66 @@ static int trim_no_bitmap(struct btrfs_block_group *block_group, mutex_lock(&ctl->cache_writeout_mutex); spin_lock(&ctl->tree_lock); - if (ctl->free_space < minlen) { - spin_unlock(&ctl->tree_lock); - mutex_unlock(&ctl->cache_writeout_mutex); - break; - } + if (ctl->free_space < minlen) + goto out_unlock; entry = tree_search_offset(ctl, start, 0, 1); - if (!entry) { - spin_unlock(&ctl->tree_lock); - mutex_unlock(&ctl->cache_writeout_mutex); - break; - } + if (!entry) + goto out_unlock; - /* skip bitmaps */ - while (entry->bitmap) { + /* Skip bitmaps and if async, already trimmed entries */ + while (entry->bitmap || + (async && btrfs_free_space_trimmed(entry))) { node = rb_next(&entry->offset_index); - if (!node) { - spin_unlock(&ctl->tree_lock); - mutex_unlock(&ctl->cache_writeout_mutex); - goto out; - } + if (!node) + goto out_unlock; entry = rb_entry(node, struct btrfs_free_space, offset_index); } - if (entry->offset >= end) { - spin_unlock(&ctl->tree_lock); - mutex_unlock(&ctl->cache_writeout_mutex); - break; - } + if (entry->offset >= end) + goto out_unlock; extent_start = entry->offset; extent_bytes = entry->bytes; - start = max(start, extent_start); - bytes = min(extent_start + extent_bytes, end) - start; - if (bytes < minlen) { - spin_unlock(&ctl->tree_lock); - mutex_unlock(&ctl->cache_writeout_mutex); - goto next; - } + extent_trim_state = entry->trim_state; + if (async) { + start = entry->offset; + bytes = entry->bytes; + if (bytes < minlen) { + spin_unlock(&ctl->tree_lock); + mutex_unlock(&ctl->cache_writeout_mutex); + goto next; + } + unlink_free_space(ctl, entry); + /* + * Let bytes = BTRFS_MAX_DISCARD_SIZE + X. + * If X < BTRFS_ASYNC_DISCARD_MIN_FILTER, we won't trim + * X when we come back around. So trim it now. + */ + if (max_discard_size && + bytes >= (max_discard_size + + BTRFS_ASYNC_DISCARD_MIN_FILTER)) { + bytes = max_discard_size; + extent_bytes = max_discard_size; + entry->offset += max_discard_size; + entry->bytes -= max_discard_size; + link_free_space(ctl, entry); + } else { + kmem_cache_free(btrfs_free_space_cachep, entry); + } + } else { + start = max(start, extent_start); + bytes = min(extent_start + extent_bytes, end) - start; + if (bytes < minlen) { + spin_unlock(&ctl->tree_lock); + mutex_unlock(&ctl->cache_writeout_mutex); + goto next; + } - unlink_free_space(ctl, entry); - kmem_cache_free(btrfs_free_space_cachep, entry); + unlink_free_space(ctl, entry); + kmem_cache_free(btrfs_free_space_cachep, entry); + } spin_unlock(&ctl->tree_lock); trim_entry.start = extent_start; @@ -3258,11 +3543,17 @@ static int trim_no_bitmap(struct btrfs_block_group *block_group, mutex_unlock(&ctl->cache_writeout_mutex); ret = do_trimming(block_group, total_trimmed, start, bytes, - extent_start, extent_bytes, &trim_entry); - if (ret) + extent_start, extent_bytes, extent_trim_state, + &trim_entry); + if (ret) { + block_group->discard_cursor = start + bytes; break; + } next: start += bytes; + block_group->discard_cursor = start; + if (async && *total_trimmed) + break; if (fatal_signal_pending(current)) { ret = -ERESTARTSYS; @@ -3271,19 +3562,76 @@ next: cond_resched(); } -out: + + return ret; + +out_unlock: + block_group->discard_cursor = btrfs_block_group_end(block_group); + spin_unlock(&ctl->tree_lock); + mutex_unlock(&ctl->cache_writeout_mutex); + return ret; } +/* + * If we break out of trimming a bitmap prematurely, we should reset the + * trimming bit. In a rather contrieved case, it's possible to race here so + * reset the state to BTRFS_TRIM_STATE_UNTRIMMED. + * + * start = start of bitmap + * end = near end of bitmap + * + * Thread 1: Thread 2: + * trim_bitmaps(start) + * trim_bitmaps(end) + * end_trimming_bitmap() + * reset_trimming_bitmap() + */ +static void reset_trimming_bitmap(struct btrfs_free_space_ctl *ctl, u64 offset) +{ + struct btrfs_free_space *entry; + + spin_lock(&ctl->tree_lock); + entry = tree_search_offset(ctl, offset, 1, 0); + if (entry) { + if (btrfs_free_space_trimmed(entry)) { + ctl->discardable_extents[BTRFS_STAT_CURR] += + entry->bitmap_extents; + ctl->discardable_bytes[BTRFS_STAT_CURR] += entry->bytes; + } + entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; + } + + spin_unlock(&ctl->tree_lock); +} + +static void end_trimming_bitmap(struct btrfs_free_space_ctl *ctl, + struct btrfs_free_space *entry) +{ + if (btrfs_free_space_trimming_bitmap(entry)) { + entry->trim_state = BTRFS_TRIM_STATE_TRIMMED; + ctl->discardable_extents[BTRFS_STAT_CURR] -= + entry->bitmap_extents; + ctl->discardable_bytes[BTRFS_STAT_CURR] -= entry->bytes; + } +} + +/* + * If @async is set, then we will trim 1 region and return. + */ static int trim_bitmaps(struct btrfs_block_group *block_group, - u64 *total_trimmed, u64 start, u64 end, u64 minlen) + u64 *total_trimmed, u64 start, u64 end, u64 minlen, + u64 maxlen, bool async) { + struct btrfs_discard_ctl *discard_ctl = + &block_group->fs_info->discard_ctl; struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; struct btrfs_free_space *entry; int ret = 0; int ret2; u64 bytes; u64 offset = offset_to_bitmap(ctl, start); + const u64 max_discard_size = READ_ONCE(discard_ctl->max_discard_size); while (offset < end) { bool next_bitmap = false; @@ -3293,35 +3641,84 @@ static int trim_bitmaps(struct btrfs_block_group *block_group, spin_lock(&ctl->tree_lock); if (ctl->free_space < minlen) { + block_group->discard_cursor = + btrfs_block_group_end(block_group); spin_unlock(&ctl->tree_lock); mutex_unlock(&ctl->cache_writeout_mutex); break; } entry = tree_search_offset(ctl, offset, 1, 0); - if (!entry) { + /* + * Bitmaps are marked trimmed lossily now to prevent constant + * discarding of the same bitmap (the reason why we are bound + * by the filters). So, retrim the block group bitmaps when we + * are preparing to punt to the unused_bgs list. This uses + * @minlen to determine if we are in BTRFS_DISCARD_INDEX_UNUSED + * which is the only discard index which sets minlen to 0. + */ + if (!entry || (async && minlen && start == offset && + btrfs_free_space_trimmed(entry))) { spin_unlock(&ctl->tree_lock); mutex_unlock(&ctl->cache_writeout_mutex); next_bitmap = true; goto next; } + /* + * Async discard bitmap trimming begins at by setting the start + * to be key.objectid and the offset_to_bitmap() aligns to the + * start of the bitmap. This lets us know we are fully + * scanning the bitmap rather than only some portion of it. + */ + if (start == offset) + entry->trim_state = BTRFS_TRIM_STATE_TRIMMING; + bytes = minlen; ret2 = search_bitmap(ctl, entry, &start, &bytes, false); if (ret2 || start >= end) { + /* + * We lossily consider a bitmap trimmed if we only skip + * over regions <= BTRFS_ASYNC_DISCARD_MIN_FILTER. + */ + if (ret2 && minlen <= BTRFS_ASYNC_DISCARD_MIN_FILTER) + end_trimming_bitmap(ctl, entry); + else + entry->trim_state = BTRFS_TRIM_STATE_UNTRIMMED; spin_unlock(&ctl->tree_lock); mutex_unlock(&ctl->cache_writeout_mutex); next_bitmap = true; goto next; } + /* + * We already trimmed a region, but are using the locking above + * to reset the trim_state. + */ + if (async && *total_trimmed) { + spin_unlock(&ctl->tree_lock); + mutex_unlock(&ctl->cache_writeout_mutex); + goto out; + } + bytes = min(bytes, end - start); - if (bytes < minlen) { + if (bytes < minlen || (async && maxlen && bytes > maxlen)) { spin_unlock(&ctl->tree_lock); mutex_unlock(&ctl->cache_writeout_mutex); goto next; } + /* + * Let bytes = BTRFS_MAX_DISCARD_SIZE + X. + * If X < @minlen, we won't trim X when we come back around. + * So trim it now. We differ here from trimming extents as we + * don't keep individual state per bit. + */ + if (async && + max_discard_size && + bytes > (max_discard_size + minlen)) + bytes = max_discard_size; + bitmap_clear_bits(ctl, entry, start, bytes); if (entry->bytes == 0) free_bitmap(ctl, entry); @@ -3333,19 +3730,25 @@ static int trim_bitmaps(struct btrfs_block_group *block_group, mutex_unlock(&ctl->cache_writeout_mutex); ret = do_trimming(block_group, total_trimmed, start, bytes, - start, bytes, &trim_entry); - if (ret) + start, bytes, 0, &trim_entry); + if (ret) { + reset_trimming_bitmap(ctl, offset); + block_group->discard_cursor = + btrfs_block_group_end(block_group); break; + } next: if (next_bitmap) { offset += BITS_PER_BITMAP * ctl->unit; + start = offset; } else { start += bytes; - if (start >= offset + BITS_PER_BITMAP * ctl->unit) - offset += BITS_PER_BITMAP * ctl->unit; } + block_group->discard_cursor = start; if (fatal_signal_pending(current)) { + if (start != offset) + reset_trimming_bitmap(ctl, offset); ret = -ERESTARTSYS; break; } @@ -3353,6 +3756,10 @@ next: cond_resched(); } + if (offset >= end) + block_group->discard_cursor = end; + +out: return ret; } @@ -3399,7 +3806,9 @@ void btrfs_put_block_group_trimming(struct btrfs_block_group *block_group) int btrfs_trim_block_group(struct btrfs_block_group *block_group, u64 *trimmed, u64 start, u64 end, u64 minlen) { + struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; int ret; + u64 rem = 0; *trimmed = 0; @@ -3411,16 +3820,66 @@ int btrfs_trim_block_group(struct btrfs_block_group *block_group, btrfs_get_block_group_trimming(block_group); spin_unlock(&block_group->lock); - ret = trim_no_bitmap(block_group, trimmed, start, end, minlen); + ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, false); if (ret) goto out; - ret = trim_bitmaps(block_group, trimmed, start, end, minlen); + ret = trim_bitmaps(block_group, trimmed, start, end, minlen, 0, false); + div64_u64_rem(end, BITS_PER_BITMAP * ctl->unit, &rem); + /* If we ended in the middle of a bitmap, reset the trimming flag */ + if (rem) + reset_trimming_bitmap(ctl, offset_to_bitmap(ctl, end)); out: btrfs_put_block_group_trimming(block_group); return ret; } +int btrfs_trim_block_group_extents(struct btrfs_block_group *block_group, + u64 *trimmed, u64 start, u64 end, u64 minlen, + bool async) +{ + int ret; + + *trimmed = 0; + + spin_lock(&block_group->lock); + if (block_group->removed) { + spin_unlock(&block_group->lock); + return 0; + } + btrfs_get_block_group_trimming(block_group); + spin_unlock(&block_group->lock); + + ret = trim_no_bitmap(block_group, trimmed, start, end, minlen, async); + btrfs_put_block_group_trimming(block_group); + + return ret; +} + +int btrfs_trim_block_group_bitmaps(struct btrfs_block_group *block_group, + u64 *trimmed, u64 start, u64 end, u64 minlen, + u64 maxlen, bool async) +{ + int ret; + + *trimmed = 0; + + spin_lock(&block_group->lock); + if (block_group->removed) { + spin_unlock(&block_group->lock); + return 0; + } + btrfs_get_block_group_trimming(block_group); + spin_unlock(&block_group->lock); + + ret = trim_bitmaps(block_group, trimmed, start, end, minlen, maxlen, + async); + + btrfs_put_block_group_trimming(block_group); + + return ret; +} + /* * Find the left-most item in the cache tree, and then return the * smallest inode number in the item. @@ -3600,6 +4059,7 @@ int test_add_free_space_entry(struct btrfs_block_group *cache, struct btrfs_free_space_ctl *ctl = cache->free_space_ctl; struct btrfs_free_space *info = NULL, *bitmap_info; void *map = NULL; + enum btrfs_trim_state trim_state = BTRFS_TRIM_STATE_TRIMMED; u64 bytes_added; int ret; @@ -3641,7 +4101,8 @@ again: info = NULL; } - bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes); + bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes, + trim_state); bytes -= bytes_added; offset += bytes_added; |