From f1591a8bb3e02713f4ee2efe20df0d84ed80da48 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Tue, 17 Jan 2023 04:50:55 +0800 Subject: sbitmap: remove unnecessary calculation of alloc_hint in __sbitmap_get_shallow Updates to alloc_hint in the loop in __sbitmap_get_shallow() are mostly pointless and equivalent to setting alloc_hint to zero (because SB_NR_TO_BIT() considers only low sb->shift bits from alloc_hint). So simplify the logic. Reviewed-by: Jan Kara Signed-off-by: Kemeng Shi Link: https://lore.kernel.org/r/20230116205059.3821738-2-shikemeng@huaweicloud.com Signed-off-by: Jens Axboe --- lib/sbitmap.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 1fcede228fa2..462873a4337c 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -243,6 +243,7 @@ static int __sbitmap_get_shallow(struct sbitmap *sb, int nr = -1; index = SB_NR_TO_INDEX(sb, alloc_hint); + alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); for (i = 0; i < sb->map_nr; i++) { again: @@ -250,7 +251,7 @@ again: min_t(unsigned int, __map_depth(sb, index), shallow_depth), - SB_NR_TO_BIT(sb, alloc_hint), true); + alloc_hint, true); if (nr != -1) { nr += index << sb->shift; break; @@ -260,13 +261,9 @@ again: goto again; /* Jump to next index. */ - index++; - alloc_hint = index << sb->shift; - - if (index >= sb->map_nr) { + alloc_hint = 0; + if (++index >= sb->map_nr) index = 0; - alloc_hint = 0; - } } return nr; -- cgit v1.2.3-70-g09d2 From 903e86f3a64d9573352bbab2f211fdbbaa5772b7 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Tue, 17 Jan 2023 04:50:56 +0800 Subject: sbitmap: remove redundant check in __sbitmap_queue_get_batch Commit fbb564a557809 ("lib/sbitmap: Fix invalid loop in __sbitmap_queue_get_batch()") mentioned that "Checking free bits when setting the target bits. Otherwise, it may reuse the busying bits." This commit add check to make sure all masked bits in word before cmpxchg is zero. Then the existing check after cmpxchg to check any zero bit is existing in masked bits in word is redundant. Actually, old value of word before cmpxchg is stored in val and we will filter out busy bits in val by "(get_mask & ~val)" after cmpxchg. So we will not reuse busy bits methioned in commit fbb564a557809 ("lib/sbitmap: Fix invalid loop in __sbitmap_queue_get_batch()"). Revert new-added check to remove redundant check. Fixes: fbb564a55780 ("lib/sbitmap: Fix invalid loop in __sbitmap_queue_get_batch()") Reviewed-by: Jan Kara Signed-off-by: Kemeng Shi Link: https://lore.kernel.org/r/20230116205059.3821738-3-shikemeng@huaweicloud.com Signed-off-by: Jens Axboe --- lib/sbitmap.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 462873a4337c..eb45a2d7916e 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -518,11 +518,9 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, get_mask = ((1UL << nr_tags) - 1) << nr; val = READ_ONCE(map->word); - do { - if ((val & ~get_mask) != val) - goto next; - } while (!atomic_long_try_cmpxchg(ptr, &val, - get_mask | val)); + while (!atomic_long_try_cmpxchg(ptr, &val, + get_mask | val)) + ; get_mask = (get_mask & ~val) >> nr; if (get_mask) { *offset = nr + (index << sb->shift); -- cgit v1.2.3-70-g09d2 From 08470a98a7d7e32c787b23b87353f13b03c23195 Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Tue, 17 Jan 2023 04:50:57 +0800 Subject: sbitmap: rewrite sbitmap_find_bit_in_index to reduce repeat code Rewrite sbitmap_find_bit_in_index as following: 1. Rename sbitmap_find_bit_in_index to sbitmap_find_bit_in_word 2. Accept "struct sbitmap_word *" directly instead of accepting "struct sbitmap *" and "int index" to get "struct sbitmap_word *". 3. Accept depth/shallow_depth and wrap for __sbitmap_get_word from caller to support need of both __sbitmap_get_shallow and __sbitmap_get. With helper function sbitmap_find_bit_in_word, we can remove repeat code in __sbitmap_get_shallow to find bit considring deferred clear. Reviewed-by: Jan Kara Signed-off-by: Kemeng Shi Link: https://lore.kernel.org/r/20230116205059.3821738-4-shikemeng@huaweicloud.com Signed-off-by: Jens Axboe --- lib/sbitmap.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/sbitmap.c b/lib/sbitmap.c index eb45a2d7916e..27cfcc35956e 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -167,15 +167,16 @@ static int __sbitmap_get_word(unsigned long *word, unsigned long depth, return nr; } -static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index, - unsigned int alloc_hint) +static int sbitmap_find_bit_in_word(struct sbitmap_word *map, + unsigned int depth, + unsigned int alloc_hint, + bool wrap) { - struct sbitmap_word *map = &sb->map[index]; int nr; do { - nr = __sbitmap_get_word(&map->word, __map_depth(sb, index), - alloc_hint, !sb->round_robin); + nr = __sbitmap_get_word(&map->word, depth, + alloc_hint, wrap); if (nr != -1) break; if (!sbitmap_deferred_clear(map)) @@ -203,7 +204,9 @@ static int __sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint) alloc_hint = 0; for (i = 0; i < sb->map_nr; i++) { - nr = sbitmap_find_bit_in_index(sb, index, alloc_hint); + nr = sbitmap_find_bit_in_word(&sb->map[index], + __map_depth(sb, index), + alloc_hint, !sb->round_robin); if (nr != -1) { nr += index << sb->shift; break; @@ -246,20 +249,17 @@ static int __sbitmap_get_shallow(struct sbitmap *sb, alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); for (i = 0; i < sb->map_nr; i++) { -again: - nr = __sbitmap_get_word(&sb->map[index].word, - min_t(unsigned int, - __map_depth(sb, index), - shallow_depth), - alloc_hint, true); + nr = sbitmap_find_bit_in_word(&sb->map[index], + min_t(unsigned int, + __map_depth(sb, index), + shallow_depth), + alloc_hint, true); + if (nr != -1) { nr += index << sb->shift; break; } - if (sbitmap_deferred_clear(&sb->map[index])) - goto again; - /* Jump to next index. */ alloc_hint = 0; if (++index >= sb->map_nr) -- cgit v1.2.3-70-g09d2 From 678418c6128f112fc5584beb5cdd21fbc225badf Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Tue, 17 Jan 2023 04:50:58 +0800 Subject: sbitmap: add sbitmap_find_bit to remove repeat code in __sbitmap_get/__sbitmap_get_shallow There are three differences between __sbitmap_get and __sbitmap_get_shallow when searching free bit: 1. __sbitmap_get_shallow limit number of bit to search per word. __sbitmap_get has no such limit. 2. __sbitmap_get_shallow always searches with wrap set. __sbitmap_get set wrap according to round_robin. 3. __sbitmap_get_shallow always searches from first bit in first word. __sbitmap_get searches from first bit when round_robin is not set otherwise searches from SB_NR_TO_BIT(sb, alloc_hint). Add helper function sbitmap_find_bit function to do common search while accept "limit depth per word", "wrap flag" and "first bit to search" from caller to support the need of both __sbitmap_get and __sbitmap_get_shallow. Reviewed-by: Jan Kara Signed-off-by: Kemeng Shi Link: https://lore.kernel.org/r/20230116205059.3821738-5-shikemeng@huaweicloud.com Signed-off-by: Jens Axboe --- lib/sbitmap.c | 70 ++++++++++++++++++++++++++++------------------------------- 1 file changed, 33 insertions(+), 37 deletions(-) (limited to 'lib') diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 27cfcc35956e..2514e7a3f6ca 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -186,27 +186,22 @@ static int sbitmap_find_bit_in_word(struct sbitmap_word *map, return nr; } -static int __sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint) +static int sbitmap_find_bit(struct sbitmap *sb, + unsigned int depth, + unsigned int index, + unsigned int alloc_hint, + bool wrap) { - unsigned int i, index; + unsigned int i; int nr = -1; - index = SB_NR_TO_INDEX(sb, alloc_hint); - - /* - * Unless we're doing round robin tag allocation, just use the - * alloc_hint to find the right word index. No point in looping - * twice in find_next_zero_bit() for that case. - */ - if (sb->round_robin) - alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); - else - alloc_hint = 0; - for (i = 0; i < sb->map_nr; i++) { nr = sbitmap_find_bit_in_word(&sb->map[index], - __map_depth(sb, index), - alloc_hint, !sb->round_robin); + min_t(unsigned int, + __map_depth(sb, index), + depth), + alloc_hint, wrap); + if (nr != -1) { nr += index << sb->shift; break; @@ -221,6 +216,26 @@ static int __sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint) return nr; } +static int __sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint) +{ + unsigned int index; + + index = SB_NR_TO_INDEX(sb, alloc_hint); + + /* + * Unless we're doing round robin tag allocation, just use the + * alloc_hint to find the right word index. No point in looping + * twice in find_next_zero_bit() for that case. + */ + if (sb->round_robin) + alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); + else + alloc_hint = 0; + + return sbitmap_find_bit(sb, UINT_MAX, index, alloc_hint, + !sb->round_robin); +} + int sbitmap_get(struct sbitmap *sb) { int nr; @@ -242,31 +257,12 @@ static int __sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, unsigned long shallow_depth) { - unsigned int i, index; - int nr = -1; + unsigned int index; index = SB_NR_TO_INDEX(sb, alloc_hint); alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); - for (i = 0; i < sb->map_nr; i++) { - nr = sbitmap_find_bit_in_word(&sb->map[index], - min_t(unsigned int, - __map_depth(sb, index), - shallow_depth), - alloc_hint, true); - - if (nr != -1) { - nr += index << sb->shift; - break; - } - - /* Jump to next index. */ - alloc_hint = 0; - if (++index >= sb->map_nr) - index = 0; - } - - return nr; + return sbitmap_find_bit(sb, shallow_depth, index, alloc_hint, true); } int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth) -- cgit v1.2.3-70-g09d2 From b5fcf7871acb7f9a3a8ed341a68bd86aba3e254a Mon Sep 17 00:00:00 2001 From: Kemeng Shi Date: Tue, 17 Jan 2023 04:50:59 +0800 Subject: sbitmap: correct wake_batch recalculation to avoid potential IO hung Commit 180dccb0dba4f ("blk-mq: fix tag_get wait task can't be awakened") mentioned that in case of shared tags, there could be just one real active hctx(queue) because of lazy detection of tag idle. Then driver tag allocation may wait forever on this real active hctx(queue) if wake_batch is > hctx_max_depth where hctx_max_depth is available tags depth for the actve hctx(queue). However, the condition wake_batch > hctx_max_depth is not strong enough to avoid IO hung as the sbitmap_queue_wake_up will only wake up one wait queue for each wake_batch even though there is only one waiter in the woken wait queue. After this, there is only one tag to free and wake_batch may not be reached anymore. Commit 180dccb0dba4f ("blk-mq: fix tag_get wait task can't be awakened") methioned that driver tag allocation may wait forever. Actually, the inactive hctx(queue) will be truely idle after at most 30 seconds and will call blk_mq_tag_wakeup_all to wake one waiter per wait queue to break the hung. But IO hung for 30 seconds is also not acceptable. Set batch size to small enough that depth of the shared hctx(queue) is enough to wake up all of the queues like sbq_calc_wake_batch do to fix this potential IO hung. Although hctx_max_depth will be clamped to at least 4 while wake_batch recalculation does not do the clamp, the wake_batch will be always recalculated to 1 when hctx_max_depth <= 4. Fixes: 180dccb0dba4 ("blk-mq: fix tag_get wait task can't be awakened") Reviewed-by: Jan Kara Signed-off-by: Kemeng Shi Link: https://lore.kernel.org/r/20230116205059.3821738-6-shikemeng@huaweicloud.com Signed-off-by: Jens Axboe --- lib/sbitmap.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 2514e7a3f6ca..eff4e42c425a 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -457,13 +457,10 @@ void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq, unsigned int users) { unsigned int wake_batch; - unsigned int min_batch; unsigned int depth = (sbq->sb.depth + users - 1) / users; - min_batch = sbq->sb.depth >= (4 * SBQ_WAIT_QUEUES) ? 4 : 1; - wake_batch = clamp_val(depth / SBQ_WAIT_QUEUES, - min_batch, SBQ_WAKE_BATCH); + 1, SBQ_WAKE_BATCH); WRITE_ONCE(sbq->wake_batch, wake_batch); } -- cgit v1.2.3-70-g09d2