From 704b914f15fb7daaf517e3acc4bed472b50ca19e Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 3 Dec 2021 21:15:32 +0800 Subject: blk-mq: move srcu from blk_mq_hw_ctx to request_queue In case of BLK_MQ_F_BLOCKING, per-hctx srcu is used to protect dispatch critical area. However, this srcu instance stays at the end of hctx, and it often takes standalone cacheline, often cold. Inside srcu_read_lock() and srcu_read_unlock(), WRITE is always done on the indirect percpu variable which is allocated from heap instead of being embedded, srcu->srcu_idx is read only in srcu_read_lock(). It doesn't matter if srcu structure stays in hctx or request queue. So switch to per-request-queue srcu for protecting dispatch, and this way simplifies quiesce a lot, not mention quiesce is always done on the request queue wide. Signed-off-by: Ming Lei Link: https://lore.kernel.org/r/20211203131534.3668411-3-ming.lei@redhat.com Signed-off-by: Jens Axboe --- block/blk-core.c | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index b0660c9df852..10619fd83c1b 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -66,6 +66,7 @@ DEFINE_IDA(blk_queue_ida); * For queue allocation */ struct kmem_cache *blk_requestq_cachep; +struct kmem_cache *blk_requestq_srcu_cachep; /* * Controlling structure to kblockd @@ -437,21 +438,27 @@ static void blk_timeout_work(struct work_struct *work) { } -struct request_queue *blk_alloc_queue(int node_id) +struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu) { struct request_queue *q; int ret; - q = kmem_cache_alloc_node(blk_requestq_cachep, - GFP_KERNEL | __GFP_ZERO, node_id); + q = kmem_cache_alloc_node(blk_get_queue_kmem_cache(alloc_srcu), + GFP_KERNEL | __GFP_ZERO, node_id); if (!q) return NULL; + if (alloc_srcu) { + blk_queue_flag_set(QUEUE_FLAG_HAS_SRCU, q); + if (init_srcu_struct(q->srcu) != 0) + goto fail_q; + } + q->last_merge = NULL; q->id = ida_simple_get(&blk_queue_ida, 0, 0, GFP_KERNEL); if (q->id < 0) - goto fail_q; + goto fail_srcu; ret = bioset_init(&q->bio_split, BIO_POOL_SIZE, 0, 0); if (ret) @@ -508,8 +515,11 @@ fail_split: bioset_exit(&q->bio_split); fail_id: ida_simple_remove(&blk_queue_ida, q->id); +fail_srcu: + if (alloc_srcu) + cleanup_srcu_struct(q->srcu); fail_q: - kmem_cache_free(blk_requestq_cachep, q); + kmem_cache_free(blk_get_queue_kmem_cache(alloc_srcu), q); return NULL; } @@ -1301,6 +1311,9 @@ int __init blk_dev_init(void) sizeof_field(struct request, cmd_flags)); BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 * sizeof_field(struct bio, bi_opf)); + BUILD_BUG_ON(ALIGN(offsetof(struct request_queue, srcu), + __alignof__(struct request_queue)) != + sizeof(struct request_queue)); /* used for unplugging and affects IO latency/throughput - HIGHPRI */ kblockd_workqueue = alloc_workqueue("kblockd", @@ -1311,6 +1324,10 @@ int __init blk_dev_init(void) blk_requestq_cachep = kmem_cache_create("request_queue", sizeof(struct request_queue), 0, SLAB_PANIC, NULL); + blk_requestq_srcu_cachep = kmem_cache_create("request_queue_srcu", + sizeof(struct request_queue) + + sizeof(struct srcu_struct), 0, SLAB_PANIC, NULL); + blk_debugfs_root = debugfs_create_dir("block", NULL); return 0; -- cgit v1.2.3-70-g09d2