From 86ff7c2a80cd357f6156a53b354f6a0b357dc0c9 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 30 Jan 2018 22:04:57 -0500 Subject: blk-mq: introduce BLK_STS_DEV_RESOURCE This status is returned from driver to block layer if device related resource is unavailable, but driver can guarantee that IO dispatch will be triggered in future when the resource is available. Convert some drivers to return BLK_STS_DEV_RESOURCE. Also, if driver returns BLK_STS_RESOURCE and SCHED_RESTART is set, rerun queue after a delay (BLK_MQ_DELAY_QUEUE) to avoid IO stalls. BLK_MQ_DELAY_QUEUE is 3 ms because both scsi-mq and nvmefc are using that magic value. If a driver can make sure there is in-flight IO, it is safe to return BLK_STS_DEV_RESOURCE because: 1) If all in-flight IOs complete before examining SCHED_RESTART in blk_mq_dispatch_rq_list(), SCHED_RESTART must be cleared, so queue is run immediately in this case by blk_mq_dispatch_rq_list(); 2) if there is any in-flight IO after/when examining SCHED_RESTART in blk_mq_dispatch_rq_list(): - if SCHED_RESTART isn't set, queue is run immediately as handled in 1) - otherwise, this request will be dispatched after any in-flight IO is completed via blk_mq_sched_restart() 3) if SCHED_RESTART is set concurently in context because of BLK_STS_RESOURCE, blk_mq_delay_run_hw_queue() will cover the above two cases and make sure IO hang can be avoided. One invariant is that queue will be rerun if SCHED_RESTART is set. Suggested-by: Jens Axboe Tested-by: Laurence Oberman Signed-off-by: Ming Lei Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- drivers/block/null_blk.c | 2 +- drivers/block/virtio_blk.c | 2 +- drivers/block/xen-blkfront.c | 2 +- drivers/md/dm-rq.c | 5 ++--- drivers/nvme/host/fc.c | 12 ++---------- drivers/scsi/scsi_lib.c | 6 +++--- 6 files changed, 10 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 6655893a3a7a..287a09611c0f 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -1230,7 +1230,7 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd) return BLK_STS_OK; } else /* requeue request */ - return BLK_STS_RESOURCE; + return BLK_STS_DEV_RESOURCE; } } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 68846897d213..79908e6ddbf2 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -276,7 +276,7 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, /* Out of mem doesn't actually happen, since we fall back * to direct descriptors */ if (err == -ENOMEM || err == -ENOSPC) - return BLK_STS_RESOURCE; + return BLK_STS_DEV_RESOURCE; return BLK_STS_IOERR; } diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 891265acb10e..e126e4cac2ca 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -911,7 +911,7 @@ out_err: out_busy: blk_mq_stop_hw_queue(hctx); spin_unlock_irqrestore(&rinfo->ring_lock, flags); - return BLK_STS_RESOURCE; + return BLK_STS_DEV_RESOURCE; } static void blkif_complete_rq(struct request *rq) diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index b7d175e94a02..348a0cb6963a 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -404,7 +404,7 @@ static blk_status_t dm_dispatch_clone_request(struct request *clone, struct requ clone->start_time = jiffies; r = blk_insert_cloned_request(clone->q, clone); - if (r != BLK_STS_OK && r != BLK_STS_RESOURCE) + if (r != BLK_STS_OK && r != BLK_STS_RESOURCE && r != BLK_STS_DEV_RESOURCE) /* must complete clone in terms of original request */ dm_complete_request(rq, r); return r; @@ -496,7 +496,7 @@ check_again: trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)), blk_rq_pos(rq)); ret = dm_dispatch_clone_request(clone, rq); - if (ret == BLK_STS_RESOURCE) { + if (ret == BLK_STS_RESOURCE || ret == BLK_STS_DEV_RESOURCE) { blk_rq_unprep_clone(clone); tio->ti->type->release_clone_rq(clone); tio->clone = NULL; @@ -769,7 +769,6 @@ static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, /* Undo dm_start_request() before requeuing */ rq_end_stats(md, rq); rq_completed(md, rq_data_dir(rq), false); - blk_mq_delay_run_hw_queue(hctx, 100/*ms*/); return BLK_STS_RESOURCE; } diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 99bf51c7e513..b856d7c919d2 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -35,8 +35,6 @@ enum nvme_fc_queue_flags { NVME_FC_Q_LIVE, }; -#define NVMEFC_QUEUE_DELAY 3 /* ms units */ - #define NVME_FC_DEFAULT_DEV_LOSS_TMO 60 /* seconds */ struct nvme_fc_queue { @@ -2231,7 +2229,7 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, * the target device is present */ if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) - goto busy; + return BLK_STS_RESOURCE; if (!nvme_fc_ctrl_get(ctrl)) return BLK_STS_IOERR; @@ -2311,16 +2309,10 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, ret != -EBUSY) return BLK_STS_IOERR; - goto busy; + return BLK_STS_RESOURCE; } return BLK_STS_OK; - -busy: - if (!(op->flags & FCOP_FLAGS_AEN) && queue->hctx) - blk_mq_delay_run_hw_queue(queue->hctx, NVMEFC_QUEUE_DELAY); - - return BLK_STS_RESOURCE; } static inline blk_status_t nvme_fc_is_ready(struct nvme_fc_queue *queue, diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index d9ca1dfab154..55be2550c555 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2030,9 +2030,9 @@ out_put_budget: case BLK_STS_OK: break; case BLK_STS_RESOURCE: - if (atomic_read(&sdev->device_busy) == 0 && - !scsi_device_blocked(sdev)) - blk_mq_delay_run_hw_queue(hctx, SCSI_QUEUE_DELAY); + if (atomic_read(&sdev->device_busy) || + scsi_device_blocked(sdev)) + ret = BLK_STS_DEV_RESOURCE; break; default: /* -- cgit v1.2.3-70-g09d2 From 1d51877578799bfe0fcfe189d8233c9fccf05931 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Feb 2018 16:03:04 +0100 Subject: block: skd: fix incorrect linux/slab_def.h inclusion skd includes slab_def.h to get access to the slab cache object size. However, including this header breaks when we use SLUB or SLOB instead of the SLAB allocator, since the structure layout is completely different, as shown by this warning when we build this driver in one of the invalid configurations with link-time optimizations enabled: include/linux/slab.h:715:0: error: type of 'kmem_cache_size' does not match original declaration [-Werror=lto-type-mismatch] unsigned int kmem_cache_size(struct kmem_cache *s); mm/slab_common.c:77:14: note: 'kmem_cache_size' was previously declared here unsigned int kmem_cache_size(struct kmem_cache *s) ^ mm/slab_common.c:77:14: note: code may be misoptimized unless -fno-strict-aliasing is used include/linux/slab.h:147:0: error: type of 'kmem_cache_destroy' does not match original declaration [-Werror=lto-type-mismatch] void kmem_cache_destroy(struct kmem_cache *); mm/slab_common.c:858:6: note: 'kmem_cache_destroy' was previously declared here void kmem_cache_destroy(struct kmem_cache *s) ^ mm/slab_common.c:858:6: note: code may be misoptimized unless -fno-strict-aliasing is used include/linux/slab.h:140:0: error: type of 'kmem_cache_create' does not match original declaration [-Werror=lto-type-mismatch] struct kmem_cache *kmem_cache_create(const char *name, size_t size, mm/slab_common.c:534:1: note: 'kmem_cache_create' was previously declared here kmem_cache_create(const char *name, size_t size, size_t align, ^ This removes the header inclusion and instead uses the kmem_cache_size() interface to get the size in a reliable way. Signed-off-by: Arnd Bergmann Signed-off-by: Jens Axboe --- drivers/block/skd_main.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/block/skd_main.c b/drivers/block/skd_main.c index de0d08133c7e..e41935ab41ef 100644 --- a/drivers/block/skd_main.c +++ b/drivers/block/skd_main.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -2603,7 +2602,8 @@ static void *skd_alloc_dma(struct skd_device *skdev, struct kmem_cache *s, buf = kmem_cache_alloc(s, gfp); if (!buf) return NULL; - *dma_handle = dma_map_single(dev, buf, s->size, dir); + *dma_handle = dma_map_single(dev, buf, + kmem_cache_size(s), dir); if (dma_mapping_error(dev, *dma_handle)) { kmem_cache_free(s, buf); buf = NULL; @@ -2618,7 +2618,8 @@ static void skd_free_dma(struct skd_device *skdev, struct kmem_cache *s, if (!vaddr) return; - dma_unmap_single(&skdev->pdev->dev, dma_handle, s->size, dir); + dma_unmap_single(&skdev->pdev->dev, dma_handle, + kmem_cache_size(s), dir); kmem_cache_free(s, vaddr); } -- cgit v1.2.3-70-g09d2