From aeec77629a4ac6f8c248f3a82e80d4170a881f22 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 14 Oct 2017 17:22:31 +0800 Subject: scsi: allow passing in null rq to scsi_prep_state_check() In the following patch, we will implement scsi_get_budget() which need to call scsi_prep_state_check() when rq isn't dequeued yet. Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/scsi/scsi_lib.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 9cf6a80fe297..d159bb085714 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1301,7 +1301,7 @@ scsi_prep_state_check(struct scsi_device *sdev, struct request *req) /* * If the devices is blocked we defer normal commands. */ - if (!(req->rq_flags & RQF_PREEMPT)) + if (req && !(req->rq_flags & RQF_PREEMPT)) ret = BLKPREP_DEFER; break; default: @@ -1310,7 +1310,7 @@ scsi_prep_state_check(struct scsi_device *sdev, struct request *req) * special commands. In particular any user initiated * command is not allowed. */ - if (!(req->rq_flags & RQF_PREEMPT)) + if (req && !(req->rq_flags & RQF_PREEMPT)) ret = BLKPREP_KILL; break; } -- cgit v1.2.3-70-g09d2 From 0df21c86bdbfd17dec9ab898312af9bfb74d5d86 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 14 Oct 2017 17:22:32 +0800 Subject: scsi: implement .get_budget and .put_budget for blk-mq We need to tell blk-mq to reserve resources before queuing one request, so implement these two callbacks. Then blk-mq can avoid to dequeue request too early, and IO merging can be improved a lot. Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/scsi/scsi_lib.c | 75 ++++++++++++++++++++++++++++++++++--------------- 1 file changed, 52 insertions(+), 23 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index d159bb085714..6f10afaca25b 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1946,25 +1946,32 @@ static void scsi_mq_done(struct scsi_cmnd *cmd) blk_mq_complete_request(cmd->request); } -static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, - const struct blk_mq_queue_data *bd) +static void scsi_mq_put_budget(struct blk_mq_hw_ctx *hctx) { - struct request *req = bd->rq; - struct request_queue *q = req->q; + struct request_queue *q = hctx->queue; + struct scsi_device *sdev = q->queuedata; + struct Scsi_Host *shost = sdev->host; + + atomic_dec(&shost->host_busy); + if (scsi_target(sdev)->can_queue > 0) + atomic_dec(&scsi_target(sdev)->target_busy); + atomic_dec(&sdev->device_busy); + put_device(&sdev->sdev_gendev); +} + +static blk_status_t scsi_mq_get_budget(struct blk_mq_hw_ctx *hctx) +{ + struct request_queue *q = hctx->queue; struct scsi_device *sdev = q->queuedata; struct Scsi_Host *shost = sdev->host; - struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); blk_status_t ret; - int reason; - ret = prep_to_mq(scsi_prep_state_check(sdev, req)); - if (ret != BLK_STS_OK) - goto out; + ret = prep_to_mq(scsi_prep_state_check(sdev, NULL)); + if (ret == BLK_STS_RESOURCE || ret != BLK_STS_OK) + return ret; - ret = BLK_STS_RESOURCE; if (!get_device(&sdev->sdev_gendev)) goto out; - if (!scsi_dev_queue_ready(q, sdev)) goto out_put_device; if (!scsi_target_queue_ready(shost, sdev)) @@ -1972,10 +1979,38 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, if (!scsi_host_queue_ready(q, shost, sdev)) goto out_dec_target_busy; + return BLK_STS_OK; + +out_dec_target_busy: + if (scsi_target(sdev)->can_queue > 0) + atomic_dec(&scsi_target(sdev)->target_busy); +out_dec_device_busy: + atomic_dec(&sdev->device_busy); +out_put_device: + put_device(&sdev->sdev_gendev); +out: + return BLK_STS_RESOURCE; +} + +static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, + const struct blk_mq_queue_data *bd) +{ + struct request *req = bd->rq; + struct request_queue *q = req->q; + struct scsi_device *sdev = q->queuedata; + struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); + blk_status_t ret; + int reason; + + ret = prep_to_mq(scsi_prep_state_check(sdev, req)); + if (ret != BLK_STS_OK) + goto out_put_budget; + + ret = BLK_STS_RESOURCE; if (!(req->rq_flags & RQF_DONTPREP)) { ret = prep_to_mq(scsi_mq_prep_fn(req)); if (ret != BLK_STS_OK) - goto out_dec_host_busy; + goto out_put_budget; req->rq_flags |= RQF_DONTPREP; } else { blk_mq_start_request(req); @@ -1993,21 +2028,13 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, if (reason) { scsi_set_blocked(cmd, reason); ret = BLK_STS_RESOURCE; - goto out_dec_host_busy; + goto out_put_budget; } return BLK_STS_OK; -out_dec_host_busy: - atomic_dec(&shost->host_busy); -out_dec_target_busy: - if (scsi_target(sdev)->can_queue > 0) - atomic_dec(&scsi_target(sdev)->target_busy); -out_dec_device_busy: - atomic_dec(&sdev->device_busy); -out_put_device: - put_device(&sdev->sdev_gendev); -out: +out_put_budget: + scsi_mq_put_budget(hctx); switch (ret) { case BLK_STS_OK: break; @@ -2211,6 +2238,8 @@ struct request_queue *scsi_old_alloc_queue(struct scsi_device *sdev) } static const struct blk_mq_ops scsi_mq_ops = { + .get_budget = scsi_mq_get_budget, + .put_budget = scsi_mq_put_budget, .queue_rq = scsi_queue_rq, .complete = scsi_softirq_done, .timeout = scsi_timeout, -- cgit v1.2.3-70-g09d2 From 2a750166a5bee7fda186c12269e37ba52b26c017 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 30 Oct 2017 09:02:19 -0700 Subject: block: Rework drivers/cdrom/Makefile Instead of referring from inside drivers/cdrom/Makefile to all the drivers that use this driver, let these drivers select the cdrom driver. This change makes the cdrom build code follow the approach that is used for most other drivers, namely refer from the higher layers to the lower layer instead of from the lower layer to the higher layers. Reviewed-by: Hannes Reinecke Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Johannes Thumshirn Signed-off-by: Jens Axboe --- drivers/block/Kconfig | 12 ++++++++++++ drivers/block/paride/Kconfig | 1 + drivers/cdrom/Makefile | 15 ++------------- drivers/ide/Kconfig | 1 + drivers/scsi/Kconfig | 1 + 5 files changed, 17 insertions(+), 13 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index 4a438b8abe27..b998b3c9ea86 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -67,9 +67,20 @@ config AMIGA_Z2RAM To compile this driver as a module, choose M here: the module will be called z2ram. +config CDROM + tristate "CD-ROM driver" + help + A CD-ROM is a pre-pressed optical compact disc which contains + data. The name is an acronym which stands for "Compact Disc + Read-Only Memory". This driver implements functionality that is + shared by all CD-ROM drivers, e.g. the IDE CD-ROM driver and the + SCSI CD-ROM driver. For a list of the ioctls implemented by this + driver, see also Documentation/ioctl/cdrom.txt. + config GDROM tristate "SEGA Dreamcast GD-ROM drive" depends on SH_DREAMCAST + select CDROM select BLK_SCSI_REQUEST # only for the generic cdrom code help A standard SEGA Dreamcast comes with a modified CD ROM drive called a @@ -347,6 +358,7 @@ config BLK_DEV_RAM_DAX config CDROM_PKTCDVD tristate "Packet writing on CD/DVD media (DEPRECATED)" depends on !UML + select CDROM select BLK_SCSI_REQUEST help Note: This driver is deprecated and will be removed from the diff --git a/drivers/block/paride/Kconfig b/drivers/block/paride/Kconfig index 3a15247942e4..1d5057f5080b 100644 --- a/drivers/block/paride/Kconfig +++ b/drivers/block/paride/Kconfig @@ -25,6 +25,7 @@ config PARIDE_PD config PARIDE_PCD tristate "Parallel port ATAPI CD-ROMs" depends on PARIDE + select CDROM select BLK_SCSI_REQUEST # only for the generic cdrom code ---help--- This option enables the high-level driver for ATAPI CD-ROM devices diff --git a/drivers/cdrom/Makefile b/drivers/cdrom/Makefile index 8ffde4f8ab9a..7f3f43cc2257 100644 --- a/drivers/cdrom/Makefile +++ b/drivers/cdrom/Makefile @@ -1,13 +1,2 @@ -# Makefile for the kernel cdrom device drivers. -# -# 30 Jan 1998, Michael Elizabeth Chastain, -# Rewritten to use lists instead of if-statements. - -# Each configuration option enables a list of files. - -obj-$(CONFIG_BLK_DEV_IDECD) += cdrom.o -obj-$(CONFIG_BLK_DEV_SR) += cdrom.o -obj-$(CONFIG_PARIDE_PCD) += cdrom.o -obj-$(CONFIG_CDROM_PKTCDVD) += cdrom.o - -obj-$(CONFIG_GDROM) += gdrom.o cdrom.o +obj-$(CONFIG_CDROM) += cdrom.o +obj-$(CONFIG_GDROM) += gdrom.o diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index c99a25c075bc..7b92c591e4c1 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -118,6 +118,7 @@ config BLK_DEV_DELKIN config BLK_DEV_IDECD tristate "Include IDE/ATAPI CDROM support" select IDE_ATAPI + select CDROM ---help--- If you have a CD-ROM drive using the ATAPI protocol, say Y. ATAPI is a newer protocol used by IDE CD-ROM and TAPE drives, similar to the diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index 41366339b950..cca0b03c2ead 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -131,6 +131,7 @@ config CHR_DEV_OSST config BLK_DEV_SR tristate "SCSI CDROM support" depends on SCSI + select CDROM ---help--- If you want to use a CD or DVD drive attached to your computer by SCSI, FireWire, USB or ATAPI, say Y and read the SCSI-HOWTO -- cgit v1.2.3-70-g09d2 From c091fbe9a26ee116d99e2c0ed010afb957a10365 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 2 Nov 2017 12:19:32 +0100 Subject: block: fix CDROM dependency on BLK_DEV After the cdrom cleanup, I get randconfig warnings for some configurations: warning: (BLK_DEV_IDECD && BLK_DEV_SR) selects CDROM which has unmet direct dependencies (BLK_DEV) This adds an explicit BLK_DEV dependency for both drivers. The other drivers that select 'CDROM' already have this and don't need a change. Fixes: 2a750166a5be ("block: Rework drivers/cdrom/Makefile") Signed-off-by: Arnd Bergmann Signed-off-by: Jens Axboe --- drivers/ide/Kconfig | 1 + drivers/scsi/Kconfig | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/scsi') diff --git a/drivers/ide/Kconfig b/drivers/ide/Kconfig index 7b92c591e4c1..cf1fb3fb5d26 100644 --- a/drivers/ide/Kconfig +++ b/drivers/ide/Kconfig @@ -117,6 +117,7 @@ config BLK_DEV_DELKIN config BLK_DEV_IDECD tristate "Include IDE/ATAPI CDROM support" + depends on BLK_DEV select IDE_ATAPI select CDROM ---help--- diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig index cca0b03c2ead..766955318005 100644 --- a/drivers/scsi/Kconfig +++ b/drivers/scsi/Kconfig @@ -130,7 +130,7 @@ config CHR_DEV_OSST config BLK_DEV_SR tristate "SCSI CDROM support" - depends on SCSI + depends on SCSI && BLK_DEV select CDROM ---help--- If you want to use a CD or DVD drive attached to your computer -- cgit v1.2.3-70-g09d2 From 826a70a08b1210bbfdbda812ab43eb986e25b5c2 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 4 Nov 2017 09:55:34 +0800 Subject: SCSI: don't get target/host busy_count in scsi_mq_get_budget() It is very expensive to atomic_inc/atomic_dec the host wide counter of host->busy_count, and it should have been avoided via blk-mq's mechanism of getting driver tag, which uses the more efficient way of sbitmap queue. Also we don't check atomic_read(&sdev->device_busy) in scsi_mq_get_budget() and don't run queue if the counter becomes zero, so IO hang may be caused if all requests are completed just before the current SCSI device is added to shost->starved_list. Fixes: 0df21c86bdbf(scsi: implement .get_budget and .put_budget for blk-mq) Reported-by: Bart Van Assche Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- drivers/scsi/scsi_lib.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 6f10afaca25b..22a7e4c47207 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1950,11 +1950,7 @@ static void scsi_mq_put_budget(struct blk_mq_hw_ctx *hctx) { struct request_queue *q = hctx->queue; struct scsi_device *sdev = q->queuedata; - struct Scsi_Host *shost = sdev->host; - atomic_dec(&shost->host_busy); - if (scsi_target(sdev)->can_queue > 0) - atomic_dec(&scsi_target(sdev)->target_busy); atomic_dec(&sdev->device_busy); put_device(&sdev->sdev_gendev); } @@ -1963,7 +1959,6 @@ static blk_status_t scsi_mq_get_budget(struct blk_mq_hw_ctx *hctx) { struct request_queue *q = hctx->queue; struct scsi_device *sdev = q->queuedata; - struct Scsi_Host *shost = sdev->host; blk_status_t ret; ret = prep_to_mq(scsi_prep_state_check(sdev, NULL)); @@ -1974,18 +1969,9 @@ static blk_status_t scsi_mq_get_budget(struct blk_mq_hw_ctx *hctx) goto out; if (!scsi_dev_queue_ready(q, sdev)) goto out_put_device; - if (!scsi_target_queue_ready(shost, sdev)) - goto out_dec_device_busy; - if (!scsi_host_queue_ready(q, shost, sdev)) - goto out_dec_target_busy; return BLK_STS_OK; -out_dec_target_busy: - if (scsi_target(sdev)->can_queue > 0) - atomic_dec(&scsi_target(sdev)->target_busy); -out_dec_device_busy: - atomic_dec(&sdev->device_busy); out_put_device: put_device(&sdev->sdev_gendev); out: @@ -1998,6 +1984,7 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req = bd->rq; struct request_queue *q = req->q; struct scsi_device *sdev = q->queuedata; + struct Scsi_Host *shost = sdev->host; struct scsi_cmnd *cmd = blk_mq_rq_to_pdu(req); blk_status_t ret; int reason; @@ -2007,10 +1994,15 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, goto out_put_budget; ret = BLK_STS_RESOURCE; + if (!scsi_target_queue_ready(shost, sdev)) + goto out_put_budget; + if (!scsi_host_queue_ready(q, shost, sdev)) + goto out_dec_target_busy; + if (!(req->rq_flags & RQF_DONTPREP)) { ret = prep_to_mq(scsi_mq_prep_fn(req)); if (ret != BLK_STS_OK) - goto out_put_budget; + goto out_dec_host_busy; req->rq_flags |= RQF_DONTPREP; } else { blk_mq_start_request(req); @@ -2028,11 +2020,16 @@ static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, if (reason) { scsi_set_blocked(cmd, reason); ret = BLK_STS_RESOURCE; - goto out_put_budget; + goto out_dec_host_busy; } return BLK_STS_OK; +out_dec_host_busy: + atomic_dec(&shost->host_busy); +out_dec_target_busy: + if (scsi_target(sdev)->can_queue > 0) + atomic_dec(&scsi_target(sdev)->target_busy); out_put_budget: scsi_mq_put_budget(hctx); switch (ret) { -- cgit v1.2.3-70-g09d2 From 88022d7201e96b43f1754b0358fc6bcd8dbdcde1 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 5 Nov 2017 02:21:12 +0800 Subject: blk-mq: don't handle failure in .get_budget It is enough to just check if we can get the budget via .get_budget(). And we don't need to deal with device state change in .get_budget(). For SCSI, one issue to be fixed is that we have to call scsi_mq_uninit_cmd() to free allocated ressources if SCSI device fails to handle the request. And it isn't enough to simply call blk_mq_end_request() to do that if this request is marked as RQF_DONTPREP. Fixes: 0df21c86bdbf(scsi: implement .get_budget and .put_budget for blk-mq) Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- block/blk-mq-sched.c | 14 ++------------ block/blk-mq.c | 17 ++++------------- block/blk-mq.h | 5 ++--- drivers/scsi/scsi_lib.c | 11 +++-------- include/linux/blk-mq.h | 2 +- 5 files changed, 12 insertions(+), 37 deletions(-) (limited to 'drivers/scsi') diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 7775f6b12fa9..13a27d4d1671 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -94,23 +94,18 @@ static void blk_mq_do_dispatch_sched(struct blk_mq_hw_ctx *hctx) do { struct request *rq; - blk_status_t ret; if (e->type->ops.mq.has_work && !e->type->ops.mq.has_work(hctx)) break; - ret = blk_mq_get_dispatch_budget(hctx); - if (ret == BLK_STS_RESOURCE) + if (!blk_mq_get_dispatch_budget(hctx)) break; rq = e->type->ops.mq.dispatch_request(hctx); if (!rq) { blk_mq_put_dispatch_budget(hctx); break; - } else if (ret != BLK_STS_OK) { - blk_mq_end_request(rq, ret); - continue; } /* @@ -146,22 +141,17 @@ static void blk_mq_do_dispatch_ctx(struct blk_mq_hw_ctx *hctx) do { struct request *rq; - blk_status_t ret; if (!sbitmap_any_bit_set(&hctx->ctx_map)) break; - ret = blk_mq_get_dispatch_budget(hctx); - if (ret == BLK_STS_RESOURCE) + if (!blk_mq_get_dispatch_budget(hctx)) break; rq = blk_mq_dequeue_from_ctx(hctx, ctx); if (!rq) { blk_mq_put_dispatch_budget(hctx); break; - } else if (ret != BLK_STS_OK) { - blk_mq_end_request(rq, ret); - continue; } /* diff --git a/block/blk-mq.c b/block/blk-mq.c index 13cdccef543c..c9fa4b294664 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1137,13 +1137,8 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, } } - if (!got_budget) { - ret = blk_mq_get_dispatch_budget(hctx); - if (ret == BLK_STS_RESOURCE) - break; - if (ret != BLK_STS_OK) - goto fail_rq; - } + if (!got_budget && !blk_mq_get_dispatch_budget(hctx)) + break; list_del_init(&rq->queuelist); @@ -1170,7 +1165,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, break; } - fail_rq: if (unlikely(ret != BLK_STS_OK)) { errors++; blk_mq_end_request(rq, BLK_STS_IOERR); @@ -1642,12 +1636,10 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, if (!blk_mq_get_driver_tag(rq, NULL, false)) goto insert; - ret = blk_mq_get_dispatch_budget(hctx); - if (ret == BLK_STS_RESOURCE) { + if (!blk_mq_get_dispatch_budget(hctx)) { blk_mq_put_driver_tag(rq); goto insert; - } else if (ret != BLK_STS_OK) - goto fail_rq; + } new_cookie = request_to_qc_t(hctx, rq); @@ -1665,7 +1657,6 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx, __blk_mq_requeue_request(rq); goto insert; default: - fail_rq: *cookie = BLK_QC_T_NONE; blk_mq_end_request(rq, ret); return; diff --git a/block/blk-mq.h b/block/blk-mq.h index 522b420dedc0..f97aceff76e9 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -147,14 +147,13 @@ static inline void blk_mq_put_dispatch_budget(struct blk_mq_hw_ctx *hctx) q->mq_ops->put_budget(hctx); } -static inline blk_status_t blk_mq_get_dispatch_budget( - struct blk_mq_hw_ctx *hctx) +static inline bool blk_mq_get_dispatch_budget(struct blk_mq_hw_ctx *hctx) { struct request_queue *q = hctx->queue; if (q->mq_ops->get_budget) return q->mq_ops->get_budget(hctx); - return BLK_STS_OK; + return true; } #endif diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 22a7e4c47207..286ea983c9e3 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1955,27 +1955,22 @@ static void scsi_mq_put_budget(struct blk_mq_hw_ctx *hctx) put_device(&sdev->sdev_gendev); } -static blk_status_t scsi_mq_get_budget(struct blk_mq_hw_ctx *hctx) +static bool scsi_mq_get_budget(struct blk_mq_hw_ctx *hctx) { struct request_queue *q = hctx->queue; struct scsi_device *sdev = q->queuedata; - blk_status_t ret; - - ret = prep_to_mq(scsi_prep_state_check(sdev, NULL)); - if (ret == BLK_STS_RESOURCE || ret != BLK_STS_OK) - return ret; if (!get_device(&sdev->sdev_gendev)) goto out; if (!scsi_dev_queue_ready(q, sdev)) goto out_put_device; - return BLK_STS_OK; + return true; out_put_device: put_device(&sdev->sdev_gendev); out: - return BLK_STS_RESOURCE; + return false; } static blk_status_t scsi_queue_rq(struct blk_mq_hw_ctx *hctx, diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index f2e3079eecdd..674641527da7 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -92,7 +92,7 @@ struct blk_mq_queue_data { typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); -typedef blk_status_t (get_budget_fn)(struct blk_mq_hw_ctx *); +typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *); typedef void (put_budget_fn)(struct blk_mq_hw_ctx *); typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); -- cgit v1.2.3-70-g09d2 From 6ddcf0a30adc5080504ca66f474101e7ad247dd7 Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 3 Nov 2017 09:33:30 -0700 Subject: lpfc: tie in to new dev_loss_tmo interface in nvme transport This patch calls the new nvme transport routine for dev_loss_tmo whenever the SCSI fc transport calls the lldd to make a dynamic change to a remote ports dev_loss_tmo. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/scsi/lpfc/lpfc_attr.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/scsi') diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index c17677f494af..3e02bc3a7c3f 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -3246,6 +3246,11 @@ lpfc_update_rport_devloss_tmo(struct lpfc_vport *vport) continue; if (ndlp->rport) ndlp->rport->dev_loss_tmo = vport->cfg_devloss_tmo; +#if (IS_ENABLED(CONFIG_NVME_FC)) + if (ndlp->nrport) + nvme_fc_set_remoteport_devloss(ndlp->nrport->remoteport, + vport->cfg_devloss_tmo); +#endif } spin_unlock_irq(shost->host_lock); } -- cgit v1.2.3-70-g09d2 From f00c4d80ffdac9e3a64947ebd57489f3232d5a74 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 5 Nov 2017 10:36:31 +0300 Subject: block: pass full fmode_t to blk_verify_command Use the obvious calling convention. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/bsg.c | 18 ++++++++---------- block/scsi_ioctl.c | 8 ++++---- drivers/scsi/sg.c | 2 +- include/linux/blkdev.h | 2 +- 4 files changed, 14 insertions(+), 16 deletions(-) (limited to 'drivers/scsi') diff --git a/block/bsg.c b/block/bsg.c index ee1335c68de7..452f94f1c5d4 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -137,7 +137,7 @@ static inline struct hlist_head *bsg_dev_idx_hash(int index) static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq, struct sg_io_v4 *hdr, struct bsg_device *bd, - fmode_t has_write_perm) + fmode_t mode) { struct scsi_request *req = scsi_req(rq); @@ -152,7 +152,7 @@ static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq, return -EFAULT; if (hdr->subprotocol == BSG_SUB_PROTOCOL_SCSI_CMD) { - if (blk_verify_command(req->cmd, has_write_perm)) + if (blk_verify_command(req->cmd, mode)) return -EPERM; } else if (!capable(CAP_SYS_RAWIO)) return -EPERM; @@ -206,7 +206,7 @@ bsg_validate_sgv4_hdr(struct sg_io_v4 *hdr, int *op) * map sg_io_v4 to a request. */ static struct request * -bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm) +bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t mode) { struct request_queue *q = bd->queue; struct request *rq, *next_rq = NULL; @@ -237,7 +237,7 @@ bsg_map_hdr(struct bsg_device *bd, struct sg_io_v4 *hdr, fmode_t has_write_perm) if (IS_ERR(rq)) return rq; - ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, has_write_perm); + ret = blk_fill_sgv4_hdr_rq(q, rq, hdr, bd, mode); if (ret) goto out; @@ -587,8 +587,7 @@ bsg_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) } static int __bsg_write(struct bsg_device *bd, const char __user *buf, - size_t count, ssize_t *bytes_written, - fmode_t has_write_perm) + size_t count, ssize_t *bytes_written, fmode_t mode) { struct bsg_command *bc; struct request *rq; @@ -619,7 +618,7 @@ static int __bsg_write(struct bsg_device *bd, const char __user *buf, /* * get a request, fill in the blanks, and add to request queue */ - rq = bsg_map_hdr(bd, &bc->hdr, has_write_perm); + rq = bsg_map_hdr(bd, &bc->hdr, mode); if (IS_ERR(rq)) { ret = PTR_ERR(rq); rq = NULL; @@ -655,8 +654,7 @@ bsg_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) bsg_set_block(bd, file); bytes_written = 0; - ret = __bsg_write(bd, buf, count, &bytes_written, - file->f_mode & FMODE_WRITE); + ret = __bsg_write(bd, buf, count, &bytes_written, file->f_mode); *ppos = bytes_written; @@ -915,7 +913,7 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg) if (copy_from_user(&hdr, uarg, sizeof(hdr))) return -EFAULT; - rq = bsg_map_hdr(bd, &hdr, file->f_mode & FMODE_WRITE); + rq = bsg_map_hdr(bd, &hdr, file->f_mode); if (IS_ERR(rq)) return PTR_ERR(rq); diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 7440de44dd85..edcfff974527 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -207,7 +207,7 @@ static void blk_set_cmd_filter_defaults(struct blk_cmd_filter *filter) __set_bit(GPCMD_SET_READ_AHEAD, filter->write_ok); } -int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm) +int blk_verify_command(unsigned char *cmd, fmode_t mode) { struct blk_cmd_filter *filter = &blk_default_cmd_filter; @@ -220,7 +220,7 @@ int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm) return 0; /* Write-safe commands require a writable open */ - if (test_bit(cmd[0], filter->write_ok) && has_write_perm) + if (test_bit(cmd[0], filter->write_ok) && (mode & FMODE_WRITE)) return 0; return -EPERM; @@ -234,7 +234,7 @@ static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq, if (copy_from_user(req->cmd, hdr->cmdp, hdr->cmd_len)) return -EFAULT; - if (blk_verify_command(req->cmd, mode & FMODE_WRITE)) + if (blk_verify_command(req->cmd, mode)) return -EPERM; /* @@ -469,7 +469,7 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, if (in_len && copy_from_user(buffer, sic->data + cmdlen, in_len)) goto error; - err = blk_verify_command(req->cmd, mode & FMODE_WRITE); + err = blk_verify_command(req->cmd, mode); if (err) goto error; diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c index 0419c2298eab..92fd870e1315 100644 --- a/drivers/scsi/sg.c +++ b/drivers/scsi/sg.c @@ -217,7 +217,7 @@ static int sg_allow_access(struct file *filp, unsigned char *cmd) if (sfp->parentdp->device->type == TYPE_SCANNER) return 0; - return blk_verify_command(cmd, filp->f_mode & FMODE_WRITE); + return blk_verify_command(cmd, filp->f_mode); } static int diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 225617dd0a3f..1437ef4d8037 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1360,7 +1360,7 @@ static inline int sb_issue_zeroout(struct super_block *sb, sector_t block, gfp_mask, 0); } -extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); +extern int blk_verify_command(unsigned char *cmd, fmode_t mode); enum blk_default_limits { BLK_MAX_SEGMENTS = 128, -- cgit v1.2.3-70-g09d2 From 039c635f4e666b647df2100038de276a83fb3fca Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 9 Nov 2017 10:49:56 -0800 Subject: ide, scsi: Tell the block layer at request allocation time about preempt requests Convert blk_get_request(q, op, __GFP_RECLAIM) into blk_get_request_flags(q, op, BLK_MQ_PREEMPT). This patch does not change any functionality. Signed-off-by: Bart Van Assche Tested-by: Martin Steigerwald Acked-by: David S. Miller [ for IDE ] Acked-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Tested-by: Oleksandr Natalenko Cc: Ming Lei Cc: Christoph Hellwig Cc: Johannes Thumshirn Signed-off-by: Jens Axboe --- drivers/ide/ide-pm.c | 4 ++-- drivers/scsi/scsi_lib.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/scsi') diff --git a/drivers/ide/ide-pm.c b/drivers/ide/ide-pm.c index 544f02d673ca..f56d742908df 100644 --- a/drivers/ide/ide-pm.c +++ b/drivers/ide/ide-pm.c @@ -89,9 +89,9 @@ int generic_ide_resume(struct device *dev) } memset(&rqpm, 0, sizeof(rqpm)); - rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, __GFP_RECLAIM); + rq = blk_get_request_flags(drive->queue, REQ_OP_DRV_IN, + BLK_MQ_REQ_PREEMPT); ide_req(rq)->type = ATA_PRIV_PM_RESUME; - rq->rq_flags |= RQF_PREEMPT; rq->special = &rqpm; rqpm.pm_step = IDE_PM_START_RESUME; rqpm.pm_state = PM_EVENT_ON; diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 286ea983c9e3..eb129dfc2ebe 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -252,9 +252,9 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, struct scsi_request *rq; int ret = DRIVER_ERROR << 24; - req = blk_get_request(sdev->request_queue, + req = blk_get_request_flags(sdev->request_queue, data_direction == DMA_TO_DEVICE ? - REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM); + REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, BLK_MQ_REQ_PREEMPT); if (IS_ERR(req)) return ret; rq = scsi_req(req); @@ -268,7 +268,7 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, rq->retries = retries; req->timeout = timeout; req->cmd_flags |= flags; - req->rq_flags |= rq_flags | RQF_QUIET | RQF_PREEMPT; + req->rq_flags |= rq_flags | RQF_QUIET; /* * head injection *required* here otherwise quiesce won't work -- cgit v1.2.3-70-g09d2 From 3a0a529971ec4e2d933e9c7798db101dfb6b1aec Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 9 Nov 2017 10:49:58 -0800 Subject: block, scsi: Make SCSI quiesce and resume work reliably The contexts from which a SCSI device can be quiesced or resumed are: * Writing into /sys/class/scsi_device/*/device/state. * SCSI parallel (SPI) domain validation. * The SCSI device power management methods. See also scsi_bus_pm_ops. It is essential during suspend and resume that neither the filesystem state nor the filesystem metadata in RAM changes. This is why while the hibernation image is being written or restored that SCSI devices are quiesced. The SCSI core quiesces devices through scsi_device_quiesce() and scsi_device_resume(). In the SDEV_QUIESCE state execution of non-preempt requests is deferred. This is realized by returning BLKPREP_DEFER from inside scsi_prep_state_check() for quiesced SCSI devices. Avoid that a full queue prevents power management requests to be submitted by deferring allocation of non-preempt requests for devices in the quiesced state. This patch has been tested by running the following commands and by verifying that after each resume the fio job was still running: for ((i=0; i<10; i++)); do ( cd /sys/block/md0/md && while true; do [ "$( sync_action sleep 1 done ) & pids=($!) for d in /sys/class/block/sd*[a-z]; do bdev=${d#/sys/class/block/} hcil=$(readlink "$d/device") hcil=${hcil#../../../} echo 4 > "$d/queue/nr_requests" echo 1 > "/sys/class/scsi_device/$hcil/device/queue_depth" fio --name="$bdev" --filename="/dev/$bdev" --buffered=0 --bs=512 \ --rw=randread --ioengine=libaio --numjobs=4 --iodepth=16 \ --iodepth_batch=1 --thread --loops=$((2**31)) & pids+=($!) done sleep 1 echo "$(date) Hibernating ..." >>hibernate-test-log.txt systemctl hibernate sleep 10 kill "${pids[@]}" echo idle > /sys/block/md0/md/sync_action wait echo "$(date) Done." >>hibernate-test-log.txt done Reported-by: Oleksandr Natalenko References: "I/O hangs after resuming from suspend-to-ram" (https://marc.info/?l=linux-block&m=150340235201348). Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Tested-by: Martin Steigerwald Tested-by: Oleksandr Natalenko Cc: Martin K. Petersen Cc: Ming Lei Cc: Christoph Hellwig Cc: Johannes Thumshirn Signed-off-by: Jens Axboe --- block/blk-core.c | 42 ++++++++++++++++++++++++++++++++++-------- block/blk-mq.c | 4 ++-- drivers/scsi/scsi_lib.c | 42 ++++++++++++++++++++++++++++++------------ fs/block_dev.c | 4 ++-- include/linux/blkdev.h | 2 +- include/scsi/scsi_device.h | 1 + 6 files changed, 70 insertions(+), 25 deletions(-) (limited to 'drivers/scsi') diff --git a/block/blk-core.c b/block/blk-core.c index edc276899116..29b08428ae45 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -374,6 +374,7 @@ void blk_clear_preempt_only(struct request_queue *q) spin_lock_irqsave(q->queue_lock, flags); queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q); + wake_up_all(&q->mq_freeze_wq); spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL_GPL(blk_clear_preempt_only); @@ -795,15 +796,38 @@ struct request_queue *blk_alloc_queue(gfp_t gfp_mask) } EXPORT_SYMBOL(blk_alloc_queue); -int blk_queue_enter(struct request_queue *q, bool nowait) +/** + * blk_queue_enter() - try to increase q->q_usage_counter + * @q: request queue pointer + * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PREEMPT + */ +int blk_queue_enter(struct request_queue *q, unsigned int flags) { + const bool preempt = flags & BLK_MQ_REQ_PREEMPT; + while (true) { + bool success = false; int ret; - if (percpu_ref_tryget_live(&q->q_usage_counter)) + rcu_read_lock_sched(); + if (percpu_ref_tryget_live(&q->q_usage_counter)) { + /* + * The code that sets the PREEMPT_ONLY flag is + * responsible for ensuring that that flag is globally + * visible before the queue is unfrozen. + */ + if (preempt || !blk_queue_preempt_only(q)) { + success = true; + } else { + percpu_ref_put(&q->q_usage_counter); + } + } + rcu_read_unlock_sched(); + + if (success) return 0; - if (nowait) + if (flags & BLK_MQ_REQ_NOWAIT) return -EBUSY; /* @@ -816,7 +840,8 @@ int blk_queue_enter(struct request_queue *q, bool nowait) smp_rmb(); ret = wait_event_interruptible(q->mq_freeze_wq, - !atomic_read(&q->mq_freeze_depth) || + (atomic_read(&q->mq_freeze_depth) == 0 && + (preempt || !blk_queue_preempt_only(q))) || blk_queue_dying(q)); if (blk_queue_dying(q)) return -ENODEV; @@ -1445,8 +1470,7 @@ static struct request *blk_old_get_request(struct request_queue *q, /* create ioc upfront */ create_io_context(gfp_mask, q->node); - ret = blk_queue_enter(q, !(gfp_mask & __GFP_DIRECT_RECLAIM) || - (op & REQ_NOWAIT)); + ret = blk_queue_enter(q, flags); if (ret) return ERR_PTR(ret); spin_lock_irq(q->queue_lock); @@ -2267,8 +2291,10 @@ blk_qc_t generic_make_request(struct bio *bio) current->bio_list = bio_list_on_stack; do { struct request_queue *q = bio->bi_disk->queue; + unsigned int flags = bio->bi_opf & REQ_NOWAIT ? + BLK_MQ_REQ_NOWAIT : 0; - if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) { + if (likely(blk_queue_enter(q, flags) == 0)) { struct bio_list lower, same; /* Create a fresh bio_list for all subordinate requests */ @@ -2327,7 +2353,7 @@ blk_qc_t direct_make_request(struct bio *bio) if (!generic_make_request_checks(bio)) return BLK_QC_T_NONE; - if (unlikely(blk_queue_enter(q, nowait))) { + if (unlikely(blk_queue_enter(q, nowait ? BLK_MQ_REQ_NOWAIT : 0))) { if (nowait && !blk_queue_dying(q)) bio->bi_status = BLK_STS_AGAIN; else diff --git a/block/blk-mq.c b/block/blk-mq.c index e21876778cec..211bc8a3e2cc 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -389,7 +389,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, struct request *rq; int ret; - ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT); + ret = blk_queue_enter(q, flags); if (ret) return ERR_PTR(ret); @@ -428,7 +428,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, if (hctx_idx >= q->nr_hw_queues) return ERR_PTR(-EIO); - ret = blk_queue_enter(q, true); + ret = blk_queue_enter(q, flags); if (ret) return ERR_PTR(ret); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index eb129dfc2ebe..f907e2f8c1dd 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -2947,21 +2947,37 @@ static void scsi_wait_for_queuecommand(struct scsi_device *sdev) int scsi_device_quiesce(struct scsi_device *sdev) { + struct request_queue *q = sdev->request_queue; int err; + /* + * It is allowed to call scsi_device_quiesce() multiple times from + * the same context but concurrent scsi_device_quiesce() calls are + * not allowed. + */ + WARN_ON_ONCE(sdev->quiesced_by && sdev->quiesced_by != current); + + blk_set_preempt_only(q); + + blk_mq_freeze_queue(q); + /* + * Ensure that the effect of blk_set_preempt_only() will be visible + * for percpu_ref_tryget() callers that occur after the queue + * unfreeze even if the queue was already frozen before this function + * was called. See also https://lwn.net/Articles/573497/. + */ + synchronize_rcu(); + blk_mq_unfreeze_queue(q); + mutex_lock(&sdev->state_mutex); err = scsi_device_set_state(sdev, SDEV_QUIESCE); + if (err == 0) + sdev->quiesced_by = current; + else + blk_clear_preempt_only(q); mutex_unlock(&sdev->state_mutex); - if (err) - return err; - - scsi_run_queue(sdev->request_queue); - while (atomic_read(&sdev->device_busy)) { - msleep_interruptible(200); - scsi_run_queue(sdev->request_queue); - } - return 0; + return err; } EXPORT_SYMBOL(scsi_device_quiesce); @@ -2981,9 +2997,11 @@ void scsi_device_resume(struct scsi_device *sdev) * device deleted during suspend) */ mutex_lock(&sdev->state_mutex); - if (sdev->sdev_state == SDEV_QUIESCE && - scsi_device_set_state(sdev, SDEV_RUNNING) == 0) - scsi_run_queue(sdev->request_queue); + WARN_ON_ONCE(!sdev->quiesced_by); + sdev->quiesced_by = NULL; + blk_clear_preempt_only(sdev->request_queue); + if (sdev->sdev_state == SDEV_QUIESCE) + scsi_device_set_state(sdev, SDEV_RUNNING); mutex_unlock(&sdev->state_mutex); } EXPORT_SYMBOL(scsi_device_resume); diff --git a/fs/block_dev.c b/fs/block_dev.c index 4afa4d5ff969..04973f484422 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -662,7 +662,7 @@ int bdev_read_page(struct block_device *bdev, sector_t sector, if (!ops->rw_page || bdev_get_integrity(bdev)) return result; - result = blk_queue_enter(bdev->bd_queue, false); + result = blk_queue_enter(bdev->bd_queue, 0); if (result) return result; result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, false); @@ -698,7 +698,7 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, if (!ops->rw_page || bdev_get_integrity(bdev)) return -EOPNOTSUPP; - result = blk_queue_enter(bdev->bd_queue, false); + result = blk_queue_enter(bdev->bd_queue, 0); if (result) return result; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2147e2381a22..402c9d536ae1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -959,7 +959,7 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); -extern int blk_queue_enter(struct request_queue *q, bool nowait); +extern int blk_queue_enter(struct request_queue *q, unsigned int flags); extern void blk_queue_exit(struct request_queue *q); extern void blk_start_queue(struct request_queue *q); extern void blk_start_queue_async(struct request_queue *q); diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 82e93ee94708..6f0f1e242e23 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -219,6 +219,7 @@ struct scsi_device { unsigned char access_state; struct mutex state_mutex; enum scsi_device_state sdev_state; + struct task_struct *quiesced_by; unsigned long sdev_data[0]; } __attribute__((aligned(sizeof(unsigned long)))); -- cgit v1.2.3-70-g09d2