summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Smart <jsmart2021@gmail.com>2019-03-13 18:55:02 +0100
committerJens Axboe <axboe@kernel.dk>2019-03-13 12:05:40 -0600
commit834d3710a093aa18c8aa88e6e1892180abadebaf (patch)
tree9e9d3960065ae368d5226dedd07d7dc09426b763
parent06f3d71ea071b70e62bcc146cd9ff7ed1f9d4e43 (diff)
nvme-fc: reject reconnect if io queue count is reduced to zero
If: - A successful connect has occurred with an io queue count greater than zero and namespaces detected and running. - An error or something occurs which causes a termination of the prior association and then starts a reconnect, - The reconnect then creates a new controller, but for whatever reason, nvme_set_queue_count() results in io queue count set to zero. This will skip io queue and tag set changes. - But... the controller will transition to live, calling nvme_start_ctrl, which calls nvme_start_queues(), which then releases I/Os into the transport which then sends them to the driver. As there are no queues, things eventually hit the driver looking for a handle, which was cleared when the original controller was reset, and it can't proceed. Worst case, things progress, but everything fails. In the failing scenario, the nvme_set_features(NVME_FEAT_NUM_QUEUES) command actually failed with a NVME_SC_INTERNAL error. For some reason, although nvme_set_queue_count() saw the error and set io queue count to zero, it doesn't return a failure status to the transport, which allows the transport to continue using the controller. Fix the problem by simply rejecting the new association if at least 1 I/O queue can't be created. The association reject will fail the reconnect attempt and fall into the reconnect retry policy. Signed-off-by: James Smart <jsmart2021@gmail.com> Reviewed-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--drivers/nvme/host/fc.c12
1 files changed, 12 insertions, 0 deletions
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 23f6bad19274..f3b9d91ba0df 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2475,6 +2475,7 @@ static int
nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
{
struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
+ u32 prior_ioq_cnt = ctrl->ctrl.queue_count - 1;
unsigned int nr_io_queues;
int ret;
@@ -2487,6 +2488,13 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
return ret;
}
+ if (!nr_io_queues && prior_ioq_cnt) {
+ dev_info(ctrl->ctrl.device,
+ "Fail Reconnect: At least 1 io queue "
+ "required (was %d)\n", prior_ioq_cnt);
+ return -ENOSPC;
+ }
+
ctrl->ctrl.queue_count = nr_io_queues + 1;
/* check for io queues existing */
if (ctrl->ctrl.queue_count == 1)
@@ -2500,6 +2508,10 @@ nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
if (ret)
goto out_delete_hw_queues;
+ if (prior_ioq_cnt != nr_io_queues)
+ dev_info(ctrl->ctrl.device,
+ "reconnect: revising io queue count from %d to %d\n",
+ prior_ioq_cnt, nr_io_queues);
blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues);
return 0;