diff options
author | Jason Gunthorpe <jgg@nvidia.com> | 2020-11-17 15:20:26 -0400 |
---|---|---|
committer | Jason Gunthorpe <jgg@nvidia.com> | 2020-11-17 15:20:26 -0400 |
commit | bf3b7b7ba9e3db55d164ec7bf05e6947f79637cd (patch) | |
tree | 7de81ebc8339c2a985b2af0e360368e281564c93 /drivers/nvme/host | |
parent | 8a7904a672a1d33c848e5129f886ee69e0773a2e (diff) | |
parent | dabbd6abcdbeb1358a53ec28a244429320eb0e3a (diff) |
Merge branch 'for-rc' into rdma.git
From https://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma.git
The rc RDMA branch is needed due to dependencies on the next patches.
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r-- | drivers/nvme/host/core.c | 10 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 270 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 1 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 23 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 26 | ||||
-rw-r--r-- | drivers/nvme/host/tcp.c | 16 |
6 files changed, 153 insertions, 193 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 95ef4943d8bd..40ca71b29bb9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2125,7 +2125,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) if (blk_queue_is_zoned(ns->queue)) { ret = nvme_revalidate_zones(ns); - if (ret) + if (ret && !nvme_first_scan(ns->disk)) return ret; } @@ -4582,8 +4582,7 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_start_queues); - -void nvme_sync_queues(struct nvme_ctrl *ctrl) +void nvme_sync_io_queues(struct nvme_ctrl *ctrl) { struct nvme_ns *ns; @@ -4591,7 +4590,12 @@ void nvme_sync_queues(struct nvme_ctrl *ctrl) list_for_each_entry(ns, &ctrl->namespaces, list) blk_sync_queue(ns->queue); up_read(&ctrl->namespaces_rwsem); +} +EXPORT_SYMBOL_GPL(nvme_sync_io_queues); +void nvme_sync_queues(struct nvme_ctrl *ctrl) +{ + nvme_sync_io_queues(ctrl); if (ctrl->admin_q) blk_sync_queue(ctrl->admin_q); } diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 3c002bdcace3..f4c246462658 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -146,7 +146,8 @@ struct nvme_fc_rport { /* fc_ctrl flags values - specified as bit positions */ #define ASSOC_ACTIVE 0 -#define FCCTRL_TERMIO 1 +#define ASSOC_FAILED 1 +#define FCCTRL_TERMIO 2 struct nvme_fc_ctrl { spinlock_t lock; @@ -157,7 +158,6 @@ struct nvme_fc_ctrl { u32 cnum; bool ioq_live; - atomic_t err_work_active; u64 association_id; struct nvmefc_ls_rcv_op *rcv_disconn; @@ -167,7 +167,6 @@ struct nvme_fc_ctrl { struct blk_mq_tag_set tag_set; struct delayed_work connect_work; - struct work_struct err_work; struct kref ref; unsigned long flags; @@ -2414,24 +2413,97 @@ nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl) nvme_fc_ctrl_put(ctrl); } +/* + * This routine is used by the transport when it needs to find active + * io on a queue that is to be terminated. The transport uses + * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke + * this routine to kill them on a 1 by 1 basis. + * + * As FC allocates FC exchange for each io, the transport must contact + * the LLDD to terminate the exchange, thus releasing the FC exchange. + * After terminating the exchange the LLDD will call the transport's + * normal io done path for the request, but it will have an aborted + * status. The done path will return the io request back to the block + * layer with an error status. + */ +static bool +nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) +{ + struct nvme_ctrl *nctrl = data; + struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); + struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); + + __nvme_fc_abort_op(ctrl, op); + return true; +} + +/* + * This routine runs through all outstanding commands on the association + * and aborts them. This routine is typically be called by the + * delete_association routine. It is also called due to an error during + * reconnect. In that scenario, it is most likely a command that initializes + * the controller, including fabric Connect commands on io queues, that + * may have timed out or failed thus the io must be killed for the connect + * thread to see the error. + */ static void -nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) +__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) { - int active; + /* + * If io queues are present, stop them and terminate all outstanding + * ios on them. As FC allocates FC exchange for each io, the + * transport must contact the LLDD to terminate the exchange, + * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr() + * to tell us what io's are busy and invoke a transport routine + * to kill them with the LLDD. After terminating the exchange + * the LLDD will call the transport's normal io done path, but it + * will have an aborted status. The done path will return the + * io requests back to the block layer as part of normal completions + * (but with error status). + */ + if (ctrl->ctrl.queue_count > 1) { + nvme_stop_queues(&ctrl->ctrl); + blk_mq_tagset_busy_iter(&ctrl->tag_set, + nvme_fc_terminate_exchange, &ctrl->ctrl); + blk_mq_tagset_wait_completed_request(&ctrl->tag_set); + if (start_queues) + nvme_start_queues(&ctrl->ctrl); + } + + /* + * Other transports, which don't have link-level contexts bound + * to sqe's, would try to gracefully shutdown the controller by + * writing the registers for shutdown and polling (call + * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially + * just aborted and we will wait on those contexts, and given + * there was no indication of how live the controlelr is on the + * link, don't send more io to create more contexts for the + * shutdown. Let the controller fail via keepalive failure if + * its still present. + */ /* - * if an error (io timeout, etc) while (re)connecting, - * it's an error on creating the new association. - * Start the error recovery thread if it hasn't already - * been started. It is expected there could be multiple - * ios hitting this path before things are cleaned up. + * clean up the admin queue. Same thing as above. + */ + blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, + nvme_fc_terminate_exchange, &ctrl->ctrl); + blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set); +} + +static void +nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) +{ + /* + * if an error (io timeout, etc) while (re)connecting, the remote + * port requested terminating of the association (disconnect_ls) + * or an error (timeout or abort) occurred on an io while creating + * the controller. Abort any ios on the association and let the + * create_association error path resolve things. */ if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { - active = atomic_xchg(&ctrl->err_work_active, 1); - if (!active && !queue_work(nvme_fc_wq, &ctrl->err_work)) { - atomic_set(&ctrl->err_work_active, 0); - WARN_ON(1); - } + __nvme_fc_abort_outstanding_ios(ctrl, true); + set_bit(ASSOC_FAILED, &ctrl->flags); return; } @@ -2745,30 +2817,6 @@ nvme_fc_complete_rq(struct request *rq) nvme_fc_ctrl_put(ctrl); } -/* - * This routine is used by the transport when it needs to find active - * io on a queue that is to be terminated. The transport uses - * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke - * this routine to kill them on a 1 by 1 basis. - * - * As FC allocates FC exchange for each io, the transport must contact - * the LLDD to terminate the exchange, thus releasing the FC exchange. - * After terminating the exchange the LLDD will call the transport's - * normal io done path for the request, but it will have an aborted - * status. The done path will return the io request back to the block - * layer with an error status. - */ -static bool -nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) -{ - struct nvme_ctrl *nctrl = data; - struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); - struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); - - __nvme_fc_abort_op(ctrl, op); - return true; -} - static const struct blk_mq_ops nvme_fc_mq_ops = { .queue_rq = nvme_fc_queue_rq, @@ -2988,6 +3036,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) ctrl->cnum, ctrl->lport->localport.port_name, ctrl->rport->remoteport.port_name, ctrl->ctrl.opts->subsysnqn); + clear_bit(ASSOC_FAILED, &ctrl->flags); + /* * Create the admin queue */ @@ -3016,7 +3066,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) */ ret = nvme_enable_ctrl(&ctrl->ctrl); - if (ret) + if (ret || test_bit(ASSOC_FAILED, &ctrl->flags)) goto out_disconnect_admin_queue; ctrl->ctrl.max_segments = ctrl->lport->ops->max_sgl_segments; @@ -3026,7 +3076,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); ret = nvme_init_identify(&ctrl->ctrl); - if (ret) + if (ret || test_bit(ASSOC_FAILED, &ctrl->flags)) goto out_disconnect_admin_queue; /* sanity checks */ @@ -3071,9 +3121,9 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) ret = nvme_fc_create_io_queues(ctrl); else ret = nvme_fc_recreate_io_queues(ctrl); - if (ret) - goto out_term_aen_ops; } + if (ret || test_bit(ASSOC_FAILED, &ctrl->flags)) + goto out_term_aen_ops; changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); @@ -3108,60 +3158,6 @@ out_free_queue: /* - * This routine runs through all outstanding commands on the association - * and aborts them. This routine is typically be called by the - * delete_association routine. It is also called due to an error during - * reconnect. In that scenario, it is most likely a command that initializes - * the controller, including fabric Connect commands on io queues, that - * may have timed out or failed thus the io must be killed for the connect - * thread to see the error. - */ -static void -__nvme_fc_abort_outstanding_ios(struct nvme_fc_ctrl *ctrl, bool start_queues) -{ - /* - * If io queues are present, stop them and terminate all outstanding - * ios on them. As FC allocates FC exchange for each io, the - * transport must contact the LLDD to terminate the exchange, - * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr() - * to tell us what io's are busy and invoke a transport routine - * to kill them with the LLDD. After terminating the exchange - * the LLDD will call the transport's normal io done path, but it - * will have an aborted status. The done path will return the - * io requests back to the block layer as part of normal completions - * (but with error status). - */ - if (ctrl->ctrl.queue_count > 1) { - nvme_stop_queues(&ctrl->ctrl); - blk_mq_tagset_busy_iter(&ctrl->tag_set, - nvme_fc_terminate_exchange, &ctrl->ctrl); - blk_mq_tagset_wait_completed_request(&ctrl->tag_set); - if (start_queues) - nvme_start_queues(&ctrl->ctrl); - } - - /* - * Other transports, which don't have link-level contexts bound - * to sqe's, would try to gracefully shutdown the controller by - * writing the registers for shutdown and polling (call - * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially - * just aborted and we will wait on those contexts, and given - * there was no indication of how live the controlelr is on the - * link, don't send more io to create more contexts for the - * shutdown. Let the controller fail via keepalive failure if - * its still present. - */ - - /* - * clean up the admin queue. Same thing as above. - */ - blk_mq_quiesce_queue(ctrl->ctrl.admin_q); - blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, - nvme_fc_terminate_exchange, &ctrl->ctrl); - blk_mq_tagset_wait_completed_request(&ctrl->admin_tag_set); -} - -/* * This routine stops operation of the controller on the host side. * On the host os stack side: Admin and IO queues are stopped, * outstanding ios on them terminated via FC ABTS. @@ -3237,7 +3233,6 @@ nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) { struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); - cancel_work_sync(&ctrl->err_work); cancel_delayed_work_sync(&ctrl->connect_work); /* * kill the association on the link side. this will block @@ -3292,78 +3287,34 @@ nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) } static void -__nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl) +nvme_fc_reset_ctrl_work(struct work_struct *work) { - /* - * if state is CONNECTING - the error occurred as part of a - * reconnect attempt. Abort any ios on the association and - * let the create_association error paths resolve things. - */ - if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { - __nvme_fc_abort_outstanding_ios(ctrl, true); - return; - } - - /* - * For any other state, kill the association. As this routine - * is a common io abort routine for resetting and such, after - * the association is terminated, ensure that the state is set - * to CONNECTING. - */ + struct nvme_fc_ctrl *ctrl = + container_of(work, struct nvme_fc_ctrl, ctrl.reset_work); - nvme_stop_keep_alive(&ctrl->ctrl); + nvme_stop_ctrl(&ctrl->ctrl); /* will block will waiting for io to terminate */ nvme_fc_delete_association(ctrl); - if (ctrl->ctrl.state != NVME_CTRL_CONNECTING && - !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) + if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) dev_err(ctrl->ctrl.device, "NVME-FC{%d}: error_recovery: Couldn't change state " "to CONNECTING\n", ctrl->cnum); -} - -static void -nvme_fc_reset_ctrl_work(struct work_struct *work) -{ - struct nvme_fc_ctrl *ctrl = - container_of(work, struct nvme_fc_ctrl, ctrl.reset_work); - int ret; - __nvme_fc_terminate_io(ctrl); - - nvme_stop_ctrl(&ctrl->ctrl); - - if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) - ret = nvme_fc_create_association(ctrl); - else - ret = -ENOTCONN; - - if (ret) - nvme_fc_reconnect_or_delete(ctrl, ret); - else - dev_info(ctrl->ctrl.device, - "NVME-FC{%d}: controller reset complete\n", - ctrl->cnum); + if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) { + if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) { + dev_err(ctrl->ctrl.device, + "NVME-FC{%d}: failed to schedule connect " + "after reset\n", ctrl->cnum); + } else { + flush_delayed_work(&ctrl->connect_work); + } + } else { + nvme_fc_reconnect_or_delete(ctrl, -ENOTCONN); + } } -static void -nvme_fc_connect_err_work(struct work_struct *work) -{ - struct nvme_fc_ctrl *ctrl = - container_of(work, struct nvme_fc_ctrl, err_work); - - __nvme_fc_terminate_io(ctrl); - - atomic_set(&ctrl->err_work_active, 0); - - /* - * Rescheduling the connection after recovering - * from the io error is left to the reconnect work - * item, which is what should have stalled waiting on - * the io that had the error that scheduled this work. - */ -} static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { .name = "fc", @@ -3491,7 +3442,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ctrl->dev = lport->dev; ctrl->cnum = idx; ctrl->ioq_live = false; - atomic_set(&ctrl->err_work_active, 0); init_waitqueue_head(&ctrl->ioabort_wait); get_device(ctrl->dev); @@ -3499,7 +3449,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work); INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); - INIT_WORK(&ctrl->err_work, nvme_fc_connect_err_work); spin_lock_init(&ctrl->lock); /* io queue count */ @@ -3592,7 +3541,6 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, fail_ctrl: nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING); cancel_work_sync(&ctrl->ctrl.reset_work); - cancel_work_sync(&ctrl->err_work); cancel_delayed_work_sync(&ctrl->connect_work); ctrl->ctrl.opts = NULL; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index cc111136a981..bc330bf0d3bd 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -602,6 +602,7 @@ void nvme_stop_queues(struct nvme_ctrl *ctrl); void nvme_start_queues(struct nvme_ctrl *ctrl); void nvme_kill_queues(struct nvme_ctrl *ctrl); void nvme_sync_queues(struct nvme_ctrl *ctrl); +void nvme_sync_io_queues(struct nvme_ctrl *ctrl); void nvme_unfreeze(struct nvme_ctrl *ctrl); void nvme_wait_freeze(struct nvme_ctrl *ctrl); int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout); diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index df8f3612107f..0578ff253c47 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -198,6 +198,7 @@ struct nvme_queue { u32 q_depth; u16 cq_vector; u16 sq_tail; + u16 last_sq_tail; u16 cq_head; u16 qid; u8 cq_phase; @@ -455,11 +456,24 @@ static int nvme_pci_map_queues(struct blk_mq_tag_set *set) return 0; } -static inline void nvme_write_sq_db(struct nvme_queue *nvmeq) +/* + * Write sq tail if we are asked to, or if the next command would wrap. + */ +static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq) { + if (!write_sq) { + u16 next_tail = nvmeq->sq_tail + 1; + + if (next_tail == nvmeq->q_depth) + next_tail = 0; + if (next_tail != nvmeq->last_sq_tail) + return; + } + if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail, nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei)) writel(nvmeq->sq_tail, nvmeq->q_db); + nvmeq->last_sq_tail = nvmeq->sq_tail; } /** @@ -476,8 +490,7 @@ static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, cmd, sizeof(*cmd)); if (++nvmeq->sq_tail == nvmeq->q_depth) nvmeq->sq_tail = 0; - if (write_sq) - nvme_write_sq_db(nvmeq); + nvme_write_sq_db(nvmeq, write_sq); spin_unlock(&nvmeq->sq_lock); } @@ -486,7 +499,8 @@ static void nvme_commit_rqs(struct blk_mq_hw_ctx *hctx) struct nvme_queue *nvmeq = hctx->driver_data; spin_lock(&nvmeq->sq_lock); - nvme_write_sq_db(nvmeq); + if (nvmeq->sq_tail != nvmeq->last_sq_tail) + nvme_write_sq_db(nvmeq, true); spin_unlock(&nvmeq->sq_lock); } @@ -1496,6 +1510,7 @@ static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) struct nvme_dev *dev = nvmeq->dev; nvmeq->sq_tail = 0; + nvmeq->last_sq_tail = 0; nvmeq->cq_head = 0; nvmeq->cq_phase = 1; nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 9a6957b4b700..8b56911dd050 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -122,7 +122,6 @@ struct nvme_rdma_ctrl { struct sockaddr_storage src_addr; struct nvme_ctrl ctrl; - struct mutex teardown_lock; bool use_inline_data; u32 io_queues[HCTX_MAX_TYPES]; }; @@ -1010,8 +1009,8 @@ out_free_io_queues: static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, bool remove) { - mutex_lock(&ctrl->teardown_lock); blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + blk_sync_queue(ctrl->ctrl.admin_q); nvme_rdma_stop_queue(&ctrl->queues[0]); if (ctrl->ctrl.admin_tagset) { blk_mq_tagset_busy_iter(ctrl->ctrl.admin_tagset, @@ -1021,16 +1020,15 @@ static void nvme_rdma_teardown_admin_queue(struct nvme_rdma_ctrl *ctrl, if (remove) blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); nvme_rdma_destroy_admin_queue(ctrl, remove); - mutex_unlock(&ctrl->teardown_lock); } static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, bool remove) { - mutex_lock(&ctrl->teardown_lock); if (ctrl->ctrl.queue_count > 1) { nvme_start_freeze(&ctrl->ctrl); nvme_stop_queues(&ctrl->ctrl); + nvme_sync_io_queues(&ctrl->ctrl); nvme_rdma_stop_io_queues(ctrl); if (ctrl->ctrl.tagset) { blk_mq_tagset_busy_iter(ctrl->ctrl.tagset, @@ -1041,7 +1039,6 @@ static void nvme_rdma_teardown_io_queues(struct nvme_rdma_ctrl *ctrl, nvme_start_queues(&ctrl->ctrl); nvme_rdma_destroy_io_queues(ctrl, remove); } - mutex_unlock(&ctrl->teardown_lock); } static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) @@ -1768,6 +1765,14 @@ static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc) return; } + /* sanity checking for received data length */ + if (unlikely(wc->byte_len < len)) { + dev_err(queue->ctrl->ctrl.device, + "Unexpected nvme completion length(%d)\n", wc->byte_len); + nvme_rdma_error_recovery(queue->ctrl); + return; + } + ib_dma_sync_single_for_cpu(ibdev, qe->dma, len, DMA_FROM_DEVICE); /* * AEN requests are special as they don't time out and can @@ -1890,10 +1895,10 @@ static int nvme_rdma_route_resolved(struct nvme_rdma_queue *queue) priv.hsqsize = cpu_to_le16(queue->ctrl->ctrl.sqsize); } - ret = rdma_connect(queue->cm_id, ¶m); + ret = rdma_connect_locked(queue->cm_id, ¶m); if (ret) { dev_err(ctrl->ctrl.device, - "rdma_connect failed (%d).\n", ret); + "rdma_connect_locked failed (%d).\n", ret); goto out_destroy_queue_ib; } @@ -1968,16 +1973,12 @@ static void nvme_rdma_complete_timed_out(struct request *rq) { struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_queue *queue = req->queue; - struct nvme_rdma_ctrl *ctrl = queue->ctrl; - /* fence other contexts that may complete the command */ - mutex_lock(&ctrl->teardown_lock); nvme_rdma_stop_queue(queue); - if (!blk_mq_request_completed(rq)) { + if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) { nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD; blk_mq_complete_request(rq); } - mutex_unlock(&ctrl->teardown_lock); } static enum blk_eh_timer_return @@ -2312,7 +2313,6 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, return ERR_PTR(-ENOMEM); ctrl->ctrl.opts = opts; INIT_LIST_HEAD(&ctrl->list); - mutex_init(&ctrl->teardown_lock); if (!(opts->mask & NVMF_OPT_TRSVCID)) { opts->trsvcid = diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index d6a3e1487354..c0c33320fe65 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -124,7 +124,6 @@ struct nvme_tcp_ctrl { struct sockaddr_storage src_addr; struct nvme_ctrl ctrl; - struct mutex teardown_lock; struct work_struct err_work; struct delayed_work connect_work; struct nvme_tcp_request async_req; @@ -1886,8 +1885,8 @@ out_free_queue: static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, bool remove) { - mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock); blk_mq_quiesce_queue(ctrl->admin_q); + blk_sync_queue(ctrl->admin_q); nvme_tcp_stop_queue(ctrl, 0); if (ctrl->admin_tagset) { blk_mq_tagset_busy_iter(ctrl->admin_tagset, @@ -1897,18 +1896,17 @@ static void nvme_tcp_teardown_admin_queue(struct nvme_ctrl *ctrl, if (remove) blk_mq_unquiesce_queue(ctrl->admin_q); nvme_tcp_destroy_admin_queue(ctrl, remove); - mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock); } static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, bool remove) { - mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock); if (ctrl->queue_count <= 1) - goto out; + return; blk_mq_quiesce_queue(ctrl->admin_q); nvme_start_freeze(ctrl); nvme_stop_queues(ctrl); + nvme_sync_io_queues(ctrl); nvme_tcp_stop_io_queues(ctrl); if (ctrl->tagset) { blk_mq_tagset_busy_iter(ctrl->tagset, @@ -1918,8 +1916,6 @@ static void nvme_tcp_teardown_io_queues(struct nvme_ctrl *ctrl, if (remove) nvme_start_queues(ctrl); nvme_tcp_destroy_io_queues(ctrl, remove); -out: - mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock); } static void nvme_tcp_reconnect_or_remove(struct nvme_ctrl *ctrl) @@ -2171,14 +2167,11 @@ static void nvme_tcp_complete_timed_out(struct request *rq) struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq); struct nvme_ctrl *ctrl = &req->queue->ctrl->ctrl; - /* fence other contexts that may complete the command */ - mutex_lock(&to_tcp_ctrl(ctrl)->teardown_lock); nvme_tcp_stop_queue(ctrl, nvme_tcp_queue_id(req->queue)); - if (!blk_mq_request_completed(rq)) { + if (blk_mq_request_started(rq) && !blk_mq_request_completed(rq)) { nvme_req(rq)->status = NVME_SC_HOST_ABORTED_CMD; blk_mq_complete_request(rq); } - mutex_unlock(&to_tcp_ctrl(ctrl)->teardown_lock); } static enum blk_eh_timer_return @@ -2455,7 +2448,6 @@ static struct nvme_ctrl *nvme_tcp_create_ctrl(struct device *dev, nvme_tcp_reconnect_ctrl_work); INIT_WORK(&ctrl->err_work, nvme_tcp_error_recovery_work); INIT_WORK(&ctrl->ctrl.reset_work, nvme_reset_ctrl_work); - mutex_init(&ctrl->teardown_lock); if (!(opts->mask & NVMF_OPT_TRSVCID)) { opts->trsvcid = |