diff options
author | Martin K. Petersen <martin.petersen@oracle.com> | 2024-07-04 23:38:16 -0400 |
---|---|---|
committer | Martin K. Petersen <martin.petersen@oracle.com> | 2024-07-04 23:38:16 -0400 |
commit | 6cd48c8f62ade6260ff9a13724e49b3854392fe4 (patch) | |
tree | bd798670c8fc0491d7c3d9723226f97e25fc9dc6 /drivers/scsi/mpi3mr/mpi3mr_os.c | |
parent | 34438552c933c4567fad5ff316c45afc920fca53 (diff) | |
parent | cf82b9e866b644d5eca38b55403e1efa8f1cc8da (diff) |
Merge patch series "mpi3mr: Support PCI Error Recovery"
Sumit Saxena <sumit.saxena@broadcom.com> says:
This patch series contains the changes done in the driver to support
PCI error recovery. It is rework of older patch series from Ranjan
Kumar, see [1].
[1] https://lore.kernel.org/all/20231214205900.270488-1-ranjan.kumar@broadcom.com/
Link: https://lore.kernel.org/r/20240627101735.18286-1-sumit.saxena@broadcom.com
Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Diffstat (limited to 'drivers/scsi/mpi3mr/mpi3mr_os.c')
-rw-r--r-- | drivers/scsi/mpi3mr/mpi3mr_os.c | 248 |
1 files changed, 243 insertions, 5 deletions
diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c index eac179dc9370..69b14918de59 100644 --- a/drivers/scsi/mpi3mr/mpi3mr_os.c +++ b/drivers/scsi/mpi3mr/mpi3mr_os.c @@ -956,7 +956,7 @@ static int mpi3mr_report_tgtdev_to_host(struct mpi3mr_ioc *mrioc, int retval = 0; struct mpi3mr_tgt_dev *tgtdev; - if (mrioc->reset_in_progress) + if (mrioc->reset_in_progress || mrioc->pci_err_recovery) return -1; tgtdev = mpi3mr_get_tgtdev_by_perst_id(mrioc, perst_id); @@ -2007,6 +2007,7 @@ static void mpi3mr_fwevt_bh(struct mpi3mr_ioc *mrioc, struct mpi3_device_page0 *dev_pg0 = NULL; u16 perst_id, handle, dev_info; struct mpi3_device0_sas_sata_format *sasinf = NULL; + unsigned int timeout; mpi3mr_fwevt_del_from_list(mrioc, fwevt); mrioc->current_event = fwevt; @@ -2097,8 +2098,18 @@ static void mpi3mr_fwevt_bh(struct mpi3mr_ioc *mrioc, } case MPI3_EVENT_WAIT_FOR_DEVICES_TO_REFRESH: { - while (mrioc->device_refresh_on) + timeout = MPI3MR_RESET_TIMEOUT * 2; + while ((mrioc->device_refresh_on || mrioc->block_on_pci_err) && + !mrioc->unrecoverable && !mrioc->pci_err_recovery) { msleep(500); + if (!timeout--) { + mrioc->unrecoverable = 1; + break; + } + } + + if (mrioc->unrecoverable || mrioc->pci_err_recovery) + break; dprint_event_bh(mrioc, "scan for non responding and newly added devices after soft reset started\n"); @@ -3796,6 +3807,13 @@ int mpi3mr_issue_tm(struct mpi3mr_ioc *mrioc, u8 tm_type, mutex_unlock(&drv_cmd->mutex); goto out; } + if (mrioc->block_on_pci_err) { + retval = -1; + dprint_tm(mrioc, "sending task management failed due to\n" + "pci error recovery in progress\n"); + mutex_unlock(&drv_cmd->mutex); + goto out; + } drv_cmd->state = MPI3MR_CMD_PENDING; drv_cmd->is_waiting = 1; @@ -4181,6 +4199,7 @@ static int mpi3mr_eh_bus_reset(struct scsi_cmnd *scmd) struct mpi3mr_sdev_priv_data *sdev_priv_data; u8 dev_type = MPI3_DEVICE_DEVFORM_VD; int retval = FAILED; + unsigned int timeout = MPI3MR_RESET_TIMEOUT; sdev_priv_data = scmd->device->hostdata; if (sdev_priv_data && sdev_priv_data->tgt_priv_data) { @@ -4191,12 +4210,24 @@ static int mpi3mr_eh_bus_reset(struct scsi_cmnd *scmd) if (dev_type == MPI3_DEVICE_DEVFORM_VD) { mpi3mr_wait_for_host_io(mrioc, MPI3MR_RAID_ERRREC_RESET_TIMEOUT); - if (!mpi3mr_get_fw_pending_ios(mrioc)) + if (!mpi3mr_get_fw_pending_ios(mrioc)) { + while (mrioc->reset_in_progress || + mrioc->prepare_for_reset || + mrioc->block_on_pci_err) { + ssleep(1); + if (!timeout--) { + retval = FAILED; + goto out; + } + } retval = SUCCESS; + goto out; + } } if (retval == FAILED) mpi3mr_print_pending_host_io(mrioc); +out: sdev_printk(KERN_INFO, scmd->device, "Bus reset is %s for scmd(%p)\n", ((retval == SUCCESS) ? "SUCCESS" : "FAILED"), scmd); @@ -4879,7 +4910,8 @@ static int mpi3mr_qcmd(struct Scsi_Host *shost, goto out; } - if (mrioc->reset_in_progress) { + if (mrioc->reset_in_progress || mrioc->prepare_for_reset + || mrioc->block_on_pci_err) { retval = SCSI_MLQUEUE_HOST_BUSY; goto out; } @@ -5362,7 +5394,14 @@ static void mpi3mr_remove(struct pci_dev *pdev) while (mrioc->reset_in_progress || mrioc->is_driver_loading) ssleep(1); - if (!pci_device_is_present(mrioc->pdev)) { + if (mrioc->block_on_pci_err) { + mrioc->block_on_pci_err = false; + scsi_unblock_requests(shost); + mrioc->unrecoverable = 1; + } + + if (!pci_device_is_present(mrioc->pdev) || + mrioc->pci_err_recovery) { mrioc->unrecoverable = 1; mpi3mr_flush_cmds_for_unrecovered_controller(mrioc); } @@ -5546,6 +5585,197 @@ mpi3mr_resume(struct device *dev) return 0; } +/** + * mpi3mr_pcierr_error_detected - PCI error detected callback + * @pdev: PCI device instance + * @state: channel state + * + * This function is called by the PCI error recovery driver and + * based on the state passed the driver decides what actions to + * be recommended back to PCI driver. + * + * For all of the states if there is no valid mrioc or scsi host + * references in the PCI device then this function will return + * the result as disconnect. + * + * For normal state, this function will return the result as can + * recover. + * + * For frozen state, this function will block for any pending + * controller initialization or re-initialization to complete, + * stop any new interactions with the controller and return + * status as reset required. + * + * For permanent failure state, this function will mark the + * controller as unrecoverable and return status as disconnect. + * + * Returns: PCI_ERS_RESULT_NEED_RESET or CAN_RECOVER or + * DISCONNECT based on the controller state. + */ +static pci_ers_result_t +mpi3mr_pcierr_error_detected(struct pci_dev *pdev, pci_channel_state_t state) +{ + struct Scsi_Host *shost; + struct mpi3mr_ioc *mrioc; + unsigned int timeout = MPI3MR_RESET_TIMEOUT; + + dev_info(&pdev->dev, "%s: callback invoked state(%d)\n", __func__, + state); + + shost = pci_get_drvdata(pdev); + mrioc = shost_priv(shost); + + switch (state) { + case pci_channel_io_normal: + return PCI_ERS_RESULT_CAN_RECOVER; + case pci_channel_io_frozen: + mrioc->pci_err_recovery = true; + mrioc->block_on_pci_err = true; + do { + if (mrioc->reset_in_progress || mrioc->is_driver_loading) + ssleep(1); + else + break; + } while (--timeout); + + if (!timeout) { + mrioc->pci_err_recovery = true; + mrioc->block_on_pci_err = true; + mrioc->unrecoverable = 1; + mpi3mr_stop_watchdog(mrioc); + mpi3mr_flush_cmds_for_unrecovered_controller(mrioc); + return PCI_ERS_RESULT_DISCONNECT; + } + + scsi_block_requests(mrioc->shost); + mpi3mr_stop_watchdog(mrioc); + mpi3mr_cleanup_resources(mrioc); + return PCI_ERS_RESULT_NEED_RESET; + case pci_channel_io_perm_failure: + mrioc->pci_err_recovery = true; + mrioc->block_on_pci_err = true; + mrioc->unrecoverable = 1; + mpi3mr_stop_watchdog(mrioc); + mpi3mr_flush_cmds_for_unrecovered_controller(mrioc); + return PCI_ERS_RESULT_DISCONNECT; + default: + return PCI_ERS_RESULT_DISCONNECT; + } +} + +/** + * mpi3mr_pcierr_slot_reset - Post slot reset callback + * @pdev: PCI device instance + * + * This function is called by the PCI error recovery driver + * after a slot or link reset issued by it for the recovery, the + * driver is expected to bring back the controller and + * initialize it. + * + * This function restores PCI state and reinitializes controller + * resources and the controller, this blocks for any pending + * reset to complete. + * + * Returns: PCI_ERS_RESULT_DISCONNECT on failure or + * PCI_ERS_RESULT_RECOVERED + */ +static pci_ers_result_t mpi3mr_pcierr_slot_reset(struct pci_dev *pdev) +{ + struct Scsi_Host *shost; + struct mpi3mr_ioc *mrioc; + unsigned int timeout = MPI3MR_RESET_TIMEOUT; + + dev_info(&pdev->dev, "%s: callback invoked\n", __func__); + + shost = pci_get_drvdata(pdev); + mrioc = shost_priv(shost); + + do { + if (mrioc->reset_in_progress) + ssleep(1); + else + break; + } while (--timeout); + + if (!timeout) + goto out_failed; + + pci_restore_state(pdev); + + if (mpi3mr_setup_resources(mrioc)) { + ioc_err(mrioc, "setup resources failed\n"); + goto out_failed; + } + mrioc->unrecoverable = 0; + mrioc->pci_err_recovery = false; + + if (mpi3mr_soft_reset_handler(mrioc, MPI3MR_RESET_FROM_FIRMWARE, 0)) + goto out_failed; + + return PCI_ERS_RESULT_RECOVERED; + +out_failed: + mrioc->unrecoverable = 1; + mrioc->block_on_pci_err = false; + scsi_unblock_requests(shost); + mpi3mr_start_watchdog(mrioc); + return PCI_ERS_RESULT_DISCONNECT; +} + +/** + * mpi3mr_pcierr_resume - PCI error recovery resume + * callback + * @pdev: PCI device instance + * + * This function enables all I/O and IOCTLs post reset issued as + * part of the PCI error recovery + * + * Return: Nothing. + */ +static void mpi3mr_pcierr_resume(struct pci_dev *pdev) +{ + struct Scsi_Host *shost; + struct mpi3mr_ioc *mrioc; + + dev_info(&pdev->dev, "%s: callback invoked\n", __func__); + + shost = pci_get_drvdata(pdev); + mrioc = shost_priv(shost); + + if (mrioc->block_on_pci_err) { + mrioc->block_on_pci_err = false; + scsi_unblock_requests(shost); + mpi3mr_start_watchdog(mrioc); + } +} + +/** + * mpi3mr_pcierr_mmio_enabled - PCI error recovery callback + * @pdev: PCI device instance + * + * This is called only if mpi3mr_pcierr_error_detected returns + * PCI_ERS_RESULT_CAN_RECOVER. + * + * Return: PCI_ERS_RESULT_DISCONNECT when the controller is + * unrecoverable or when the shost/mrioc reference cannot be + * found, else return PCI_ERS_RESULT_RECOVERED + */ +static pci_ers_result_t mpi3mr_pcierr_mmio_enabled(struct pci_dev *pdev) +{ + struct Scsi_Host *shost; + struct mpi3mr_ioc *mrioc; + + dev_info(&pdev->dev, "%s: callback invoked\n", __func__); + + shost = pci_get_drvdata(pdev); + mrioc = shost_priv(shost); + + if (mrioc->unrecoverable) + return PCI_ERS_RESULT_DISCONNECT; + + return PCI_ERS_RESULT_RECOVERED; +} + static const struct pci_device_id mpi3mr_pci_id_table[] = { { PCI_DEVICE_SUB(MPI3_MFGPAGE_VENDORID_BROADCOM, @@ -5563,6 +5793,13 @@ static const struct pci_device_id mpi3mr_pci_id_table[] = { }; MODULE_DEVICE_TABLE(pci, mpi3mr_pci_id_table); +static struct pci_error_handlers mpi3mr_err_handler = { + .error_detected = mpi3mr_pcierr_error_detected, + .mmio_enabled = mpi3mr_pcierr_mmio_enabled, + .slot_reset = mpi3mr_pcierr_slot_reset, + .resume = mpi3mr_pcierr_resume, +}; + static SIMPLE_DEV_PM_OPS(mpi3mr_pm_ops, mpi3mr_suspend, mpi3mr_resume); static struct pci_driver mpi3mr_pci_driver = { @@ -5571,6 +5808,7 @@ static struct pci_driver mpi3mr_pci_driver = { .probe = mpi3mr_probe, .remove = mpi3mr_remove, .shutdown = mpi3mr_shutdown, + .err_handler = &mpi3mr_err_handler, .driver.pm = &mpi3mr_pm_ops, }; |