summaryrefslogtreecommitdiff
path: root/drivers/scsi/mpi3mr/mpi3mr_os.c
diff options
context:
space:
mode:
authorMartin K. Petersen <martin.petersen@oracle.com>2024-07-04 23:38:16 -0400
committerMartin K. Petersen <martin.petersen@oracle.com>2024-07-04 23:38:16 -0400
commit6cd48c8f62ade6260ff9a13724e49b3854392fe4 (patch)
treebd798670c8fc0491d7c3d9723226f97e25fc9dc6 /drivers/scsi/mpi3mr/mpi3mr_os.c
parent34438552c933c4567fad5ff316c45afc920fca53 (diff)
parentcf82b9e866b644d5eca38b55403e1efa8f1cc8da (diff)
Merge patch series "mpi3mr: Support PCI Error Recovery"
Sumit Saxena <sumit.saxena@broadcom.com> says: This patch series contains the changes done in the driver to support PCI error recovery. It is rework of older patch series from Ranjan Kumar, see [1]. [1] https://lore.kernel.org/all/20231214205900.270488-1-ranjan.kumar@broadcom.com/ Link: https://lore.kernel.org/r/20240627101735.18286-1-sumit.saxena@broadcom.com Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Diffstat (limited to 'drivers/scsi/mpi3mr/mpi3mr_os.c')
-rw-r--r--drivers/scsi/mpi3mr/mpi3mr_os.c248
1 files changed, 243 insertions, 5 deletions
diff --git a/drivers/scsi/mpi3mr/mpi3mr_os.c b/drivers/scsi/mpi3mr/mpi3mr_os.c
index eac179dc9370..69b14918de59 100644
--- a/drivers/scsi/mpi3mr/mpi3mr_os.c
+++ b/drivers/scsi/mpi3mr/mpi3mr_os.c
@@ -956,7 +956,7 @@ static int mpi3mr_report_tgtdev_to_host(struct mpi3mr_ioc *mrioc,
int retval = 0;
struct mpi3mr_tgt_dev *tgtdev;
- if (mrioc->reset_in_progress)
+ if (mrioc->reset_in_progress || mrioc->pci_err_recovery)
return -1;
tgtdev = mpi3mr_get_tgtdev_by_perst_id(mrioc, perst_id);
@@ -2007,6 +2007,7 @@ static void mpi3mr_fwevt_bh(struct mpi3mr_ioc *mrioc,
struct mpi3_device_page0 *dev_pg0 = NULL;
u16 perst_id, handle, dev_info;
struct mpi3_device0_sas_sata_format *sasinf = NULL;
+ unsigned int timeout;
mpi3mr_fwevt_del_from_list(mrioc, fwevt);
mrioc->current_event = fwevt;
@@ -2097,8 +2098,18 @@ static void mpi3mr_fwevt_bh(struct mpi3mr_ioc *mrioc,
}
case MPI3_EVENT_WAIT_FOR_DEVICES_TO_REFRESH:
{
- while (mrioc->device_refresh_on)
+ timeout = MPI3MR_RESET_TIMEOUT * 2;
+ while ((mrioc->device_refresh_on || mrioc->block_on_pci_err) &&
+ !mrioc->unrecoverable && !mrioc->pci_err_recovery) {
msleep(500);
+ if (!timeout--) {
+ mrioc->unrecoverable = 1;
+ break;
+ }
+ }
+
+ if (mrioc->unrecoverable || mrioc->pci_err_recovery)
+ break;
dprint_event_bh(mrioc,
"scan for non responding and newly added devices after soft reset started\n");
@@ -3796,6 +3807,13 @@ int mpi3mr_issue_tm(struct mpi3mr_ioc *mrioc, u8 tm_type,
mutex_unlock(&drv_cmd->mutex);
goto out;
}
+ if (mrioc->block_on_pci_err) {
+ retval = -1;
+ dprint_tm(mrioc, "sending task management failed due to\n"
+ "pci error recovery in progress\n");
+ mutex_unlock(&drv_cmd->mutex);
+ goto out;
+ }
drv_cmd->state = MPI3MR_CMD_PENDING;
drv_cmd->is_waiting = 1;
@@ -4181,6 +4199,7 @@ static int mpi3mr_eh_bus_reset(struct scsi_cmnd *scmd)
struct mpi3mr_sdev_priv_data *sdev_priv_data;
u8 dev_type = MPI3_DEVICE_DEVFORM_VD;
int retval = FAILED;
+ unsigned int timeout = MPI3MR_RESET_TIMEOUT;
sdev_priv_data = scmd->device->hostdata;
if (sdev_priv_data && sdev_priv_data->tgt_priv_data) {
@@ -4191,12 +4210,24 @@ static int mpi3mr_eh_bus_reset(struct scsi_cmnd *scmd)
if (dev_type == MPI3_DEVICE_DEVFORM_VD) {
mpi3mr_wait_for_host_io(mrioc,
MPI3MR_RAID_ERRREC_RESET_TIMEOUT);
- if (!mpi3mr_get_fw_pending_ios(mrioc))
+ if (!mpi3mr_get_fw_pending_ios(mrioc)) {
+ while (mrioc->reset_in_progress ||
+ mrioc->prepare_for_reset ||
+ mrioc->block_on_pci_err) {
+ ssleep(1);
+ if (!timeout--) {
+ retval = FAILED;
+ goto out;
+ }
+ }
retval = SUCCESS;
+ goto out;
+ }
}
if (retval == FAILED)
mpi3mr_print_pending_host_io(mrioc);
+out:
sdev_printk(KERN_INFO, scmd->device,
"Bus reset is %s for scmd(%p)\n",
((retval == SUCCESS) ? "SUCCESS" : "FAILED"), scmd);
@@ -4879,7 +4910,8 @@ static int mpi3mr_qcmd(struct Scsi_Host *shost,
goto out;
}
- if (mrioc->reset_in_progress) {
+ if (mrioc->reset_in_progress || mrioc->prepare_for_reset
+ || mrioc->block_on_pci_err) {
retval = SCSI_MLQUEUE_HOST_BUSY;
goto out;
}
@@ -5362,7 +5394,14 @@ static void mpi3mr_remove(struct pci_dev *pdev)
while (mrioc->reset_in_progress || mrioc->is_driver_loading)
ssleep(1);
- if (!pci_device_is_present(mrioc->pdev)) {
+ if (mrioc->block_on_pci_err) {
+ mrioc->block_on_pci_err = false;
+ scsi_unblock_requests(shost);
+ mrioc->unrecoverable = 1;
+ }
+
+ if (!pci_device_is_present(mrioc->pdev) ||
+ mrioc->pci_err_recovery) {
mrioc->unrecoverable = 1;
mpi3mr_flush_cmds_for_unrecovered_controller(mrioc);
}
@@ -5546,6 +5585,197 @@ mpi3mr_resume(struct device *dev)
return 0;
}
+/**
+ * mpi3mr_pcierr_error_detected - PCI error detected callback
+ * @pdev: PCI device instance
+ * @state: channel state
+ *
+ * This function is called by the PCI error recovery driver and
+ * based on the state passed the driver decides what actions to
+ * be recommended back to PCI driver.
+ *
+ * For all of the states if there is no valid mrioc or scsi host
+ * references in the PCI device then this function will return
+ * the result as disconnect.
+ *
+ * For normal state, this function will return the result as can
+ * recover.
+ *
+ * For frozen state, this function will block for any pending
+ * controller initialization or re-initialization to complete,
+ * stop any new interactions with the controller and return
+ * status as reset required.
+ *
+ * For permanent failure state, this function will mark the
+ * controller as unrecoverable and return status as disconnect.
+ *
+ * Returns: PCI_ERS_RESULT_NEED_RESET or CAN_RECOVER or
+ * DISCONNECT based on the controller state.
+ */
+static pci_ers_result_t
+mpi3mr_pcierr_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+{
+ struct Scsi_Host *shost;
+ struct mpi3mr_ioc *mrioc;
+ unsigned int timeout = MPI3MR_RESET_TIMEOUT;
+
+ dev_info(&pdev->dev, "%s: callback invoked state(%d)\n", __func__,
+ state);
+
+ shost = pci_get_drvdata(pdev);
+ mrioc = shost_priv(shost);
+
+ switch (state) {
+ case pci_channel_io_normal:
+ return PCI_ERS_RESULT_CAN_RECOVER;
+ case pci_channel_io_frozen:
+ mrioc->pci_err_recovery = true;
+ mrioc->block_on_pci_err = true;
+ do {
+ if (mrioc->reset_in_progress || mrioc->is_driver_loading)
+ ssleep(1);
+ else
+ break;
+ } while (--timeout);
+
+ if (!timeout) {
+ mrioc->pci_err_recovery = true;
+ mrioc->block_on_pci_err = true;
+ mrioc->unrecoverable = 1;
+ mpi3mr_stop_watchdog(mrioc);
+ mpi3mr_flush_cmds_for_unrecovered_controller(mrioc);
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ scsi_block_requests(mrioc->shost);
+ mpi3mr_stop_watchdog(mrioc);
+ mpi3mr_cleanup_resources(mrioc);
+ return PCI_ERS_RESULT_NEED_RESET;
+ case pci_channel_io_perm_failure:
+ mrioc->pci_err_recovery = true;
+ mrioc->block_on_pci_err = true;
+ mrioc->unrecoverable = 1;
+ mpi3mr_stop_watchdog(mrioc);
+ mpi3mr_flush_cmds_for_unrecovered_controller(mrioc);
+ return PCI_ERS_RESULT_DISCONNECT;
+ default:
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+}
+
+/**
+ * mpi3mr_pcierr_slot_reset - Post slot reset callback
+ * @pdev: PCI device instance
+ *
+ * This function is called by the PCI error recovery driver
+ * after a slot or link reset issued by it for the recovery, the
+ * driver is expected to bring back the controller and
+ * initialize it.
+ *
+ * This function restores PCI state and reinitializes controller
+ * resources and the controller, this blocks for any pending
+ * reset to complete.
+ *
+ * Returns: PCI_ERS_RESULT_DISCONNECT on failure or
+ * PCI_ERS_RESULT_RECOVERED
+ */
+static pci_ers_result_t mpi3mr_pcierr_slot_reset(struct pci_dev *pdev)
+{
+ struct Scsi_Host *shost;
+ struct mpi3mr_ioc *mrioc;
+ unsigned int timeout = MPI3MR_RESET_TIMEOUT;
+
+ dev_info(&pdev->dev, "%s: callback invoked\n", __func__);
+
+ shost = pci_get_drvdata(pdev);
+ mrioc = shost_priv(shost);
+
+ do {
+ if (mrioc->reset_in_progress)
+ ssleep(1);
+ else
+ break;
+ } while (--timeout);
+
+ if (!timeout)
+ goto out_failed;
+
+ pci_restore_state(pdev);
+
+ if (mpi3mr_setup_resources(mrioc)) {
+ ioc_err(mrioc, "setup resources failed\n");
+ goto out_failed;
+ }
+ mrioc->unrecoverable = 0;
+ mrioc->pci_err_recovery = false;
+
+ if (mpi3mr_soft_reset_handler(mrioc, MPI3MR_RESET_FROM_FIRMWARE, 0))
+ goto out_failed;
+
+ return PCI_ERS_RESULT_RECOVERED;
+
+out_failed:
+ mrioc->unrecoverable = 1;
+ mrioc->block_on_pci_err = false;
+ scsi_unblock_requests(shost);
+ mpi3mr_start_watchdog(mrioc);
+ return PCI_ERS_RESULT_DISCONNECT;
+}
+
+/**
+ * mpi3mr_pcierr_resume - PCI error recovery resume
+ * callback
+ * @pdev: PCI device instance
+ *
+ * This function enables all I/O and IOCTLs post reset issued as
+ * part of the PCI error recovery
+ *
+ * Return: Nothing.
+ */
+static void mpi3mr_pcierr_resume(struct pci_dev *pdev)
+{
+ struct Scsi_Host *shost;
+ struct mpi3mr_ioc *mrioc;
+
+ dev_info(&pdev->dev, "%s: callback invoked\n", __func__);
+
+ shost = pci_get_drvdata(pdev);
+ mrioc = shost_priv(shost);
+
+ if (mrioc->block_on_pci_err) {
+ mrioc->block_on_pci_err = false;
+ scsi_unblock_requests(shost);
+ mpi3mr_start_watchdog(mrioc);
+ }
+}
+
+/**
+ * mpi3mr_pcierr_mmio_enabled - PCI error recovery callback
+ * @pdev: PCI device instance
+ *
+ * This is called only if mpi3mr_pcierr_error_detected returns
+ * PCI_ERS_RESULT_CAN_RECOVER.
+ *
+ * Return: PCI_ERS_RESULT_DISCONNECT when the controller is
+ * unrecoverable or when the shost/mrioc reference cannot be
+ * found, else return PCI_ERS_RESULT_RECOVERED
+ */
+static pci_ers_result_t mpi3mr_pcierr_mmio_enabled(struct pci_dev *pdev)
+{
+ struct Scsi_Host *shost;
+ struct mpi3mr_ioc *mrioc;
+
+ dev_info(&pdev->dev, "%s: callback invoked\n", __func__);
+
+ shost = pci_get_drvdata(pdev);
+ mrioc = shost_priv(shost);
+
+ if (mrioc->unrecoverable)
+ return PCI_ERS_RESULT_DISCONNECT;
+
+ return PCI_ERS_RESULT_RECOVERED;
+}
+
static const struct pci_device_id mpi3mr_pci_id_table[] = {
{
PCI_DEVICE_SUB(MPI3_MFGPAGE_VENDORID_BROADCOM,
@@ -5563,6 +5793,13 @@ static const struct pci_device_id mpi3mr_pci_id_table[] = {
};
MODULE_DEVICE_TABLE(pci, mpi3mr_pci_id_table);
+static struct pci_error_handlers mpi3mr_err_handler = {
+ .error_detected = mpi3mr_pcierr_error_detected,
+ .mmio_enabled = mpi3mr_pcierr_mmio_enabled,
+ .slot_reset = mpi3mr_pcierr_slot_reset,
+ .resume = mpi3mr_pcierr_resume,
+};
+
static SIMPLE_DEV_PM_OPS(mpi3mr_pm_ops, mpi3mr_suspend, mpi3mr_resume);
static struct pci_driver mpi3mr_pci_driver = {
@@ -5571,6 +5808,7 @@ static struct pci_driver mpi3mr_pci_driver = {
.probe = mpi3mr_probe,
.remove = mpi3mr_remove,
.shutdown = mpi3mr_shutdown,
+ .err_handler = &mpi3mr_err_handler,
.driver.pm = &mpi3mr_pm_ops,
};