From 7d7cbeaba5b7aea8e1e4eb988d6b5e7cb3c34490 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 4 Jan 2021 15:02:56 -0800 Subject: PCI/ERR: Clear status of the reporting device Error handling operates on the first Downstream Port above the detected error, but the error may have been reported by a downstream device. Clear the AER status of the device that reported the error rather than the first Downstream Port. Link: https://lore.kernel.org/r/20210104230300.1277180-2-kbusch@kernel.org Tested-by: Hedi Berriche Signed-off-by: Keith Busch Signed-off-by: Bjorn Helgaas Acked-by: Sean V Kelley Acked-by: Hedi Berriche --- drivers/pci/pcie/err.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 510f31f0ef6d..a84f0bf4c1e2 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -231,15 +231,14 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, pci_walk_bridge(bridge, report_resume, &status); /* - * If we have native control of AER, clear error status in the Root - * Port or Downstream Port that signaled the error. If the - * platform retained control of AER, it is responsible for clearing - * this status. In that case, the signaling device may not even be - * visible to the OS. + * If we have native control of AER, clear error status in the device + * that detected the error. If the platform retained control of AER, + * it is responsible for clearing this status. In that case, the + * signaling device may not even be visible to the OS. */ if (host->native_aer || pcie_ports_native) { - pcie_clear_device_status(bridge); - pci_aer_clear_nonfatal_status(bridge); + pcie_clear_device_status(dev); + pci_aer_clear_nonfatal_status(dev); } pci_info(bridge, "device recovery successful\n"); return status; -- cgit v1.2.3-70-g09d2 From 7a8a22be35a5058366429e311017e05206c43137 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 4 Jan 2021 15:02:57 -0800 Subject: PCI/AER: Clear AER status from Root Port when resetting Downstream Port The pci_dev parameter given to aer_root_reset() may be a Downstream Port rather than the Root Port. Get the Root Port from the provided device in order to clear the root's AER status. Link: https://lore.kernel.org/r/20210104230300.1277180-3-kbusch@kernel.org Tested-by: Hedi Berriche Signed-off-by: Keith Busch Signed-off-by: Bjorn Helgaas Acked-by: Sean V Kelley Acked-by: Hedi Berriche --- drivers/pci/pcie/aer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 77b0f2c45bc0..3fd4aaaa627e 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -1388,7 +1388,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) if (type == PCI_EXP_TYPE_RC_END) root = dev->rcec; else - root = dev; + root = pcie_find_root_port(dev); /* * If the platform retained control of AER, an RCiEP may not have -- cgit v1.2.3-70-g09d2 From 387c72cdd7fb6bef650fb078d0f6ae9682abf631 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 4 Jan 2021 15:02:58 -0800 Subject: PCI/ERR: Retain status from error notification Overwriting the frozen detected status with the result of the link reset loses the NEED_RESET result that drivers are depending on for error handling to report the .slot_reset() callback. Retain this status so that subsequent error handling has the correct flow. Link: https://lore.kernel.org/r/20210104230300.1277180-4-kbusch@kernel.org Reported-by: Hinko Kocevar Tested-by: Hedi Berriche Signed-off-by: Keith Busch Signed-off-by: Bjorn Helgaas Acked-by: Sean V Kelley Acked-by: Hedi Berriche --- drivers/pci/pcie/err.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index a84f0bf4c1e2..b576aa890c76 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -198,8 +198,7 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, pci_dbg(bridge, "broadcast error_detected message\n"); if (state == pci_channel_io_frozen) { pci_walk_bridge(bridge, report_frozen_detected, &status); - status = reset_subordinates(bridge); - if (status != PCI_ERS_RESULT_RECOVERED) { + if (reset_subordinates(bridge) != PCI_ERS_RESULT_RECOVERED) { pci_warn(bridge, "subordinate device reset failed\n"); goto failed; } -- cgit v1.2.3-70-g09d2 From 33ac78bd3b509d36e7f109a447e28af42e637cb2 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 4 Jan 2021 15:02:59 -0800 Subject: PCI/AER: Specify the type of Port that was reset The AER driver may be called upon to reset either a Downstream or a Root Port. Check which type it is to properly identify it when logging that the reset occurred. Link: https://lore.kernel.org/r/20210104230300.1277180-5-kbusch@kernel.org Tested-by: Hedi Berriche Signed-off-by: Keith Busch Signed-off-by: Bjorn Helgaas Acked-by: Hedi Berriche --- drivers/pci/pcie/aer.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 3fd4aaaa627e..ba22388342d1 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -1414,7 +1414,8 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) } } else { rc = pci_bus_error_reset(dev); - pci_info(dev, "Root Port link has been reset (%d)\n", rc); + pci_info(dev, "%s Port link has been reset (%d)\n", + pci_is_root_bus(dev->bus) ? "Root" : "Downstream", rc); } if ((host->native_aer || pcie_ports_native) && aer) { -- cgit v1.2.3-70-g09d2 From ba952824e6c106f979c07814c8e3ef7405dd7b29 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 4 Jan 2021 15:03:00 -0800 Subject: PCI/portdrv: Report reset for frozen channel The PCI error recovery always resets the link for a frozen state, so the port driver should return that a reset is required for its result. This will get the .slot_reset() callback invoked, which is necessary to restore the port's config space. Without this, the driver had been relying on downstream drivers to return this status. Link: https://lore.kernel.org/r/20210104230300.1277180-6-kbusch@kernel.org Tested-by: Hedi Berriche Signed-off-by: Keith Busch Signed-off-by: Bjorn Helgaas Acked-by: Hedi Berriche --- drivers/pci/pcie/portdrv_pci.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/pcie/portdrv_pci.c b/drivers/pci/pcie/portdrv_pci.c index 0b250bc5f405..de141bfb0bc2 100644 --- a/drivers/pci/pcie/portdrv_pci.c +++ b/drivers/pci/pcie/portdrv_pci.c @@ -153,7 +153,8 @@ static void pcie_portdrv_remove(struct pci_dev *dev) static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev, pci_channel_state_t error) { - /* Root Port has no impact. Always recovers. */ + if (error == pci_channel_io_frozen) + return PCI_ERS_RESULT_NEED_RESET; return PCI_ERS_RESULT_CAN_RECOVER; } -- cgit v1.2.3-70-g09d2