From c49ae619905eebd3f54598a84e4cd2bd58ba8fe9 Mon Sep 17 00:00:00 2001 From: Pali Rohár Date: Mon, 14 Feb 2022 12:02:28 +0100 Subject: PCI: mvebu: Fix device enumeration regression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Jan reported that on Turris Omnia (Armada 385), no PCIe devices were detected after upgrading from v5.16.1 to v5.16.3 and identified the cause as the backport of 91a8d79fc797 ("PCI: mvebu: Fix configuring secondary bus of PCIe Root Port via emulated bridge"), which appeared in v5.17-rc1. 91a8d79fc797 was incorrectly applied from mailing list patch [1] to the linux git repository [2] probably due to resolving merge conflicts incorrectly. Fix it now. [1] https://lore.kernel.org/r/20211125124605.25915-12-pali@kernel.org [2] https://git.kernel.org/linus/91a8d79fc797 [bhelgaas: commit log] BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215540 Fixes: 91a8d79fc797 ("PCI: mvebu: Fix configuring secondary bus of PCIe Root Port via emulated bridge") Link: https://lore.kernel.org/r/20220214110228.25825-1-pali@kernel.org Link: https://lore.kernel.org/r/20220127234917.GA150851@bhelgaas Reported-by: Jan Palus Signed-off-by: Pali Rohár Signed-off-by: Bjorn Helgaas --- drivers/pci/controller/pci-mvebu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/pci') diff --git a/drivers/pci/controller/pci-mvebu.c b/drivers/pci/controller/pci-mvebu.c index 71258ea3d35f..f8e82c5e2d87 100644 --- a/drivers/pci/controller/pci-mvebu.c +++ b/drivers/pci/controller/pci-mvebu.c @@ -1329,7 +1329,8 @@ static int mvebu_pcie_probe(struct platform_device *pdev) * indirectly via kernel emulated PCI bridge driver. */ mvebu_pcie_setup_hw(port); - mvebu_pcie_set_local_dev_nr(port, 0); + mvebu_pcie_set_local_dev_nr(port, 1); + mvebu_pcie_set_local_bus_nr(port, 0); } pcie->nports = i; -- cgit v1.2.3-70-g09d2 From ba1366f3d039e7c3ca1fc29ed00ce3ed2b8fd32f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 13 Feb 2022 14:54:05 +0100 Subject: PCI: vmd: Prevent recursive locking on interrupt allocation Tejas reported the following recursive locking issue: swapper/0/1 is trying to acquire lock: ffff8881074fd0a0 (&md->mutex){+.+.}-{3:3}, at: msi_get_virq+0x30/0xc0 but task is already holding lock: ffff8881017cd6a0 (&md->mutex){+.+.}-{3:3}, at: __pci_enable_msi_range+0xf2/0x290 stack backtrace: __mutex_lock+0x9d/0x920 msi_get_virq+0x30/0xc0 pci_irq_vector+0x26/0x30 vmd_msi_init+0xcc/0x210 msi_domain_alloc+0xbf/0x150 msi_domain_alloc_irqs_descs_locked+0x3e/0xb0 __pci_enable_msi_range+0x155/0x290 pci_alloc_irq_vectors_affinity+0xba/0x100 pcie_port_device_register+0x307/0x550 pcie_portdrv_probe+0x3c/0xd0 pci_device_probe+0x95/0x110 This is caused by the VMD MSI code which does a lookup of the Linux interrupt number for an VMD managed MSI[X] vector. The lookup function tries to acquire the already held mutex. Avoid that by caching the Linux interrupt number at initialization time instead of looking it up over and over. Fixes: 82ff8e6b78fc ("PCI/MSI: Use msi_get_virq() in pci_get_vector()") Reported-by: "Surendrakumar Upadhyay, TejaskumarX" Signed-off-by: Thomas Gleixner Tested-by: "Surendrakumar Upadhyay, TejaskumarX" Cc: linux-pci@vger.kernel.org Link: https://lore.kernel.org/r/87a6euub2a.ffs@tglx --- drivers/pci/controller/vmd.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/controller/vmd.c b/drivers/pci/controller/vmd.c index cc166c683638..eb05cceab964 100644 --- a/drivers/pci/controller/vmd.c +++ b/drivers/pci/controller/vmd.c @@ -99,11 +99,13 @@ struct vmd_irq { * @srcu: SRCU struct for local synchronization. * @count: number of child IRQs assigned to this vector; used to track * sharing. + * @virq: The underlying VMD Linux interrupt number */ struct vmd_irq_list { struct list_head irq_list; struct srcu_struct srcu; unsigned int count; + unsigned int virq; }; struct vmd_dev { @@ -253,7 +255,6 @@ static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info, struct msi_desc *desc = arg->desc; struct vmd_dev *vmd = vmd_from_bus(msi_desc_to_pci_dev(desc)->bus); struct vmd_irq *vmdirq = kzalloc(sizeof(*vmdirq), GFP_KERNEL); - unsigned int index, vector; if (!vmdirq) return -ENOMEM; @@ -261,10 +262,8 @@ static int vmd_msi_init(struct irq_domain *domain, struct msi_domain_info *info, INIT_LIST_HEAD(&vmdirq->node); vmdirq->irq = vmd_next_irq(vmd, desc); vmdirq->virq = virq; - index = index_from_irqs(vmd, vmdirq->irq); - vector = pci_irq_vector(vmd->dev, index); - irq_domain_set_info(domain, virq, vector, info->chip, vmdirq, + irq_domain_set_info(domain, virq, vmdirq->irq->virq, info->chip, vmdirq, handle_untracked_irq, vmd, NULL); return 0; } @@ -685,7 +684,8 @@ static int vmd_alloc_irqs(struct vmd_dev *vmd) return err; INIT_LIST_HEAD(&vmd->irqs[i].irq_list); - err = devm_request_irq(&dev->dev, pci_irq_vector(dev, i), + vmd->irqs[i].virq = pci_irq_vector(dev, i); + err = devm_request_irq(&dev->dev, vmd->irqs[i].virq, vmd_irq, IRQF_NO_THREAD, vmd->name, &vmd->irqs[i]); if (err) @@ -969,7 +969,7 @@ static int vmd_suspend(struct device *dev) int i; for (i = 0; i < vmd->msix_count; i++) - devm_free_irq(dev, pci_irq_vector(pdev, i), &vmd->irqs[i]); + devm_free_irq(dev, vmd->irqs[i].virq, &vmd->irqs[i]); return 0; } @@ -981,7 +981,7 @@ static int vmd_resume(struct device *dev) int err, i; for (i = 0; i < vmd->msix_count; i++) { - err = devm_request_irq(dev, pci_irq_vector(pdev, i), + err = devm_request_irq(dev, vmd->irqs[i].virq, vmd_irq, IRQF_NO_THREAD, vmd->name, &vmd->irqs[i]); if (err) -- cgit v1.2.3-70-g09d2 From 3f1271b54edcc692da5a3663f2aa2a64781f9bc3 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 22 Feb 2022 11:08:01 -0500 Subject: PCI: Mark all AMD Navi10 and Navi14 GPU ATS as broken MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are enough VBIOS escapes without the proper workaround that some users still hit this. Microsoft never productized ATS on Windows so OEM platforms that were Windows-only didn't always validate ATS. The advantages of ATS are not worth it compared to the potential instabilities on harvested boards. Disable ATS on all Navi10 and Navi14 boards. Symptoms include: amdgpu 0000:07:00.0: AMD-Vi: Event logged [IO_PAGE_FAULT domain=0x0007 address=0xffffc02000 flags=0x0000] AMD-Vi: Event logged [IO_PAGE_FAULT device=07:00.0 domain=0x0007 address=0xffffc02000 flags=0x0000] [drm:amdgpu_job_timedout [amdgpu]] *ERROR* ring sdma0 timeout, signaled seq=6047, emitted seq=6049 amdgpu 0000:07:00.0: amdgpu: GPU reset begin! amdgpu 0000:07:00.0: amdgpu: GPU reset succeeded, trying to resume amdgpu 0000:07:00.0: [drm:amdgpu_ring_test_helper [amdgpu]] *ERROR* ring sdma0 test failed (-110) [drm:amdgpu_device_ip_resume_phase2 [amdgpu]] *ERROR* resume of IP block failed -110 amdgpu 0000:07:00.0: amdgpu: GPU reset(1) failed Related commits: e8946a53e2a6 ("PCI: Mark AMD Navi14 GPU ATS as broken") a2da5d8cc0b0 ("PCI: Mark AMD Raven iGPU ATS as broken in some platforms") 45beb31d3afb ("PCI: Mark AMD Navi10 GPU rev 0x00 ATS as broken") 5e89cd303e3a ("PCI: Mark AMD Navi14 GPU rev 0xc5 ATS as broken") d28ca864c493 ("PCI: Mark AMD Stoney Radeon R7 GPU ATS as broken") 9b44b0b09dec ("PCI: Mark AMD Stoney GPU ATS as broken") [bhelgaas: add symptoms and related commits] Bug: https://gitlab.freedesktop.org/drm/amd/-/issues/1760 Link: https://lore.kernel.org/r/20220222160801.841643-1-alexander.deucher@amd.com Signed-off-by: Alex Deucher Signed-off-by: Bjorn Helgaas Acked-by: Christian König Acked-by: Guchun Chen --- drivers/pci/quirks.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'drivers/pci') diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index d2dd6a6cda60..65f7f6b0576c 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -5344,11 +5344,6 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SERVERWORKS, 0x0422, quirk_no_ext_tags); */ static void quirk_amd_harvest_no_ats(struct pci_dev *pdev) { - if ((pdev->device == 0x7312 && pdev->revision != 0x00) || - (pdev->device == 0x7340 && pdev->revision != 0xc5) || - (pdev->device == 0x7341 && pdev->revision != 0x00)) - return; - if (pdev->device == 0x15d8) { if (pdev->revision == 0xcf && pdev->subsystem_vendor == 0xea50 && @@ -5370,10 +5365,19 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x98e4, quirk_amd_harvest_no_ats); /* AMD Iceland dGPU */ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x6900, quirk_amd_harvest_no_ats); /* AMD Navi10 dGPU */ +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7310, quirk_amd_harvest_no_ats); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7312, quirk_amd_harvest_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7318, quirk_amd_harvest_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7319, quirk_amd_harvest_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x731a, quirk_amd_harvest_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x731b, quirk_amd_harvest_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x731e, quirk_amd_harvest_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x731f, quirk_amd_harvest_no_ats); /* AMD Navi14 dGPU */ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7340, quirk_amd_harvest_no_ats); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7341, quirk_amd_harvest_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x7347, quirk_amd_harvest_no_ats); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x734f, quirk_amd_harvest_no_ats); /* AMD Raven platform iGPU */ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x15d8, quirk_amd_harvest_no_ats); #endif /* CONFIG_PCI_ATS */ -- cgit v1.2.3-70-g09d2