From af1f2985038fb2569786f3de64d7dd69f5580e03 Mon Sep 17 00:00:00 2001 From: Tim Huang Date: Thu, 9 Mar 2023 16:27:51 +0800 Subject: drm/amdgpu: skip ASIC reset for APUs when go to S4 For GC IP v11.0.4/11, PSP TMR need to be reserved for ASIC mode2 reset. But for S4, when psp suspend, it will destroy the TMR that fails the ASIC reset. [ 96.006101] amdgpu 0000:62:00.0: amdgpu: MODE2 reset [ 100.409717] amdgpu 0000:62:00.0: amdgpu: SMU: I'm not done with your previous command: SMN_C2PMSG_66:0x00000011 SMN_C2PMSG_82:0x00000002 [ 100.411593] amdgpu 0000:62:00.0: amdgpu: Mode2 reset failed! [ 100.412470] amdgpu 0000:62:00.0: PM: pci_pm_freeze(): amdgpu_pmops_freeze+0x0/0x50 [amdgpu] returns -62 [ 100.414020] amdgpu 0000:62:00.0: PM: dpm_run_callback(): pci_pm_freeze+0x0/0xd0 returns -62 [ 100.415311] amdgpu 0000:62:00.0: PM: pci_pm_freeze+0x0/0xd0 returned -62 after 4623202 usecs [ 100.416608] amdgpu 0000:62:00.0: PM: failed to freeze async: error -62 We can skip the reset on APUs, assuming we can resume them properly. Verified on some GFX11, GFX10 and old GFX9 APUs. Signed-off-by: Tim Huang Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index f5ffca24def4..ba5def374368 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -2467,7 +2467,10 @@ static int amdgpu_pmops_freeze(struct device *dev) adev->in_s4 = false; if (r) return r; - return amdgpu_asic_reset(adev); + + if (amdgpu_acpi_should_gpu_reset(adev)) + return amdgpu_asic_reset(adev); + return 0; } static int amdgpu_pmops_thaw(struct device *dev) -- cgit v1.2.3-70-g09d2 From 120ceaf78e28f20ec7244c021d24d1e409572be1 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 27 Mar 2023 22:35:33 +0530 Subject: drm/amd/amdgpu: Fix error do not initialise globals to 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Global variables do not need to be initialized to 0 and checkpatch flags this error in drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c: ERROR: do not initialise globals to 0 +int amdgpu_no_queue_eviction_on_vm_fault = 0; Fix this checkpatch error. Cc: Christian König Cc: Alex Deucher Cc: Mario Limonciello Signed-off-by: Srinivasan Shanmugam Reviewed-by: Hamza Mahfooz Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ba5def374368..13cd524ad99b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -822,7 +822,7 @@ MODULE_PARM_DESC(no_system_mem_limit, "disable system memory limit (false = defa * DOC: no_queue_eviction_on_vm_fault (int) * If set, process queues will not be evicted on gpuvm fault. This is to keep the wavefront context for debugging (0 = queue eviction, 1 = no queue eviction). The default is 0 (queue eviction). */ -int amdgpu_no_queue_eviction_on_vm_fault = 0; +int amdgpu_no_queue_eviction_on_vm_fault; MODULE_PARM_DESC(no_queue_eviction_on_vm_fault, "No queue eviction on VM fault (0 = queue eviction, 1 = no queue eviction)"); module_param_named(no_queue_eviction_on_vm_fault, amdgpu_no_queue_eviction_on_vm_fault, int, 0444); #endif -- cgit v1.2.3-70-g09d2 From e86c30e951b50301fbc80e0ba7f6ffdc16e4fe85 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 27 Mar 2023 23:28:47 +0530 Subject: drm/amd/amdgpu: Remove initialisation of globals to 0 or NULL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Global variables do not need to be initialized to 0 or NULL and checkpatch flags this error in drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c: ERROR: do not initialise globals to NULL +char *amdgpu_disable_cu = NULL; +char *amdgpu_virtual_display = NULL; Fix this checkpatch error. Cc: Christian König Cc: Alex Deucher Cc: Mario Limonciello Cc: Hamza Mahfooz Signed-off-by: Srinivasan Shanmugam Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 13cd524ad99b..b4189d669b54 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -148,8 +148,8 @@ uint amdgpu_pcie_lane_cap; u64 amdgpu_cg_mask = 0xffffffffffffffff; uint amdgpu_pg_mask = 0xffffffff; uint amdgpu_sdma_phase_quantum = 32; -char *amdgpu_disable_cu = NULL; -char *amdgpu_virtual_display = NULL; +char *amdgpu_disable_cu; +char *amdgpu_virtual_display; /* * OverDrive(bit 14) disabled by default -- cgit v1.2.3-70-g09d2 From 11f25c844e29f85abb0b3ffdb360a2f82a2c4ed0 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 5 Apr 2023 20:41:09 +0530 Subject: drm/amd/amdgpu: Drop the hang limit parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver doesn't resubmit jobs on hangs any more, hence drop the hang limit parameter - amdgpu_job_hang_limit, wherever it is used. Suggested-by: Christian König Cc: Alex Deucher Cc: Mario Limonciello Cc: Kent Russell Signed-off-by: Srinivasan Shanmugam Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 -------- 3 files changed, 1 insertion(+), 10 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 8cf2cc50b3de..833996291dee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -185,7 +185,6 @@ extern char *amdgpu_disable_cu; extern char *amdgpu_virtual_display; extern uint amdgpu_pp_feature_mask; extern uint amdgpu_force_long_training; -extern int amdgpu_job_hang_limit; extern int amdgpu_lbpw; extern int amdgpu_compute_multipipe; extern int amdgpu_gpu_recovery; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index fac9312b1695..4819b3f86750 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2365,7 +2365,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) } r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, - ring->num_hw_submission, amdgpu_job_hang_limit, + ring->num_hw_submission, 0, timeout, adev->reset_domain->wq, ring->sched_score, ring->name, adev->dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b4189d669b54..b1ca1ab6d6ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -157,7 +157,6 @@ char *amdgpu_virtual_display; */ uint amdgpu_pp_feature_mask = 0xfff7bfff; uint amdgpu_force_long_training; -int amdgpu_job_hang_limit; int amdgpu_lbpw = -1; int amdgpu_compute_multipipe = -1; int amdgpu_gpu_recovery = -1; /* auto */ @@ -520,13 +519,6 @@ MODULE_PARM_DESC(virtual_display, "Enable virtual display feature (the virtual_display will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x)"); module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444); -/** - * DOC: job_hang_limit (int) - * Set how much time allow a job hang and not drop it. The default is 0. - */ -MODULE_PARM_DESC(job_hang_limit, "how much time allow a job hang and not drop it (default 0)"); -module_param_named(job_hang_limit, amdgpu_job_hang_limit, int ,0444); - /** * DOC: lbpw (int) * Override Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable). The default is -1 (auto, enabled). -- cgit v1.2.3-70-g09d2 From e61f67749b351c19455ce3085af2ae9af80023bc Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 6 Jun 2023 11:14:04 -0400 Subject: drm/amdgpu: add missing radeon secondary PCI ID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 0x5b70 is a missing RV370 secondary id. Add it so we don't try and probe it with amdgpu. Cc: michel@daenzer.net Reviewed-by: Michel Dänzer Tested-by: Michel Dänzer Signed-off-by: Alex Deucher Cc: stable@vger.kernel.org --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index b1ca1ab6d6ad..393b6fb7a71d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1615,6 +1615,7 @@ static const u16 amdgpu_unsupported_pciidlist[] = { 0x5874, 0x5940, 0x5941, + 0x5b70, 0x5b72, 0x5b73, 0x5b74, -- cgit v1.2.3-70-g09d2