From 7504d3bbec7da70516a13e34415b92bf5203399a Mon Sep 17 00:00:00 2001 From: Liu ChengZhe Date: Tue, 9 Jun 2020 16:44:43 +0800 Subject: drm/amd/amdgpu: handle return value of amdgpu_driver_load_kms if guest driver failed to enter full GPU access, amdgpu_driver_load_kms will unload kms and free dev->dev_private, drm_dev_register would access null pointer. Driver will enter an error state and can't be unloaded. Signed-off-by: Liu ChengZhe Reviewed-by: Madhav Chauhan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 126e74758a34..75bcd1789185 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1111,7 +1111,9 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, dev); - amdgpu_driver_load_kms(dev, ent->driver_data); + ret = amdgpu_driver_load_kms(dev, ent->driver_data); + if (ret) + goto err_pci; retry_init: ret = drm_dev_register(dev, ent->driver_data); -- cgit v1.2.3-70-g09d2 From b205795677c034a1975e75a466ad158950fde4b4 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Thu, 11 Jun 2020 23:19:37 -0400 Subject: drm/amdkfd: Add eviction debug messages Use WARN to print messages with backtrace when evictions are triggered. This can help determine the root cause of evictions and help spot driver bugs triggering evictions unintentionally, or help with performance tuning by avoiding conditions that cause evictions in a specific workload. The messages are controlled by a new module parameter that can be changed at runtime: echo Y > /sys/module/amdgpu/parameters/debug_evictions echo N > /sys/module/amdgpu/parameters/debug_evictions Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 2 ++ drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 +++ drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 +++++ 5 files changed, 20 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 905cf0bac100..3d2625beacf7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -186,8 +186,10 @@ extern int amdgpu_noretry; extern int amdgpu_force_asic_type; #ifdef CONFIG_HSA_AMD extern int sched_policy; +extern bool debug_evictions; #else static const int sched_policy = KFD_SCHED_POLICY_HWS; +static const bool debug_evictions; /* = false */ #endif extern int amdgpu_tmz; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 75bcd1789185..653a377dd342 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -705,6 +705,14 @@ MODULE_PARM_DESC(hws_gws_support, "Assume MEC2 FW supports GWS barriers (false = int queue_preemption_timeout_ms = 9000; module_param(queue_preemption_timeout_ms, int, 0644); MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption timeout in ms (1 = Minimum, 9000 = default)"); + +/** + * DOC: debug_evictions(bool) + * Enable extra debug messages to help determine the cause of evictions + */ +bool debug_evictions; +module_param(debug_evictions, bool, 0644); +MODULE_PARM_DESC(debug_evictions, "enable eviction debug messages (false = default)"); #endif /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index b87ca171986a..072f0e1185a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -275,6 +275,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, continue; } + WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD, + "Adding eviction fence to sync obj"); r = amdgpu_sync_fence(sync, f, false); if (r) break; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 314c4b99671d..7f6d0958ed62 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -935,6 +935,7 @@ int kgd2kfd_quiesce_mm(struct mm_struct *mm) if (!p) return -ESRCH; + WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid); r = kfd_process_evict_queues(p); kfd_unref_process(p); @@ -1002,6 +1003,8 @@ int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm, /* During process initialization eviction_work.dwork is initialized * to kfd_evict_bo_worker */ + WARN(debug_evictions, "Scheduling eviction of pid %d in %ld jiffies", + p->lead_thread->pid, delay_jiffies); schedule_delayed_work(&p->eviction_work, delay_jiffies); out: kfd_unref_process(p); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 3a4fbb6a9aca..308e96f1dab5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -177,6 +177,11 @@ extern bool hws_gws_support; */ extern int queue_preemption_timeout_ms; +/* + * Enable eviction debug messages + */ +extern bool debug_evictions; + enum cache_policy { cache_policy_coherent, cache_policy_noncoherent -- cgit v1.2.3-70-g09d2 From 5509ac65f2fe5aa3c0003237ec629ca55024307c Mon Sep 17 00:00:00 2001 From: Navid Emamdoost Date: Sun, 14 Jun 2020 02:14:50 -0500 Subject: drm/amd/display: fix ref count leak in amdgpu_drm_ioctl in amdgpu_drm_ioctl the call to pm_runtime_get_sync increments the counter even in case of failure, leading to incorrect ref count. In case of failure, decrement the ref count before returning. Signed-off-by: Navid Emamdoost Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 653a377dd342..8db76677615c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1383,11 +1383,12 @@ long amdgpu_drm_ioctl(struct file *filp, dev = file_priv->minor->dev; ret = pm_runtime_get_sync(dev->dev); if (ret < 0) - return ret; + goto out; ret = drm_ioctl(filp, cmd, arg); pm_runtime_mark_last_busy(dev->dev); +out: pm_runtime_put_autosuspend(dev->dev); return ret; } -- cgit v1.2.3-70-g09d2 From 174b328bc89b5fa9cfec57831670e8100a1e0da0 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 27 May 2020 10:31:08 +0200 Subject: drm/amdgpu: remove distinction between explicit and implicit sync (v2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to Marek a pipeline sync should be inserted for implicit syncs well. v2: bump the driver version Signed-off-by: Christian König Tested-by: Marek Olšák Signed-off-by: Marek Olšák Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 8 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 12 ++++----- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 15 +++++------- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 31 +++++++----------------- drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 6 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 2 +- 9 files changed, 33 insertions(+), 50 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 68e6e1bc8f3a..c408936e8f98 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -395,7 +395,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) if (ret) return ret; - return amdgpu_sync_fence(sync, vm->last_update, false); + return amdgpu_sync_fence(sync, vm->last_update); } static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) @@ -785,7 +785,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); - amdgpu_sync_fence(sync, bo_va->last_pt_update, false); + amdgpu_sync_fence(sync, bo_va->last_pt_update); return 0; } @@ -804,7 +804,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, return ret; } - return amdgpu_sync_fence(sync, bo_va->last_pt_update, false); + return amdgpu_sync_fence(sync, bo_va->last_pt_update); } static int map_bo_to_gpuvm(struct amdgpu_device *adev, @@ -2102,7 +2102,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) pr_debug("Memory eviction: Validate BOs failed. Try again\n"); goto validate_map_fail; } - ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving, false); + ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving); if (ret) { pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); goto validate_map_fail; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 19070226a945..ffbcaf4bfb8b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -992,7 +992,7 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, dma_fence_put(old); } - r = amdgpu_sync_fence(&p->job->sync, fence, true); + r = amdgpu_sync_fence(&p->job->sync, fence); dma_fence_put(fence); if (r) return r; @@ -1014,7 +1014,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, return r; } - r = amdgpu_sync_fence(&p->job->sync, fence, true); + r = amdgpu_sync_fence(&p->job->sync, fence); dma_fence_put(fence); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 8db76677615c..2f12f3ae3c7f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -87,9 +87,10 @@ * - 3.36.0 - Allow reading more status registers on si/cik * - 3.37.0 - L2 is invalidated before SDMA IBs, needed for correctness * - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC + * - 3.39.0 - DMABUF implicit sync does a full pipeline sync */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 38 +#define KMS_DRIVER_MINOR 39 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index b91853fd66d3..4ffc32b78745 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -178,7 +178,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, need_ctx_switch = ring->current_ctx != fence_ctx; if (ring->funcs->emit_pipeline_sync && job && - ((tmp = amdgpu_sync_get_fence(&job->sched_sync, NULL)) || + ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) || (amdgpu_sriov_vf(adev) && need_ctx_switch) || amdgpu_vm_need_pipeline_sync(ring, job))) { need_pipe_sync = true; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index fe92dcd94d4a..267fa45ddb66 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -206,7 +206,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, int r; if (ring->vmid_wait && !dma_fence_is_signaled(ring->vmid_wait)) - return amdgpu_sync_fence(sync, ring->vmid_wait, false); + return amdgpu_sync_fence(sync, ring->vmid_wait); fences = kmalloc_array(sizeof(void *), id_mgr->num_ids, GFP_KERNEL); if (!fences) @@ -241,7 +241,7 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm, return -ENOMEM; } - r = amdgpu_sync_fence(sync, &array->base, false); + r = amdgpu_sync_fence(sync, &array->base); dma_fence_put(ring->vmid_wait); ring->vmid_wait = &array->base; return r; @@ -294,7 +294,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, tmp = amdgpu_sync_peek_fence(&(*id)->active, ring); if (tmp) { *id = NULL; - r = amdgpu_sync_fence(sync, tmp, false); + r = amdgpu_sync_fence(sync, tmp); return r; } needs_flush = true; @@ -303,7 +303,7 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm, /* Good we can use this VMID. Remember this submission as * user of the VMID. */ - r = amdgpu_sync_fence(&(*id)->active, fence, false); + r = amdgpu_sync_fence(&(*id)->active, fence); if (r) return r; @@ -375,7 +375,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm, /* Good, we can use this VMID. Remember this submission as * user of the VMID. */ - r = amdgpu_sync_fence(&(*id)->active, fence, false); + r = amdgpu_sync_fence(&(*id)->active, fence); if (r) return r; @@ -435,7 +435,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, id = idle; /* Remember this submission as user of the VMID */ - r = amdgpu_sync_fence(&id->active, fence, false); + r = amdgpu_sync_fence(&id->active, fence); if (r) goto error; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 47207188c569..2975c4a6e581 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -183,16 +183,13 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, struct amdgpu_job *job = to_amdgpu_job(sched_job); struct amdgpu_vm *vm = job->vm; struct dma_fence *fence; - bool explicit = false; int r; - fence = amdgpu_sync_get_fence(&job->sync, &explicit); - if (fence && explicit) { - if (drm_sched_dependency_optimized(fence, s_entity)) { - r = amdgpu_sync_fence(&job->sched_sync, fence, false); - if (r) - DRM_ERROR("Error adding fence (%d)\n", r); - } + fence = amdgpu_sync_get_fence(&job->sync); + if (fence && drm_sched_dependency_optimized(fence, s_entity)) { + r = amdgpu_sync_fence(&job->sched_sync, fence); + if (r) + DRM_ERROR("Error adding fence (%d)\n", r); } while (fence == NULL && vm && !job->vmid) { @@ -202,7 +199,7 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job, if (r) DRM_ERROR("Error getting VM ID (%d)\n", r); - fence = amdgpu_sync_get_fence(&job->sync, NULL); + fence = amdgpu_sync_get_fence(&job->sync); } return fence; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index 072f0e1185a8..8ea6c49529e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -35,7 +35,6 @@ struct amdgpu_sync_entry { struct hlist_node node; struct dma_fence *fence; - bool explicit; }; static struct kmem_cache *amdgpu_sync_slab; @@ -129,8 +128,7 @@ static void amdgpu_sync_keep_later(struct dma_fence **keep, * Tries to add the fence to an existing hash entry. Returns true when an entry * was found, false otherwise. */ -static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, - bool explicit) +static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f) { struct amdgpu_sync_entry *e; @@ -139,10 +137,6 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, continue; amdgpu_sync_keep_later(&e->fence, f); - - /* Preserve eplicit flag to not loose pipe line sync */ - e->explicit |= explicit; - return true; } return false; @@ -153,27 +147,23 @@ static bool amdgpu_sync_add_later(struct amdgpu_sync *sync, struct dma_fence *f, * * @sync: sync object to add fence to * @f: fence to sync to - * @explicit: if this is an explicit dependency * * Add the fence to the sync object. */ -int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, - bool explicit) +int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f) { struct amdgpu_sync_entry *e; if (!f) return 0; - if (amdgpu_sync_add_later(sync, f, explicit)) + if (amdgpu_sync_add_later(sync, f)) return 0; e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL); if (!e) return -ENOMEM; - e->explicit = explicit; - hash_add(sync->fences, &e->node, f->context); e->fence = dma_fence_get(f); return 0; @@ -194,7 +184,7 @@ int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence) return 0; amdgpu_sync_keep_later(&sync->last_vm_update, fence); - return amdgpu_sync_fence(sync, fence, false); + return amdgpu_sync_fence(sync, fence); } /** @@ -221,7 +211,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, /* always sync to the exclusive fence */ f = dma_resv_get_excl(resv); - r = amdgpu_sync_fence(sync, f, false); + r = amdgpu_sync_fence(sync, f); flist = dma_resv_get_list(resv); if (!flist || r) @@ -237,7 +227,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, /* Always sync to moves, no matter what */ if (fence_owner == AMDGPU_FENCE_OWNER_UNDEFINED) { - r = amdgpu_sync_fence(sync, f, false); + r = amdgpu_sync_fence(sync, f); if (r) break; } @@ -277,7 +267,7 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, WARN(debug_evictions && fence_owner == AMDGPU_FENCE_OWNER_KFD, "Adding eviction fence to sync obj"); - r = amdgpu_sync_fence(sync, f, false); + r = amdgpu_sync_fence(sync, f); if (r) break; } @@ -332,11 +322,10 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, * amdgpu_sync_get_fence - get the next fence from the sync object * * @sync: sync object to use - * @explicit: true if the next fence is explicit * * Get and removes the next fence from the sync object not signaled yet. */ -struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit) +struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync) { struct amdgpu_sync_entry *e; struct hlist_node *tmp; @@ -345,8 +334,6 @@ struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, bool *explicit hash_for_each_safe(sync->fences, i, tmp, e, node) { f = e->fence; - if (explicit) - *explicit = e->explicit; hash_del(&e->node); kmem_cache_free(amdgpu_sync_slab, e); @@ -378,7 +365,7 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone) hash_for_each_safe(source->fences, i, tmp, e, node) { f = e->fence; if (!dma_fence_is_signaled(f)) { - r = amdgpu_sync_fence(clone, f, e->explicit); + r = amdgpu_sync_fence(clone, f); if (r) return r; } else { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index cfbe5788b8b9..7c0fe20c470d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -47,16 +47,14 @@ struct amdgpu_sync { }; void amdgpu_sync_create(struct amdgpu_sync *sync); -int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f, - bool explicit); +int amdgpu_sync_fence(struct amdgpu_sync *sync, struct dma_fence *f); int amdgpu_sync_vm_fence(struct amdgpu_sync *sync, struct dma_fence *fence); int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct dma_resv *resv, enum amdgpu_sync_mode mode, void *owner); struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); -struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync, - bool *explicit); +struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone); int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr); void amdgpu_sync_free(struct amdgpu_sync *sync); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 8d9c6feba660..28bdfb3ac33d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -208,7 +208,7 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, int r; /* Wait for PD/PT moves to be completed */ - r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving, false); + r = amdgpu_sync_fence(&p->job->sync, bo->tbo.moving); if (r) return r; -- cgit v1.2.3-70-g09d2 From 273da6ff7ce8727e02d6e67f77eb98df4627f60b Mon Sep 17 00:00:00 2001 From: Wenhui Sheng Date: Tue, 14 Jul 2020 16:29:18 +0800 Subject: drm/amdgpu: add module parameter choose reset mode Default value is auto, doesn't change original reset method logic. v2: change to use parameter reset_method v3: add warn msg if specified mode isn't supported Signed-off-by: Likun Gao Signed-off-by: Wenhui Sheng Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/cik.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/nv.c | 8 ++++++++ drivers/gpu/drm/amd/amdgpu/si.c | 5 +++++ drivers/gpu/drm/amd/amdgpu/soc15.c | 9 +++++++++ drivers/gpu/drm/amd/amdgpu/vi.c | 8 ++++++++ 7 files changed, 47 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c99df80d62eb..327a0daf4a1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -193,6 +193,7 @@ static const bool debug_evictions; /* = false */ #endif extern int amdgpu_tmz; +extern int amdgpu_reset_method; #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 2f12f3ae3c7f..2eacf1f51bbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -149,6 +149,7 @@ int amdgpu_mes = 0; int amdgpu_noretry; int amdgpu_force_asic_type = -1; int amdgpu_tmz = 0; +int amdgpu_reset_method = -1; /* auto */ struct amdgpu_mgpu_info mgpu_info = { .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), @@ -757,6 +758,13 @@ module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444); MODULE_PARM_DESC(tmz, "Enable TMZ feature (-1 = auto, 0 = off (default), 1 = on)"); module_param_named(tmz, amdgpu_tmz, int, 0444); +/** + * DOC: reset_method (int) + * GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco) + */ +MODULE_PARM_DESC(reset_method, "GPU reset method (-1 = auto (default), 0 = legacy, 1 = mode0, 2 = mode1, 3 = mode2, 4 = baco)"); +module_param_named(reset_method, amdgpu_reset_method, int, 0444); + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index fe306d0f73f7..c2c67ab68a43 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1326,6 +1326,14 @@ cik_asic_reset_method(struct amdgpu_device *adev) { bool baco_reset; + if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY || + amdgpu_reset_method == AMD_RESET_METHOD_BACO) + return amdgpu_reset_method; + + if (amdgpu_reset_method != -1) + dev_warn(adev->dev, "Specified reset:%d isn't supported, using AUTO instead.\n", + amdgpu_reset_method); + switch (adev->asic_type) { case CHIP_BONAIRE: case CHIP_HAWAII: diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 9f1240bd0310..aa3101ab70eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -311,6 +311,14 @@ nv_asic_reset_method(struct amdgpu_device *adev) { struct smu_context *smu = &adev->smu; + if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 || + amdgpu_reset_method == AMD_RESET_METHOD_BACO) + return amdgpu_reset_method; + + if (amdgpu_reset_method != -1) + dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n", + amdgpu_reset_method); + if (smu_baco_is_support(smu)) return AMD_RESET_METHOD_BACO; else diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index 9d7b4ccd17b8..1b449291f068 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -1229,6 +1229,11 @@ static bool si_asic_supports_baco(struct amdgpu_device *adev) static enum amd_reset_method si_asic_reset_method(struct amdgpu_device *adev) { + if (amdgpu_reset_method != AMD_RESET_METHOD_LEGACY && + amdgpu_reset_method != -1) + dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n", + amdgpu_reset_method); + return AMD_RESET_METHOD_LEGACY; } diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 8c739b285915..84d811b6e48b 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -532,6 +532,15 @@ soc15_asic_reset_method(struct amdgpu_device *adev) bool baco_reset = false; struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + if (amdgpu_reset_method == AMD_RESET_METHOD_MODE1 || + amdgpu_reset_method == AMD_RESET_METHOD_MODE2 || + amdgpu_reset_method == AMD_RESET_METHOD_BACO) + return amdgpu_reset_method; + + if (amdgpu_reset_method != -1) + dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n", + amdgpu_reset_method); + switch (adev->asic_type) { case CHIP_RAVEN: case CHIP_RENOIR: diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 4e5e91888d87..f6f2ed0830b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -710,6 +710,14 @@ vi_asic_reset_method(struct amdgpu_device *adev) { bool baco_reset; + if (amdgpu_reset_method == AMD_RESET_METHOD_LEGACY || + amdgpu_reset_method == AMD_RESET_METHOD_BACO) + return amdgpu_reset_method; + + if (amdgpu_reset_method != -1) + dev_warn(adev->dev, "Specified reset method:%d isn't supported, using AUTO instead.\n", + amdgpu_reset_method); + switch (adev->asic_type) { case CHIP_FIJI: case CHIP_TONGA: -- cgit v1.2.3-70-g09d2 From 05cac1ae8ffbc5d835213ff2f9d7b8998c1f532f Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Fri, 10 Jul 2020 16:15:40 +0200 Subject: drm/amdgpu: do not disable SMU on vm reboot For passthrough device, we do baco reset after 1st vm boot so if we disable SMU on 1st VM shutdown baco reset will fail for 2nd vm boot. Signed-off-by: Nirmoy Das Acked-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 2eacf1f51bbf..26127c7d2f32 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1186,7 +1186,8 @@ amdgpu_pci_shutdown(struct pci_dev *pdev) * unfortunately we can't detect certain * hypervisors so just do this all the time. */ - adev->mp1_state = PP_MP1_STATE_UNLOAD; + if (!amdgpu_passthrough(adev)) + adev->mp1_state = PP_MP1_STATE_UNLOAD; amdgpu_device_ip_suspend(adev); adev->mp1_state = PP_MP1_STATE_NONE; } -- cgit v1.2.3-70-g09d2