diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 40 |
1 files changed, 29 insertions, 11 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index f7f6df40875e..d7eb6ac37f62 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -930,7 +930,6 @@ static void uninitialize(struct device_queue_manager *dqm) for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++) kfree(dqm->mqd_mgrs[i]); mutex_destroy(&dqm->lock_hidden); - kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem); } static int start_nocpsch(struct device_queue_manager *dqm) @@ -947,12 +946,19 @@ static int start_nocpsch(struct device_queue_manager *dqm) static int stop_nocpsch(struct device_queue_manager *dqm) { if (dqm->dev->device_info->asic_family == CHIP_HAWAII) - pm_uninit(&dqm->packets); + pm_uninit(&dqm->packets, false); dqm->sched_running = false; return 0; } +static void pre_reset(struct device_queue_manager *dqm) +{ + dqm_lock(dqm); + dqm->is_resetting = true; + dqm_unlock(dqm); +} + static int allocate_sdma_queue(struct device_queue_manager *dqm, struct queue *q) { @@ -1100,6 +1106,7 @@ static int start_cpsch(struct device_queue_manager *dqm) dqm_lock(dqm); /* clear hang status when driver try to start the hw scheduler */ dqm->is_hws_hang = false; + dqm->is_resetting = false; dqm->sched_running = true; execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); dqm_unlock(dqm); @@ -1107,20 +1114,24 @@ static int start_cpsch(struct device_queue_manager *dqm) return 0; fail_allocate_vidmem: fail_set_sched_resources: - pm_uninit(&dqm->packets); + pm_uninit(&dqm->packets, false); fail_packet_manager_init: return retval; } static int stop_cpsch(struct device_queue_manager *dqm) { + bool hanging; + dqm_lock(dqm); - unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); + if (!dqm->is_hws_hang) + unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); + hanging = dqm->is_hws_hang || dqm->is_resetting; dqm->sched_running = false; dqm_unlock(dqm); kfd_gtt_sa_free(dqm->dev, dqm->fence_mem); - pm_uninit(&dqm->packets); + pm_uninit(&dqm->packets, hanging); return 0; } @@ -1352,8 +1363,17 @@ static int unmap_queues_cpsch(struct device_queue_manager *dqm, /* should be timed out */ retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED, queue_preemption_timeout_ms); - if (retval) + if (retval) { + pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); + dqm->is_hws_hang = true; + /* It's possible we're detecting a HWS hang in the + * middle of a GPU reset. No need to schedule another + * reset in this case. + */ + if (!dqm->is_resetting) + schedule_work(&dqm->hw_exception_work); return retval; + } pm_release_ib(&dqm->packets); dqm->active_runlist = false; @@ -1371,12 +1391,8 @@ static int execute_queues_cpsch(struct device_queue_manager *dqm, if (dqm->is_hws_hang) return -EIO; retval = unmap_queues_cpsch(dqm, filter, filter_param); - if (retval) { - pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n"); - dqm->is_hws_hang = true; - schedule_work(&dqm->hw_exception_work); + if (retval) return retval; - } return map_queues_cpsch(dqm); } @@ -1770,6 +1786,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) dqm->ops.initialize = initialize_cpsch; dqm->ops.start = start_cpsch; dqm->ops.stop = stop_cpsch; + dqm->ops.pre_reset = pre_reset; dqm->ops.destroy_queue = destroy_queue_cpsch; dqm->ops.update_queue = update_queue; dqm->ops.register_process = register_process; @@ -1788,6 +1805,7 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev) /* initialize dqm for no cp scheduling */ dqm->ops.start = start_nocpsch; dqm->ops.stop = stop_nocpsch; + dqm->ops.pre_reset = pre_reset; dqm->ops.create_queue = create_queue_nocpsch; dqm->ops.destroy_queue = destroy_queue_nocpsch; dqm->ops.update_queue = update_queue; |