diff options
author | Jonathan Kim <jonathan.kim@amd.com> | 2022-04-22 12:26:18 -0400 |
---|---|---|
committer | Alex Deucher <alexander.deucher@amd.com> | 2023-06-09 12:36:17 -0400 |
commit | 12fb1ad70d65edc3405884792d044fa79df7244f (patch) | |
tree | 4718cd398489ee00a9275922a2372b86f791cfea /drivers/gpu/drm/amd/amdkfd/kfd_debug.c | |
parent | a37d23f816b18a324c24d066d5bc453308913bf9 (diff) |
drm/amdkfd: update process interrupt handling for debug events
The debugger must be notified by any debugger subscribed exception
that comes from hardware interrupts.
If a debugger session exits, any exceptions it subscribed to may still
have interrupts in the interrupt ring buffer or KGD/KFD pipeline.
To prevent a new session from inheriting stale interrupts, when a new
queue is created, open an interrupt drain and allow the IH ring to drain
from a timestamped checkpoint. Then inject a custom IV so that once
the custom IV is picked up by the KFD, it's safe to close the drain
and proceed with queue creation.
The drain must also be on debug disable as SW interrupts may still
be processed. Drain at this time and clear all the exception status.
The debugger may also not be attached nor subscibed to certain
exceptions so forward them directly to the runtime.
GFX10 also requires its own IV processing, hence the creation of
kfd_int_process_v10.c. This is because the IV from SQ interrupts are
packed into a new continguous format unlike GFX9. To make this clear,
a separate interrupting handling code file was created.
Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_debug.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_debug.c | 84 |
1 files changed, 84 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c index 17e8e9edccbf..68b657398d41 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c @@ -125,6 +125,64 @@ bool kfd_dbg_ev_raise(uint64_t event_mask, return is_subscribed; } +/* set pending event queue entry from ring entry */ +bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev, + unsigned int pasid, + uint32_t doorbell_id, + uint64_t trap_mask, + void *exception_data, + size_t exception_data_size) +{ + struct kfd_process *p; + bool signaled_to_debugger_or_runtime = false; + + p = kfd_lookup_process_by_pasid(pasid); + + if (!p) + return false; + + if (!kfd_dbg_ev_raise(trap_mask, p, dev, doorbell_id, true, + exception_data, exception_data_size)) { + struct process_queue_manager *pqm; + struct process_queue_node *pqn; + + if (!!(trap_mask & KFD_EC_MASK_QUEUE) && + p->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED) { + mutex_lock(&p->mutex); + + pqm = &p->pqm; + list_for_each_entry(pqn, &pqm->queues, + process_queue_list) { + + if (!(pqn->q && pqn->q->device == dev && + pqn->q->doorbell_id == doorbell_id)) + continue; + + kfd_send_exception_to_runtime(p, pqn->q->properties.queue_id, + trap_mask); + + signaled_to_debugger_or_runtime = true; + + break; + } + + mutex_unlock(&p->mutex); + } else if (trap_mask & KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) { + kfd_dqm_evict_pasid(dev->dqm, p->pasid); + kfd_signal_vm_fault_event(dev, p->pasid, NULL, + exception_data); + + signaled_to_debugger_or_runtime = true; + } + } else { + signaled_to_debugger_or_runtime = true; + } + + kfd_unref_process(p); + + return signaled_to_debugger_or_runtime; +} + int kfd_dbg_send_exception_to_runtime(struct kfd_process *p, unsigned int dev_id, unsigned int queue_id, @@ -281,6 +339,31 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind kfd_dbg_set_workaround(target, false); } +static void kfd_dbg_clean_exception_status(struct kfd_process *target) +{ + struct process_queue_manager *pqm; + struct process_queue_node *pqn; + int i; + + for (i = 0; i < target->n_pdds; i++) { + struct kfd_process_device *pdd = target->pdds[i]; + + kfd_process_drain_interrupts(pdd); + + pdd->exception_status = 0; + } + + pqm = &target->pqm; + list_for_each_entry(pqn, &pqm->queues, process_queue_list) { + if (!pqn->q) + continue; + + pqn->q->properties.exception_status = 0; + } + + target->exception_status = 0; +} + int kfd_dbg_trap_disable(struct kfd_process *target) { if (!target->debug_trap_enabled) @@ -304,6 +387,7 @@ int kfd_dbg_trap_disable(struct kfd_process *target) } target->debug_trap_enabled = false; + kfd_dbg_clean_exception_status(target); kfd_unref_process(target); return 0; |