summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
diff options
context:
space:
mode:
authorJonathan Kim <jonathan.kim@amd.com>2022-04-22 12:26:18 -0400
committerAlex Deucher <alexander.deucher@amd.com>2023-06-09 12:36:17 -0400
commit12fb1ad70d65edc3405884792d044fa79df7244f (patch)
tree4718cd398489ee00a9275922a2372b86f791cfea /drivers/gpu/drm/amd/amdkfd/kfd_debug.c
parenta37d23f816b18a324c24d066d5bc453308913bf9 (diff)
drm/amdkfd: update process interrupt handling for debug events
The debugger must be notified by any debugger subscribed exception that comes from hardware interrupts. If a debugger session exits, any exceptions it subscribed to may still have interrupts in the interrupt ring buffer or KGD/KFD pipeline. To prevent a new session from inheriting stale interrupts, when a new queue is created, open an interrupt drain and allow the IH ring to drain from a timestamped checkpoint. Then inject a custom IV so that once the custom IV is picked up by the KFD, it's safe to close the drain and proceed with queue creation. The drain must also be on debug disable as SW interrupts may still be processed. Drain at this time and clear all the exception status. The debugger may also not be attached nor subscibed to certain exceptions so forward them directly to the runtime. GFX10 also requires its own IV processing, hence the creation of kfd_int_process_v10.c. This is because the IV from SQ interrupts are packed into a new continguous format unlike GFX9. To make this clear, a separate interrupting handling code file was created. Signed-off-by: Jonathan Kim <jonathan.kim@amd.com> Reviewed-by: Felix Kuehling <felix.kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_debug.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_debug.c84
1 files changed, 84 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index 17e8e9edccbf..68b657398d41 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -125,6 +125,64 @@ bool kfd_dbg_ev_raise(uint64_t event_mask,
return is_subscribed;
}
+/* set pending event queue entry from ring entry */
+bool kfd_set_dbg_ev_from_interrupt(struct kfd_node *dev,
+ unsigned int pasid,
+ uint32_t doorbell_id,
+ uint64_t trap_mask,
+ void *exception_data,
+ size_t exception_data_size)
+{
+ struct kfd_process *p;
+ bool signaled_to_debugger_or_runtime = false;
+
+ p = kfd_lookup_process_by_pasid(pasid);
+
+ if (!p)
+ return false;
+
+ if (!kfd_dbg_ev_raise(trap_mask, p, dev, doorbell_id, true,
+ exception_data, exception_data_size)) {
+ struct process_queue_manager *pqm;
+ struct process_queue_node *pqn;
+
+ if (!!(trap_mask & KFD_EC_MASK_QUEUE) &&
+ p->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED) {
+ mutex_lock(&p->mutex);
+
+ pqm = &p->pqm;
+ list_for_each_entry(pqn, &pqm->queues,
+ process_queue_list) {
+
+ if (!(pqn->q && pqn->q->device == dev &&
+ pqn->q->doorbell_id == doorbell_id))
+ continue;
+
+ kfd_send_exception_to_runtime(p, pqn->q->properties.queue_id,
+ trap_mask);
+
+ signaled_to_debugger_or_runtime = true;
+
+ break;
+ }
+
+ mutex_unlock(&p->mutex);
+ } else if (trap_mask & KFD_EC_MASK(EC_DEVICE_MEMORY_VIOLATION)) {
+ kfd_dqm_evict_pasid(dev->dqm, p->pasid);
+ kfd_signal_vm_fault_event(dev, p->pasid, NULL,
+ exception_data);
+
+ signaled_to_debugger_or_runtime = true;
+ }
+ } else {
+ signaled_to_debugger_or_runtime = true;
+ }
+
+ kfd_unref_process(p);
+
+ return signaled_to_debugger_or_runtime;
+}
+
int kfd_dbg_send_exception_to_runtime(struct kfd_process *p,
unsigned int dev_id,
unsigned int queue_id,
@@ -281,6 +339,31 @@ void kfd_dbg_trap_deactivate(struct kfd_process *target, bool unwind, int unwind
kfd_dbg_set_workaround(target, false);
}
+static void kfd_dbg_clean_exception_status(struct kfd_process *target)
+{
+ struct process_queue_manager *pqm;
+ struct process_queue_node *pqn;
+ int i;
+
+ for (i = 0; i < target->n_pdds; i++) {
+ struct kfd_process_device *pdd = target->pdds[i];
+
+ kfd_process_drain_interrupts(pdd);
+
+ pdd->exception_status = 0;
+ }
+
+ pqm = &target->pqm;
+ list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
+ if (!pqn->q)
+ continue;
+
+ pqn->q->properties.exception_status = 0;
+ }
+
+ target->exception_status = 0;
+}
+
int kfd_dbg_trap_disable(struct kfd_process *target)
{
if (!target->debug_trap_enabled)
@@ -304,6 +387,7 @@ int kfd_dbg_trap_disable(struct kfd_process *target)
}
target->debug_trap_enabled = false;
+ kfd_dbg_clean_exception_status(target);
kfd_unref_process(target);
return 0;