summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2018-07-30 10:23:40 +1000
committerDave Airlie <airlied@redhat.com>2018-07-30 10:23:50 +1000
commitecd7963f7cf967009882fd56eaee1e87a229bea2 (patch)
tree21d8c2583d94f26894202433adf46b98d25bf698 /drivers/gpu/drm/amd/amdgpu
parent6d52aacd92c60331ec8c3117522f4301b5195e28 (diff)
parentb5aa3f4aef724e9c0f626dcf69948b22efcc5176 (diff)
Merge tag 'drm-amdkfd-next-2018-07-28' of git://people.freedesktop.org/~gabbayo/linux into drm-next
This is amdkfd pull for 4.19. The major changes are: - Add Raven support. Raven refers to Ryzen APUs with integrated GFXv9 GPU. - Integrate GPU reset support In addition, there are a couple of small fixes and improvements, such as: - Better handling and reporting to user of VM faults - Fix race upon context restore - Allow the user to use specific Compute Units - Basic power management Signed-off-by: Dave Airlie <airlied@redhat.com> Link: https://patchwork.freedesktop.org/patch/msgid/20180728122306.GA5235@ogabbay-vm
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c14
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c33
11 files changed, 183 insertions, 8 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index e3ed08dca7b7..f8bbbb3a9504 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -243,6 +243,33 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
return r;
}
+int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (adev->kfd)
+ r = kgd2kfd->pre_reset(adev->kfd);
+
+ return r;
+}
+
+int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
+{
+ int r = 0;
+
+ if (adev->kfd)
+ r = kgd2kfd->post_reset(adev->kfd);
+
+ return r;
+}
+
+void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+ amdgpu_device_gpu_recover(adev, NULL, false);
+}
+
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
void **cpu_ptr)
@@ -461,6 +488,14 @@ err:
return ret;
}
+void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+ amdgpu_dpm_switch_power_profile(adev,
+ PP_SMC_POWER_PROFILE_COMPUTE, !idle);
+}
+
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
{
if (adev->kfd) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index a8418a3f4e9d..2f379c183ed2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -119,6 +119,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
uint32_t *ib_cmd, uint32_t ib_len);
+void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
@@ -126,6 +127,12 @@ struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);
bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);
+int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev);
+
+int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev);
+
+void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd);
+
/* Shared API */
int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
void **mem_obj, uint64_t *gpu_addr,
@@ -183,6 +190,9 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
struct dma_fence **ef);
+int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
+ struct kfd_vm_fault_info *info);
+
void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index ea79908dac4c..ea3f698aef5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -145,6 +145,7 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
uint32_t page_table_base);
static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
+static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd);
/* Because of REG_GET_FIELD() being used, we put this function in the
* asic specific file.
@@ -216,6 +217,10 @@ static const struct kfd2kgd_calls kfd2kgd = {
.invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib,
+ .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
+ .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
+ .gpu_recover = amdgpu_amdkfd_gpu_reset,
+ .set_compute_idle = amdgpu_amdkfd_set_compute_idle
};
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
@@ -571,6 +576,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
unsigned long flags, end_jiffies;
int retry;
+ if (adev->in_gpu_reset)
+ return -EIO;
+
acquire_queue(kgd, pipe_id, queue_id);
WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0);
@@ -882,6 +890,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
int vmid;
unsigned int tmp;
+ if (adev->in_gpu_reset)
+ return -EIO;
+
for (vmid = 0; vmid < 16; vmid++) {
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
continue;
@@ -911,3 +922,19 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
RREG32(mmVM_INVALIDATE_RESPONSE);
return 0;
}
+
+ /**
+ * read_vmid_from_vmfault_reg - read vmid from register
+ *
+ * adev: amdgpu_device pointer
+ * @vmid: vmid pointer
+ * read vmid from register (CIK).
+ */
+static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd)
+{
+ struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+ uint32_t status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
+
+ return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 19dd665e7307..f6e53e9352bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -176,6 +176,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
.invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib,
+ .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
+ .gpu_recover = amdgpu_amdkfd_gpu_reset,
+ .set_compute_idle = amdgpu_amdkfd_set_compute_idle
};
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
@@ -568,6 +571,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
int retry;
struct vi_mqd *m = get_mqd(mqd);
+ if (adev->in_gpu_reset)
+ return -EIO;
+
acquire_queue(kgd, pipe_id, queue_id);
if (m->cp_hqd_vmid == 0)
@@ -844,6 +850,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
int vmid;
unsigned int tmp;
+ if (adev->in_gpu_reset)
+ return -EIO;
+
for (vmid = 0; vmid < 16; vmid++) {
if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
continue;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 1db60aa5b7f0..8efedfcb9dfc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -213,6 +213,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
.invalidate_tlbs = invalidate_tlbs,
.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
.submit_ib = amdgpu_amdkfd_submit_ib,
+ .gpu_recover = amdgpu_amdkfd_gpu_reset,
+ .set_compute_idle = amdgpu_amdkfd_set_compute_idle
};
struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
@@ -679,6 +681,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
uint32_t temp;
struct v9_mqd *m = get_mqd(mqd);
+ if (adev->in_gpu_reset)
+ return -EIO;
+
acquire_queue(kgd, pipe_id, queue_id);
if (m->cp_hqd_vmid == 0)
@@ -866,6 +871,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
int vmid;
struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+ if (adev->in_gpu_reset)
+ return -EIO;
+
if (ring->ready)
return invalidate_tlbs_with_kiq(adev, pasid);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index fa38a960ce00..8a707d8bbb1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -1621,6 +1621,20 @@ bo_reserve_failed:
return ret;
}
+int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
+ struct kfd_vm_fault_info *mem)
+{
+ struct amdgpu_device *adev;
+
+ adev = (struct amdgpu_device *)kgd;
+ if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
+ *mem = *adev->gmc.vm_fault_info;
+ mb();
+ atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+ }
+ return 0;
+}
+
/* Evict a userptr BO by stopping the queues if necessary
*
* Runs in MMU notifier, may be in RECLAIM_FS context. This means it
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index ec53d8f96d06..13acef526c5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3307,6 +3307,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
atomic_inc(&adev->gpu_reset_counter);
adev->in_gpu_reset = 1;
+ /* Block kfd */
+ amdgpu_amdkfd_pre_reset(adev);
+
/* block TTM */
resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
@@ -3322,7 +3325,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
if (job && job->base.sched == &ring->sched)
continue;
- drm_sched_hw_job_reset(&ring->sched, &job->base);
+ drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL);
/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
amdgpu_fence_driver_force_completion(ring);
@@ -3363,6 +3366,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
dev_info(adev->dev, "GPU reset(%d) succeeded!\n",atomic_read(&adev->gpu_reset_counter));
}
+ /*unlock kfd */
+ amdgpu_amdkfd_post_reset(adev);
amdgpu_vf_error_trans_all(adev);
adev->in_gpu_reset = 0;
mutex_unlock(&adev->lock_reset);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 6cb4948233cb..bb5a47a45790 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -105,6 +105,8 @@ struct amdgpu_gmc {
/* protects concurrent invalidation */
spinlock_t invalidate_lock;
bool translate_further;
+ struct kfd_vm_fault_info *vm_fault_info;
+ atomic_t vm_fault_info_updated;
const struct amdgpu_gmc_funcs *gmc_funcs;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 5d7d7900ccab..9eedc9810004 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -495,11 +495,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
eaddr = eaddr & ((1 << shift) - 1);
flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+ if (vm->root.base.bo->shadow)
+ flags |= AMDGPU_GEM_CREATE_SHADOW;
if (vm->use_cpu_for_update)
flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
else
- flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
- AMDGPU_GEM_CREATE_SHADOW);
+ flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
/* walk over the address space and allocate the page tables */
for (pt_idx = from; pt_idx <= to; ++pt_idx) {
@@ -2587,7 +2588,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
if (vm->use_cpu_for_update)
flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- else
+ else if (vm_context != AMDGPU_VM_CONTEXT_COMPUTE)
flags |= AMDGPU_GEM_CREATE_SHADOW;
size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
@@ -2662,8 +2663,7 @@ error_free_sched_entity:
* - pasid (old PASID is released, because compute manages its own PASIDs)
*
* Reinitializes the page directory to reflect the changed ATS
- * setting. May leave behind an unused shadow BO for the page
- * directory when switching from SDMA updates to CPU updates.
+ * setting.
*
* Returns:
* 0 for success, -errno for errors.
@@ -2713,6 +2713,9 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
vm->pasid = 0;
}
+ /* Free the shadow bo for compute VM */
+ amdgpu_bo_unref(&vm->root.base.bo->shadow);
+
error:
amdgpu_bo_unreserve(vm->root.base.bo);
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 10920f0bd85f..36dc367c4b45 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -28,6 +28,7 @@
#include "cik.h"
#include "gmc_v7_0.h"
#include "amdgpu_ucode.h"
+#include "amdgpu_amdkfd.h"
#include "bif/bif_4_1_d.h"
#include "bif/bif_4_1_sh_mask.h"
@@ -1078,6 +1079,12 @@ static int gmc_v7_0_sw_init(void *handle)
adev->vm_manager.vram_base_offset = 0;
}
+ adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info),
+ GFP_KERNEL);
+ if (!adev->gmc.vm_fault_info)
+ return -ENOMEM;
+ atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+
return 0;
}
@@ -1087,6 +1094,7 @@ static int gmc_v7_0_sw_fini(void *handle)
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
+ kfree(adev->gmc.vm_fault_info);
gmc_v7_0_gart_fini(adev);
amdgpu_bo_fini(adev);
release_firmware(adev->gmc.fw);
@@ -1276,7 +1284,7 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- u32 addr, status, mc_client;
+ u32 addr, status, mc_client, vmid;
addr = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR);
status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
@@ -1301,6 +1309,29 @@ static int gmc_v7_0_process_interrupt(struct amdgpu_device *adev,
entry->pasid);
}
+ vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ VMID);
+ if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
+ && !atomic_read(&adev->gmc.vm_fault_info_updated)) {
+ struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
+ u32 protections = REG_GET_FIELD(status,
+ VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ PROTECTIONS);
+
+ info->vmid = vmid;
+ info->mc_id = REG_GET_FIELD(status,
+ VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ MEMORY_CLIENT_ID);
+ info->status = status;
+ info->page_addr = addr;
+ info->prot_valid = protections & 0x7 ? true : false;
+ info->prot_read = protections & 0x8 ? true : false;
+ info->prot_write = protections & 0x10 ? true : false;
+ info->prot_exec = protections & 0x20 ? true : false;
+ mb();
+ atomic_set(&adev->gmc.vm_fault_info_updated, 1);
+ }
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 75f3ffb2891e..70fc97b59b4f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -26,6 +26,7 @@
#include "amdgpu.h"
#include "gmc_v8_0.h"
#include "amdgpu_ucode.h"
+#include "amdgpu_amdkfd.h"
#include "gmc/gmc_8_1_d.h"
#include "gmc/gmc_8_1_sh_mask.h"
@@ -1182,6 +1183,12 @@ static int gmc_v8_0_sw_init(void *handle)
adev->vm_manager.vram_base_offset = 0;
}
+ adev->gmc.vm_fault_info = kmalloc(sizeof(struct kfd_vm_fault_info),
+ GFP_KERNEL);
+ if (!adev->gmc.vm_fault_info)
+ return -ENOMEM;
+ atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+
return 0;
}
@@ -1191,6 +1198,7 @@ static int gmc_v8_0_sw_fini(void *handle)
amdgpu_gem_force_release(adev);
amdgpu_vm_manager_fini(adev);
+ kfree(adev->gmc.vm_fault_info);
gmc_v8_0_gart_fini(adev);
amdgpu_bo_fini(adev);
release_firmware(adev->gmc.fw);
@@ -1426,7 +1434,7 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- u32 addr, status, mc_client;
+ u32 addr, status, mc_client, vmid;
if (amdgpu_sriov_vf(adev)) {
dev_err(adev->dev, "GPU fault detected: %d 0x%08x\n",
@@ -1463,6 +1471,29 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
entry->pasid);
}
+ vmid = REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ VMID);
+ if (amdgpu_amdkfd_is_kfd_vmid(adev, vmid)
+ && !atomic_read(&adev->gmc.vm_fault_info_updated)) {
+ struct kfd_vm_fault_info *info = adev->gmc.vm_fault_info;
+ u32 protections = REG_GET_FIELD(status,
+ VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ PROTECTIONS);
+
+ info->vmid = vmid;
+ info->mc_id = REG_GET_FIELD(status,
+ VM_CONTEXT1_PROTECTION_FAULT_STATUS,
+ MEMORY_CLIENT_ID);
+ info->status = status;
+ info->page_addr = addr;
+ info->prot_valid = protections & 0x7 ? true : false;
+ info->prot_read = protections & 0x8 ? true : false;
+ info->prot_write = protections & 0x10 ? true : false;
+ info->prot_exec = protections & 0x20 ? true : false;
+ mb();
+ atomic_set(&adev->gmc.vm_fault_info_updated, 1);
+ }
+
return 0;
}