summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2021-05-21 15:29:34 +1000
committerDave Airlie <airlied@redhat.com>2021-05-21 15:29:40 +1000
commitc99c4d0ca57c978dcc2a2f41ab8449684ea154cc (patch)
tree3fd20557381e99063293ae5d399a54d0108bcdde /drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
parent2ba047855096fff551402a87272b520fe97323f5 (diff)
parent2bb5b5f688cbbd5030629905d3ed8032ab46e79f (diff)
Merge tag 'amd-drm-next-5.14-2021-05-19' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-5.14-2021-05-19: amdgpu: - Aldebaran updates - More LTTPR display work - Vangogh updates - SDMA 5.x GCR fixes - RAS fixes - PCIe ASPM support - Modifier fixes - Enable TMZ on Renoir - Buffer object code cleanup - Display overlay fixes - Initial support for multiple eDP panels - Initial SR-IOV support for Aldebaran - DP link training refactor - Misc code cleanups and bug fixes - SMU regression fixes for variable sized arrays - MAINTAINERS fixes for amdgpu amdkfd: - Initial SR-IOV support for Aldebaran - Topology fixes - Initial HMM SVM support - Misc code cleanups and bug fixes radeon: - Misc code cleanups and bug fixes - SMU regression fixes for variable sized arrays - Flickering fix for Oland with multiple 4K displays UAPI: - amdgpu: Drop AMDGPU_GEM_CREATE_SHADOW flag. This was always a kernel internal flag and userspace use of it has always been blocked. It's no longer needed so remove it. - amdkgd: HMM SVM support Overview: https://patchwork.freedesktop.org/series/85562/ Porposed userspace: https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/tree/fxkamd/hmm-wip Signed-off-by: Dave Airlie <airlied@redhat.com> From: Alex Deucher <alexander.deucher@amd.com> Link: https://patchwork.freedesktop.org/patch/msgid/20210520031258.231896-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c61
1 files changed, 56 insertions, 5 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index c39ed9eb0987..a129ecc73869 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -333,6 +333,17 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
}
/**
+ * amdgpu_gmc_fault_key - get hask key from vm fault address and pasid
+ *
+ * @addr: 48 bit physical address, page aligned (36 significant bits)
+ * @pasid: 16 bit process address space identifier
+ */
+static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid)
+{
+ return addr << 4 | pasid;
+}
+
+/**
* amdgpu_gmc_filter_faults - filter VM faults
*
* @adev: amdgpu device structure
@@ -348,8 +359,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
uint16_t pasid, uint64_t timestamp)
{
struct amdgpu_gmc *gmc = &adev->gmc;
-
- uint64_t stamp, key = addr << 4 | pasid;
+ uint64_t stamp, key = amdgpu_gmc_fault_key(addr, pasid);
struct amdgpu_gmc_fault *fault;
uint32_t hash;
@@ -365,7 +375,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
while (fault->timestamp >= stamp) {
uint64_t tmp;
- if (fault->key == key)
+ if (atomic64_read(&fault->key) == key)
return true;
tmp = fault->timestamp;
@@ -378,7 +388,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
/* Add the fault to the ring */
fault = &gmc->fault_ring[gmc->last_fault];
- fault->key = key;
+ atomic64_set(&fault->key, key);
fault->timestamp = timestamp;
/* And update the hash */
@@ -387,6 +397,36 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
return false;
}
+/**
+ * amdgpu_gmc_filter_faults_remove - remove address from VM faults filter
+ *
+ * @adev: amdgpu device structure
+ * @addr: address of the VM fault
+ * @pasid: PASID of the process causing the fault
+ *
+ * Remove the address from fault filter, then future vm fault on this address
+ * will pass to retry fault handler to recover.
+ */
+void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
+ uint16_t pasid)
+{
+ struct amdgpu_gmc *gmc = &adev->gmc;
+ uint64_t key = amdgpu_gmc_fault_key(addr, pasid);
+ struct amdgpu_gmc_fault *fault;
+ uint32_t hash;
+ uint64_t tmp;
+
+ hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
+ fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
+ do {
+ if (atomic64_cmpxchg(&fault->key, key, 0) == key)
+ break;
+
+ tmp = fault->timestamp;
+ fault = &gmc->fault_ring[fault->next];
+ } while (fault->timestamp < tmp);
+}
+
int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
{
int r;
@@ -415,6 +455,13 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
return r;
}
+ if (adev->hdp.ras_funcs &&
+ adev->hdp.ras_funcs->ras_late_init) {
+ r = adev->hdp.ras_funcs->ras_late_init(adev);
+ if (r)
+ return r;
+ }
+
return 0;
}
@@ -426,11 +473,15 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
if (adev->mmhub.ras_funcs &&
adev->mmhub.ras_funcs->ras_fini)
- amdgpu_mmhub_ras_fini(adev);
+ adev->mmhub.ras_funcs->ras_fini(adev);
if (adev->gmc.xgmi.ras_funcs &&
adev->gmc.xgmi.ras_funcs->ras_fini)
adev->gmc.xgmi.ras_funcs->ras_fini(adev);
+
+ if (adev->hdp.ras_funcs &&
+ adev->hdp.ras_funcs->ras_fini)
+ adev->hdp.ras_funcs->ras_fini(adev);
}
/*