summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/Makefile2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c98
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c21
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c61
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c169
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c69
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c86
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c72
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c73
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c199
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c99
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c124
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c232
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_virtual.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/df_v3_6.c17
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c644
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c31
-rw-r--r--drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c25
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/nv.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c40
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c18
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc15.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ta_ras_if.h7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vega10_ih.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c2
58 files changed, 1707 insertions, 681 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index d216b7ecb5d1..cc36570b0d2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -56,7 +56,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
- amdgpu_fw_attestation.o amdgpu_securedisplay.o
+ amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_hdp.o
amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 6cabecc74007..ad5f508924b8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1075,7 +1075,8 @@ struct amdgpu_device {
atomic_t throttling_logging_enabled;
struct ratelimit_state throttling_logging_rs;
- uint32_t ras_features;
+ uint32_t ras_hw_enabled;
+ uint32_t ras_enabled;
bool in_pci_err_recovery;
struct pci_saved_state *pci_state;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 2e9b16fb3fcd..bf2939b6eb43 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -76,7 +76,7 @@ struct amdgpu_atif {
/**
* amdgpu_atif_call - call an ATIF method
*
- * @handle: acpi handle
+ * @atif: acpi handle
* @function: the ATIF function to execute
* @params: ATIF function params
*
@@ -166,7 +166,6 @@ static void amdgpu_atif_parse_functions(struct amdgpu_atif_functions *f, u32 mas
/**
* amdgpu_atif_verify_interface - verify ATIF
*
- * @handle: acpi handle
* @atif: amdgpu atif struct
*
* Execute the ATIF_FUNCTION_VERIFY_INTERFACE ATIF function
@@ -240,8 +239,7 @@ out:
/**
* amdgpu_atif_get_notification_params - determine notify configuration
*
- * @handle: acpi handle
- * @n: atif notification configuration struct
+ * @atif: acpi handle
*
* Execute the ATIF_FUNCTION_GET_SYSTEM_PARAMETERS ATIF function
* to determine if a notifier is used and if so which one
@@ -304,7 +302,7 @@ out:
/**
* amdgpu_atif_query_backlight_caps - get min and max backlight input signal
*
- * @handle: acpi handle
+ * @atif: acpi handle
*
* Execute the QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS ATIF function
* to determine the acceptable range of backlight values
@@ -363,7 +361,7 @@ out:
/**
* amdgpu_atif_get_sbios_requests - get requested sbios event
*
- * @handle: acpi handle
+ * @atif: acpi handle
* @req: atif sbios request struct
*
* Execute the ATIF_FUNCTION_GET_SYSTEM_BIOS_REQUESTS ATIF function
@@ -899,6 +897,8 @@ void amdgpu_acpi_fini(struct amdgpu_device *adev)
/**
* amdgpu_acpi_is_s0ix_supported
*
+ * @adev: amdgpu_device_pointer
+ *
* returns true if supported, false if not.
*/
bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 5ffb07b02810..313ee49b9f17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -75,6 +75,7 @@ struct amdgpu_amdkfd_fence {
struct mm_struct *mm;
spinlock_t lock;
char timeline_name[TASK_COMM_LEN];
+ struct svm_range_bo *svm_bo;
};
struct amdgpu_kfd_dev {
@@ -148,7 +149,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
int queue_bit);
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
- struct mm_struct *mm);
+ struct mm_struct *mm,
+ struct svm_range_bo *svm_bo);
#if IS_ENABLED(CONFIG_HSA_AMD)
bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
@@ -234,22 +236,27 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s
})
/* GPUVM API */
+#define drm_priv_to_vm(drm_priv) \
+ (&((struct amdgpu_fpriv *) \
+ ((struct drm_file *)(drm_priv))->driver_priv)->vm)
+
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
struct file *filp, u32 pasid,
- void **vm, void **process_info,
+ void **process_info,
struct dma_fence **ef);
-void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm);
-uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm);
+void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *drm_priv);
+uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct kgd_dev *kgd, uint64_t va, uint64_t size,
- void *vm, struct kgd_mem **mem,
+ void *drm_priv, struct kgd_mem **mem,
uint64_t *offset, uint32_t flags);
int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
- struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size);
+ struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv,
+ uint64_t *size);
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
- struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
+ struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
- struct kgd_dev *kgd, struct kgd_mem *mem, void *vm);
+ struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv);
int amdgpu_amdkfd_gpuvm_sync_memory(
struct kgd_dev *kgd, struct kgd_mem *mem, bool intr);
int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
@@ -260,7 +267,7 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
struct kfd_vm_fault_info *info);
int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
struct dma_buf *dmabuf,
- uint64_t va, void *vm,
+ uint64_t va, void *drm_priv,
struct kgd_mem **mem, uint64_t *size,
uint64_t *mmap_offset);
int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
@@ -270,6 +277,7 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
+void amdgpu_amdkfd_reserve_system_mem(uint64_t size);
#else
static inline
void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
index 9ef9f3ddad48..6409d6b1b2df 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
@@ -25,6 +25,7 @@
#include <linux/firmware.h>
#include "amdgpu.h"
#include "amdgpu_amdkfd.h"
+#include "amdgpu_amdkfd_arcturus.h"
#include "sdma0/sdma0_4_2_2_offset.h"
#include "sdma0/sdma0_4_2_2_sh_mask.h"
#include "sdma1/sdma1_4_2_2_offset.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
index 5af464933976..1d0dbff87d3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/sched/mm.h>
#include "amdgpu_amdkfd.h"
+#include "kfd_svm.h"
static const struct dma_fence_ops amdkfd_fence_ops;
static atomic_t fence_seq = ATOMIC_INIT(0);
@@ -60,7 +61,8 @@ static atomic_t fence_seq = ATOMIC_INIT(0);
*/
struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
- struct mm_struct *mm)
+ struct mm_struct *mm,
+ struct svm_range_bo *svm_bo)
{
struct amdgpu_amdkfd_fence *fence;
@@ -73,7 +75,7 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
fence->mm = mm;
get_task_comm(fence->timeline_name, current);
spin_lock_init(&fence->lock);
-
+ fence->svm_bo = svm_bo;
dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock,
context, atomic_inc_return(&fence_seq));
@@ -111,6 +113,8 @@ static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f)
* a KFD BO and schedules a job to move the BO.
* If fence is already signaled return true.
* If fence is not signaled schedule a evict KFD process work item.
+ *
+ * @f: dma_fence
*/
static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
{
@@ -122,16 +126,20 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
if (dma_fence_is_signaled(f))
return true;
- if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
- return true;
-
+ if (!fence->svm_bo) {
+ if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
+ return true;
+ } else {
+ if (!svm_range_schedule_evict_svm_bo(fence))
+ return true;
+ }
return false;
}
/**
* amdkfd_fence_release - callback that fence can be freed
*
- * @fence: fence
+ * @f: dma_fence
*
* This function is called when the reference count becomes zero.
* Drops the mm_struct reference and RCU schedules freeing up the fence.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index b43e68fc1378..ed3014fbb563 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -719,7 +719,7 @@ static void unlock_spi_csq_mutexes(struct amdgpu_device *adev)
}
/**
- * @get_wave_count: Read device registers to get number of waves in flight for
+ * get_wave_count: Read device registers to get number of waves in flight for
* a particular queue. The method also returns the VMID associated with the
* queue.
*
@@ -755,19 +755,19 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx,
}
/**
- * @kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each
+ * kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each
* shader engine and aggregates the number of waves that are in flight for the
* process whose pasid is provided as a parameter. The process could have ZERO
* or more queues running and submitting waves to compute units.
*
* @kgd: Handle of device from which to get number of waves in flight
* @pasid: Identifies the process for which this query call is invoked
- * @wave_cnt: Output parameter updated with number of waves in flight that
+ * @pasid_wave_cnt: Output parameter updated with number of waves in flight that
* belong to process with given pasid
* @max_waves_per_cu: Output parameter updated with maximum number of waves
* possible per Compute Unit
*
- * @note: It's possible that the device has too many queues (oversubscription)
+ * Note: It's possible that the device has too many queues (oversubscription)
* in which case a VMID could be remapped to a different PASID. This could lead
* to an iaccurate wave count. Following is a high-level sequence:
* Time T1: vmid = getVmid(); vmid is associated with Pasid P1
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 7d4118c8128a..dfa025d694f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -33,9 +33,6 @@
#include <uapi/linux/kfd_ioctl.h>
#include "amdgpu_xgmi.h"
-/* BO flag to indicate a KFD userptr BO */
-#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63)
-
/* Userptr restore delay, just long enough to allow consecutive VM
* changes to accumulate
*/
@@ -108,6 +105,11 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
(kfd_mem_limit.max_ttm_mem_limit >> 20));
}
+void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
+{
+ kfd_mem_limit.system_mem_used += size;
+}
+
/* Estimate page table size needed to represent a given memory size
*
* With 4KB pages, we need one 8 byte PTE for each 4KB of memory
@@ -217,7 +219,7 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
u32 domain = bo->preferred_domains;
bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
- if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
+ if (bo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO) {
domain = AMDGPU_GEM_DOMAIN_CPU;
sg = false;
}
@@ -967,7 +969,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
info->eviction_fence =
amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1),
- current->mm);
+ current->mm,
+ NULL);
if (!info->eviction_fence) {
pr_err("Failed to create eviction fence\n");
ret = -ENOMEM;
@@ -1036,15 +1039,19 @@ create_evict_fence_fail:
int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
struct file *filp, u32 pasid,
- void **vm, void **process_info,
+ void **process_info,
struct dma_fence **ef)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
- struct drm_file *drm_priv = filp->private_data;
- struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv;
- struct amdgpu_vm *avm = &drv_priv->vm;
+ struct amdgpu_fpriv *drv_priv;
+ struct amdgpu_vm *avm;
int ret;
+ ret = amdgpu_file_to_fpriv(filp, &drv_priv);
+ if (ret)
+ return ret;
+ avm = &drv_priv->vm;
+
/* Already a compute VM? */
if (avm->process_info)
return -EINVAL;
@@ -1059,7 +1066,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd,
if (ret)
return ret;
- *vm = (void *)avm;
+ amdgpu_vm_set_task_info(avm);
return 0;
}
@@ -1100,15 +1107,17 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
}
}
-void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm)
+void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *drm_priv)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
- struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+ struct amdgpu_vm *avm;
- if (WARN_ON(!kgd || !vm))
+ if (WARN_ON(!kgd || !drm_priv))
return;
- pr_debug("Releasing process vm %p\n", vm);
+ avm = drm_priv_to_vm(drm_priv);
+
+ pr_debug("Releasing process vm %p\n", avm);
/* The original pasid of amdgpu vm has already been
* released during making a amdgpu vm to a compute vm
@@ -1119,9 +1128,9 @@ void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm)
amdgpu_vm_release_compute(adev, avm);
}
-uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
+uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)
{
- struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+ struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
struct amdgpu_bo *pd = avm->root.base.bo;
struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev);
@@ -1132,11 +1141,11 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm)
int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
struct kgd_dev *kgd, uint64_t va, uint64_t size,
- void *vm, struct kgd_mem **mem,
+ void *drm_priv, struct kgd_mem **mem,
uint64_t *offset, uint32_t flags)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
- struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+ struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
enum ttm_bo_type bo_type = ttm_bo_type_device;
struct sg_table *sg = NULL;
uint64_t user_addr = 0;
@@ -1216,6 +1225,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
domain_string(alloc_domain), ret);
goto err_bo_create;
}
+ ret = drm_vma_node_allow(&gobj->vma_node, drm_priv);
+ if (ret) {
+ pr_debug("Failed to allow vma node access. ret %d\n", ret);
+ goto err_node_allow;
+ }
bo = gem_to_amdgpu_bo(gobj);
if (bo_type == ttm_bo_type_sg) {
bo->tbo.sg = sg;
@@ -1224,7 +1238,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
bo->kfd_bo = *mem;
(*mem)->bo = bo;
if (user_addr)
- bo->flags |= AMDGPU_AMDKFD_USERPTR_BO;
+ bo->flags |= AMDGPU_AMDKFD_CREATE_USERPTR_BO;
(*mem)->va = va;
(*mem)->domain = domain;
@@ -1245,6 +1259,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
allocate_init_user_pages_failed:
remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
+ drm_vma_node_revoke(&gobj->vma_node, drm_priv);
+err_node_allow:
amdgpu_bo_unref(&bo);
/* Don't unreserve system mem limit twice */
goto err_reserve_limit;
@@ -1262,7 +1278,8 @@ err:
}
int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
- struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size)
+ struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv,
+ uint64_t *size)
{
struct amdkfd_process_info *process_info = mem->process_info;
unsigned long bo_size = mem->bo->tbo.base.size;
@@ -1339,6 +1356,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
}
/* Free the BO*/
+ drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv);
drm_gem_object_put(&mem->bo->tbo.base);
mutex_destroy(&mem->lock);
kfree(mem);
@@ -1347,10 +1365,10 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
}
int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
- struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
+ struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
- struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+ struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
int ret;
struct amdgpu_bo *bo;
uint32_t domain;
@@ -1391,9 +1409,9 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n",
mem->va,
mem->va + bo_size * (1 + mem->aql_queue),
- vm, domain_string(domain));
+ avm, domain_string(domain));
- ret = reserve_bo_and_vm(mem, vm, &ctx);
+ ret = reserve_bo_and_vm(mem, avm, &ctx);
if (unlikely(ret))
goto out;
@@ -1437,7 +1455,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
}
list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
- if (entry->bo_va->base.vm == vm && !entry->is_mapped) {
+ if (entry->bo_va->base.vm == avm && !entry->is_mapped) {
pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n",
entry->va, entry->va + bo_size,
entry);
@@ -1449,7 +1467,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
goto map_bo_to_gpuvm_failed;
}
- ret = vm_update_pds(vm, ctx.sync);
+ ret = vm_update_pds(avm, ctx.sync);
if (ret) {
pr_err("Failed to update page directories\n");
goto map_bo_to_gpuvm_failed;
@@ -1485,11 +1503,11 @@ out:
}
int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
- struct kgd_dev *kgd, struct kgd_mem *mem, void *vm)
+ struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv)
{
struct amdgpu_device *adev = get_amdgpu_device(kgd);
- struct amdkfd_process_info *process_info =
- ((struct amdgpu_vm *)vm)->process_info;
+ struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
+ struct amdkfd_process_info *process_info = avm->process_info;
unsigned long bo_size = mem->bo->tbo.base.size;
struct kfd_bo_va_list *entry;
struct bo_vm_reservation_context ctx;
@@ -1497,7 +1515,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
mutex_lock(&mem->lock);
- ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx);
+ ret = reserve_bo_and_cond_vms(mem, avm, BO_VM_MAPPED, &ctx);
if (unlikely(ret))
goto out;
/* If no VMs were reserved, it means the BO wasn't actually mapped */
@@ -1506,17 +1524,17 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
goto unreserve_out;
}
- ret = vm_validate_pt_pd_bos((struct amdgpu_vm *)vm);
+ ret = vm_validate_pt_pd_bos(avm);
if (unlikely(ret))
goto unreserve_out;
pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n",
mem->va,
mem->va + bo_size * (1 + mem->aql_queue),
- vm);
+ avm);
list_for_each_entry(entry, &mem->bo_va_list, bo_list) {
- if (entry->bo_va->base.vm == vm && entry->is_mapped) {
+ if (entry->bo_va->base.vm == avm && entry->is_mapped) {
pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n",
entry->va,
entry->va + bo_size,
@@ -1642,14 +1660,15 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
struct dma_buf *dma_buf,
- uint64_t va, void *vm,
+ uint64_t va, void *drm_priv,
struct kgd_mem **mem, uint64_t *size,
uint64_t *mmap_offset)
{
struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+ struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
struct drm_gem_object *obj;
struct amdgpu_bo *bo;
- struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+ int ret;
if (dma_buf->ops != &amdgpu_dmabuf_ops)
/* Can't handle non-graphics buffers */
@@ -1670,6 +1689,12 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
if (!*mem)
return -ENOMEM;
+ ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
+ if (ret) {
+ kfree(mem);
+ return ret;
+ }
+
if (size)
*size = amdgpu_bo_size(bo);
@@ -2135,7 +2160,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
*/
new_fence = amdgpu_amdkfd_fence_create(
process_info->eviction_fence->base.context,
- process_info->eviction_fence->mm);
+ process_info->eviction_fence->mm,
+ NULL);
if (!new_fence) {
pr_err("Failed to create eviction fence\n");
ret = -ENOMEM;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index b5c766998045..90136f9dedd6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -672,7 +672,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
}
/**
- * cs_parser_fini() - clean parser states
+ * amdgpu_cs_parser_fini() - clean parser states
* @parser: parser structure holding parsing context.
* @error: error number
* @backoff: indicator to backoff the reservation
@@ -1488,7 +1488,7 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
}
/**
- * amdgpu_cs_wait_all_fence - wait on all fences to signal
+ * amdgpu_cs_wait_all_fences - wait on all fences to signal
*
* @adev: amdgpu device
* @filp: file private
@@ -1639,7 +1639,7 @@ err_free_fences:
}
/**
- * amdgpu_cs_find_bo_va - find bo_va for VM address
+ * amdgpu_cs_find_mapping - find bo_va for VM address
*
* @parser: command submission parser context
* @addr: VM address
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 8b2a37bf2adf..2360a9c518eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2856,7 +2856,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
AMD_IP_BLOCK_TYPE_IH,
};
- for (i = 0; i < ARRAY_SIZE(ip_order); i++) {
+ for (i = 0; i < adev->num_ip_blocks; i++) {
int j;
struct amdgpu_ip_block *block;
@@ -3179,8 +3179,8 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
int ret = 0;
/*
- * By default timeout for non compute jobs is 10000.
- * And there is no timeout enforced on compute jobs.
+ * By default timeout for non compute jobs is 10000
+ * and 60000 for compute jobs.
* In SR-IOV or passthrough mode, timeout for compute
* jobs are 60000 by default.
*/
@@ -3189,10 +3189,8 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
if (amdgpu_sriov_vf(adev))
adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ?
msecs_to_jiffies(60000) : msecs_to_jiffies(10000);
- else if (amdgpu_passthrough(adev))
- adev->compute_timeout = msecs_to_jiffies(60000);
else
- adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
+ adev->compute_timeout = msecs_to_jiffies(60000);
if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) {
while ((timeout_setting = strsep(&input, ",")) &&
@@ -3741,7 +3739,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
{
struct amdgpu_device *adev = drm_to_adev(dev);
- int r;
if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
return 0;
@@ -3756,7 +3753,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
amdgpu_ras_suspend(adev);
- r = amdgpu_device_ip_suspend_phase1(adev);
+ amdgpu_device_ip_suspend_phase1(adev);
if (!adev->in_s0ix)
amdgpu_amdkfd_suspend(adev, adev->in_runpm);
@@ -3766,7 +3763,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
amdgpu_fence_driver_suspend(adev);
- r = amdgpu_device_ip_suspend_phase2(adev);
+ amdgpu_device_ip_suspend_phase2(adev);
/* evict remaining vram memory
* This second call to evict vram is to evict the gart page table
* using the CPU.
@@ -5124,7 +5121,8 @@ int amdgpu_device_baco_enter(struct drm_device *dev)
if (!amdgpu_device_supports_baco(adev_to_drm(adev)))
return -ENOTSUPP;
- if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)
+ if (ras && adev->ras_enabled &&
+ adev->nbio.funcs->enable_doorbell_interrupt)
adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
return amdgpu_dpm_baco_enter(adev);
@@ -5143,7 +5141,8 @@ int amdgpu_device_baco_exit(struct drm_device *dev)
if (ret)
return ret;
- if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt)
+ if (ras && adev->ras_enabled &&
+ adev->nbio.funcs->enable_doorbell_interrupt)
adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 6fd20ea2935b..1ed9748b9bc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -288,9 +288,9 @@ module_param_named(msi, amdgpu_msi, int, 0444);
* for SDMA and Video.
*
* By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video)
- * jobs is 10000. And there is no timeout enforced on compute jobs.
+ * jobs is 10000. The timeout for compute is 60000.
*/
-MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and infinity timeout for compute jobs; "
+MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and 60000 for compute jobs; "
"for passthrough or sriov, 10000 for all jobs."
" 0: keep default value. negative: infinity timeout), "
"format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; "
@@ -641,7 +641,8 @@ module_param_named(mes, amdgpu_mes, int, 0444);
/**
* DOC: noretry (int)
- * Disable retry faults in the GPU memory controller.
+ * Disable XNACK retry in the SQ by default on GFXv9 hardware. On ASICs that
+ * do not support per-process XNACK this also disables retry page faults.
* (0 = retry enabled, 1 = retry disabled, -1 auto (default))
*/
MODULE_PARM_DESC(noretry,
@@ -1186,6 +1187,7 @@ static const struct pci_device_id pciidlist[] = {
{0x1002, 0x7408, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
{0x1002, 0x740C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
{0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
+ {0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT},
{0, 0, 0}
};
@@ -1598,17 +1600,15 @@ static int amdgpu_pmops_runtime_idle(struct device *dev)
if (amdgpu_device_has_dc_support(adev)) {
struct drm_crtc *crtc;
- drm_modeset_lock_all(drm_dev);
-
drm_for_each_crtc(crtc, drm_dev) {
- if (crtc->state->active) {
+ drm_modeset_lock(&crtc->mutex, NULL);
+ if (crtc->state->active)
ret = -EBUSY;
+ drm_modeset_unlock(&crtc->mutex);
+ if (ret < 0)
break;
- }
}
- drm_modeset_unlock_all(drm_dev);
-
} else {
struct drm_connector *list_connector;
struct drm_connector_list_iter iter;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 47ea46859618..30772608eac6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -434,6 +434,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
*
* @ring: ring to init the fence driver on
* @num_hw_submission: number of entries on the hardware queue
+ * @sched_score: optional score atomic shared with other schedulers
*
* Init the fence driver for the requested ring (all asics).
* Helper function for amdgpu_fence_driver_init().
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
index c5a9a4fb10d2..5562b5c90c03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@@ -60,7 +60,7 @@
*/
/**
- * amdgpu_dummy_page_init - init dummy page used by the driver
+ * amdgpu_gart_dummy_page_init - init dummy page used by the driver
*
* @adev: amdgpu_device pointer
*
@@ -86,7 +86,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev)
}
/**
- * amdgpu_dummy_page_fini - free dummy page used by the driver
+ * amdgpu_gart_dummy_page_fini - free dummy page used by the driver
*
* @adev: amdgpu_device pointer
*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index c39ed9eb0987..a129ecc73869 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -333,6 +333,17 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc)
}
/**
+ * amdgpu_gmc_fault_key - get hask key from vm fault address and pasid
+ *
+ * @addr: 48 bit physical address, page aligned (36 significant bits)
+ * @pasid: 16 bit process address space identifier
+ */
+static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid)
+{
+ return addr << 4 | pasid;
+}
+
+/**
* amdgpu_gmc_filter_faults - filter VM faults
*
* @adev: amdgpu device structure
@@ -348,8 +359,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
uint16_t pasid, uint64_t timestamp)
{
struct amdgpu_gmc *gmc = &adev->gmc;
-
- uint64_t stamp, key = addr << 4 | pasid;
+ uint64_t stamp, key = amdgpu_gmc_fault_key(addr, pasid);
struct amdgpu_gmc_fault *fault;
uint32_t hash;
@@ -365,7 +375,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
while (fault->timestamp >= stamp) {
uint64_t tmp;
- if (fault->key == key)
+ if (atomic64_read(&fault->key) == key)
return true;
tmp = fault->timestamp;
@@ -378,7 +388,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
/* Add the fault to the ring */
fault = &gmc->fault_ring[gmc->last_fault];
- fault->key = key;
+ atomic64_set(&fault->key, key);
fault->timestamp = timestamp;
/* And update the hash */
@@ -387,6 +397,36 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
return false;
}
+/**
+ * amdgpu_gmc_filter_faults_remove - remove address from VM faults filter
+ *
+ * @adev: amdgpu device structure
+ * @addr: address of the VM fault
+ * @pasid: PASID of the process causing the fault
+ *
+ * Remove the address from fault filter, then future vm fault on this address
+ * will pass to retry fault handler to recover.
+ */
+void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
+ uint16_t pasid)
+{
+ struct amdgpu_gmc *gmc = &adev->gmc;
+ uint64_t key = amdgpu_gmc_fault_key(addr, pasid);
+ struct amdgpu_gmc_fault *fault;
+ uint32_t hash;
+ uint64_t tmp;
+
+ hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER);
+ fault = &gmc->fault_ring[gmc->fault_hash[hash].idx];
+ do {
+ if (atomic64_cmpxchg(&fault->key, key, 0) == key)
+ break;
+
+ tmp = fault->timestamp;
+ fault = &gmc->fault_ring[fault->next];
+ } while (fault->timestamp < tmp);
+}
+
int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
{
int r;
@@ -415,6 +455,13 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev)
return r;
}
+ if (adev->hdp.ras_funcs &&
+ adev->hdp.ras_funcs->ras_late_init) {
+ r = adev->hdp.ras_funcs->ras_late_init(adev);
+ if (r)
+ return r;
+ }
+
return 0;
}
@@ -426,11 +473,15 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev)
if (adev->mmhub.ras_funcs &&
adev->mmhub.ras_funcs->ras_fini)
- amdgpu_mmhub_ras_fini(adev);
+ adev->mmhub.ras_funcs->ras_fini(adev);
if (adev->gmc.xgmi.ras_funcs &&
adev->gmc.xgmi.ras_funcs->ras_fini)
adev->gmc.xgmi.ras_funcs->ras_fini(adev);
+
+ if (adev->hdp.ras_funcs &&
+ adev->hdp.ras_funcs->ras_fini)
+ adev->hdp.ras_funcs->ras_fini(adev);
}
/*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 9d11c02a3938..6aa1d52d3aee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -66,9 +66,9 @@ struct firmware;
* GMC page fault information
*/
struct amdgpu_gmc_fault {
- uint64_t timestamp;
+ uint64_t timestamp:48;
uint64_t next:AMDGPU_GMC_FAULT_RING_ORDER;
- uint64_t key:52;
+ atomic64_t key;
};
/*
@@ -318,6 +318,8 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev,
struct amdgpu_gmc *mc);
bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr,
uint16_t pasid, uint64_t timestamp);
+void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr,
+ uint16_t pasid);
int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev);
void amdgpu_gmc_ras_fini(struct amdgpu_device *adev);
int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index 72962de4c04c..c026972ca9a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -24,7 +24,8 @@
#include "amdgpu.h"
-static inline struct amdgpu_gtt_mgr *to_gtt_mgr(struct ttm_resource_manager *man)
+static inline struct amdgpu_gtt_mgr *
+to_gtt_mgr(struct ttm_resource_manager *man)
{
return container_of(man, struct amdgpu_gtt_mgr, manager);
}
@@ -43,12 +44,14 @@ struct amdgpu_gtt_node {
* the GTT block, in bytes
*/
static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr,
+ char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
+ struct ttm_resource_manager *man;
+ man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
return sysfs_emit(buf, "%llu\n", man->size * PAGE_SIZE);
}
@@ -61,12 +64,14 @@ static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev,
* size of the GTT block, in bytes
*/
static ssize_t amdgpu_mem_info_gtt_used_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr,
+ char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
+ struct ttm_resource_manager *man;
+ man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
return sysfs_emit(buf, "%llu\n", amdgpu_gtt_mgr_usage(man));
}
@@ -75,80 +80,6 @@ static DEVICE_ATTR(mem_info_gtt_total, S_IRUGO,
static DEVICE_ATTR(mem_info_gtt_used, S_IRUGO,
amdgpu_mem_info_gtt_used_show, NULL);
-static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func;
-/**
- * amdgpu_gtt_mgr_init - init GTT manager and DRM MM
- *
- * @adev: amdgpu_device pointer
- * @gtt_size: maximum size of GTT
- *
- * Allocate and initialize the GTT manager.
- */
-int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size)
-{
- struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
- struct ttm_resource_manager *man = &mgr->manager;
- uint64_t start, size;
- int ret;
-
- man->use_tt = true;
- man->func = &amdgpu_gtt_mgr_func;
-
- ttm_resource_manager_init(man, gtt_size >> PAGE_SHIFT);
-
- start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
- size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
- drm_mm_init(&mgr->mm, start, size);
- spin_lock_init(&mgr->lock);
- atomic64_set(&mgr->available, gtt_size >> PAGE_SHIFT);
-
- ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_total);
- if (ret) {
- DRM_ERROR("Failed to create device file mem_info_gtt_total\n");
- return ret;
- }
- ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_used);
- if (ret) {
- DRM_ERROR("Failed to create device file mem_info_gtt_used\n");
- return ret;
- }
-
- ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager);
- ttm_resource_manager_set_used(man, true);
- return 0;
-}
-
-/**
- * amdgpu_gtt_mgr_fini - free and destroy GTT manager
- *
- * @adev: amdgpu_device pointer
- *
- * Destroy and free the GTT manager, returns -EBUSY if ranges are still
- * allocated inside it.
- */
-void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev)
-{
- struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
- struct ttm_resource_manager *man = &mgr->manager;
- int ret;
-
- ttm_resource_manager_set_used(man, false);
-
- ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
- if (ret)
- return;
-
- spin_lock(&mgr->lock);
- drm_mm_takedown(&mgr->mm);
- spin_unlock(&mgr->lock);
-
- device_remove_file(adev->dev, &dev_attr_mem_info_gtt_total);
- device_remove_file(adev->dev, &dev_attr_mem_info_gtt_used);
-
- ttm_resource_manager_cleanup(man);
- ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL);
-}
-
/**
* amdgpu_gtt_mgr_has_gart_addr - Check if mem has address space
*
@@ -265,6 +196,13 @@ uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man)
return (result > 0 ? result : 0) * PAGE_SIZE;
}
+/**
+ * amdgpu_gtt_mgr_recover - re-init gart
+ *
+ * @man: TTM memory type manager
+ *
+ * Re-init the gart for each known BO in the GTT.
+ */
int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man)
{
struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man);
@@ -311,3 +249,76 @@ static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func = {
.free = amdgpu_gtt_mgr_del,
.debug = amdgpu_gtt_mgr_debug
};
+
+/**
+ * amdgpu_gtt_mgr_init - init GTT manager and DRM MM
+ *
+ * @adev: amdgpu_device pointer
+ * @gtt_size: maximum size of GTT
+ *
+ * Allocate and initialize the GTT manager.
+ */
+int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size)
+{
+ struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
+ struct ttm_resource_manager *man = &mgr->manager;
+ uint64_t start, size;
+ int ret;
+
+ man->use_tt = true;
+ man->func = &amdgpu_gtt_mgr_func;
+
+ ttm_resource_manager_init(man, gtt_size >> PAGE_SHIFT);
+
+ start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS;
+ size = (adev->gmc.gart_size >> PAGE_SHIFT) - start;
+ drm_mm_init(&mgr->mm, start, size);
+ spin_lock_init(&mgr->lock);
+ atomic64_set(&mgr->available, gtt_size >> PAGE_SHIFT);
+
+ ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_total);
+ if (ret) {
+ DRM_ERROR("Failed to create device file mem_info_gtt_total\n");
+ return ret;
+ }
+ ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_used);
+ if (ret) {
+ DRM_ERROR("Failed to create device file mem_info_gtt_used\n");
+ return ret;
+ }
+
+ ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager);
+ ttm_resource_manager_set_used(man, true);
+ return 0;
+}
+
+/**
+ * amdgpu_gtt_mgr_fini - free and destroy GTT manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Destroy and free the GTT manager, returns -EBUSY if ranges are still
+ * allocated inside it.
+ */
+void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr;
+ struct ttm_resource_manager *man = &mgr->manager;
+ int ret;
+
+ ttm_resource_manager_set_used(man, false);
+
+ ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
+ if (ret)
+ return;
+
+ spin_lock(&mgr->lock);
+ drm_mm_takedown(&mgr->mm);
+ spin_unlock(&mgr->lock);
+
+ device_remove_file(adev->dev, &dev_attr_mem_info_gtt_total);
+ device_remove_file(adev->dev, &dev_attr_mem_info_gtt_used);
+
+ ttm_resource_manager_cleanup(man);
+ ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
new file mode 100644
index 000000000000..1d50d534d77c
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_ras.h"
+
+int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev)
+{
+ int r;
+ struct ras_ih_if ih_info = {
+ .cb = NULL,
+ };
+ struct ras_fs_if fs_info = {
+ .sysfs_name = "hdp_err_count",
+ };
+
+ if (!adev->hdp.ras_if) {
+ adev->hdp.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+ if (!adev->hdp.ras_if)
+ return -ENOMEM;
+ adev->hdp.ras_if->block = AMDGPU_RAS_BLOCK__HDP;
+ adev->hdp.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
+ adev->hdp.ras_if->sub_block_index = 0;
+ strcpy(adev->hdp.ras_if->name, "hdp");
+ }
+ ih_info.head = fs_info.head = *adev->hdp.ras_if;
+ r = amdgpu_ras_late_init(adev, adev->hdp.ras_if,
+ &fs_info, &ih_info);
+ if (r || !amdgpu_ras_is_supported(adev, adev->hdp.ras_if->block)) {
+ kfree(adev->hdp.ras_if);
+ adev->hdp.ras_if = NULL;
+ }
+
+ return r;
+}
+
+void amdgpu_hdp_ras_fini(struct amdgpu_device *adev)
+{
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP) &&
+ adev->hdp.ras_if) {
+ struct ras_common_if *ras_if = adev->hdp.ras_if;
+ struct ras_ih_if ih_info = {
+ .cb = NULL,
+ };
+
+ amdgpu_ras_late_fini(adev, ras_if, &ih_info);
+ kfree(ras_if);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
index 43caf9f8cc11..7ec99d591584 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h
@@ -23,18 +23,29 @@
#ifndef __AMDGPU_HDP_H__
#define __AMDGPU_HDP_H__
+struct amdgpu_hdp_ras_funcs {
+ int (*ras_late_init)(struct amdgpu_device *adev);
+ void (*ras_fini)(struct amdgpu_device *adev);
+ void (*query_ras_error_count)(struct amdgpu_device *adev,
+ void *ras_error_status);
+ void (*reset_ras_error_count)(struct amdgpu_device *adev);
+};
+
struct amdgpu_hdp_funcs {
void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
void (*invalidate_hdp)(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
- void (*reset_ras_error_count)(struct amdgpu_device *adev);
void (*update_clock_gating)(struct amdgpu_device *adev, bool enable);
void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags);
void (*init_registers)(struct amdgpu_device *adev);
};
struct amdgpu_hdp {
+ struct ras_common_if *ras_if;
const struct amdgpu_hdp_funcs *funcs;
+ const struct amdgpu_hdp_ras_funcs *ras_funcs;
};
+int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev);
+void amdgpu_hdp_ras_fini(struct amdgpu_device *adev);
#endif /* __AMDGPU_HDP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index a2fe2dac32c1..2e6789a7dc46 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -328,7 +328,7 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev)
for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) {
if (i == AMDGPU_IB_POOL_DIRECT)
- size = PAGE_SIZE * 2;
+ size = PAGE_SIZE * 6;
else
size = AMDGPU_IB_POOL_SIZE;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index faaa6aa2faaf..a36e191cf086 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -175,7 +175,9 @@ static bool amdgpu_ih_has_checkpoint_processed(struct amdgpu_device *adev,
cur_rptr += ih->ptr_mask + 1;
*prev_rptr = cur_rptr;
- return cur_rptr >= checkpoint_wptr;
+ /* check ring is empty to workaround missing wptr overflow flag */
+ return cur_rptr >= checkpoint_wptr ||
+ (cur_rptr & ih->ptr_mask) == amdgpu_ih_get_wptr(adev, ih);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 39ee88d29cca..8d12e474745a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -986,7 +986,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (!ras)
return -EINVAL;
- ras_mask = (uint64_t)ras->supported << 32 | ras->features;
+ ras_mask = (uint64_t)adev->ras_enabled << 32 | ras->features;
return copy_to_user(out, &ras_mask,
min_t(u64, size, sizeof(ras_mask))) ?
@@ -1114,7 +1114,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
dev_warn(adev->dev, "No more PASIDs available!");
pasid = 0;
}
- r = amdgpu_vm_init(adev, &fpriv->vm, AMDGPU_VM_CONTEXT_GFX, pasid);
+
+ r = amdgpu_vm_init(adev, &fpriv->vm, pasid);
if (r)
goto error_pasid;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
index 11aa29933c1f..b27fcbccce2b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
@@ -28,6 +28,7 @@ struct amdgpu_mmhub_ras_funcs {
void *ras_error_status);
void (*query_ras_error_status)(struct amdgpu_device *adev);
void (*reset_ras_error_count)(struct amdgpu_device *adev);
+ void (*reset_ras_error_status)(struct amdgpu_device *adev);
};
struct amdgpu_mmhub_funcs {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 828b5167ff12..2741c28ff1b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -155,3 +155,89 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
mmu_interval_notifier_remove(&bo->notifier);
bo->notifier.mm = NULL;
}
+
+int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
+ struct mm_struct *mm, struct page **pages,
+ uint64_t start, uint64_t npages,
+ struct hmm_range **phmm_range, bool readonly,
+ bool mmap_locked)
+{
+ struct hmm_range *hmm_range;
+ unsigned long timeout;
+ unsigned long i;
+ unsigned long *pfns;
+ int r = 0;
+
+ hmm_range = kzalloc(sizeof(*hmm_range), GFP_KERNEL);
+ if (unlikely(!hmm_range))
+ return -ENOMEM;
+
+ pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
+ if (unlikely(!pfns)) {
+ r = -ENOMEM;
+ goto out_free_range;
+ }
+
+ hmm_range->notifier = notifier;
+ hmm_range->default_flags = HMM_PFN_REQ_FAULT;
+ if (!readonly)
+ hmm_range->default_flags |= HMM_PFN_REQ_WRITE;
+ hmm_range->hmm_pfns = pfns;
+ hmm_range->start = start;
+ hmm_range->end = start + npages * PAGE_SIZE;
+
+ /* Assuming 512MB takes maxmium 1 second to fault page address */
+ timeout = max(npages >> 17, 1ULL) * HMM_RANGE_DEFAULT_TIMEOUT;
+ timeout = jiffies + msecs_to_jiffies(timeout);
+
+retry:
+ hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
+
+ if (likely(!mmap_locked))
+ mmap_read_lock(mm);
+
+ r = hmm_range_fault(hmm_range);
+
+ if (likely(!mmap_locked))
+ mmap_read_unlock(mm);
+ if (unlikely(r)) {
+ /*
+ * FIXME: This timeout should encompass the retry from
+ * mmu_interval_read_retry() as well.
+ */
+ if (r == -EBUSY && !time_after(jiffies, timeout))
+ goto retry;
+ goto out_free_pfns;
+ }
+
+ /*
+ * Due to default_flags, all pages are HMM_PFN_VALID or
+ * hmm_range_fault() fails. FIXME: The pages cannot be touched outside
+ * the notifier_lock, and mmu_interval_read_retry() must be done first.
+ */
+ for (i = 0; pages && i < npages; i++)
+ pages[i] = hmm_pfn_to_page(pfns[i]);
+
+ *phmm_range = hmm_range;
+
+ return 0;
+
+out_free_pfns:
+ kvfree(pfns);
+out_free_range:
+ kfree(hmm_range);
+
+ return r;
+}
+
+int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range)
+{
+ int r;
+
+ r = mmu_interval_read_retry(hmm_range->notifier,
+ hmm_range->notifier_seq);
+ kvfree(hmm_range->hmm_pfns);
+ kfree(hmm_range);
+
+ return r;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
index a292238f75eb..7f7d37a457c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
@@ -30,6 +30,13 @@
#include <linux/workqueue.h>
#include <linux/interval_tree.h>
+int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
+ struct mm_struct *mm, struct page **pages,
+ uint64_t start, uint64_t npages,
+ struct hmm_range **phmm_range, bool readonly,
+ bool mmap_locked);
+int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range);
+
#if defined(CONFIG_HMM_MIRROR)
int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
void amdgpu_mn_unregister(struct amdgpu_bo *bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 0adffcace326..8714d50c5b20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -491,7 +491,18 @@ bool amdgpu_bo_support_uswc(u64 bo_flags)
#endif
}
-static int amdgpu_bo_do_create(struct amdgpu_device *adev,
+/**
+ * amdgpu_bo_create - create an &amdgpu_bo buffer object
+ * @adev: amdgpu device object
+ * @bp: parameters to be used for the buffer object
+ * @bo_ptr: pointer to the buffer object pointer
+ *
+ * Creates an &amdgpu_bo buffer object.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_create(struct amdgpu_device *adev,
struct amdgpu_bo_param *bp,
struct amdgpu_bo **bo_ptr)
{
@@ -601,9 +612,9 @@ fail_unreserve:
return r;
}
-static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
- unsigned long size,
- struct amdgpu_bo *bo)
+int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
+ unsigned long size,
+ struct amdgpu_bo *bo)
{
struct amdgpu_bo_param bp;
int r;
@@ -614,13 +625,12 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
memset(&bp, 0, sizeof(bp));
bp.size = size;
bp.domain = AMDGPU_GEM_DOMAIN_GTT;
- bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC |
- AMDGPU_GEM_CREATE_SHADOW;
+ bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC;
bp.type = ttm_bo_type_kernel;
bp.resv = bo->tbo.base.resv;
bp.bo_ptr_size = sizeof(struct amdgpu_bo);
- r = amdgpu_bo_do_create(adev, &bp, &bo->shadow);
+ r = amdgpu_bo_create(adev, &bp, &bo->shadow);
if (!r) {
bo->shadow->parent = amdgpu_bo_ref(bo);
mutex_lock(&adev->shadow_list_lock);
@@ -632,50 +642,6 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
}
/**
- * amdgpu_bo_create - create an &amdgpu_bo buffer object
- * @adev: amdgpu device object
- * @bp: parameters to be used for the buffer object
- * @bo_ptr: pointer to the buffer object pointer
- *
- * Creates an &amdgpu_bo buffer object; and if requested, also creates a
- * shadow object.
- * Shadow object is used to backup the original buffer object, and is always
- * in GTT.
- *
- * Returns:
- * 0 for success or a negative error code on failure.
- */
-int amdgpu_bo_create(struct amdgpu_device *adev,
- struct amdgpu_bo_param *bp,
- struct amdgpu_bo **bo_ptr)
-{
- u64 flags = bp->flags;
- int r;
-
- bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW;
-
- r = amdgpu_bo_do_create(adev, bp, bo_ptr);
- if (r)
- return r;
-
- if ((flags & AMDGPU_GEM_CREATE_SHADOW) && !(adev->flags & AMD_IS_APU)) {
- if (!bp->resv)
- WARN_ON(dma_resv_lock((*bo_ptr)->tbo.base.resv,
- NULL));
-
- r = amdgpu_bo_create_shadow(adev, bp->size, *bo_ptr);
-
- if (!bp->resv)
- dma_resv_unlock((*bo_ptr)->tbo.base.resv);
-
- if (r)
- amdgpu_bo_unref(bo_ptr);
- }
-
- return r;
-}
-
-/**
* amdgpu_bo_create_user - create an &amdgpu_bo_user buffer object
* @adev: amdgpu device object
* @bp: parameters to be used for the buffer object
@@ -694,9 +660,8 @@ int amdgpu_bo_create_user(struct amdgpu_device *adev,
struct amdgpu_bo *bo_ptr;
int r;
- bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW;
bp->bo_ptr_size = sizeof(struct amdgpu_bo_user);
- r = amdgpu_bo_do_create(adev, bp, &bo_ptr);
+ r = amdgpu_bo_create(adev, bp, &bo_ptr);
if (r)
return r;
@@ -1595,7 +1560,6 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
amdgpu_bo_print_flag(m, bo, NO_CPU_ACCESS);
amdgpu_bo_print_flag(m, bo, CPU_GTT_USWC);
amdgpu_bo_print_flag(m, bo, VRAM_CLEARED);
- amdgpu_bo_print_flag(m, bo, SHADOW);
amdgpu_bo_print_flag(m, bo, VRAM_CONTIGUOUS);
amdgpu_bo_print_flag(m, bo, VM_ALWAYS_VALID);
amdgpu_bo_print_flag(m, bo, EXPLICIT_SYNC);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index b37d36ac6b5a..46d22ab85492 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -37,6 +37,10 @@
#define AMDGPU_BO_INVALID_OFFSET LONG_MAX
#define AMDGPU_BO_MAX_PLACEMENTS 3
+/* BO flag to indicate a KFD userptr BO */
+#define AMDGPU_AMDKFD_CREATE_USERPTR_BO (1ULL << 63)
+#define AMDGPU_AMDKFD_CREATE_SVM_BO (1ULL << 62)
+
#define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo)
struct amdgpu_bo_param {
@@ -267,6 +271,9 @@ int amdgpu_bo_create_user(struct amdgpu_device *adev,
struct amdgpu_bo_user **ubo_ptr);
void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
void **cpu_addr);
+int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
+ unsigned long size,
+ struct amdgpu_bo *bo);
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr);
void *amdgpu_bo_kptr(struct amdgpu_bo *bo);
void amdgpu_bo_kunmap(struct amdgpu_bo *bo);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index a09483beb968..f7bbb04d01ee 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -417,31 +417,12 @@ static int psp_tmr_init(struct psp_context *psp)
return ret;
}
-static int psp_clear_vf_fw(struct psp_context *psp)
-{
- int ret;
- struct psp_gfx_cmd_resp *cmd;
-
- if (!amdgpu_sriov_vf(psp->adev) || psp->adev->asic_type != CHIP_NAVI12)
- return 0;
-
- cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
- if (!cmd)
- return -ENOMEM;
-
- cmd->cmd_id = GFX_CMD_ID_CLEAR_VF_FW;
-
- ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
- kfree(cmd);
-
- return ret;
-}
-
static bool psp_skip_tmr(struct psp_context *psp)
{
switch (psp->adev->asic_type) {
case CHIP_NAVI12:
case CHIP_SIENNA_CICHLID:
+ case CHIP_ALDEBARAN:
return true;
default:
return false;
@@ -1037,6 +1018,13 @@ static int psp_ras_load(struct psp_context *psp)
memset(psp->fw_pri_buf, 0, PSP_1_MEG);
memcpy(psp->fw_pri_buf, psp->ta_ras_start_addr, psp->ta_ras_ucode_size);
+ ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf;
+
+ if (psp->adev->gmc.xgmi.connected_to_cpu)
+ ras_cmd->ras_in_message.init_flags.poison_mode_en = 1;
+ else
+ ras_cmd->ras_in_message.init_flags.dgpu_mode = 1;
+
psp_prep_ta_load_cmd_buf(cmd,
psp->fw_pri_mc_addr,
psp->ta_ras_ucode_size,
@@ -1046,8 +1034,6 @@ static int psp_ras_load(struct psp_context *psp)
ret = psp_cmd_submit_buf(psp, NULL, cmd,
psp->fence_buf_mc_addr);
- ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf;
-
if (!ret) {
psp->ras.session_id = cmd->resp.session_id;
@@ -1128,6 +1114,31 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
return ret;
}
+static int psp_ras_status_to_errno(struct amdgpu_device *adev,
+ enum ta_ras_status ras_status)
+{
+ int ret = -EINVAL;
+
+ switch (ras_status) {
+ case TA_RAS_STATUS__SUCCESS:
+ ret = 0;
+ break;
+ case TA_RAS_STATUS__RESET_NEEDED:
+ ret = -EAGAIN;
+ break;
+ case TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE:
+ dev_warn(adev->dev, "RAS WARN: ras function unavailable\n");
+ break;
+ case TA_RAS_STATUS__ERROR_ASD_READ_WRITE:
+ dev_warn(adev->dev, "RAS WARN: asd read or write failed\n");
+ break;
+ default:
+ dev_err(adev->dev, "RAS ERROR: ras function failed ret 0x%X\n", ret);
+ }
+
+ return ret;
+}
+
int psp_ras_enable_features(struct psp_context *psp,
union ta_ras_cmd_input *info, bool enable)
{
@@ -1151,7 +1162,7 @@ int psp_ras_enable_features(struct psp_context *psp,
if (ret)
return -EINVAL;
- return ras_cmd->ras_status;
+ return psp_ras_status_to_errno(psp->adev, ras_cmd->ras_status);
}
static int psp_ras_terminate(struct psp_context *psp)
@@ -1234,7 +1245,7 @@ int psp_ras_trigger_error(struct psp_context *psp,
if (amdgpu_ras_intr_triggered())
return 0;
- return ras_cmd->ras_status;
+ return psp_ras_status_to_errno(psp->adev, ras_cmd->ras_status);
}
// ras end
@@ -1920,12 +1931,6 @@ static int psp_hw_start(struct psp_context *psp)
return ret;
}
- ret = psp_clear_vf_fw(psp);
- if (ret) {
- DRM_ERROR("PSP clear vf fw!\n");
- return ret;
- }
-
ret = psp_boot_config_set(adev);
if (ret) {
DRM_WARN("PSP set boot config@\n");
@@ -2166,7 +2171,7 @@ static int psp_load_smu_fw(struct psp_context *psp)
return 0;
if ((amdgpu_in_reset(adev) &&
- ras && ras->supported &&
+ ras && adev->ras_enabled &&
(adev->asic_type == CHIP_ARCTURUS ||
adev->asic_type == CHIP_VEGA20)) ||
(adev->in_runpm &&
@@ -2434,7 +2439,6 @@ static int psp_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct psp_context *psp = &adev->psp;
- int ret;
if (psp->adev->psp.ta_fw) {
psp_ras_terminate(psp);
@@ -2445,11 +2449,6 @@ static int psp_hw_fini(void *handle)
}
psp_asd_unload(psp);
- ret = psp_clear_vf_fw(psp);
- if (ret) {
- DRM_ERROR("PSP clear vf fw!\n");
- return ret;
- }
psp_tmr_terminate(psp);
psp_ring_destroy(psp, PSP_RING_TYPE__KM);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index b0d2fc9454ca..b1c57a5b6e89 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -33,6 +33,7 @@
#include "amdgpu_atomfirmware.h"
#include "amdgpu_xgmi.h"
#include "ivsrcid/nbio/irqsrcs_nbif_7_4.h"
+#include "atom.h"
static const char *RAS_FS_NAME = "ras";
@@ -320,11 +321,14 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f,
* "disable" requires only the block.
* "enable" requires the block and error type.
* "inject" requires the block, error type, address, and value.
+ *
* The block is one of: umc, sdma, gfx, etc.
* see ras_block_string[] for details
+ *
* The error type is one of: ue, ce, where,
* ue is multi-uncorrectable
* ce is single-correctable
+ *
* The sub-block is a the sub-block index, pass 0 if there is no sub-block.
* The address and value are hexadecimal numbers, leading 0x is optional.
*
@@ -531,7 +535,7 @@ static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev,
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj;
- if (!adev->ras_features || !con)
+ if (!adev->ras_enabled || !con)
return NULL;
if (head->block >= AMDGPU_RAS_BLOCK_COUNT)
@@ -558,7 +562,7 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
struct ras_manager *obj;
int i;
- if (!adev->ras_features || !con)
+ if (!adev->ras_enabled || !con)
return NULL;
if (head) {
@@ -585,36 +589,11 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev,
}
/* obj end */
-static void amdgpu_ras_parse_status_code(struct amdgpu_device *adev,
- const char* invoke_type,
- const char* block_name,
- enum ta_ras_status ret)
-{
- switch (ret) {
- case TA_RAS_STATUS__SUCCESS:
- return;
- case TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE:
- dev_warn(adev->dev,
- "RAS WARN: %s %s currently unavailable\n",
- invoke_type,
- block_name);
- break;
- default:
- dev_err(adev->dev,
- "RAS ERROR: %s %s error failed ret 0x%X\n",
- invoke_type,
- block_name,
- ret);
- }
-}
-
/* feature ctl begin */
static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev,
- struct ras_common_if *head)
+ struct ras_common_if *head)
{
- struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
-
- return con->hw_supported & BIT(head->block);
+ return adev->ras_hw_enabled & BIT(head->block);
}
static int amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev,
@@ -658,11 +637,7 @@ static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev,
con->features |= BIT(head->block);
} else {
if (obj && amdgpu_ras_is_feature_enabled(adev, head)) {
- /* skip clean gfx ras context feature for VEGA20 Gaming.
- * will clean later
- */
- if (!(!adev->ras_features && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)))
- con->features &= ~BIT(head->block);
+ con->features &= ~BIT(head->block);
put_obj(obj);
}
}
@@ -708,15 +683,10 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
if (!amdgpu_ras_intr_triggered()) {
ret = psp_ras_enable_features(&adev->psp, info, enable);
if (ret) {
- amdgpu_ras_parse_status_code(adev,
- enable ? "enable":"disable",
- ras_block_str(head->block),
- (enum ta_ras_status)ret);
- if (ret == TA_RAS_STATUS__RESET_NEEDED)
- ret = -EAGAIN;
- else
- ret = -EINVAL;
-
+ dev_err(adev->dev, "ras %s %s failed %d\n",
+ enable ? "enable":"disable",
+ ras_block_str(head->block),
+ ret);
goto out;
}
}
@@ -770,6 +740,10 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev,
con->features |= BIT(head->block);
ret = amdgpu_ras_feature_enable(adev, head, 0);
+
+ /* clean gfx block ras features flag */
+ if (adev->ras_enabled && head->block == AMDGPU_RAS_BLOCK__GFX)
+ con->features &= ~BIT(head->block);
}
} else
ret = amdgpu_ras_feature_enable(adev, head, enable);
@@ -890,6 +864,11 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
adev->gmc.xgmi.ras_funcs->query_ras_error_count)
adev->gmc.xgmi.ras_funcs->query_ras_error_count(adev, &err_data);
break;
+ case AMDGPU_RAS_BLOCK__HDP:
+ if (adev->hdp.ras_funcs &&
+ adev->hdp.ras_funcs->query_ras_error_count)
+ adev->hdp.ras_funcs->query_ras_error_count(adev, &err_data);
+ break;
default:
break;
}
@@ -901,17 +880,42 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,
info->ce_count = obj->err_data.ce_count;
if (err_data.ce_count) {
- dev_info(adev->dev, "%ld correctable hardware errors "
+ if (adev->smuio.funcs &&
+ adev->smuio.funcs->get_socket_id &&
+ adev->smuio.funcs->get_die_id) {
+ dev_info(adev->dev, "socket: %d, die: %d "
+ "%ld correctable hardware errors "
+ "detected in %s block, no user "
+ "action is needed.\n",
+ adev->smuio.funcs->get_socket_id(adev),
+ adev->smuio.funcs->get_die_id(adev),
+ obj->err_data.ce_count,
+ ras_block_str(info->head.block));
+ } else {
+ dev_info(adev->dev, "%ld correctable hardware errors "
"detected in %s block, no user "
"action is needed.\n",
obj->err_data.ce_count,
ras_block_str(info->head.block));
+ }
}
if (err_data.ue_count) {
- dev_info(adev->dev, "%ld uncorrectable hardware errors "
+ if (adev->smuio.funcs &&
+ adev->smuio.funcs->get_socket_id &&
+ adev->smuio.funcs->get_die_id) {
+ dev_info(adev->dev, "socket: %d, die: %d "
+ "%ld uncorrectable hardware errors "
+ "detected in %s block\n",
+ adev->smuio.funcs->get_socket_id(adev),
+ adev->smuio.funcs->get_die_id(adev),
+ obj->err_data.ue_count,
+ ras_block_str(info->head.block));
+ } else {
+ dev_info(adev->dev, "%ld uncorrectable hardware errors "
"detected in %s block\n",
obj->err_data.ue_count,
ras_block_str(info->head.block));
+ }
}
return 0;
@@ -937,11 +941,20 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev,
if (adev->mmhub.ras_funcs &&
adev->mmhub.ras_funcs->reset_ras_error_count)
adev->mmhub.ras_funcs->reset_ras_error_count(adev);
+
+ if (adev->mmhub.ras_funcs &&
+ adev->mmhub.ras_funcs->reset_ras_error_status)
+ adev->mmhub.ras_funcs->reset_ras_error_status(adev);
break;
case AMDGPU_RAS_BLOCK__SDMA:
if (adev->sdma.funcs->reset_ras_error_count)
adev->sdma.funcs->reset_ras_error_count(adev);
break;
+ case AMDGPU_RAS_BLOCK__HDP:
+ if (adev->hdp.ras_funcs &&
+ adev->hdp.ras_funcs->reset_ras_error_count)
+ adev->hdp.ras_funcs->reset_ras_error_count(adev);
+ break;
default:
break;
}
@@ -1022,10 +1035,9 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev,
ret = -EINVAL;
}
- amdgpu_ras_parse_status_code(adev,
- "inject",
- ras_block_str(info->head.block),
- (enum ta_ras_status)ret);
+ if (ret)
+ dev_err(adev->dev, "ras inject %s failed %d\n",
+ ras_block_str(info->head.block), ret);
return ret;
}
@@ -1038,7 +1050,7 @@ unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev,
struct ras_manager *obj;
struct ras_err_data data = {0, 0};
- if (!adev->ras_features || !con)
+ if (!adev->ras_enabled || !con)
return 0;
list_for_each_entry(obj, &con->head, node) {
@@ -1265,8 +1277,8 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev)
static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- struct dentry *dir;
- struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct drm_minor *minor = adev_to_drm(adev)->primary;
+ struct dentry *dir;
dir = debugfs_create_dir(RAS_FS_NAME, minor->debugfs_root);
debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, dir, adev,
@@ -1275,6 +1287,8 @@ static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *
&amdgpu_ras_debugfs_eeprom_ops);
debugfs_create_u32("bad_page_cnt_threshold", 0444, dir,
&con->bad_page_cnt_threshold);
+ debugfs_create_x32("ras_hw_enabled", 0444, dir, &adev->ras_hw_enabled);
+ debugfs_create_x32("ras_enabled", 0444, dir, &adev->ras_enabled);
/*
* After one uncorrectable error happens, usually GPU recovery will
@@ -1561,7 +1575,7 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj;
- if (!adev->ras_features || !con)
+ if (!adev->ras_enabled || !con)
return;
list_for_each_entry(obj, &con->head, node) {
@@ -1611,7 +1625,7 @@ static void amdgpu_ras_query_err_status(struct amdgpu_device *adev)
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj;
- if (!adev->ras_features || !con)
+ if (!adev->ras_enabled || !con)
return;
list_for_each_entry(obj, &con->head, node) {
@@ -1925,7 +1939,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
bool exc_err_limit = false;
int ret;
- if (adev->ras_features && con)
+ if (adev->ras_enabled && con)
data = &con->eh_data;
else
return 0;
@@ -2029,6 +2043,23 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
}
/*
+ * this is workaround for vega20 workstation sku,
+ * force enable gfx ras, ignore vbios gfx ras flag
+ * due to GC EDC can not write
+ */
+static void amdgpu_ras_get_quirks(struct amdgpu_device *adev)
+{
+ struct atom_context *ctx = adev->mode_info.atom_context;
+
+ if (!ctx)
+ return;
+
+ if (strnstr(ctx->vbios_version, "D16406",
+ sizeof(ctx->vbios_version)))
+ adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX);
+}
+
+/*
* check hardware's ras ability which will be saved in hw_supported.
* if hardware does not support ras, we can skip some ras initializtion and
* forbid some ras operations from IP.
@@ -2037,11 +2068,9 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev)
* we have to initialize ras as normal. but need check if operation is
* allowed or not in each function.
*/
-static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
- uint32_t *hw_supported, uint32_t *supported)
+static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
{
- *hw_supported = 0;
- *supported = 0;
+ adev->ras_hw_enabled = adev->ras_enabled = 0;
if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw ||
!amdgpu_ras_asic_supported(adev))
@@ -2050,33 +2079,34 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev,
if (!adev->gmc.xgmi.connected_to_cpu) {
if (amdgpu_atomfirmware_mem_ecc_supported(adev)) {
dev_info(adev->dev, "MEM ECC is active.\n");
- *hw_supported |= (1 << AMDGPU_RAS_BLOCK__UMC |
- 1 << AMDGPU_RAS_BLOCK__DF);
+ adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC |
+ 1 << AMDGPU_RAS_BLOCK__DF);
} else {
dev_info(adev->dev, "MEM ECC is not presented.\n");
}
if (amdgpu_atomfirmware_sram_ecc_supported(adev)) {
dev_info(adev->dev, "SRAM ECC is active.\n");
- *hw_supported |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
- 1 << AMDGPU_RAS_BLOCK__DF);
+ adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
+ 1 << AMDGPU_RAS_BLOCK__DF);
} else {
dev_info(adev->dev, "SRAM ECC is not presented.\n");
}
} else {
/* driver only manages a few IP blocks RAS feature
* when GPU is connected cpu through XGMI */
- *hw_supported |= (1 << AMDGPU_RAS_BLOCK__GFX |
- 1 << AMDGPU_RAS_BLOCK__SDMA |
- 1 << AMDGPU_RAS_BLOCK__MMHUB);
+ adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX |
+ 1 << AMDGPU_RAS_BLOCK__SDMA |
+ 1 << AMDGPU_RAS_BLOCK__MMHUB);
}
+ amdgpu_ras_get_quirks(adev);
+
/* hw_supported needs to be aligned with RAS block mask. */
- *hw_supported &= AMDGPU_RAS_BLOCK_MASK;
+ adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK;
- *supported = amdgpu_ras_enable == 0 ?
- 0 : *hw_supported & amdgpu_ras_mask;
- adev->ras_features = *supported;
+ adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 :
+ adev->ras_hw_enabled & amdgpu_ras_mask;
}
int amdgpu_ras_init(struct amdgpu_device *adev)
@@ -2097,13 +2127,13 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
amdgpu_ras_set_context(adev, con);
- amdgpu_ras_check_supported(adev, &con->hw_supported,
- &con->supported);
- if (!con->hw_supported || (adev->asic_type == CHIP_VEGA10)) {
+ amdgpu_ras_check_supported(adev);
+
+ if (!adev->ras_enabled || adev->asic_type == CHIP_VEGA10) {
/* set gfx block ras context feature for VEGA20 Gaming
* send ras disable cmd to ras ta during ras late init.
*/
- if (!adev->ras_features && adev->asic_type == CHIP_VEGA20) {
+ if (!adev->ras_enabled && adev->asic_type == CHIP_VEGA20) {
con->features |= BIT(AMDGPU_RAS_BLOCK__GFX);
return 0;
@@ -2153,8 +2183,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev)
}
dev_info(adev->dev, "RAS INFO: ras initialized successfully, "
- "hardware ability[%x] ras_mask[%x]\n",
- con->hw_supported, con->supported);
+ "hardware ability[%x] ras_mask[%x]\n",
+ adev->ras_hw_enabled, adev->ras_enabled);
+
return 0;
release_con:
amdgpu_ras_set_context(adev, NULL);
@@ -2268,7 +2299,7 @@ void amdgpu_ras_resume(struct amdgpu_device *adev)
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
struct ras_manager *obj, *tmp;
- if (!adev->ras_features || !con) {
+ if (!adev->ras_enabled || !con) {
/* clean ras context for VEGA20 Gaming after send ras disable cmd */
amdgpu_release_ras_context(adev);
@@ -2314,7 +2345,7 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- if (!adev->ras_features || !con)
+ if (!adev->ras_enabled || !con)
return;
amdgpu_ras_disable_all_features(adev, 0);
@@ -2328,7 +2359,7 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- if (!adev->ras_features || !con)
+ if (!adev->ras_enabled || !con)
return 0;
/* Need disable ras on all IPs here before ip [hw/sw]fini */
@@ -2341,7 +2372,7 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
{
struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
- if (!adev->ras_features || !con)
+ if (!adev->ras_enabled || !con)
return 0;
amdgpu_ras_fs_fini(adev);
@@ -2360,10 +2391,8 @@ int amdgpu_ras_fini(struct amdgpu_device *adev)
void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev)
{
- uint32_t hw_supported, supported;
-
- amdgpu_ras_check_supported(adev, &hw_supported, &supported);
- if (!hw_supported)
+ amdgpu_ras_check_supported(adev);
+ if (!adev->ras_hw_enabled)
return;
if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) {
@@ -2392,7 +2421,7 @@ void amdgpu_release_ras_context(struct amdgpu_device *adev)
if (!con)
return;
- if (!adev->ras_features && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)) {
+ if (!adev->ras_enabled && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)) {
con->features &= ~BIT(AMDGPU_RAS_BLOCK__GFX);
amdgpu_ras_set_context(adev, NULL);
kfree(con);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 60df268a0c66..201fbdee1d09 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -313,9 +313,6 @@ struct ras_common_if {
struct amdgpu_ras {
/* ras infrastructure */
/* for ras itself. */
- uint32_t hw_supported;
- /* for IP to check its ras ability. */
- uint32_t supported;
uint32_t features;
struct list_head head;
/* sysfs */
@@ -478,7 +475,7 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev,
if (block >= AMDGPU_RAS_BLOCK_COUNT)
return 0;
- return ras && (ras->supported & (1 << block));
+ return ras && (adev->ras_enabled & (1 << block));
}
int amdgpu_ras_recovery_init(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 688624ebe421..7b634a1517f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -158,6 +158,7 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
* @irq_src: interrupt source to use for this ring
* @irq_type: interrupt type to use for this ring
* @hw_prio: ring priority (NORMAL/HIGH)
+ * @sched_score: optional score atomic shared with other schedulers
*
* Initialize the driver information for the selected ring (all asics).
* Returns 0 on success, error on failure.
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
index b860ec913ac5..484bb3dcec47 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h
@@ -29,6 +29,7 @@ struct amdgpu_smuio_funcs {
void (*update_rom_clock_gating)(struct amdgpu_device *adev, bool enable);
void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags);
u32 (*get_die_id)(struct amdgpu_device *adev);
+ u32 (*get_socket_id)(struct amdgpu_device *adev);
bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev);
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 8c7ec09eb1a4..10391fcff343 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -32,7 +32,6 @@
#include <linux/dma-mapping.h>
#include <linux/iommu.h>
-#include <linux/hmm.h>
#include <linux/pagemap.h>
#include <linux/sched/task.h>
#include <linux/sched/mm.h>
@@ -112,6 +111,20 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
}
abo = ttm_to_amdgpu_bo(bo);
+ if (abo->flags & AMDGPU_AMDKFD_CREATE_SVM_BO) {
+ struct dma_fence *fence;
+ struct dma_resv *resv = &bo->base._resv;
+
+ rcu_read_lock();
+ fence = rcu_dereference(resv->fence_excl);
+ if (fence && !fence->ops->signaled)
+ dma_fence_enable_sw_signaling(fence);
+
+ placement->num_placement = 0;
+ placement->num_busy_placement = 0;
+ rcu_read_unlock();
+ return;
+ }
switch (bo->mem.mem_type) {
case AMDGPU_PL_GDS:
case AMDGPU_PL_GWS:
@@ -165,13 +178,6 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
{
struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
- /*
- * Don't verify access for KFD BOs. They don't have a GEM
- * object associated with them.
- */
- if (abo->kfd_bo)
- return 0;
-
if (amdgpu_ttm_tt_get_usermm(bo->ttm))
return -EPERM;
return drm_vma_node_verify_access(&abo->tbo.base.vma_node,
@@ -288,7 +294,7 @@ error_free:
}
/**
- * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
+ * amdgpu_ttm_copy_mem_to_mem - Helper function for copy
* @adev: amdgpu device
* @src: buffer/address where to read from
* @dst: buffer/address where to write to
@@ -670,10 +676,8 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
struct amdgpu_ttm_tt *gtt = (void *)ttm;
unsigned long start = gtt->userptr;
struct vm_area_struct *vma;
- struct hmm_range *range;
- unsigned long timeout;
struct mm_struct *mm;
- unsigned long i;
+ bool readonly;
int r = 0;
mm = bo->notifier.mm;
@@ -689,76 +693,26 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
if (!mmget_not_zero(mm)) /* Happens during process shutdown */
return -ESRCH;
- range = kzalloc(sizeof(*range), GFP_KERNEL);
- if (unlikely(!range)) {
- r = -ENOMEM;
- goto out;
- }
- range->notifier = &bo->notifier;
- range->start = bo->notifier.interval_tree.start;
- range->end = bo->notifier.interval_tree.last + 1;
- range->default_flags = HMM_PFN_REQ_FAULT;
- if (!amdgpu_ttm_tt_is_readonly(ttm))
- range->default_flags |= HMM_PFN_REQ_WRITE;
-
- range->hmm_pfns = kvmalloc_array(ttm->num_pages,
- sizeof(*range->hmm_pfns), GFP_KERNEL);
- if (unlikely(!range->hmm_pfns)) {
- r = -ENOMEM;
- goto out_free_ranges;
- }
-
mmap_read_lock(mm);
vma = find_vma(mm, start);
+ mmap_read_unlock(mm);
if (unlikely(!vma || start < vma->vm_start)) {
r = -EFAULT;
- goto out_unlock;
+ goto out_putmm;
}
if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) &&
vma->vm_file)) {
r = -EPERM;
- goto out_unlock;
- }
- mmap_read_unlock(mm);
- timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
-
-retry:
- range->notifier_seq = mmu_interval_read_begin(&bo->notifier);
-
- mmap_read_lock(mm);
- r = hmm_range_fault(range);
- mmap_read_unlock(mm);
- if (unlikely(r)) {
- /*
- * FIXME: This timeout should encompass the retry from
- * mmu_interval_read_retry() as well.
- */
- if (r == -EBUSY && !time_after(jiffies, timeout))
- goto retry;
- goto out_free_pfns;
+ goto out_putmm;
}
- /*
- * Due to default_flags, all pages are HMM_PFN_VALID or
- * hmm_range_fault() fails. FIXME: The pages cannot be touched outside
- * the notifier_lock, and mmu_interval_read_retry() must be done first.
- */
- for (i = 0; i < ttm->num_pages; i++)
- pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]);
-
- gtt->range = range;
+ readonly = amdgpu_ttm_tt_is_readonly(ttm);
+ r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start,
+ ttm->num_pages, &gtt->range, readonly,
+ false);
+out_putmm:
mmput(mm);
- return 0;
-
-out_unlock:
- mmap_read_unlock(mm);
-out_free_pfns:
- kvfree(range->hmm_pfns);
-out_free_ranges:
- kfree(range);
-out:
- mmput(mm);
return r;
}
@@ -787,10 +741,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
* FIXME: Must always hold notifier_lock for this, and must
* not ignore the return code.
*/
- r = mmu_interval_read_retry(gtt->range->notifier,
- gtt->range->notifier_seq);
- kvfree(gtt->range->hmm_pfns);
- kfree(gtt->range);
+ r = amdgpu_hmm_range_get_pages_done(gtt->range);
gtt->range = NULL;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 0c9c5255aa42..a57842689d42 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -50,9 +50,12 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
struct drm_device *ddev = adev_to_drm(adev);
/* enable virtual display */
- if (adev->mode_info.num_crtc == 0)
- adev->mode_info.num_crtc = 1;
- adev->enable_virtual_display = true;
+ if (adev->asic_type != CHIP_ALDEBARAN &&
+ adev->asic_type != CHIP_ARCTURUS) {
+ if (adev->mode_info.num_crtc == 0)
+ adev->mode_info.num_crtc = 1;
+ adev->enable_virtual_display = true;
+ }
ddev->driver_features &= ~DRIVER_ATOMIC;
adev->cg_flags = 0;
adev->pg_flags = 0;
@@ -679,6 +682,7 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
case CHIP_VEGA10:
case CHIP_VEGA20:
case CHIP_ARCTURUS:
+ case CHIP_ALDEBARAN:
soc15_set_virt_ops(adev);
break;
case CHIP_NAVI10:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 4a3e3f72e127..edc63d3e087e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -38,6 +38,7 @@
#include "amdgpu_gmc.h"
#include "amdgpu_xgmi.h"
#include "amdgpu_dma_buf.h"
+#include "kfd_svm.h"
/**
* DOC: GPUVM
@@ -850,35 +851,60 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
}
/**
- * amdgpu_vm_bo_param - fill in parameters for PD/PT allocation
+ * amdgpu_vm_pt_create - create bo for PD/PT
*
* @adev: amdgpu_device pointer
* @vm: requesting vm
* @level: the page table level
* @immediate: use a immediate update
- * @bp: resulting BO allocation parameters
+ * @bo: pointer to the buffer object pointer
*/
-static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+static int amdgpu_vm_pt_create(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
int level, bool immediate,
- struct amdgpu_bo_param *bp)
+ struct amdgpu_bo **bo)
{
- memset(bp, 0, sizeof(*bp));
+ struct amdgpu_bo_param bp;
+ int r;
- bp->size = amdgpu_vm_bo_size(adev, level);
- bp->byte_align = AMDGPU_GPU_PAGE_SIZE;
- bp->domain = AMDGPU_GEM_DOMAIN_VRAM;
- bp->domain = amdgpu_bo_get_preferred_pin_domain(adev, bp->domain);
- bp->flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
+ memset(&bp, 0, sizeof(bp));
+
+ bp.size = amdgpu_vm_bo_size(adev, level);
+ bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
+ bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+ bp.domain = amdgpu_bo_get_preferred_pin_domain(adev, bp.domain);
+ bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
AMDGPU_GEM_CREATE_CPU_GTT_USWC;
- bp->bo_ptr_size = sizeof(struct amdgpu_bo);
+ bp.bo_ptr_size = sizeof(struct amdgpu_bo);
if (vm->use_cpu_for_update)
- bp->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
- else if (!vm->root.base.bo || vm->root.base.bo->shadow)
- bp->flags |= AMDGPU_GEM_CREATE_SHADOW;
- bp->type = ttm_bo_type_kernel;
- bp->no_wait_gpu = immediate;
+ bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
+
+ bp.type = ttm_bo_type_kernel;
+ bp.no_wait_gpu = immediate;
if (vm->root.base.bo)
- bp->resv = vm->root.base.bo->tbo.base.resv;
+ bp.resv = vm->root.base.bo->tbo.base.resv;
+
+ r = amdgpu_bo_create(adev, &bp, bo);
+ if (r)
+ return r;
+
+ if (vm->is_compute_context && (adev->flags & AMD_IS_APU))
+ return 0;
+
+ if (!bp.resv)
+ WARN_ON(dma_resv_lock((*bo)->tbo.base.resv,
+ NULL));
+ r = amdgpu_bo_create_shadow(adev, bp.size, *bo);
+
+ if (!bp.resv)
+ dma_resv_unlock((*bo)->tbo.base.resv);
+
+ if (r) {
+ amdgpu_bo_unref(bo);
+ return r;
+ }
+
+ return 0;
}
/**
@@ -901,7 +927,6 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
bool immediate)
{
struct amdgpu_vm_pt *entry = cursor->entry;
- struct amdgpu_bo_param bp;
struct amdgpu_bo *pt;
int r;
@@ -919,9 +944,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
if (entry->base.bo)
return 0;
- amdgpu_vm_bo_param(adev, vm, cursor->level, immediate, &bp);
-
- r = amdgpu_bo_create(adev, &bp, &pt);
+ r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt);
if (r)
return r;
@@ -1593,15 +1616,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
* Returns:
* 0 for success, -EINVAL for failure.
*/
-static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
- struct amdgpu_device *bo_adev,
- struct amdgpu_vm *vm, bool immediate,
- bool unlocked, struct dma_resv *resv,
- uint64_t start, uint64_t last,
- uint64_t flags, uint64_t offset,
- struct drm_mm_node *nodes,
- dma_addr_t *pages_addr,
- struct dma_fence **fence)
+int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+ struct amdgpu_device *bo_adev,
+ struct amdgpu_vm *vm, bool immediate,
+ bool unlocked, struct dma_resv *resv,
+ uint64_t start, uint64_t last,
+ uint64_t flags, uint64_t offset,
+ struct drm_mm_node *nodes,
+ dma_addr_t *pages_addr,
+ struct dma_fence **fence)
{
struct amdgpu_vm_update_params params;
enum amdgpu_sync_mode sync_mode;
@@ -2818,7 +2841,6 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
*
* @adev: amdgpu_device pointer
* @vm: requested vm
- * @vm_context: Indicates if it GFX or Compute context
* @pasid: Process address space identifier
*
* Init @vm fields.
@@ -2826,10 +2848,8 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
* Returns:
* 0 for success, error for failure.
*/
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- int vm_context, u32 pasid)
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid)
{
- struct amdgpu_bo_param bp;
struct amdgpu_bo *root;
int r, i;
@@ -2861,16 +2881,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
vm->pte_support_ats = false;
vm->is_compute_context = false;
- if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) {
- vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
- AMDGPU_VM_USE_CPU_FOR_COMPUTE);
+ vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
+ AMDGPU_VM_USE_CPU_FOR_GFX);
- if (adev->asic_type == CHIP_RAVEN)
- vm->pte_support_ats = true;
- } else {
- vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
- AMDGPU_VM_USE_CPU_FOR_GFX);
- }
DRM_DEBUG_DRIVER("VM update mode is %s\n",
vm->use_cpu_for_update ? "CPU" : "SDMA");
WARN_ONCE((vm->use_cpu_for_update &&
@@ -2887,10 +2900,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
mutex_init(&vm->eviction_lock);
vm->evicting = false;
- amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp);
- if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE)
- bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW;
- r = amdgpu_bo_create(adev, &bp, &root);
+ r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
+ false, &root);
if (r)
goto error_free_delayed;
@@ -3349,6 +3360,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
uint64_t addr)
{
+ bool is_compute_context = false;
struct amdgpu_bo *root;
uint64_t value, flags;
struct amdgpu_vm *vm;
@@ -3356,15 +3368,25 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
spin_lock(&adev->vm_manager.pasid_lock);
vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
- if (vm)
+ if (vm) {
root = amdgpu_bo_ref(vm->root.base.bo);
- else
+ is_compute_context = vm->is_compute_context;
+ } else {
root = NULL;
+ }
spin_unlock(&adev->vm_manager.pasid_lock);
if (!root)
return false;
+ addr /= AMDGPU_GPU_PAGE_SIZE;
+
+ if (is_compute_context &&
+ !svm_range_restore_pages(adev, pasid, addr)) {
+ amdgpu_bo_unref(&root);
+ return true;
+ }
+
r = amdgpu_bo_reserve(root, true);
if (r)
goto error_unref;
@@ -3378,18 +3400,16 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid,
if (!vm)
goto error_unlock;
- addr /= AMDGPU_GPU_PAGE_SIZE;
flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
AMDGPU_PTE_SYSTEM;
- if (vm->is_compute_context) {
+ if (is_compute_context) {
/* Intentionally setting invalid PTE flag
* combination to force a no-retry-fault
*/
flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE |
AMDGPU_PTE_TF;
value = 0;
-
} else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) {
/* Redirect the access to the dummy page */
value = adev->dummy_page_addr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 6fd7dad0540a..ea60ec122b51 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -121,9 +121,6 @@ struct amdgpu_bo_list_entry;
/* max vmids dedicated for process */
#define AMDGPU_VM_MAX_RESERVED_VMID 1
-#define AMDGPU_VM_CONTEXT_GFX 0
-#define AMDGPU_VM_CONTEXT_COMPUTE 1
-
/* See vm_update_mode */
#define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
#define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
@@ -367,6 +364,8 @@ struct amdgpu_vm_manager {
spinlock_t pasid_lock;
};
+struct amdgpu_bo_va_mapping;
+
#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
@@ -378,8 +377,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev);
void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
- int vm_context, u32 pasid);
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid);
int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid);
void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
@@ -398,6 +396,15 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct dma_fence **fence);
int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
+int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+ struct amdgpu_device *bo_adev,
+ struct amdgpu_vm *vm, bool immediate,
+ bool unlocked, struct dma_resv *resv,
+ uint64_t start, uint64_t last,
+ uint64_t flags, uint64_t offset,
+ struct drm_mm_node *nodes,
+ dma_addr_t *pages_addr,
+ struct dma_fence **fence);
int amdgpu_vm_bo_update(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va,
bool clear);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index f7235438535f..f78d21910e07 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -29,12 +29,14 @@
#include "amdgpu_atomfirmware.h"
#include "atom.h"
-static inline struct amdgpu_vram_mgr *to_vram_mgr(struct ttm_resource_manager *man)
+static inline struct amdgpu_vram_mgr *
+to_vram_mgr(struct ttm_resource_manager *man)
{
return container_of(man, struct amdgpu_vram_mgr, manager);
}
-static inline struct amdgpu_device *to_amdgpu_device(struct amdgpu_vram_mgr *mgr)
+static inline struct amdgpu_device *
+to_amdgpu_device(struct amdgpu_vram_mgr *mgr)
{
return container_of(mgr, struct amdgpu_device, mman.vram_mgr);
}
@@ -82,12 +84,14 @@ static ssize_t amdgpu_mem_info_vis_vram_total_show(struct device *dev,
* amount of currently used VRAM in bytes
*/
static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr,
+ char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
+ struct ttm_resource_manager *man;
+ man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
return sysfs_emit(buf, "%llu\n", amdgpu_vram_mgr_usage(man));
}
@@ -100,18 +104,28 @@ static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev,
* amount of currently used visible VRAM in bytes
*/
static ssize_t amdgpu_mem_info_vis_vram_used_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+ struct device_attribute *attr,
+ char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
- struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
+ struct ttm_resource_manager *man;
+ man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
return sysfs_emit(buf, "%llu\n", amdgpu_vram_mgr_vis_usage(man));
}
+/**
+ * DOC: mem_info_vram_vendor
+ *
+ * The amdgpu driver provides a sysfs API for reporting the vendor of the
+ * installed VRAM
+ * The file mem_info_vram_vendor is used for this and returns the name of the
+ * vendor.
+ */
static ssize_t amdgpu_mem_info_vram_vendor(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+ struct device_attribute *attr,
+ char *buf)
{
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = drm_to_adev(ddev);
@@ -162,78 +176,6 @@ static const struct attribute *amdgpu_vram_mgr_attributes[] = {
NULL
};
-static const struct ttm_resource_manager_func amdgpu_vram_mgr_func;
-
-/**
- * amdgpu_vram_mgr_init - init VRAM manager and DRM MM
- *
- * @adev: amdgpu_device pointer
- *
- * Allocate and initialize the VRAM manager.
- */
-int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
-{
- struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
- struct ttm_resource_manager *man = &mgr->manager;
- int ret;
-
- ttm_resource_manager_init(man, adev->gmc.real_vram_size >> PAGE_SHIFT);
-
- man->func = &amdgpu_vram_mgr_func;
-
- drm_mm_init(&mgr->mm, 0, man->size);
- spin_lock_init(&mgr->lock);
- INIT_LIST_HEAD(&mgr->reservations_pending);
- INIT_LIST_HEAD(&mgr->reserved_pages);
-
- /* Add the two VRAM-related sysfs files */
- ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
- if (ret)
- DRM_ERROR("Failed to register sysfs\n");
-
- ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
- ttm_resource_manager_set_used(man, true);
- return 0;
-}
-
-/**
- * amdgpu_vram_mgr_fini - free and destroy VRAM manager
- *
- * @adev: amdgpu_device pointer
- *
- * Destroy and free the VRAM manager, returns -EBUSY if ranges are still
- * allocated inside it.
- */
-void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
-{
- struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
- struct ttm_resource_manager *man = &mgr->manager;
- int ret;
- struct amdgpu_vram_reservation *rsv, *temp;
-
- ttm_resource_manager_set_used(man, false);
-
- ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
- if (ret)
- return;
-
- spin_lock(&mgr->lock);
- list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node)
- kfree(rsv);
-
- list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) {
- drm_mm_remove_node(&rsv->mm_node);
- kfree(rsv);
- }
- drm_mm_takedown(&mgr->mm);
- spin_unlock(&mgr->lock);
-
- sysfs_remove_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
-
- ttm_resource_manager_cleanup(man);
- ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL);
-}
-
/**
* amdgpu_vram_mgr_vis_size - Calculate visible node size
*
@@ -283,6 +225,7 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo)
return usage;
}
+/* Commit the reservation of VRAM pages */
static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man)
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
@@ -415,13 +358,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
const struct ttm_place *place,
struct ttm_resource *mem)
{
+ unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
+ uint64_t vis_usage = 0, mem_bytes, max_bytes;
struct drm_mm *mm = &mgr->mm;
- struct drm_mm_node *nodes;
enum drm_mm_insert_mode mode;
- unsigned long lpfn, num_nodes, pages_per_node, pages_left;
- uint64_t vis_usage = 0, mem_bytes, max_bytes;
+ struct drm_mm_node *nodes;
unsigned i;
int r;
@@ -448,10 +391,10 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
pages_per_node = HPAGE_PMD_NR;
#else
/* default to 2MB */
- pages_per_node = (2UL << (20UL - PAGE_SHIFT));
+ pages_per_node = 2UL << (20UL - PAGE_SHIFT);
#endif
- pages_per_node = max((uint32_t)pages_per_node,
- tbo->page_alignment);
+ pages_per_node = max_t(uint32_t, pages_per_node,
+ tbo->page_alignment);
num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node);
}
@@ -469,42 +412,37 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
mem->start = 0;
pages_left = mem->num_pages;
- spin_lock(&mgr->lock);
- for (i = 0; pages_left >= pages_per_node; ++i) {
- unsigned long pages = rounddown_pow_of_two(pages_left);
+ /* Limit maximum size to 2GB due to SG table limitations */
+ pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
- /* Limit maximum size to 2GB due to SG table limitations */
- pages = min(pages, (2UL << (30 - PAGE_SHIFT)));
-
- r = drm_mm_insert_node_in_range(mm, &nodes[i], pages,
- pages_per_node, 0,
- place->fpfn, lpfn,
- mode);
- if (unlikely(r))
- break;
-
- vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
- amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
- pages_left -= pages;
- }
-
- for (; pages_left; ++i) {
- unsigned long pages = min(pages_left, pages_per_node);
+ i = 0;
+ spin_lock(&mgr->lock);
+ while (pages_left) {
uint32_t alignment = tbo->page_alignment;
- if (pages == pages_per_node)
+ if (pages >= pages_per_node)
alignment = pages_per_node;
- r = drm_mm_insert_node_in_range(mm, &nodes[i],
- pages, alignment, 0,
- place->fpfn, lpfn,
- mode);
- if (unlikely(r))
+ r = drm_mm_insert_node_in_range(mm, &nodes[i], pages, alignment,
+ 0, place->fpfn, lpfn, mode);
+ if (unlikely(r)) {
+ if (pages > pages_per_node) {
+ if (is_power_of_2(pages))
+ pages = pages / 2;
+ else
+ pages = rounddown_pow_of_two(pages);
+ continue;
+ }
goto error;
+ }
vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]);
amdgpu_vram_mgr_virt_start(mem, &nodes[i]);
pages_left -= pages;
+ ++i;
+
+ if (pages > pages_left)
+ pages = pages_left;
}
spin_unlock(&mgr->lock);
@@ -728,3 +666,73 @@ static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
.free = amdgpu_vram_mgr_del,
.debug = amdgpu_vram_mgr_debug
};
+
+/**
+ * amdgpu_vram_mgr_init - init VRAM manager and DRM MM
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Allocate and initialize the VRAM manager.
+ */
+int amdgpu_vram_mgr_init(struct amdgpu_device *adev)
+{
+ struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+ struct ttm_resource_manager *man = &mgr->manager;
+ int ret;
+
+ ttm_resource_manager_init(man, adev->gmc.real_vram_size >> PAGE_SHIFT);
+
+ man->func = &amdgpu_vram_mgr_func;
+
+ drm_mm_init(&mgr->mm, 0, man->size);
+ spin_lock_init(&mgr->lock);
+ INIT_LIST_HEAD(&mgr->reservations_pending);
+ INIT_LIST_HEAD(&mgr->reserved_pages);
+
+ /* Add the two VRAM-related sysfs files */
+ ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
+ if (ret)
+ DRM_ERROR("Failed to register sysfs\n");
+
+ ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
+ ttm_resource_manager_set_used(man, true);
+ return 0;
+}
+
+/**
+ * amdgpu_vram_mgr_fini - free and destroy VRAM manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Destroy and free the VRAM manager, returns -EBUSY if ranges are still
+ * allocated inside it.
+ */
+void amdgpu_vram_mgr_fini(struct amdgpu_device *adev)
+{
+ struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
+ struct ttm_resource_manager *man = &mgr->manager;
+ int ret;
+ struct amdgpu_vram_reservation *rsv, *temp;
+
+ ttm_resource_manager_set_used(man, false);
+
+ ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man);
+ if (ret)
+ return;
+
+ spin_lock(&mgr->lock);
+ list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node)
+ kfree(rsv);
+
+ list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) {
+ drm_mm_remove_node(&rsv->mm_node);
+ kfree(rsv);
+ }
+ drm_mm_takedown(&mgr->mm);
+ spin_unlock(&mgr->lock);
+
+ sysfs_remove_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes);
+
+ ttm_resource_manager_cleanup(man);
+ ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index 1a8f6d4baab2..befd0b4b7bea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -98,9 +98,9 @@ union amd_sriov_msg_feature_flags {
union amd_sriov_reg_access_flags {
struct {
- uint32_t vf_reg_access_ih : 1;
- uint32_t vf_reg_access_mmhub : 1;
- uint32_t vf_reg_access_gc : 1;
+ uint32_t vf_reg_psp_access_ih : 1;
+ uint32_t vf_reg_rlc_access_mmhub : 1;
+ uint32_t vf_reg_rlc_access_gc : 1;
uint32_t reserved : 29;
} flags;
uint32_t all;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index 5c11144da051..33324427b555 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -421,6 +421,11 @@ static int dce_virtual_sw_init(void *handle)
static int dce_virtual_sw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ int i = 0;
+
+ for (i = 0; i < adev->mode_info.num_crtc; i++)
+ if (adev->mode_info.crtcs[i])
+ hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer);
kfree(adev->mode_info.bios_hardcoded_edid);
@@ -480,13 +485,6 @@ static int dce_virtual_hw_init(void *handle)
static int dce_virtual_hw_fini(void *handle)
{
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- int i = 0;
-
- for (i = 0; i<adev->mode_info.num_crtc; i++)
- if (adev->mode_info.crtcs[i])
- hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer);
-
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
index 0d8459d63bac..36ba229576d8 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -219,11 +219,11 @@ static void df_v3_6_query_hashes(struct amdgpu_device *adev)
adev->df.hash_status.hash_2m = false;
adev->df.hash_status.hash_1g = false;
- if (adev->asic_type != CHIP_ARCTURUS)
- return;
-
- /* encoding for hash-enabled on Arcturus */
- if (adev->df.funcs->get_fb_channel_number(adev) == 0xe) {
+ /* encoding for hash-enabled on Arcturus and Aldebaran */
+ if ((adev->asic_type == CHIP_ARCTURUS &&
+ adev->df.funcs->get_fb_channel_number(adev) == 0xe) ||
+ (adev->asic_type == CHIP_ALDEBARAN &&
+ adev->df.funcs->get_fb_channel_number(adev) == 0x1e)) {
tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl);
adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp,
DF_CS_UMC_AON0_DfGlobalCtrl,
@@ -278,7 +278,12 @@ static u32 df_v3_6_get_fb_channel_number(struct amdgpu_device *adev)
u32 tmp;
tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0);
- tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ tmp &=
+ ALDEBARAN_DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+ else
+ tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK;
+
tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT;
return tmp;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index a078a38c2cee..22608c45f07c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -3937,7 +3937,8 @@ static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev)
{
u32 tmp;
- if (adev->asic_type != CHIP_ARCTURUS)
+ if (adev->asic_type != CHIP_ARCTURUS &&
+ adev->asic_type != CHIP_ALDEBARAN)
return;
tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG);
@@ -4559,8 +4560,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
if (!ring->sched.ready)
return 0;
- if (adev->asic_type == CHIP_ARCTURUS ||
- adev->asic_type == CHIP_ALDEBARAN) {
+ if (adev->asic_type == CHIP_ARCTURUS) {
vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus;
vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus);
vgpr_init_regs_ptr = vgpr_init_regs_arcturus;
@@ -4745,7 +4745,11 @@ static int gfx_v9_0_ecc_late_init(void *handle)
}
/* requires IBs so do in late init after IB pool is initialized */
- r = gfx_v9_0_do_edc_gpr_workarounds(adev);
+ if (adev->asic_type == CHIP_ALDEBARAN)
+ r = gfx_v9_4_2_do_edc_gpr_workarounds(adev);
+ else
+ r = gfx_v9_0_do_edc_gpr_workarounds(adev);
+
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
index a30c7c10cd9a..dbad9ef002d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -22,6 +22,7 @@
*/
#include "amdgpu.h"
#include "soc15.h"
+#include "soc15d.h"
#include "gc/gc_9_4_2_offset.h"
#include "gc/gc_9_4_2_sh_mask.h"
@@ -31,6 +32,11 @@
#include "amdgpu_ras.h"
#include "amdgpu_gfx.h"
+#define SE_ID_MAX 8
+#define CU_ID_MAX 16
+#define SIMD_ID_MAX 4
+#define WAVE_ID_MAX 10
+
enum gfx_v9_4_2_utc_type {
VML2_MEM,
VML2_WALKER_MEM,
@@ -79,6 +85,634 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_2_alde[] = {
SOC15_REG_GOLDEN_VALUE(GC, 0, regTCI_CNTL_3, 0xff, 0x20),
};
+/**
+ * This shader is used to clear VGPRS and LDS, and also write the input
+ * pattern into the write back buffer, which will be used by driver to
+ * check whether all SIMDs have been covered.
+*/
+static const u32 vgpr_init_compute_shader_aldebaran[] = {
+ 0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
+ 0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
+ 0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xd3d94000,
+ 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080, 0xd3d94003,
+ 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080, 0xd3d94006,
+ 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080, 0xd3d94009,
+ 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080, 0xd3d9400c,
+ 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080, 0xd3d9400f,
+ 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080, 0xd3d94012,
+ 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080, 0xd3d94015,
+ 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080, 0xd3d94018,
+ 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080, 0xd3d9401b,
+ 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080, 0xd3d9401e,
+ 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080, 0xd3d94021,
+ 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080, 0xd3d94024,
+ 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080, 0xd3d94027,
+ 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080, 0xd3d9402a,
+ 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080, 0xd3d9402d,
+ 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080, 0xd3d94030,
+ 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080, 0xd3d94033,
+ 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080, 0xd3d94036,
+ 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080, 0xd3d94039,
+ 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080, 0xd3d9403c,
+ 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080, 0xd3d9403f,
+ 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080, 0xd3d94042,
+ 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080, 0xd3d94045,
+ 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080, 0xd3d94048,
+ 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080, 0xd3d9404b,
+ 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080, 0xd3d9404e,
+ 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080, 0xd3d94051,
+ 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080, 0xd3d94054,
+ 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080, 0xd3d94057,
+ 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080, 0xd3d9405a,
+ 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080, 0xd3d9405d,
+ 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080, 0xd3d94060,
+ 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080, 0xd3d94063,
+ 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080, 0xd3d94066,
+ 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080, 0xd3d94069,
+ 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080, 0xd3d9406c,
+ 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080, 0xd3d9406f,
+ 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080, 0xd3d94072,
+ 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080, 0xd3d94075,
+ 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080, 0xd3d94078,
+ 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080, 0xd3d9407b,
+ 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080, 0xd3d9407e,
+ 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080, 0xd3d94081,
+ 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080, 0xd3d94084,
+ 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080, 0xd3d94087,
+ 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080, 0xd3d9408a,
+ 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080, 0xd3d9408d,
+ 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080, 0xd3d94090,
+ 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080, 0xd3d94093,
+ 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080, 0xd3d94096,
+ 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080, 0xd3d94099,
+ 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080, 0xd3d9409c,
+ 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080, 0xd3d9409f,
+ 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080, 0xd3d940a2,
+ 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080, 0xd3d940a5,
+ 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080, 0xd3d940a8,
+ 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080, 0xd3d940ab,
+ 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080, 0xd3d940ae,
+ 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080, 0xd3d940b1,
+ 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080, 0xd3d940b4,
+ 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080, 0xd3d940b7,
+ 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080, 0xd3d940ba,
+ 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080, 0xd3d940bd,
+ 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080, 0xd3d940c0,
+ 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080, 0xd3d940c3,
+ 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080, 0xd3d940c6,
+ 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080, 0xd3d940c9,
+ 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080, 0xd3d940cc,
+ 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080, 0xd3d940cf,
+ 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080, 0xd3d940d2,
+ 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080, 0xd3d940d5,
+ 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080, 0xd3d940d8,
+ 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080, 0xd3d940db,
+ 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080, 0xd3d940de,
+ 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080, 0xd3d940e1,
+ 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080, 0xd3d940e4,
+ 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080, 0xd3d940e7,
+ 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080, 0xd3d940ea,
+ 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080, 0xd3d940ed,
+ 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080, 0xd3d940f0,
+ 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080, 0xd3d940f3,
+ 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080, 0xd3d940f6,
+ 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080, 0xd3d940f9,
+ 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080, 0xd3d940fc,
+ 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080, 0xd3d940ff,
+ 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a, 0x7e000280,
+ 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280, 0x7e0c0280,
+ 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000, 0xd28c0001,
+ 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xbe8b0004, 0xb78b4000,
+ 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000, 0x00020201,
+ 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a, 0xbf84fff8,
+ 0xbf810000,
+};
+
+const struct soc15_reg_entry vgpr_init_regs_aldebaran[] = {
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 4 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0xbf },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x400006 }, /* 64KB LDS */
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x3F }, /* 63 - accum-offset = 256 */
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
+};
+
+/**
+ * The below shaders are used to clear SGPRS, and also write the input
+ * pattern into the write back buffer. The first two dispatch should be
+ * scheduled simultaneously which make sure that all SGPRS could be
+ * allocated, so the dispatch 1 need check write back buffer before scheduled,
+ * make sure that waves of dispatch 0 are all dispacthed to all simds
+ * balanced. both dispatch 0 and dispatch 1 should be halted until all waves
+ * are dispatched, and then driver write a pattern to the shared memory to make
+ * all waves continue.
+*/
+static const u32 sgpr112_init_compute_shader_aldebaran[] = {
+ 0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
+ 0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
+ 0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200,
+ 0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102,
+ 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080,
+ 0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080,
+ 0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080,
+ 0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080,
+ 0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080,
+ 0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080,
+ 0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080,
+ 0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080,
+ 0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080,
+ 0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080,
+ 0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbeba0080, 0xbebb0080,
+ 0xbebc0080, 0xbebd0080, 0xbebe0080, 0xbebf0080, 0xbec00080, 0xbec10080,
+ 0xbec20080, 0xbec30080, 0xbec40080, 0xbec50080, 0xbec60080, 0xbec70080,
+ 0xbec80080, 0xbec90080, 0xbeca0080, 0xbecb0080, 0xbecc0080, 0xbecd0080,
+ 0xbece0080, 0xbecf0080, 0xbed00080, 0xbed10080, 0xbed20080, 0xbed30080,
+ 0xbed40080, 0xbed50080, 0xbed60080, 0xbed70080, 0xbed80080, 0xbed90080,
+ 0xbeda0080, 0xbedb0080, 0xbedc0080, 0xbedd0080, 0xbede0080, 0xbedf0080,
+ 0xbee00080, 0xbee10080, 0xbee20080, 0xbee30080, 0xbee40080, 0xbee50080,
+ 0xbf810000
+};
+
+const struct soc15_reg_entry sgpr112_init_regs_aldebaran[] = {
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 8 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x340 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
+};
+
+static const u32 sgpr96_init_compute_shader_aldebaran[] = {
+ 0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
+ 0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
+ 0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200,
+ 0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102,
+ 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080,
+ 0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080,
+ 0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080,
+ 0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080,
+ 0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080,
+ 0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080,
+ 0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080,
+ 0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080,
+ 0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080,
+ 0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080,
+ 0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbeba0080, 0xbebb0080,
+ 0xbebc0080, 0xbebd0080, 0xbebe0080, 0xbebf0080, 0xbec00080, 0xbec10080,
+ 0xbec20080, 0xbec30080, 0xbec40080, 0xbec50080, 0xbec60080, 0xbec70080,
+ 0xbec80080, 0xbec90080, 0xbeca0080, 0xbecb0080, 0xbecc0080, 0xbecd0080,
+ 0xbece0080, 0xbecf0080, 0xbed00080, 0xbed10080, 0xbed20080, 0xbed30080,
+ 0xbed40080, 0xbed50080, 0xbed60080, 0xbed70080, 0xbed80080, 0xbed90080,
+ 0xbf810000,
+};
+
+const struct soc15_reg_entry sgpr96_init_regs_aldebaran[] = {
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 0xc },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x2c0 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
+};
+
+/**
+ * This shader is used to clear the uninitiated sgprs after the above
+ * two dispatches, because of hardware feature, dispath 0 couldn't clear
+ * top hole sgprs. Therefore need 4 waves per SIMD to cover these sgprs
+*/
+static const u32 sgpr64_init_compute_shader_aldebaran[] = {
+ 0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280,
+ 0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208,
+ 0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200,
+ 0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102,
+ 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080,
+ 0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080,
+ 0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080,
+ 0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080,
+ 0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080,
+ 0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080,
+ 0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080,
+ 0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080,
+ 0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080,
+ 0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080,
+ 0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbf810000,
+};
+
+const struct soc15_reg_entry sgpr64_init_regs_aldebaran[] = {
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 0x10 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x1c0 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff },
+ { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff },
+};
+
+static int gfx_v9_4_2_run_shader(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring,
+ struct amdgpu_ib *ib,
+ const u32 *shader_ptr, u32 shader_size,
+ const struct soc15_reg_entry *init_regs, u32 regs_size,
+ u32 compute_dim_x, u64 wb_gpu_addr, u32 pattern,
+ struct dma_fence **fence_ptr)
+{
+ int r, i;
+ uint32_t total_size, shader_offset;
+ u64 gpu_addr;
+
+ total_size = (regs_size * 3 + 4 + 5 + 5) * 4;
+ total_size = ALIGN(total_size, 256);
+ shader_offset = total_size;
+ total_size += ALIGN(shader_size, 256);
+
+ /* allocate an indirect buffer to put the commands in */
+ memset(ib, 0, sizeof(*ib));
+ r = amdgpu_ib_get(adev, NULL, total_size,
+ AMDGPU_IB_POOL_DIRECT, ib);
+ if (r) {
+ dev_err(adev->dev, "failed to get ib (%d).\n", r);
+ return r;
+ }
+
+ /* load the compute shaders */
+ for (i = 0; i < shader_size/sizeof(u32); i++)
+ ib->ptr[i + (shader_offset / 4)] = shader_ptr[i];
+
+ /* init the ib length to 0 */
+ ib->length_dw = 0;
+
+ /* write the register state for the compute dispatch */
+ for (i = 0; i < regs_size; i++) {
+ ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1);
+ ib->ptr[ib->length_dw++] = SOC15_REG_ENTRY_OFFSET(init_regs[i])
+ - PACKET3_SET_SH_REG_START;
+ ib->ptr[ib->length_dw++] = init_regs[i].reg_value;
+ }
+
+ /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */
+ gpu_addr = (ib->gpu_addr + (u64)shader_offset) >> 8;
+ ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2);
+ ib->ptr[ib->length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_PGM_LO)
+ - PACKET3_SET_SH_REG_START;
+ ib->ptr[ib->length_dw++] = lower_32_bits(gpu_addr);
+ ib->ptr[ib->length_dw++] = upper_32_bits(gpu_addr);
+
+ /* write the wb buffer address */
+ ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 3);
+ ib->ptr[ib->length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_USER_DATA_0)
+ - PACKET3_SET_SH_REG_START;
+ ib->ptr[ib->length_dw++] = lower_32_bits(wb_gpu_addr);
+ ib->ptr[ib->length_dw++] = upper_32_bits(wb_gpu_addr);
+ ib->ptr[ib->length_dw++] = pattern;
+
+ /* write dispatch packet */
+ ib->ptr[ib->length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3);
+ ib->ptr[ib->length_dw++] = compute_dim_x; /* x */
+ ib->ptr[ib->length_dw++] = 1; /* y */
+ ib->ptr[ib->length_dw++] = 1; /* z */
+ ib->ptr[ib->length_dw++] =
+ REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1);
+
+ /* shedule the ib on the ring */
+ r = amdgpu_ib_schedule(ring, 1, ib, NULL, fence_ptr);
+ if (r) {
+ dev_err(adev->dev, "ib submit failed (%d).\n", r);
+ amdgpu_ib_free(adev, ib, NULL);
+ }
+ return r;
+}
+
+static void gfx_v9_4_2_log_wave_assignment(struct amdgpu_device *adev, uint32_t *wb_ptr)
+{
+ uint32_t se, cu, simd, wave;
+ uint32_t offset = 0;
+ char *str;
+ int size;
+
+ str = kmalloc(256, GFP_KERNEL);
+ if (!str)
+ return;
+
+ dev_dbg(adev->dev, "wave assignment:\n");
+
+ for (se = 0; se < adev->gfx.config.max_shader_engines; se++) {
+ for (cu = 0; cu < CU_ID_MAX; cu++) {
+ memset(str, 0, 256);
+ size = sprintf(str, "SE[%02d]CU[%02d]: ", se, cu);
+ for (simd = 0; simd < SIMD_ID_MAX; simd++) {
+ size += sprintf(str + size, "[");
+ for (wave = 0; wave < WAVE_ID_MAX; wave++) {
+ size += sprintf(str + size, "%x", wb_ptr[offset]);
+ offset++;
+ }
+ size += sprintf(str + size, "] ");
+ }
+ dev_dbg(adev->dev, "%s\n", str);
+ }
+ }
+
+ kfree(str);
+}
+
+static int gfx_v9_4_2_wait_for_waves_assigned(struct amdgpu_device *adev,
+ uint32_t *wb_ptr, uint32_t mask,
+ uint32_t pattern, uint32_t num_wave, bool wait)
+{
+ uint32_t se, cu, simd, wave;
+ uint32_t loop = 0;
+ uint32_t wave_cnt;
+ uint32_t offset;
+
+ do {
+ wave_cnt = 0;
+ offset = 0;
+
+ for (se = 0; se < adev->gfx.config.max_shader_engines; se++)
+ for (cu = 0; cu < CU_ID_MAX; cu++)
+ for (simd = 0; simd < SIMD_ID_MAX; simd++)
+ for (wave = 0; wave < WAVE_ID_MAX; wave++) {
+ if (((1 << wave) & mask) &&
+ (wb_ptr[offset] == pattern))
+ wave_cnt++;
+
+ offset++;
+ }
+
+ if (wave_cnt == num_wave)
+ return 0;
+
+ mdelay(1);
+ } while (++loop < 2000 && wait);
+
+ dev_err(adev->dev, "actual wave num: %d, expected wave num: %d\n",
+ wave_cnt, num_wave);
+
+ gfx_v9_4_2_log_wave_assignment(adev, wb_ptr);
+
+ return -EBADSLT;
+}
+
+static int gfx_v9_4_2_do_sgprs_init(struct amdgpu_device *adev)
+{
+ int r;
+ int wb_size = adev->gfx.config.max_shader_engines *
+ CU_ID_MAX * SIMD_ID_MAX * WAVE_ID_MAX;
+ struct amdgpu_ib wb_ib;
+ struct amdgpu_ib disp_ibs[3];
+ struct dma_fence *fences[3];
+ u32 pattern[3] = { 0x1, 0x5, 0xa };
+
+ /* bail if the compute ring is not ready */
+ if (!adev->gfx.compute_ring[0].sched.ready ||
+ !adev->gfx.compute_ring[1].sched.ready)
+ return 0;
+
+ /* allocate the write-back buffer from IB */
+ memset(&wb_ib, 0, sizeof(wb_ib));
+ r = amdgpu_ib_get(adev, NULL, (1 + wb_size) * sizeof(uint32_t),
+ AMDGPU_IB_POOL_DIRECT, &wb_ib);
+ if (r) {
+ dev_err(adev->dev, "failed to get ib (%d) for wb\n", r);
+ return r;
+ }
+ memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t));
+
+ r = gfx_v9_4_2_run_shader(adev,
+ &adev->gfx.compute_ring[0],
+ &disp_ibs[0],
+ sgpr112_init_compute_shader_aldebaran,
+ sizeof(sgpr112_init_compute_shader_aldebaran),
+ sgpr112_init_regs_aldebaran,
+ ARRAY_SIZE(sgpr112_init_regs_aldebaran),
+ adev->gfx.cu_info.number,
+ wb_ib.gpu_addr, pattern[0], &fences[0]);
+ if (r) {
+ dev_err(adev->dev, "failed to clear first 224 sgprs\n");
+ goto pro_end;
+ }
+
+ r = gfx_v9_4_2_wait_for_waves_assigned(adev,
+ &wb_ib.ptr[1], 0b11,
+ pattern[0],
+ adev->gfx.cu_info.number * SIMD_ID_MAX * 2,
+ true);
+ if (r) {
+ dev_err(adev->dev, "wave coverage failed when clear first 224 sgprs\n");
+ wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
+ goto disp0_failed;
+ }
+
+ r = gfx_v9_4_2_run_shader(adev,
+ &adev->gfx.compute_ring[1],
+ &disp_ibs[1],
+ sgpr96_init_compute_shader_aldebaran,
+ sizeof(sgpr96_init_compute_shader_aldebaran),
+ sgpr96_init_regs_aldebaran,
+ ARRAY_SIZE(sgpr96_init_regs_aldebaran),
+ adev->gfx.cu_info.number * 2,
+ wb_ib.gpu_addr, pattern[1], &fences[1]);
+ if (r) {
+ dev_err(adev->dev, "failed to clear next 576 sgprs\n");
+ goto disp0_failed;
+ }
+
+ r = gfx_v9_4_2_wait_for_waves_assigned(adev,
+ &wb_ib.ptr[1], 0b11111100,
+ pattern[1], adev->gfx.cu_info.number * SIMD_ID_MAX * 6,
+ true);
+ if (r) {
+ dev_err(adev->dev, "wave coverage failed when clear first 576 sgprs\n");
+ wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
+ goto disp1_failed;
+ }
+
+ wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
+
+ /* wait for the GPU to finish processing the IB */
+ r = dma_fence_wait(fences[0], false);
+ if (r) {
+ dev_err(adev->dev, "timeout to clear first 224 sgprs\n");
+ goto disp1_failed;
+ }
+
+ r = dma_fence_wait(fences[1], false);
+ if (r) {
+ dev_err(adev->dev, "timeout to clear first 576 sgprs\n");
+ goto disp1_failed;
+ }
+
+ memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t));
+ r = gfx_v9_4_2_run_shader(adev,
+ &adev->gfx.compute_ring[0],
+ &disp_ibs[2],
+ sgpr64_init_compute_shader_aldebaran,
+ sizeof(sgpr64_init_compute_shader_aldebaran),
+ sgpr64_init_regs_aldebaran,
+ ARRAY_SIZE(sgpr64_init_regs_aldebaran),
+ adev->gfx.cu_info.number,
+ wb_ib.gpu_addr, pattern[2], &fences[2]);
+ if (r) {
+ dev_err(adev->dev, "failed to clear first 256 sgprs\n");
+ goto disp1_failed;
+ }
+
+ r = gfx_v9_4_2_wait_for_waves_assigned(adev,
+ &wb_ib.ptr[1], 0b1111,
+ pattern[2],
+ adev->gfx.cu_info.number * SIMD_ID_MAX * 4,
+ true);
+ if (r) {
+ dev_err(adev->dev, "wave coverage failed when clear first 256 sgprs\n");
+ wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
+ goto disp2_failed;
+ }
+
+ wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */
+
+ r = dma_fence_wait(fences[2], false);
+ if (r) {
+ dev_err(adev->dev, "timeout to clear first 256 sgprs\n");
+ goto disp2_failed;
+ }
+
+disp2_failed:
+ amdgpu_ib_free(adev, &disp_ibs[2], NULL);
+ dma_fence_put(fences[2]);
+disp1_failed:
+ amdgpu_ib_free(adev, &disp_ibs[1], NULL);
+ dma_fence_put(fences[1]);
+disp0_failed:
+ amdgpu_ib_free(adev, &disp_ibs[0], NULL);
+ dma_fence_put(fences[0]);
+pro_end:
+ amdgpu_ib_free(adev, &wb_ib, NULL);
+
+ if (r)
+ dev_info(adev->dev, "Init SGPRS Failed\n");
+ else
+ dev_info(adev->dev, "Init SGPRS Successfully\n");
+
+ return r;
+}
+
+static int gfx_v9_4_2_do_vgprs_init(struct amdgpu_device *adev)
+{
+ int r;
+ /* CU_ID: 0~15, SIMD_ID: 0~3, WAVE_ID: 0 ~ 9 */
+ int wb_size = adev->gfx.config.max_shader_engines *
+ CU_ID_MAX * SIMD_ID_MAX * WAVE_ID_MAX;
+ struct amdgpu_ib wb_ib;
+ struct amdgpu_ib disp_ib;
+ struct dma_fence *fence;
+ u32 pattern = 0xa;
+
+ /* bail if the compute ring is not ready */
+ if (!adev->gfx.compute_ring[0].sched.ready)
+ return 0;
+
+ /* allocate the write-back buffer from IB */
+ memset(&wb_ib, 0, sizeof(wb_ib));
+ r = amdgpu_ib_get(adev, NULL, (1 + wb_size) * sizeof(uint32_t),
+ AMDGPU_IB_POOL_DIRECT, &wb_ib);
+ if (r) {
+ dev_err(adev->dev, "failed to get ib (%d) for wb.\n", r);
+ return r;
+ }
+ memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t));
+
+ r = gfx_v9_4_2_run_shader(adev,
+ &adev->gfx.compute_ring[0],
+ &disp_ib,
+ vgpr_init_compute_shader_aldebaran,
+ sizeof(vgpr_init_compute_shader_aldebaran),
+ vgpr_init_regs_aldebaran,
+ ARRAY_SIZE(vgpr_init_regs_aldebaran),
+ adev->gfx.cu_info.number,
+ wb_ib.gpu_addr, pattern, &fence);
+ if (r) {
+ dev_err(adev->dev, "failed to clear vgprs\n");
+ goto pro_end;
+ }
+
+ /* wait for the GPU to finish processing the IB */
+ r = dma_fence_wait(fence, false);
+ if (r) {
+ dev_err(adev->dev, "timeout to clear vgprs\n");
+ goto disp_failed;
+ }
+
+ r = gfx_v9_4_2_wait_for_waves_assigned(adev,
+ &wb_ib.ptr[1], 0b1,
+ pattern,
+ adev->gfx.cu_info.number * SIMD_ID_MAX,
+ false);
+ if (r) {
+ dev_err(adev->dev, "failed to cover all simds when clearing vgprs\n");
+ goto disp_failed;
+ }
+
+disp_failed:
+ amdgpu_ib_free(adev, &disp_ib, NULL);
+ dma_fence_put(fence);
+pro_end:
+ amdgpu_ib_free(adev, &wb_ib, NULL);
+
+ if (r)
+ dev_info(adev->dev, "Init VGPRS Failed\n");
+ else
+ dev_info(adev->dev, "Init VGPRS Successfully\n");
+
+ return r;
+}
+
+int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev)
+{
+ /* only support when RAS is enabled */
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX))
+ return 0;
+
+ gfx_v9_4_2_do_sgprs_init(adev);
+
+ gfx_v9_4_2_do_vgprs_init(adev);
+
+ return 0;
+}
+
static void gfx_v9_4_2_query_sq_timeout_status(struct amdgpu_device *adev);
static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev);
@@ -808,8 +1442,9 @@ static struct gfx_v9_4_2_utc_block gfx_v9_4_2_utc_blocks[] = {
REG_SET_FIELD(0, ATC_L2_CACHE_4K_DSM_CNTL, WRITE_COUNTERS, 1) },
};
-static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs =
- { SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 };
+static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs = {
+ SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16
+};
static int gfx_v9_4_2_get_reg_error_count(struct amdgpu_device *adev,
const struct soc15_reg_entry *reg,
@@ -1039,13 +1674,16 @@ static void gfx_v9_4_2_reset_utc_err_status(struct amdgpu_device *adev)
static void gfx_v9_4_2_reset_ea_err_status(struct amdgpu_device *adev)
{
uint32_t i, j;
+ uint32_t value;
+
+ value = REG_SET_FIELD(0, GCEA_ERR_STATUS, CLEAR_ERROR_STATUS, 0x1);
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) {
for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance;
j++) {
gfx_v9_4_2_select_se_sh(adev, i, 0, j);
- WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), 0x10);
+ WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), value);
}
}
gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h
index 81c5833b6b9f..6db1f88509af 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h
@@ -29,6 +29,7 @@ void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev,
void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev,
uint32_t die_id);
void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev);
+int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev);
extern const struct amdgpu_gfx_ras_funcs gfx_v9_4_2_ras_funcs;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 1e4678cb98f0..a03fdd41212b 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -283,10 +283,14 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
block_size);
- /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ /* Send no-retry XNACK on fault to suppress VM fault storm.
+ * On Aldebaran, XNACK can be enabled in the SQ per-process.
+ * Retry faults need to be enabled for that to work.
+ */
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
- !adev->gmc.noretry);
+ !adev->gmc.noretry ||
+ adev->asic_type == CHIP_ALDEBARAN);
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 455bb91060d0..093ab98c31bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -53,6 +53,7 @@
#include "mmhub_v1_7.h"
#include "umc_v6_1.h"
#include "umc_v6_0.h"
+#include "hdp_v4_0.h"
#include "ivsrcid/vmc/irqsrcs_vmc_1_0.h"
@@ -1210,6 +1211,11 @@ static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev)
adev->gfxhub.funcs = &gfxhub_v1_0_funcs;
}
+static void gmc_v9_0_set_hdp_ras_funcs(struct amdgpu_device *adev)
+{
+ adev->hdp.ras_funcs = &hdp_v4_0_ras_funcs;
+}
+
static int gmc_v9_0_early_init(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -1230,6 +1236,7 @@ static int gmc_v9_0_early_init(void *handle)
gmc_v9_0_set_mmhub_funcs(adev);
gmc_v9_0_set_mmhub_ras_funcs(adev);
gmc_v9_0_set_gfxhub_funcs(adev);
+ gmc_v9_0_set_hdp_ras_funcs(adev);
adev->gmc.shared_aperture_start = 0x2000000000000000ULL;
adev->gmc.shared_aperture_end =
@@ -1255,7 +1262,7 @@ static int gmc_v9_0_late_init(void *handle)
* writes, while disables HBM ECC for vega10.
*/
if (!amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_VEGA10)) {
- if (!(adev->ras_features & (1 << AMDGPU_RAS_BLOCK__UMC))) {
+ if (!(adev->ras_enabled & (1 << AMDGPU_RAS_BLOCK__UMC))) {
if (adev->df.funcs->enable_ecc_force_par_wr_rmw)
adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false);
}
@@ -1265,6 +1272,10 @@ static int gmc_v9_0_late_init(void *handle)
adev->mmhub.ras_funcs->reset_ras_error_count)
adev->mmhub.ras_funcs->reset_ras_error_count(adev);
+ if (adev->hdp.ras_funcs &&
+ adev->hdp.ras_funcs->reset_ras_error_count)
+ adev->hdp.ras_funcs->reset_ras_error_count(adev);
+
r = amdgpu_gmc_ras_late_init(adev);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
index edbd35d293eb..74b90cc2bf48 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
@@ -59,12 +59,31 @@ static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev,
HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
}
+static void hdp_v4_0_query_ras_error_count(struct amdgpu_device *adev,
+ void *ras_error_status)
+{
+ struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
+
+ err_data->ue_count = 0;
+ err_data->ce_count = 0;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP))
+ return;
+
+ /* HDP SRAM errors are uncorrectable ones (i.e. fatal errors) */
+ err_data->ue_count += RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
+};
+
static void hdp_v4_0_reset_ras_error_count(struct amdgpu_device *adev)
{
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP))
return;
- /*read back hdp ras counter to reset it to 0 */
- RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
+
+ if (adev->asic_type >= CHIP_ALDEBARAN)
+ WREG32_SOC15(HDP, 0, mmHDP_EDC_CNT, 0);
+ else
+ /*read back hdp ras counter to reset it to 0 */
+ RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT);
}
static void hdp_v4_0_update_clock_gating(struct amdgpu_device *adev,
@@ -130,10 +149,16 @@ static void hdp_v4_0_init_registers(struct amdgpu_device *adev)
WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40));
}
+const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs = {
+ .ras_late_init = amdgpu_hdp_ras_late_init,
+ .ras_fini = amdgpu_hdp_ras_fini,
+ .query_ras_error_count = hdp_v4_0_query_ras_error_count,
+ .reset_ras_error_count = hdp_v4_0_reset_ras_error_count,
+};
+
const struct amdgpu_hdp_funcs hdp_v4_0_funcs = {
.flush_hdp = hdp_v4_0_flush_hdp,
.invalidate_hdp = hdp_v4_0_invalidate_hdp,
- .reset_ras_error_count = hdp_v4_0_reset_ras_error_count,
.update_clock_gating = hdp_v4_0_update_clock_gating,
.get_clock_gating_state = hdp_v4_0_get_clockgating_state,
.init_registers = hdp_v4_0_init_registers,
diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h
index d1e6399e8c46..dc3a1b81dd62 100644
--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h
@@ -27,5 +27,6 @@
#include "soc15_common.h"
extern const struct amdgpu_hdp_funcs hdp_v4_0_funcs;
+extern const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
index 0103a5ab28e6..6264934b67ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
@@ -296,10 +296,12 @@ static void mmhub_v1_7_setup_vmid_config(struct amdgpu_device *adev)
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
PAGE_TABLE_BLOCK_SIZE,
block_size);
- /* Send no-retry XNACK on fault to suppress VM fault storm. */
+ /* On Aldebaran, XNACK can be enabled in the SQ per-process.
+ * Retry faults need to be enabled for that to work.
+ */
tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
RETRY_PERMISSION_OR_INVALID_PAGE_FAULT,
- !adev->gmc.noretry);
+ 1);
WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL,
i * hub->ctx_distance, tmp);
WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32,
@@ -1313,12 +1315,31 @@ static void mmhub_v1_7_query_ras_error_status(struct amdgpu_device *adev)
}
}
+static void mmhub_v1_7_reset_ras_error_status(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t reg_value;
+
+ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_ea_err_status_regs); i++) {
+ reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
+ mmhub_v1_7_ea_err_status_regs[i]));
+ reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS,
+ CLEAR_ERROR_STATUS, 0x01);
+ WREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_ea_err_status_regs[i]),
+ reg_value);
+ }
+}
+
const struct amdgpu_mmhub_ras_funcs mmhub_v1_7_ras_funcs = {
.ras_late_init = amdgpu_mmhub_ras_late_init,
.ras_fini = amdgpu_mmhub_ras_fini,
.query_ras_error_count = mmhub_v1_7_query_ras_error_count,
.reset_ras_error_count = mmhub_v1_7_reset_ras_error_count,
.query_ras_error_status = mmhub_v1_7_query_ras_error_status,
+ .reset_ras_error_status = mmhub_v1_7_reset_ras_error_status,
};
const struct amdgpu_mmhub_funcs mmhub_v1_7_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
index a9899335d0b1..709ac576ac7e 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
@@ -569,9 +569,9 @@ static int mmhub_v2_3_set_clockgating(struct amdgpu_device *adev,
return 0;
mmhub_v2_3_update_medium_grain_clock_gating(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
mmhub_v2_3_update_medium_grain_light_sleep(adev,
- state == AMD_CG_STATE_GATE ? true : false);
+ state == AMD_CG_STATE_GATE);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index d290ca0b06da..75d1f9b939b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -598,7 +598,7 @@ static void nv_pcie_gen3_enable(struct amdgpu_device *adev)
static void nv_program_aspm(struct amdgpu_device *adev)
{
- if (amdgpu_aspm != 1)
+ if (!amdgpu_aspm)
return;
if (!(adev->flags & AMD_IS_APU) &&
@@ -1068,6 +1068,7 @@ static int nv_common_early_init(void *handle)
case CHIP_SIENNA_CICHLID:
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
AMD_CG_SUPPORT_GFX_3D_CGCG |
AMD_CG_SUPPORT_MC_MGCG |
AMD_CG_SUPPORT_VCN_MGCG |
@@ -1091,6 +1092,7 @@ static int nv_common_early_init(void *handle)
case CHIP_NAVY_FLOUNDER:
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
AMD_CG_SUPPORT_GFX_3D_CGCG |
AMD_CG_SUPPORT_VCN_MGCG |
AMD_CG_SUPPORT_JPEG_MGCG |
@@ -1121,6 +1123,8 @@ static int nv_common_early_init(void *handle)
AMD_CG_SUPPORT_MC_LS |
AMD_CG_SUPPORT_GFX_FGCG |
AMD_CG_SUPPORT_VCN_MGCG |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
AMD_CG_SUPPORT_JPEG_MGCG;
adev->pg_flags = AMD_PG_SUPPORT_GFX_PG |
AMD_PG_SUPPORT_VCN |
@@ -1132,6 +1136,7 @@ static int nv_common_early_init(void *handle)
case CHIP_DIMGREY_CAVEFISH:
adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG |
AMD_CG_SUPPORT_GFX_CGCG |
+ AMD_CG_SUPPORT_GFX_CGLS |
AMD_CG_SUPPORT_GFX_3D_CGCG |
AMD_CG_SUPPORT_VCN_MGCG |
AMD_CG_SUPPORT_JPEG_MGCG |
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 96064c343163..f6d3180febc4 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -97,7 +97,6 @@ enum psp_gfx_cmd_id
GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */
GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */
GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */
- GFX_CMD_ID_CLEAR_VF_FW = 0x0000000D, /* Clear VF FW, to be used on VF shutdown. */
GFX_CMD_ID_GET_FW_ATTESTATION = 0x0000000F, /* Query GPUVA of the Fw Attestation DB */
/* IDs upto 0x1F are reserved for older programs (Raven, Vega 10/12/20) */
GFX_CMD_ID_LOAD_TOC = 0x00000020, /* Load TOC and obtain TMR size */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 5715be6770ec..d197185f7789 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1109,6 +1109,8 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
if (adev->asic_type == CHIP_ARCTURUS &&
adev->sdma.instance[i].fw_version >= 14)
WREG32_SDMA(i, mmSDMA0_PUB_DUMMY_REG2, enable);
+ /* Extend page fault timeout to avoid interrupt storm */
+ WREG32_SDMA(i, mmSDMA0_UTCL1_TIMEOUT, 0x00800080);
}
}
@@ -2227,7 +2229,7 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev,
memset(&task_info, 0, sizeof(struct amdgpu_task_info));
amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);
- dev_info(adev->dev,
+ dev_dbg_ratelimited(adev->dev,
"[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u "
"pasid:%u, for process %s pid %d thread %s pid %d\n",
instance, addr, entry->src_id, entry->ring_id, entry->vmid,
@@ -2240,7 +2242,7 @@ static int sdma_v4_0_process_vm_hole_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- dev_err(adev->dev, "MC or SEM address in VM hole\n");
+ dev_dbg_ratelimited(adev->dev, "MC or SEM address in VM hole\n");
sdma_v4_0_print_iv_entry(adev, entry);
return 0;
}
@@ -2249,7 +2251,7 @@ static int sdma_v4_0_process_doorbell_invalid_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- dev_err(adev->dev, "SDMA received a doorbell from BIF with byte_enable !=0xff\n");
+ dev_dbg_ratelimited(adev->dev, "SDMA received a doorbell from BIF with byte_enable !=0xff\n");
sdma_v4_0_print_iv_entry(adev, entry);
return 0;
}
@@ -2258,7 +2260,7 @@ static int sdma_v4_0_process_pool_timeout_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- dev_err(adev->dev,
+ dev_dbg_ratelimited(adev->dev,
"Polling register/memory timeout executing POLL_REG/MEM with finite timer\n");
sdma_v4_0_print_iv_entry(adev, entry);
return 0;
@@ -2268,7 +2270,7 @@ static int sdma_v4_0_process_srbm_write_irq(struct amdgpu_device *adev,
struct amdgpu_irq_src *source,
struct amdgpu_iv_entry *entry)
{
- dev_err(adev->dev,
+ dev_dbg_ratelimited(adev->dev,
"SDMA gets an Register Write SRBM_WRITE command in non-privilege command buffer\n");
sdma_v4_0_print_iv_entry(adev, entry);
return 0;
@@ -2597,27 +2599,18 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_srbm_write_irq_funcs = {
static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev)
{
+ adev->sdma.trap_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.ecc_irq.num_types = adev->sdma.num_instances;
+ /*For Arcturus and Aldebaran, add another 4 irq handler*/
switch (adev->sdma.num_instances) {
- case 1:
- adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1;
- adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1;
- break;
case 5:
- adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5;
- adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5;
- break;
case 8:
- adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
- adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
- adev->sdma.vm_hole_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5;
- adev->sdma.doorbell_invalid_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
- adev->sdma.pool_timeout_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
- adev->sdma.srbm_write_irq.num_types = AMDGPU_SDMA_IRQ_LAST;
+ adev->sdma.vm_hole_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.doorbell_invalid_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.pool_timeout_irq.num_types = adev->sdma.num_instances;
+ adev->sdma.srbm_write_irq.num_types = adev->sdma.num_instances;
break;
- case 2:
default:
- adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2;
- adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2;
break;
}
adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs;
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
index 920fc6d4a127..04c68a79eca4 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
@@ -405,18 +405,6 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid);
- /* Invalidate L2, because if we don't do it, we might get stale cache
- * lines from previous IBs.
- */
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ));
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, (SDMA_GCR_GL2_INV |
- SDMA_GCR_GL2_WB |
- SDMA_GCR_GLM_INV |
- SDMA_GCR_GLM_WB) << 16);
- amdgpu_ring_write(ring, 0xffffff80);
- amdgpu_ring_write(ring, 0xffff);
-
/* An IB packet must end on a 8 DW boundary--the next dword
* must be on a 8-dword boundary. Our IB packet below is 6
* dwords long, thus add x number of NOPs, such that, in
@@ -438,6 +426,33 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
}
/**
+ * sdma_v5_0_ring_emit_mem_sync - flush the IB by graphics cache rinse
+ *
+ * @ring: amdgpu ring pointer
+ * @job: job to retrieve vmid from
+ * @ib: IB object to schedule
+ *
+ * flush the IB by graphics cache rinse.
+ */
+static void sdma_v5_0_ring_emit_mem_sync(struct amdgpu_ring *ring)
+{
+ uint32_t gcr_cntl =
+ SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV |
+ SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV |
+ SDMA_GCR_GLI_INV(1);
+
+ /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) |
+ SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) |
+ SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16));
+ amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) |
+ SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0));
+}
+
+/**
* sdma_v5_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring
*
* @ring: amdgpu ring pointer
@@ -1643,6 +1658,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */
.emit_ib_size = 5 + 7 + 6, /* sdma_v5_0_ring_emit_ib */
.emit_ib = sdma_v5_0_ring_emit_ib,
+ .emit_mem_sync = sdma_v5_0_ring_emit_mem_sync,
.emit_fence = sdma_v5_0_ring_emit_fence,
.emit_pipeline_sync = sdma_v5_0_ring_emit_pipeline_sync,
.emit_vm_flush = sdma_v5_0_ring_emit_vm_flush,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
index b1ad9e52b234..7c4e0586e26d 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
@@ -1556,6 +1556,10 @@ static void sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *ade
int i;
for (i = 0; i < adev->sdma.num_instances; i++) {
+
+ if (adev->sdma.instance[i].fw_version < 70 && adev->asic_type == CHIP_VANGOGH)
+ adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_MGCG;
+
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
/* Enable sdma clock gating */
def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL));
@@ -1589,6 +1593,10 @@ static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev
int i;
for (i = 0; i < adev->sdma.num_instances; i++) {
+
+ if (adev->sdma.instance[i].fw_version < 70 && adev->asic_type == CHIP_VANGOGH)
+ adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_LS;
+
if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
/* Enable sdma mem light sleep */
def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL));
diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c
index 079b094c48ad..3c47c94846d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c
@@ -89,6 +89,23 @@ static u32 smuio_v13_0_get_die_id(struct amdgpu_device *adev)
}
/**
+ * smuio_v13_0_get_socket_id - query socket id from FCH
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Returns socket id
+ */
+static u32 smuio_v13_0_get_socket_id(struct amdgpu_device *adev)
+{
+ u32 data, socket_id;
+
+ data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG);
+ socket_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, SOCKET_ID);
+
+ return socket_id;
+}
+
+/**
* smuio_v13_0_supports_host_gpu_xgmi - detect xgmi interface between cpu and gpu/s.
*
* @adev: amdgpu device pointer
@@ -115,6 +132,7 @@ const struct amdgpu_smuio_funcs smuio_v13_0_funcs = {
.get_rom_index_offset = smuio_v13_0_get_rom_index_offset,
.get_rom_data_offset = smuio_v13_0_get_rom_data_offset,
.get_die_id = smuio_v13_0_get_die_id,
+ .get_socket_id = smuio_v13_0_get_socket_id,
.is_host_gpu_xgmi_supported = smuio_v13_0_is_host_gpu_xgmi_supported,
.update_rom_clock_gating = smuio_v13_0_update_rom_clock_gating,
.get_clock_gating_state = smuio_v13_0_get_clock_gating_state,
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 8e1b9a40839f..4b660b2d1c22 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -655,7 +655,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev)
int ret = 0;
/* avoid NBIF got stuck when do RAS recovery in BACO reset */
- if (ras && ras->supported)
+ if (ras && adev->ras_enabled)
adev->nbio.funcs->enable_doorbell_interrupt(adev, false);
ret = amdgpu_dpm_baco_reset(adev);
@@ -663,7 +663,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev)
return ret;
/* re-enable doorbell interrupt after BACO exit */
- if (ras && ras->supported)
+ if (ras && adev->ras_enabled)
adev->nbio.funcs->enable_doorbell_interrupt(adev, true);
return 0;
@@ -710,7 +710,8 @@ soc15_asic_reset_method(struct amdgpu_device *adev)
* 1. PMFW version > 0x284300: all cases use baco
* 2. PMFW version <= 0x284300: only sGPU w/o RAS use baco
*/
- if ((ras && ras->supported) && adev->pm.fw_version <= 0x283400)
+ if (ras && adev->ras_enabled &&
+ adev->pm.fw_version <= 0x283400)
baco_reset = false;
break;
case CHIP_ALDEBARAN:
@@ -816,7 +817,7 @@ static void soc15_pcie_gen3_enable(struct amdgpu_device *adev)
static void soc15_program_aspm(struct amdgpu_device *adev)
{
- if (amdgpu_aspm != 1)
+ if (!amdgpu_aspm)
return;
if (!(adev->flags & AMD_IS_APU) &&
@@ -1522,9 +1523,6 @@ static int soc15_common_late_init(void *handle)
if (amdgpu_sriov_vf(adev))
xgpu_ai_mailbox_get_irq(adev);
- if (adev->hdp.funcs->reset_ras_error_count)
- adev->hdp.funcs->reset_ras_error_count(adev);
-
if (adev->nbio.ras_funcs &&
adev->nbio.ras_funcs->ras_late_init)
r = adev->nbio.ras_funcs->ras_late_init(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
index 745ed0fba1ed..0f214a398dd8 100644
--- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h
@@ -105,6 +105,12 @@ struct ta_ras_trigger_error_input {
uint64_t value; // method if error injection. i.e persistent, coherent etc.
};
+struct ta_ras_init_flags
+{
+ uint8_t poison_mode_en;
+ uint8_t dgpu_mode;
+};
+
struct ta_ras_output_flags
{
uint8_t ras_init_success_flag;
@@ -115,6 +121,7 @@ struct ta_ras_output_flags
/* Common input structure for RAS callbacks */
/**********************************************************/
union ta_ras_cmd_input {
+ struct ta_ras_init_flags init_flags;
struct ta_ras_enable_features_input enable_features;
struct ta_ras_disable_features_input disable_features;
struct ta_ras_trigger_error_input trigger_error;
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index ca8efa5c6978..2f17c8a57015 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -104,6 +104,7 @@ static int vega10_ih_toggle_ring_interrupts(struct amdgpu_device *adev,
tmp = RREG32(ih_regs->ih_rb_cntl);
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0));
+ tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_GPU_TS_ENABLE, 1);
/* enable_intr field is only valid in ring0 */
if (ih == &adev->irq.ih)
tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0));
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 735ebbd1148f..3d21c0799037 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -1136,7 +1136,7 @@ static void vi_program_aspm(struct amdgpu_device *adev)
bool bL1SS = false;
bool bClkReqSupport = true;
- if (amdgpu_aspm != 1)
+ if (!amdgpu_aspm)
return;
if (adev->flags & AMD_IS_APU ||