diff options
author | Dave Airlie <airlied@redhat.com> | 2021-05-21 15:29:34 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2021-05-21 15:29:40 +1000 |
commit | c99c4d0ca57c978dcc2a2f41ab8449684ea154cc (patch) | |
tree | 3fd20557381e99063293ae5d399a54d0108bcdde /drivers/gpu/drm/amd/amdgpu | |
parent | 2ba047855096fff551402a87272b520fe97323f5 (diff) | |
parent | 2bb5b5f688cbbd5030629905d3ed8032ab46e79f (diff) |
Merge tag 'amd-drm-next-5.14-2021-05-19' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-5.14-2021-05-19:
amdgpu:
- Aldebaran updates
- More LTTPR display work
- Vangogh updates
- SDMA 5.x GCR fixes
- RAS fixes
- PCIe ASPM support
- Modifier fixes
- Enable TMZ on Renoir
- Buffer object code cleanup
- Display overlay fixes
- Initial support for multiple eDP panels
- Initial SR-IOV support for Aldebaran
- DP link training refactor
- Misc code cleanups and bug fixes
- SMU regression fixes for variable sized arrays
- MAINTAINERS fixes for amdgpu
amdkfd:
- Initial SR-IOV support for Aldebaran
- Topology fixes
- Initial HMM SVM support
- Misc code cleanups and bug fixes
radeon:
- Misc code cleanups and bug fixes
- SMU regression fixes for variable sized arrays
- Flickering fix for Oland with multiple 4K displays
UAPI:
- amdgpu: Drop AMDGPU_GEM_CREATE_SHADOW flag.
This was always a kernel internal flag and userspace use of it has always been blocked.
It's no longer needed so remove it.
- amdkgd: HMM SVM support
Overview: https://patchwork.freedesktop.org/series/85562/
Porposed userspace: https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/tree/fxkamd/hmm-wip
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20210520031258.231896-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
58 files changed, 1707 insertions, 681 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index d216b7ecb5d1..cc36570b0d2f 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -56,7 +56,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_gmc.o amdgpu_mmhub.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \ amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \ - amdgpu_fw_attestation.o amdgpu_securedisplay.o + amdgpu_fw_attestation.o amdgpu_securedisplay.o amdgpu_hdp.o amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 6cabecc74007..ad5f508924b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1075,7 +1075,8 @@ struct amdgpu_device { atomic_t throttling_logging_enabled; struct ratelimit_state throttling_logging_rs; - uint32_t ras_features; + uint32_t ras_hw_enabled; + uint32_t ras_enabled; bool in_pci_err_recovery; struct pci_saved_state *pci_state; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 2e9b16fb3fcd..bf2939b6eb43 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -76,7 +76,7 @@ struct amdgpu_atif { /** * amdgpu_atif_call - call an ATIF method * - * @handle: acpi handle + * @atif: acpi handle * @function: the ATIF function to execute * @params: ATIF function params * @@ -166,7 +166,6 @@ static void amdgpu_atif_parse_functions(struct amdgpu_atif_functions *f, u32 mas /** * amdgpu_atif_verify_interface - verify ATIF * - * @handle: acpi handle * @atif: amdgpu atif struct * * Execute the ATIF_FUNCTION_VERIFY_INTERFACE ATIF function @@ -240,8 +239,7 @@ out: /** * amdgpu_atif_get_notification_params - determine notify configuration * - * @handle: acpi handle - * @n: atif notification configuration struct + * @atif: acpi handle * * Execute the ATIF_FUNCTION_GET_SYSTEM_PARAMETERS ATIF function * to determine if a notifier is used and if so which one @@ -304,7 +302,7 @@ out: /** * amdgpu_atif_query_backlight_caps - get min and max backlight input signal * - * @handle: acpi handle + * @atif: acpi handle * * Execute the QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS ATIF function * to determine the acceptable range of backlight values @@ -363,7 +361,7 @@ out: /** * amdgpu_atif_get_sbios_requests - get requested sbios event * - * @handle: acpi handle + * @atif: acpi handle * @req: atif sbios request struct * * Execute the ATIF_FUNCTION_GET_SYSTEM_BIOS_REQUESTS ATIF function @@ -899,6 +897,8 @@ void amdgpu_acpi_fini(struct amdgpu_device *adev) /** * amdgpu_acpi_is_s0ix_supported * + * @adev: amdgpu_device_pointer + * * returns true if supported, false if not. */ bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index 5ffb07b02810..313ee49b9f17 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -75,6 +75,7 @@ struct amdgpu_amdkfd_fence { struct mm_struct *mm; spinlock_t lock; char timeline_name[TASK_COMM_LEN]; + struct svm_range_bo *svm_bo; }; struct amdgpu_kfd_dev { @@ -148,7 +149,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, int queue_bit); struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, - struct mm_struct *mm); + struct mm_struct *mm, + struct svm_range_bo *svm_bo); #if IS_ENABLED(CONFIG_HSA_AMD) bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f); @@ -234,22 +236,27 @@ uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *s }) /* GPUVM API */ +#define drm_priv_to_vm(drm_priv) \ + (&((struct amdgpu_fpriv *) \ + ((struct drm_file *)(drm_priv))->driver_priv)->vm) + int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, struct file *filp, u32 pasid, - void **vm, void **process_info, + void **process_info, struct dma_fence **ef); -void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm); -uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); +void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *drm_priv); +uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct kgd_dev *kgd, uint64_t va, uint64_t size, - void *vm, struct kgd_mem **mem, + void *drm_priv, struct kgd_mem **mem, uint64_t *offset, uint32_t flags); int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size); + struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, + uint64_t *size); int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); + struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *vm); + struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv); int amdgpu_amdkfd_gpuvm_sync_memory( struct kgd_dev *kgd, struct kgd_mem *mem, bool intr); int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd, @@ -260,7 +267,7 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, struct kfd_vm_fault_info *info); int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, struct dma_buf *dmabuf, - uint64_t va, void *vm, + uint64_t va, void *drm_priv, struct kgd_mem **mem, uint64_t *size, uint64_t *mmap_offset); int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, @@ -270,6 +277,7 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void); void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo); +void amdgpu_amdkfd_reserve_system_mem(uint64_t size); #else static inline void amdgpu_amdkfd_gpuvm_init_mem_limits(void) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 9ef9f3ddad48..6409d6b1b2df 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -25,6 +25,7 @@ #include <linux/firmware.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_amdkfd_arcturus.h" #include "sdma0/sdma0_4_2_2_offset.h" #include "sdma0/sdma0_4_2_2_sh_mask.h" #include "sdma1/sdma1_4_2_2_offset.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c index 5af464933976..1d0dbff87d3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c @@ -28,6 +28,7 @@ #include <linux/slab.h> #include <linux/sched/mm.h> #include "amdgpu_amdkfd.h" +#include "kfd_svm.h" static const struct dma_fence_ops amdkfd_fence_ops; static atomic_t fence_seq = ATOMIC_INIT(0); @@ -60,7 +61,8 @@ static atomic_t fence_seq = ATOMIC_INIT(0); */ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, - struct mm_struct *mm) + struct mm_struct *mm, + struct svm_range_bo *svm_bo) { struct amdgpu_amdkfd_fence *fence; @@ -73,7 +75,7 @@ struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, fence->mm = mm; get_task_comm(fence->timeline_name, current); spin_lock_init(&fence->lock); - + fence->svm_bo = svm_bo; dma_fence_init(&fence->base, &amdkfd_fence_ops, &fence->lock, context, atomic_inc_return(&fence_seq)); @@ -111,6 +113,8 @@ static const char *amdkfd_fence_get_timeline_name(struct dma_fence *f) * a KFD BO and schedules a job to move the BO. * If fence is already signaled return true. * If fence is not signaled schedule a evict KFD process work item. + * + * @f: dma_fence */ static bool amdkfd_fence_enable_signaling(struct dma_fence *f) { @@ -122,16 +126,20 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence *f) if (dma_fence_is_signaled(f)) return true; - if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f)) - return true; - + if (!fence->svm_bo) { + if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f)) + return true; + } else { + if (!svm_range_schedule_evict_svm_bo(fence)) + return true; + } return false; } /** * amdkfd_fence_release - callback that fence can be freed * - * @fence: fence + * @f: dma_fence * * This function is called when the reference count becomes zero. * Drops the mm_struct reference and RCU schedules freeing up the fence. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index b43e68fc1378..ed3014fbb563 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -719,7 +719,7 @@ static void unlock_spi_csq_mutexes(struct amdgpu_device *adev) } /** - * @get_wave_count: Read device registers to get number of waves in flight for + * get_wave_count: Read device registers to get number of waves in flight for * a particular queue. The method also returns the VMID associated with the * queue. * @@ -755,19 +755,19 @@ static void get_wave_count(struct amdgpu_device *adev, int queue_idx, } /** - * @kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each + * kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each * shader engine and aggregates the number of waves that are in flight for the * process whose pasid is provided as a parameter. The process could have ZERO * or more queues running and submitting waves to compute units. * * @kgd: Handle of device from which to get number of waves in flight * @pasid: Identifies the process for which this query call is invoked - * @wave_cnt: Output parameter updated with number of waves in flight that + * @pasid_wave_cnt: Output parameter updated with number of waves in flight that * belong to process with given pasid * @max_waves_per_cu: Output parameter updated with maximum number of waves * possible per Compute Unit * - * @note: It's possible that the device has too many queues (oversubscription) + * Note: It's possible that the device has too many queues (oversubscription) * in which case a VMID could be remapped to a different PASID. This could lead * to an iaccurate wave count. Following is a high-level sequence: * Time T1: vmid = getVmid(); vmid is associated with Pasid P1 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 7d4118c8128a..dfa025d694f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -33,9 +33,6 @@ #include <uapi/linux/kfd_ioctl.h> #include "amdgpu_xgmi.h" -/* BO flag to indicate a KFD userptr BO */ -#define AMDGPU_AMDKFD_USERPTR_BO (1ULL << 63) - /* Userptr restore delay, just long enough to allow consecutive VM * changes to accumulate */ @@ -108,6 +105,11 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) (kfd_mem_limit.max_ttm_mem_limit >> 20)); } +void amdgpu_amdkfd_reserve_system_mem(uint64_t size) +{ + kfd_mem_limit.system_mem_used += size; +} + /* Estimate page table size needed to represent a given memory size * * With 4KB pages, we need one 8 byte PTE for each 4KB of memory @@ -217,7 +219,7 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) u32 domain = bo->preferred_domains; bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU); - if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { + if (bo->flags & AMDGPU_AMDKFD_CREATE_USERPTR_BO) { domain = AMDGPU_GEM_DOMAIN_CPU; sg = false; } @@ -967,7 +969,8 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info, info->eviction_fence = amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), - current->mm); + current->mm, + NULL); if (!info->eviction_fence) { pr_err("Failed to create eviction fence\n"); ret = -ENOMEM; @@ -1036,15 +1039,19 @@ create_evict_fence_fail: int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, struct file *filp, u32 pasid, - void **vm, void **process_info, + void **process_info, struct dma_fence **ef) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct drm_file *drm_priv = filp->private_data; - struct amdgpu_fpriv *drv_priv = drm_priv->driver_priv; - struct amdgpu_vm *avm = &drv_priv->vm; + struct amdgpu_fpriv *drv_priv; + struct amdgpu_vm *avm; int ret; + ret = amdgpu_file_to_fpriv(filp, &drv_priv); + if (ret) + return ret; + avm = &drv_priv->vm; + /* Already a compute VM? */ if (avm->process_info) return -EINVAL; @@ -1059,7 +1066,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, if (ret) return ret; - *vm = (void *)avm; + amdgpu_vm_set_task_info(avm); return 0; } @@ -1100,15 +1107,17 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, } } -void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm) +void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *drm_priv) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + struct amdgpu_vm *avm; - if (WARN_ON(!kgd || !vm)) + if (WARN_ON(!kgd || !drm_priv)) return; - pr_debug("Releasing process vm %p\n", vm); + avm = drm_priv_to_vm(drm_priv); + + pr_debug("Releasing process vm %p\n", avm); /* The original pasid of amdgpu vm has already been * released during making a amdgpu vm to a compute vm @@ -1119,9 +1128,9 @@ void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm) amdgpu_vm_release_compute(adev, avm); } -uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) +uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv) { - struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); struct amdgpu_bo *pd = avm->root.base.bo; struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); @@ -1132,11 +1141,11 @@ uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct kgd_dev *kgd, uint64_t va, uint64_t size, - void *vm, struct kgd_mem **mem, + void *drm_priv, struct kgd_mem **mem, uint64_t *offset, uint32_t flags) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); enum ttm_bo_type bo_type = ttm_bo_type_device; struct sg_table *sg = NULL; uint64_t user_addr = 0; @@ -1216,6 +1225,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( domain_string(alloc_domain), ret); goto err_bo_create; } + ret = drm_vma_node_allow(&gobj->vma_node, drm_priv); + if (ret) { + pr_debug("Failed to allow vma node access. ret %d\n", ret); + goto err_node_allow; + } bo = gem_to_amdgpu_bo(gobj); if (bo_type == ttm_bo_type_sg) { bo->tbo.sg = sg; @@ -1224,7 +1238,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( bo->kfd_bo = *mem; (*mem)->bo = bo; if (user_addr) - bo->flags |= AMDGPU_AMDKFD_USERPTR_BO; + bo->flags |= AMDGPU_AMDKFD_CREATE_USERPTR_BO; (*mem)->va = va; (*mem)->domain = domain; @@ -1245,6 +1259,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( allocate_init_user_pages_failed: remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); + drm_vma_node_revoke(&gobj->vma_node, drm_priv); +err_node_allow: amdgpu_bo_unref(&bo); /* Don't unreserve system mem limit twice */ goto err_reserve_limit; @@ -1262,7 +1278,8 @@ err: } int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, uint64_t *size) + struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv, + uint64_t *size) { struct amdkfd_process_info *process_info = mem->process_info; unsigned long bo_size = mem->bo->tbo.base.size; @@ -1339,6 +1356,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( } /* Free the BO*/ + drm_vma_node_revoke(&mem->bo->tbo.base.vma_node, drm_priv); drm_gem_object_put(&mem->bo->tbo.base); mutex_destroy(&mem->lock); kfree(mem); @@ -1347,10 +1365,10 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( } int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) + struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); int ret; struct amdgpu_bo *bo; uint32_t domain; @@ -1391,9 +1409,9 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( pr_debug("Map VA 0x%llx - 0x%llx to vm %p domain %s\n", mem->va, mem->va + bo_size * (1 + mem->aql_queue), - vm, domain_string(domain)); + avm, domain_string(domain)); - ret = reserve_bo_and_vm(mem, vm, &ctx); + ret = reserve_bo_and_vm(mem, avm, &ctx); if (unlikely(ret)) goto out; @@ -1437,7 +1455,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( } list_for_each_entry(entry, &mem->bo_va_list, bo_list) { - if (entry->bo_va->base.vm == vm && !entry->is_mapped) { + if (entry->bo_va->base.vm == avm && !entry->is_mapped) { pr_debug("\t map VA 0x%llx - 0x%llx in entry %p\n", entry->va, entry->va + bo_size, entry); @@ -1449,7 +1467,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( goto map_bo_to_gpuvm_failed; } - ret = vm_update_pds(vm, ctx.sync); + ret = vm_update_pds(avm, ctx.sync); if (ret) { pr_err("Failed to update page directories\n"); goto map_bo_to_gpuvm_failed; @@ -1485,11 +1503,11 @@ out: } int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( - struct kgd_dev *kgd, struct kgd_mem *mem, void *vm) + struct kgd_dev *kgd, struct kgd_mem *mem, void *drm_priv) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - struct amdkfd_process_info *process_info = - ((struct amdgpu_vm *)vm)->process_info; + struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); + struct amdkfd_process_info *process_info = avm->process_info; unsigned long bo_size = mem->bo->tbo.base.size; struct kfd_bo_va_list *entry; struct bo_vm_reservation_context ctx; @@ -1497,7 +1515,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( mutex_lock(&mem->lock); - ret = reserve_bo_and_cond_vms(mem, vm, BO_VM_MAPPED, &ctx); + ret = reserve_bo_and_cond_vms(mem, avm, BO_VM_MAPPED, &ctx); if (unlikely(ret)) goto out; /* If no VMs were reserved, it means the BO wasn't actually mapped */ @@ -1506,17 +1524,17 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( goto unreserve_out; } - ret = vm_validate_pt_pd_bos((struct amdgpu_vm *)vm); + ret = vm_validate_pt_pd_bos(avm); if (unlikely(ret)) goto unreserve_out; pr_debug("Unmap VA 0x%llx - 0x%llx from vm %p\n", mem->va, mem->va + bo_size * (1 + mem->aql_queue), - vm); + avm); list_for_each_entry(entry, &mem->bo_va_list, bo_list) { - if (entry->bo_va->base.vm == vm && entry->is_mapped) { + if (entry->bo_va->base.vm == avm && entry->is_mapped) { pr_debug("\t unmap VA 0x%llx - 0x%llx from entry %p\n", entry->va, entry->va + bo_size, @@ -1642,14 +1660,15 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, struct dma_buf *dma_buf, - uint64_t va, void *vm, + uint64_t va, void *drm_priv, struct kgd_mem **mem, uint64_t *size, uint64_t *mmap_offset) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); struct drm_gem_object *obj; struct amdgpu_bo *bo; - struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + int ret; if (dma_buf->ops != &amdgpu_dmabuf_ops) /* Can't handle non-graphics buffers */ @@ -1670,6 +1689,12 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, if (!*mem) return -ENOMEM; + ret = drm_vma_node_allow(&obj->vma_node, drm_priv); + if (ret) { + kfree(mem); + return ret; + } + if (size) *size = amdgpu_bo_size(bo); @@ -2135,7 +2160,8 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) */ new_fence = amdgpu_amdkfd_fence_create( process_info->eviction_fence->base.context, - process_info->eviction_fence->mm); + process_info->eviction_fence->mm, + NULL); if (!new_fence) { pr_err("Failed to create eviction fence\n"); ret = -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b5c766998045..90136f9dedd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -672,7 +672,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) } /** - * cs_parser_fini() - clean parser states + * amdgpu_cs_parser_fini() - clean parser states * @parser: parser structure holding parsing context. * @error: error number * @backoff: indicator to backoff the reservation @@ -1488,7 +1488,7 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, } /** - * amdgpu_cs_wait_all_fence - wait on all fences to signal + * amdgpu_cs_wait_all_fences - wait on all fences to signal * * @adev: amdgpu device * @filp: file private @@ -1639,7 +1639,7 @@ err_free_fences: } /** - * amdgpu_cs_find_bo_va - find bo_va for VM address + * amdgpu_cs_find_mapping - find bo_va for VM address * * @parser: command submission parser context * @addr: VM address diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 8b2a37bf2adf..2360a9c518eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2856,7 +2856,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev) AMD_IP_BLOCK_TYPE_IH, }; - for (i = 0; i < ARRAY_SIZE(ip_order); i++) { + for (i = 0; i < adev->num_ip_blocks; i++) { int j; struct amdgpu_ip_block *block; @@ -3179,8 +3179,8 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) int ret = 0; /* - * By default timeout for non compute jobs is 10000. - * And there is no timeout enforced on compute jobs. + * By default timeout for non compute jobs is 10000 + * and 60000 for compute jobs. * In SR-IOV or passthrough mode, timeout for compute * jobs are 60000 by default. */ @@ -3189,10 +3189,8 @@ static int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) adev->compute_timeout = amdgpu_sriov_is_pp_one_vf(adev) ? msecs_to_jiffies(60000) : msecs_to_jiffies(10000); - else if (amdgpu_passthrough(adev)) - adev->compute_timeout = msecs_to_jiffies(60000); else - adev->compute_timeout = MAX_SCHEDULE_TIMEOUT; + adev->compute_timeout = msecs_to_jiffies(60000); if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENGTH)) { while ((timeout_setting = strsep(&input, ",")) && @@ -3741,7 +3739,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev) int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) { struct amdgpu_device *adev = drm_to_adev(dev); - int r; if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; @@ -3756,7 +3753,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) amdgpu_ras_suspend(adev); - r = amdgpu_device_ip_suspend_phase1(adev); + amdgpu_device_ip_suspend_phase1(adev); if (!adev->in_s0ix) amdgpu_amdkfd_suspend(adev, adev->in_runpm); @@ -3766,7 +3763,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon) amdgpu_fence_driver_suspend(adev); - r = amdgpu_device_ip_suspend_phase2(adev); + amdgpu_device_ip_suspend_phase2(adev); /* evict remaining vram memory * This second call to evict vram is to evict the gart page table * using the CPU. @@ -5124,7 +5121,8 @@ int amdgpu_device_baco_enter(struct drm_device *dev) if (!amdgpu_device_supports_baco(adev_to_drm(adev))) return -ENOTSUPP; - if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt) + if (ras && adev->ras_enabled && + adev->nbio.funcs->enable_doorbell_interrupt) adev->nbio.funcs->enable_doorbell_interrupt(adev, false); return amdgpu_dpm_baco_enter(adev); @@ -5143,7 +5141,8 @@ int amdgpu_device_baco_exit(struct drm_device *dev) if (ret) return ret; - if (ras && ras->supported && adev->nbio.funcs->enable_doorbell_interrupt) + if (ras && adev->ras_enabled && + adev->nbio.funcs->enable_doorbell_interrupt) adev->nbio.funcs->enable_doorbell_interrupt(adev, true); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 6fd20ea2935b..1ed9748b9bc7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -288,9 +288,9 @@ module_param_named(msi, amdgpu_msi, int, 0444); * for SDMA and Video. * * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video) - * jobs is 10000. And there is no timeout enforced on compute jobs. + * jobs is 10000. The timeout for compute is 60000. */ -MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and infinity timeout for compute jobs; " +MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: for bare metal 10000 for non-compute jobs and 60000 for compute jobs; " "for passthrough or sriov, 10000 for all jobs." " 0: keep default value. negative: infinity timeout), " "format: for bare metal [Non-Compute] or [GFX,Compute,SDMA,Video]; " @@ -641,7 +641,8 @@ module_param_named(mes, amdgpu_mes, int, 0444); /** * DOC: noretry (int) - * Disable retry faults in the GPU memory controller. + * Disable XNACK retry in the SQ by default on GFXv9 hardware. On ASICs that + * do not support per-process XNACK this also disables retry page faults. * (0 = retry enabled, 1 = retry disabled, -1 auto (default)) */ MODULE_PARM_DESC(noretry, @@ -1186,6 +1187,7 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x7408, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, {0x1002, 0x740C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, {0x1002, 0x740F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x7410, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_ALDEBARAN|AMD_EXP_HW_SUPPORT}, {0, 0, 0} }; @@ -1598,17 +1600,15 @@ static int amdgpu_pmops_runtime_idle(struct device *dev) if (amdgpu_device_has_dc_support(adev)) { struct drm_crtc *crtc; - drm_modeset_lock_all(drm_dev); - drm_for_each_crtc(crtc, drm_dev) { - if (crtc->state->active) { + drm_modeset_lock(&crtc->mutex, NULL); + if (crtc->state->active) ret = -EBUSY; + drm_modeset_unlock(&crtc->mutex); + if (ret < 0) break; - } } - drm_modeset_unlock_all(drm_dev); - } else { struct drm_connector *list_connector; struct drm_connector_list_iter iter; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 47ea46859618..30772608eac6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -434,6 +434,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, * * @ring: ring to init the fence driver on * @num_hw_submission: number of entries on the hardware queue + * @sched_score: optional score atomic shared with other schedulers * * Init the fence driver for the requested ring (all asics). * Helper function for amdgpu_fence_driver_init(). diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index c5a9a4fb10d2..5562b5c90c03 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -60,7 +60,7 @@ */ /** - * amdgpu_dummy_page_init - init dummy page used by the driver + * amdgpu_gart_dummy_page_init - init dummy page used by the driver * * @adev: amdgpu_device pointer * @@ -86,7 +86,7 @@ static int amdgpu_gart_dummy_page_init(struct amdgpu_device *adev) } /** - * amdgpu_dummy_page_fini - free dummy page used by the driver + * amdgpu_gart_dummy_page_fini - free dummy page used by the driver * * @adev: amdgpu_device pointer * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index c39ed9eb0987..a129ecc73869 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -333,6 +333,17 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) } /** + * amdgpu_gmc_fault_key - get hask key from vm fault address and pasid + * + * @addr: 48 bit physical address, page aligned (36 significant bits) + * @pasid: 16 bit process address space identifier + */ +static inline uint64_t amdgpu_gmc_fault_key(uint64_t addr, uint16_t pasid) +{ + return addr << 4 | pasid; +} + +/** * amdgpu_gmc_filter_faults - filter VM faults * * @adev: amdgpu device structure @@ -348,8 +359,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, uint16_t pasid, uint64_t timestamp) { struct amdgpu_gmc *gmc = &adev->gmc; - - uint64_t stamp, key = addr << 4 | pasid; + uint64_t stamp, key = amdgpu_gmc_fault_key(addr, pasid); struct amdgpu_gmc_fault *fault; uint32_t hash; @@ -365,7 +375,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, while (fault->timestamp >= stamp) { uint64_t tmp; - if (fault->key == key) + if (atomic64_read(&fault->key) == key) return true; tmp = fault->timestamp; @@ -378,7 +388,7 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, /* Add the fault to the ring */ fault = &gmc->fault_ring[gmc->last_fault]; - fault->key = key; + atomic64_set(&fault->key, key); fault->timestamp = timestamp; /* And update the hash */ @@ -387,6 +397,36 @@ bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, return false; } +/** + * amdgpu_gmc_filter_faults_remove - remove address from VM faults filter + * + * @adev: amdgpu device structure + * @addr: address of the VM fault + * @pasid: PASID of the process causing the fault + * + * Remove the address from fault filter, then future vm fault on this address + * will pass to retry fault handler to recover. + */ +void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr, + uint16_t pasid) +{ + struct amdgpu_gmc *gmc = &adev->gmc; + uint64_t key = amdgpu_gmc_fault_key(addr, pasid); + struct amdgpu_gmc_fault *fault; + uint32_t hash; + uint64_t tmp; + + hash = hash_64(key, AMDGPU_GMC_FAULT_HASH_ORDER); + fault = &gmc->fault_ring[gmc->fault_hash[hash].idx]; + do { + if (atomic64_cmpxchg(&fault->key, key, 0) == key) + break; + + tmp = fault->timestamp; + fault = &gmc->fault_ring[fault->next]; + } while (fault->timestamp < tmp); +} + int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) { int r; @@ -415,6 +455,13 @@ int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev) return r; } + if (adev->hdp.ras_funcs && + adev->hdp.ras_funcs->ras_late_init) { + r = adev->hdp.ras_funcs->ras_late_init(adev); + if (r) + return r; + } + return 0; } @@ -426,11 +473,15 @@ void amdgpu_gmc_ras_fini(struct amdgpu_device *adev) if (adev->mmhub.ras_funcs && adev->mmhub.ras_funcs->ras_fini) - amdgpu_mmhub_ras_fini(adev); + adev->mmhub.ras_funcs->ras_fini(adev); if (adev->gmc.xgmi.ras_funcs && adev->gmc.xgmi.ras_funcs->ras_fini) adev->gmc.xgmi.ras_funcs->ras_fini(adev); + + if (adev->hdp.ras_funcs && + adev->hdp.ras_funcs->ras_fini) + adev->hdp.ras_funcs->ras_fini(adev); } /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index 9d11c02a3938..6aa1d52d3aee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -66,9 +66,9 @@ struct firmware; * GMC page fault information */ struct amdgpu_gmc_fault { - uint64_t timestamp; + uint64_t timestamp:48; uint64_t next:AMDGPU_GMC_FAULT_RING_ORDER; - uint64_t key:52; + atomic64_t key; }; /* @@ -318,6 +318,8 @@ void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc); bool amdgpu_gmc_filter_faults(struct amdgpu_device *adev, uint64_t addr, uint16_t pasid, uint64_t timestamp); +void amdgpu_gmc_filter_faults_remove(struct amdgpu_device *adev, uint64_t addr, + uint16_t pasid); int amdgpu_gmc_ras_late_init(struct amdgpu_device *adev); void amdgpu_gmc_ras_fini(struct amdgpu_device *adev); int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 72962de4c04c..c026972ca9a1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -24,7 +24,8 @@ #include "amdgpu.h" -static inline struct amdgpu_gtt_mgr *to_gtt_mgr(struct ttm_resource_manager *man) +static inline struct amdgpu_gtt_mgr * +to_gtt_mgr(struct ttm_resource_manager *man) { return container_of(man, struct amdgpu_gtt_mgr, manager); } @@ -43,12 +44,14 @@ struct amdgpu_gtt_node { * the GTT block, in bytes */ static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); + struct ttm_resource_manager *man; + man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); return sysfs_emit(buf, "%llu\n", man->size * PAGE_SIZE); } @@ -61,12 +64,14 @@ static ssize_t amdgpu_mem_info_gtt_total_show(struct device *dev, * size of the GTT block, in bytes */ static ssize_t amdgpu_mem_info_gtt_used_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); + struct ttm_resource_manager *man; + man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT); return sysfs_emit(buf, "%llu\n", amdgpu_gtt_mgr_usage(man)); } @@ -75,80 +80,6 @@ static DEVICE_ATTR(mem_info_gtt_total, S_IRUGO, static DEVICE_ATTR(mem_info_gtt_used, S_IRUGO, amdgpu_mem_info_gtt_used_show, NULL); -static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func; -/** - * amdgpu_gtt_mgr_init - init GTT manager and DRM MM - * - * @adev: amdgpu_device pointer - * @gtt_size: maximum size of GTT - * - * Allocate and initialize the GTT manager. - */ -int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size) -{ - struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr; - struct ttm_resource_manager *man = &mgr->manager; - uint64_t start, size; - int ret; - - man->use_tt = true; - man->func = &amdgpu_gtt_mgr_func; - - ttm_resource_manager_init(man, gtt_size >> PAGE_SHIFT); - - start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS; - size = (adev->gmc.gart_size >> PAGE_SHIFT) - start; - drm_mm_init(&mgr->mm, start, size); - spin_lock_init(&mgr->lock); - atomic64_set(&mgr->available, gtt_size >> PAGE_SHIFT); - - ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_total); - if (ret) { - DRM_ERROR("Failed to create device file mem_info_gtt_total\n"); - return ret; - } - ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_used); - if (ret) { - DRM_ERROR("Failed to create device file mem_info_gtt_used\n"); - return ret; - } - - ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager); - ttm_resource_manager_set_used(man, true); - return 0; -} - -/** - * amdgpu_gtt_mgr_fini - free and destroy GTT manager - * - * @adev: amdgpu_device pointer - * - * Destroy and free the GTT manager, returns -EBUSY if ranges are still - * allocated inside it. - */ -void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev) -{ - struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr; - struct ttm_resource_manager *man = &mgr->manager; - int ret; - - ttm_resource_manager_set_used(man, false); - - ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man); - if (ret) - return; - - spin_lock(&mgr->lock); - drm_mm_takedown(&mgr->mm); - spin_unlock(&mgr->lock); - - device_remove_file(adev->dev, &dev_attr_mem_info_gtt_total); - device_remove_file(adev->dev, &dev_attr_mem_info_gtt_used); - - ttm_resource_manager_cleanup(man); - ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL); -} - /** * amdgpu_gtt_mgr_has_gart_addr - Check if mem has address space * @@ -265,6 +196,13 @@ uint64_t amdgpu_gtt_mgr_usage(struct ttm_resource_manager *man) return (result > 0 ? result : 0) * PAGE_SIZE; } +/** + * amdgpu_gtt_mgr_recover - re-init gart + * + * @man: TTM memory type manager + * + * Re-init the gart for each known BO in the GTT. + */ int amdgpu_gtt_mgr_recover(struct ttm_resource_manager *man) { struct amdgpu_gtt_mgr *mgr = to_gtt_mgr(man); @@ -311,3 +249,76 @@ static const struct ttm_resource_manager_func amdgpu_gtt_mgr_func = { .free = amdgpu_gtt_mgr_del, .debug = amdgpu_gtt_mgr_debug }; + +/** + * amdgpu_gtt_mgr_init - init GTT manager and DRM MM + * + * @adev: amdgpu_device pointer + * @gtt_size: maximum size of GTT + * + * Allocate and initialize the GTT manager. + */ +int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size) +{ + struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr; + struct ttm_resource_manager *man = &mgr->manager; + uint64_t start, size; + int ret; + + man->use_tt = true; + man->func = &amdgpu_gtt_mgr_func; + + ttm_resource_manager_init(man, gtt_size >> PAGE_SHIFT); + + start = AMDGPU_GTT_MAX_TRANSFER_SIZE * AMDGPU_GTT_NUM_TRANSFER_WINDOWS; + size = (adev->gmc.gart_size >> PAGE_SHIFT) - start; + drm_mm_init(&mgr->mm, start, size); + spin_lock_init(&mgr->lock); + atomic64_set(&mgr->available, gtt_size >> PAGE_SHIFT); + + ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_total); + if (ret) { + DRM_ERROR("Failed to create device file mem_info_gtt_total\n"); + return ret; + } + ret = device_create_file(adev->dev, &dev_attr_mem_info_gtt_used); + if (ret) { + DRM_ERROR("Failed to create device file mem_info_gtt_used\n"); + return ret; + } + + ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, &mgr->manager); + ttm_resource_manager_set_used(man, true); + return 0; +} + +/** + * amdgpu_gtt_mgr_fini - free and destroy GTT manager + * + * @adev: amdgpu_device pointer + * + * Destroy and free the GTT manager, returns -EBUSY if ranges are still + * allocated inside it. + */ +void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev) +{ + struct amdgpu_gtt_mgr *mgr = &adev->mman.gtt_mgr; + struct ttm_resource_manager *man = &mgr->manager; + int ret; + + ttm_resource_manager_set_used(man, false); + + ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man); + if (ret) + return; + + spin_lock(&mgr->lock); + drm_mm_takedown(&mgr->mm); + spin_unlock(&mgr->lock); + + device_remove_file(adev->dev, &dev_attr_mem_info_gtt_total); + device_remove_file(adev->dev, &dev_attr_mem_info_gtt_used); + + ttm_resource_manager_cleanup(man); + ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_TT, NULL); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c new file mode 100644 index 000000000000..1d50d534d77c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.c @@ -0,0 +1,69 @@ +/* + * Copyright 2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include "amdgpu.h" +#include "amdgpu_ras.h" + +int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev) +{ + int r; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + struct ras_fs_if fs_info = { + .sysfs_name = "hdp_err_count", + }; + + if (!adev->hdp.ras_if) { + adev->hdp.ras_if = kmalloc(sizeof(struct ras_common_if), GFP_KERNEL); + if (!adev->hdp.ras_if) + return -ENOMEM; + adev->hdp.ras_if->block = AMDGPU_RAS_BLOCK__HDP; + adev->hdp.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; + adev->hdp.ras_if->sub_block_index = 0; + strcpy(adev->hdp.ras_if->name, "hdp"); + } + ih_info.head = fs_info.head = *adev->hdp.ras_if; + r = amdgpu_ras_late_init(adev, adev->hdp.ras_if, + &fs_info, &ih_info); + if (r || !amdgpu_ras_is_supported(adev, adev->hdp.ras_if->block)) { + kfree(adev->hdp.ras_if); + adev->hdp.ras_if = NULL; + } + + return r; +} + +void amdgpu_hdp_ras_fini(struct amdgpu_device *adev) +{ + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP) && + adev->hdp.ras_if) { + struct ras_common_if *ras_if = adev->hdp.ras_if; + struct ras_ih_if ih_info = { + .cb = NULL, + }; + + amdgpu_ras_late_fini(adev, ras_if, &ih_info); + kfree(ras_if); + } +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h index 43caf9f8cc11..7ec99d591584 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h @@ -23,18 +23,29 @@ #ifndef __AMDGPU_HDP_H__ #define __AMDGPU_HDP_H__ +struct amdgpu_hdp_ras_funcs { + int (*ras_late_init)(struct amdgpu_device *adev); + void (*ras_fini)(struct amdgpu_device *adev); + void (*query_ras_error_count)(struct amdgpu_device *adev, + void *ras_error_status); + void (*reset_ras_error_count)(struct amdgpu_device *adev); +}; + struct amdgpu_hdp_funcs { void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring); void (*invalidate_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring); - void (*reset_ras_error_count)(struct amdgpu_device *adev); void (*update_clock_gating)(struct amdgpu_device *adev, bool enable); void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags); void (*init_registers)(struct amdgpu_device *adev); }; struct amdgpu_hdp { + struct ras_common_if *ras_if; const struct amdgpu_hdp_funcs *funcs; + const struct amdgpu_hdp_ras_funcs *ras_funcs; }; +int amdgpu_hdp_ras_late_init(struct amdgpu_device *adev); +void amdgpu_hdp_ras_fini(struct amdgpu_device *adev); #endif /* __AMDGPU_HDP_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index a2fe2dac32c1..2e6789a7dc46 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -328,7 +328,7 @@ int amdgpu_ib_pool_init(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_IB_POOL_MAX; i++) { if (i == AMDGPU_IB_POOL_DIRECT) - size = PAGE_SIZE * 2; + size = PAGE_SIZE * 6; else size = AMDGPU_IB_POOL_SIZE; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index faaa6aa2faaf..a36e191cf086 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -175,7 +175,9 @@ static bool amdgpu_ih_has_checkpoint_processed(struct amdgpu_device *adev, cur_rptr += ih->ptr_mask + 1; *prev_rptr = cur_rptr; - return cur_rptr >= checkpoint_wptr; + /* check ring is empty to workaround missing wptr overflow flag */ + return cur_rptr >= checkpoint_wptr || + (cur_rptr & ih->ptr_mask) == amdgpu_ih_get_wptr(adev, ih); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 39ee88d29cca..8d12e474745a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -986,7 +986,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (!ras) return -EINVAL; - ras_mask = (uint64_t)ras->supported << 32 | ras->features; + ras_mask = (uint64_t)adev->ras_enabled << 32 | ras->features; return copy_to_user(out, &ras_mask, min_t(u64, size, sizeof(ras_mask))) ? @@ -1114,7 +1114,8 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) dev_warn(adev->dev, "No more PASIDs available!"); pasid = 0; } - r = amdgpu_vm_init(adev, &fpriv->vm, AMDGPU_VM_CONTEXT_GFX, pasid); + + r = amdgpu_vm_init(adev, &fpriv->vm, pasid); if (r) goto error_pasid; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h index 11aa29933c1f..b27fcbccce2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h @@ -28,6 +28,7 @@ struct amdgpu_mmhub_ras_funcs { void *ras_error_status); void (*query_ras_error_status)(struct amdgpu_device *adev); void (*reset_ras_error_count)(struct amdgpu_device *adev); + void (*reset_ras_error_status)(struct amdgpu_device *adev); }; struct amdgpu_mmhub_funcs { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index 828b5167ff12..2741c28ff1b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -155,3 +155,89 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo) mmu_interval_notifier_remove(&bo->notifier); bo->notifier.mm = NULL; } + +int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, + struct mm_struct *mm, struct page **pages, + uint64_t start, uint64_t npages, + struct hmm_range **phmm_range, bool readonly, + bool mmap_locked) +{ + struct hmm_range *hmm_range; + unsigned long timeout; + unsigned long i; + unsigned long *pfns; + int r = 0; + + hmm_range = kzalloc(sizeof(*hmm_range), GFP_KERNEL); + if (unlikely(!hmm_range)) + return -ENOMEM; + + pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL); + if (unlikely(!pfns)) { + r = -ENOMEM; + goto out_free_range; + } + + hmm_range->notifier = notifier; + hmm_range->default_flags = HMM_PFN_REQ_FAULT; + if (!readonly) + hmm_range->default_flags |= HMM_PFN_REQ_WRITE; + hmm_range->hmm_pfns = pfns; + hmm_range->start = start; + hmm_range->end = start + npages * PAGE_SIZE; + + /* Assuming 512MB takes maxmium 1 second to fault page address */ + timeout = max(npages >> 17, 1ULL) * HMM_RANGE_DEFAULT_TIMEOUT; + timeout = jiffies + msecs_to_jiffies(timeout); + +retry: + hmm_range->notifier_seq = mmu_interval_read_begin(notifier); + + if (likely(!mmap_locked)) + mmap_read_lock(mm); + + r = hmm_range_fault(hmm_range); + + if (likely(!mmap_locked)) + mmap_read_unlock(mm); + if (unlikely(r)) { + /* + * FIXME: This timeout should encompass the retry from + * mmu_interval_read_retry() as well. + */ + if (r == -EBUSY && !time_after(jiffies, timeout)) + goto retry; + goto out_free_pfns; + } + + /* + * Due to default_flags, all pages are HMM_PFN_VALID or + * hmm_range_fault() fails. FIXME: The pages cannot be touched outside + * the notifier_lock, and mmu_interval_read_retry() must be done first. + */ + for (i = 0; pages && i < npages; i++) + pages[i] = hmm_pfn_to_page(pfns[i]); + + *phmm_range = hmm_range; + + return 0; + +out_free_pfns: + kvfree(pfns); +out_free_range: + kfree(hmm_range); + + return r; +} + +int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range) +{ + int r; + + r = mmu_interval_read_retry(hmm_range->notifier, + hmm_range->notifier_seq); + kvfree(hmm_range->hmm_pfns); + kfree(hmm_range); + + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h index a292238f75eb..7f7d37a457c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h @@ -30,6 +30,13 @@ #include <linux/workqueue.h> #include <linux/interval_tree.h> +int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, + struct mm_struct *mm, struct page **pages, + uint64_t start, uint64_t npages, + struct hmm_range **phmm_range, bool readonly, + bool mmap_locked); +int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range); + #if defined(CONFIG_HMM_MIRROR) int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr); void amdgpu_mn_unregister(struct amdgpu_bo *bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 0adffcace326..8714d50c5b20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -491,7 +491,18 @@ bool amdgpu_bo_support_uswc(u64 bo_flags) #endif } -static int amdgpu_bo_do_create(struct amdgpu_device *adev, +/** + * amdgpu_bo_create - create an &amdgpu_bo buffer object + * @adev: amdgpu device object + * @bp: parameters to be used for the buffer object + * @bo_ptr: pointer to the buffer object pointer + * + * Creates an &amdgpu_bo buffer object. + * + * Returns: + * 0 for success or a negative error code on failure. + */ +int amdgpu_bo_create(struct amdgpu_device *adev, struct amdgpu_bo_param *bp, struct amdgpu_bo **bo_ptr) { @@ -601,9 +612,9 @@ fail_unreserve: return r; } -static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, - unsigned long size, - struct amdgpu_bo *bo) +int amdgpu_bo_create_shadow(struct amdgpu_device *adev, + unsigned long size, + struct amdgpu_bo *bo) { struct amdgpu_bo_param bp; int r; @@ -614,13 +625,12 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, memset(&bp, 0, sizeof(bp)); bp.size = size; bp.domain = AMDGPU_GEM_DOMAIN_GTT; - bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC | - AMDGPU_GEM_CREATE_SHADOW; + bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC; bp.type = ttm_bo_type_kernel; bp.resv = bo->tbo.base.resv; bp.bo_ptr_size = sizeof(struct amdgpu_bo); - r = amdgpu_bo_do_create(adev, &bp, &bo->shadow); + r = amdgpu_bo_create(adev, &bp, &bo->shadow); if (!r) { bo->shadow->parent = amdgpu_bo_ref(bo); mutex_lock(&adev->shadow_list_lock); @@ -632,50 +642,6 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, } /** - * amdgpu_bo_create - create an &amdgpu_bo buffer object - * @adev: amdgpu device object - * @bp: parameters to be used for the buffer object - * @bo_ptr: pointer to the buffer object pointer - * - * Creates an &amdgpu_bo buffer object; and if requested, also creates a - * shadow object. - * Shadow object is used to backup the original buffer object, and is always - * in GTT. - * - * Returns: - * 0 for success or a negative error code on failure. - */ -int amdgpu_bo_create(struct amdgpu_device *adev, - struct amdgpu_bo_param *bp, - struct amdgpu_bo **bo_ptr) -{ - u64 flags = bp->flags; - int r; - - bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW; - - r = amdgpu_bo_do_create(adev, bp, bo_ptr); - if (r) - return r; - - if ((flags & AMDGPU_GEM_CREATE_SHADOW) && !(adev->flags & AMD_IS_APU)) { - if (!bp->resv) - WARN_ON(dma_resv_lock((*bo_ptr)->tbo.base.resv, - NULL)); - - r = amdgpu_bo_create_shadow(adev, bp->size, *bo_ptr); - - if (!bp->resv) - dma_resv_unlock((*bo_ptr)->tbo.base.resv); - - if (r) - amdgpu_bo_unref(bo_ptr); - } - - return r; -} - -/** * amdgpu_bo_create_user - create an &amdgpu_bo_user buffer object * @adev: amdgpu device object * @bp: parameters to be used for the buffer object @@ -694,9 +660,8 @@ int amdgpu_bo_create_user(struct amdgpu_device *adev, struct amdgpu_bo *bo_ptr; int r; - bp->flags = bp->flags & ~AMDGPU_GEM_CREATE_SHADOW; bp->bo_ptr_size = sizeof(struct amdgpu_bo_user); - r = amdgpu_bo_do_create(adev, bp, &bo_ptr); + r = amdgpu_bo_create(adev, bp, &bo_ptr); if (r) return r; @@ -1595,7 +1560,6 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m) amdgpu_bo_print_flag(m, bo, NO_CPU_ACCESS); amdgpu_bo_print_flag(m, bo, CPU_GTT_USWC); amdgpu_bo_print_flag(m, bo, VRAM_CLEARED); - amdgpu_bo_print_flag(m, bo, SHADOW); amdgpu_bo_print_flag(m, bo, VRAM_CONTIGUOUS); amdgpu_bo_print_flag(m, bo, VM_ALWAYS_VALID); amdgpu_bo_print_flag(m, bo, EXPLICIT_SYNC); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index b37d36ac6b5a..46d22ab85492 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -37,6 +37,10 @@ #define AMDGPU_BO_INVALID_OFFSET LONG_MAX #define AMDGPU_BO_MAX_PLACEMENTS 3 +/* BO flag to indicate a KFD userptr BO */ +#define AMDGPU_AMDKFD_CREATE_USERPTR_BO (1ULL << 63) +#define AMDGPU_AMDKFD_CREATE_SVM_BO (1ULL << 62) + #define to_amdgpu_bo_user(abo) container_of((abo), struct amdgpu_bo_user, bo) struct amdgpu_bo_param { @@ -267,6 +271,9 @@ int amdgpu_bo_create_user(struct amdgpu_device *adev, struct amdgpu_bo_user **ubo_ptr); void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr, void **cpu_addr); +int amdgpu_bo_create_shadow(struct amdgpu_device *adev, + unsigned long size, + struct amdgpu_bo *bo); int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr); void *amdgpu_bo_kptr(struct amdgpu_bo *bo); void amdgpu_bo_kunmap(struct amdgpu_bo *bo); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index a09483beb968..f7bbb04d01ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -417,31 +417,12 @@ static int psp_tmr_init(struct psp_context *psp) return ret; } -static int psp_clear_vf_fw(struct psp_context *psp) -{ - int ret; - struct psp_gfx_cmd_resp *cmd; - - if (!amdgpu_sriov_vf(psp->adev) || psp->adev->asic_type != CHIP_NAVI12) - return 0; - - cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); - if (!cmd) - return -ENOMEM; - - cmd->cmd_id = GFX_CMD_ID_CLEAR_VF_FW; - - ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - kfree(cmd); - - return ret; -} - static bool psp_skip_tmr(struct psp_context *psp) { switch (psp->adev->asic_type) { case CHIP_NAVI12: case CHIP_SIENNA_CICHLID: + case CHIP_ALDEBARAN: return true; default: return false; @@ -1037,6 +1018,13 @@ static int psp_ras_load(struct psp_context *psp) memset(psp->fw_pri_buf, 0, PSP_1_MEG); memcpy(psp->fw_pri_buf, psp->ta_ras_start_addr, psp->ta_ras_ucode_size); + ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; + + if (psp->adev->gmc.xgmi.connected_to_cpu) + ras_cmd->ras_in_message.init_flags.poison_mode_en = 1; + else + ras_cmd->ras_in_message.init_flags.dgpu_mode = 1; + psp_prep_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, psp->ta_ras_ucode_size, @@ -1046,8 +1034,6 @@ static int psp_ras_load(struct psp_context *psp) ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr); - ras_cmd = (struct ta_ras_shared_memory *)psp->ras.ras_shared_buf; - if (!ret) { psp->ras.session_id = cmd->resp.session_id; @@ -1128,6 +1114,31 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) return ret; } +static int psp_ras_status_to_errno(struct amdgpu_device *adev, + enum ta_ras_status ras_status) +{ + int ret = -EINVAL; + + switch (ras_status) { + case TA_RAS_STATUS__SUCCESS: + ret = 0; + break; + case TA_RAS_STATUS__RESET_NEEDED: + ret = -EAGAIN; + break; + case TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE: + dev_warn(adev->dev, "RAS WARN: ras function unavailable\n"); + break; + case TA_RAS_STATUS__ERROR_ASD_READ_WRITE: + dev_warn(adev->dev, "RAS WARN: asd read or write failed\n"); + break; + default: + dev_err(adev->dev, "RAS ERROR: ras function failed ret 0x%X\n", ret); + } + + return ret; +} + int psp_ras_enable_features(struct psp_context *psp, union ta_ras_cmd_input *info, bool enable) { @@ -1151,7 +1162,7 @@ int psp_ras_enable_features(struct psp_context *psp, if (ret) return -EINVAL; - return ras_cmd->ras_status; + return psp_ras_status_to_errno(psp->adev, ras_cmd->ras_status); } static int psp_ras_terminate(struct psp_context *psp) @@ -1234,7 +1245,7 @@ int psp_ras_trigger_error(struct psp_context *psp, if (amdgpu_ras_intr_triggered()) return 0; - return ras_cmd->ras_status; + return psp_ras_status_to_errno(psp->adev, ras_cmd->ras_status); } // ras end @@ -1920,12 +1931,6 @@ static int psp_hw_start(struct psp_context *psp) return ret; } - ret = psp_clear_vf_fw(psp); - if (ret) { - DRM_ERROR("PSP clear vf fw!\n"); - return ret; - } - ret = psp_boot_config_set(adev); if (ret) { DRM_WARN("PSP set boot config@\n"); @@ -2166,7 +2171,7 @@ static int psp_load_smu_fw(struct psp_context *psp) return 0; if ((amdgpu_in_reset(adev) && - ras && ras->supported && + ras && adev->ras_enabled && (adev->asic_type == CHIP_ARCTURUS || adev->asic_type == CHIP_VEGA20)) || (adev->in_runpm && @@ -2434,7 +2439,6 @@ static int psp_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct psp_context *psp = &adev->psp; - int ret; if (psp->adev->psp.ta_fw) { psp_ras_terminate(psp); @@ -2445,11 +2449,6 @@ static int psp_hw_fini(void *handle) } psp_asd_unload(psp); - ret = psp_clear_vf_fw(psp); - if (ret) { - DRM_ERROR("PSP clear vf fw!\n"); - return ret; - } psp_tmr_terminate(psp); psp_ring_destroy(psp, PSP_RING_TYPE__KM); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index b0d2fc9454ca..b1c57a5b6e89 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -33,6 +33,7 @@ #include "amdgpu_atomfirmware.h" #include "amdgpu_xgmi.h" #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" +#include "atom.h" static const char *RAS_FS_NAME = "ras"; @@ -320,11 +321,14 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, * "disable" requires only the block. * "enable" requires the block and error type. * "inject" requires the block, error type, address, and value. + * * The block is one of: umc, sdma, gfx, etc. * see ras_block_string[] for details + * * The error type is one of: ue, ce, where, * ue is multi-uncorrectable * ce is single-correctable + * * The sub-block is a the sub-block index, pass 0 if there is no sub-block. * The address and value are hexadecimal numbers, leading 0x is optional. * @@ -531,7 +535,7 @@ static struct ras_manager *amdgpu_ras_create_obj(struct amdgpu_device *adev, struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj; - if (!adev->ras_features || !con) + if (!adev->ras_enabled || !con) return NULL; if (head->block >= AMDGPU_RAS_BLOCK_COUNT) @@ -558,7 +562,7 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, struct ras_manager *obj; int i; - if (!adev->ras_features || !con) + if (!adev->ras_enabled || !con) return NULL; if (head) { @@ -585,36 +589,11 @@ struct ras_manager *amdgpu_ras_find_obj(struct amdgpu_device *adev, } /* obj end */ -static void amdgpu_ras_parse_status_code(struct amdgpu_device *adev, - const char* invoke_type, - const char* block_name, - enum ta_ras_status ret) -{ - switch (ret) { - case TA_RAS_STATUS__SUCCESS: - return; - case TA_RAS_STATUS__ERROR_RAS_NOT_AVAILABLE: - dev_warn(adev->dev, - "RAS WARN: %s %s currently unavailable\n", - invoke_type, - block_name); - break; - default: - dev_err(adev->dev, - "RAS ERROR: %s %s error failed ret 0x%X\n", - invoke_type, - block_name, - ret); - } -} - /* feature ctl begin */ static int amdgpu_ras_is_feature_allowed(struct amdgpu_device *adev, - struct ras_common_if *head) + struct ras_common_if *head) { - struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - - return con->hw_supported & BIT(head->block); + return adev->ras_hw_enabled & BIT(head->block); } static int amdgpu_ras_is_feature_enabled(struct amdgpu_device *adev, @@ -658,11 +637,7 @@ static int __amdgpu_ras_feature_enable(struct amdgpu_device *adev, con->features |= BIT(head->block); } else { if (obj && amdgpu_ras_is_feature_enabled(adev, head)) { - /* skip clean gfx ras context feature for VEGA20 Gaming. - * will clean later - */ - if (!(!adev->ras_features && con->features & BIT(AMDGPU_RAS_BLOCK__GFX))) - con->features &= ~BIT(head->block); + con->features &= ~BIT(head->block); put_obj(obj); } } @@ -708,15 +683,10 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev, if (!amdgpu_ras_intr_triggered()) { ret = psp_ras_enable_features(&adev->psp, info, enable); if (ret) { - amdgpu_ras_parse_status_code(adev, - enable ? "enable":"disable", - ras_block_str(head->block), - (enum ta_ras_status)ret); - if (ret == TA_RAS_STATUS__RESET_NEEDED) - ret = -EAGAIN; - else - ret = -EINVAL; - + dev_err(adev->dev, "ras %s %s failed %d\n", + enable ? "enable":"disable", + ras_block_str(head->block), + ret); goto out; } } @@ -770,6 +740,10 @@ int amdgpu_ras_feature_enable_on_boot(struct amdgpu_device *adev, con->features |= BIT(head->block); ret = amdgpu_ras_feature_enable(adev, head, 0); + + /* clean gfx block ras features flag */ + if (adev->ras_enabled && head->block == AMDGPU_RAS_BLOCK__GFX) + con->features &= ~BIT(head->block); } } else ret = amdgpu_ras_feature_enable(adev, head, enable); @@ -890,6 +864,11 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, adev->gmc.xgmi.ras_funcs->query_ras_error_count) adev->gmc.xgmi.ras_funcs->query_ras_error_count(adev, &err_data); break; + case AMDGPU_RAS_BLOCK__HDP: + if (adev->hdp.ras_funcs && + adev->hdp.ras_funcs->query_ras_error_count) + adev->hdp.ras_funcs->query_ras_error_count(adev, &err_data); + break; default: break; } @@ -901,17 +880,42 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, info->ce_count = obj->err_data.ce_count; if (err_data.ce_count) { - dev_info(adev->dev, "%ld correctable hardware errors " + if (adev->smuio.funcs && + adev->smuio.funcs->get_socket_id && + adev->smuio.funcs->get_die_id) { + dev_info(adev->dev, "socket: %d, die: %d " + "%ld correctable hardware errors " + "detected in %s block, no user " + "action is needed.\n", + adev->smuio.funcs->get_socket_id(adev), + adev->smuio.funcs->get_die_id(adev), + obj->err_data.ce_count, + ras_block_str(info->head.block)); + } else { + dev_info(adev->dev, "%ld correctable hardware errors " "detected in %s block, no user " "action is needed.\n", obj->err_data.ce_count, ras_block_str(info->head.block)); + } } if (err_data.ue_count) { - dev_info(adev->dev, "%ld uncorrectable hardware errors " + if (adev->smuio.funcs && + adev->smuio.funcs->get_socket_id && + adev->smuio.funcs->get_die_id) { + dev_info(adev->dev, "socket: %d, die: %d " + "%ld uncorrectable hardware errors " + "detected in %s block\n", + adev->smuio.funcs->get_socket_id(adev), + adev->smuio.funcs->get_die_id(adev), + obj->err_data.ue_count, + ras_block_str(info->head.block)); + } else { + dev_info(adev->dev, "%ld uncorrectable hardware errors " "detected in %s block\n", obj->err_data.ue_count, ras_block_str(info->head.block)); + } } return 0; @@ -937,11 +941,20 @@ int amdgpu_ras_reset_error_status(struct amdgpu_device *adev, if (adev->mmhub.ras_funcs && adev->mmhub.ras_funcs->reset_ras_error_count) adev->mmhub.ras_funcs->reset_ras_error_count(adev); + + if (adev->mmhub.ras_funcs && + adev->mmhub.ras_funcs->reset_ras_error_status) + adev->mmhub.ras_funcs->reset_ras_error_status(adev); break; case AMDGPU_RAS_BLOCK__SDMA: if (adev->sdma.funcs->reset_ras_error_count) adev->sdma.funcs->reset_ras_error_count(adev); break; + case AMDGPU_RAS_BLOCK__HDP: + if (adev->hdp.ras_funcs && + adev->hdp.ras_funcs->reset_ras_error_count) + adev->hdp.ras_funcs->reset_ras_error_count(adev); + break; default: break; } @@ -1022,10 +1035,9 @@ int amdgpu_ras_error_inject(struct amdgpu_device *adev, ret = -EINVAL; } - amdgpu_ras_parse_status_code(adev, - "inject", - ras_block_str(info->head.block), - (enum ta_ras_status)ret); + if (ret) + dev_err(adev->dev, "ras inject %s failed %d\n", + ras_block_str(info->head.block), ret); return ret; } @@ -1038,7 +1050,7 @@ unsigned long amdgpu_ras_query_error_count(struct amdgpu_device *adev, struct ras_manager *obj; struct ras_err_data data = {0, 0}; - if (!adev->ras_features || !con) + if (!adev->ras_enabled || !con) return 0; list_for_each_entry(obj, &con->head, node) { @@ -1265,8 +1277,8 @@ static int amdgpu_ras_sysfs_remove_all(struct amdgpu_device *adev) static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - struct dentry *dir; - struct drm_minor *minor = adev_to_drm(adev)->primary; + struct drm_minor *minor = adev_to_drm(adev)->primary; + struct dentry *dir; dir = debugfs_create_dir(RAS_FS_NAME, minor->debugfs_root); debugfs_create_file("ras_ctrl", S_IWUGO | S_IRUGO, dir, adev, @@ -1275,6 +1287,8 @@ static struct dentry *amdgpu_ras_debugfs_create_ctrl_node(struct amdgpu_device * &amdgpu_ras_debugfs_eeprom_ops); debugfs_create_u32("bad_page_cnt_threshold", 0444, dir, &con->bad_page_cnt_threshold); + debugfs_create_x32("ras_hw_enabled", 0444, dir, &adev->ras_hw_enabled); + debugfs_create_x32("ras_enabled", 0444, dir, &adev->ras_enabled); /* * After one uncorrectable error happens, usually GPU recovery will @@ -1561,7 +1575,7 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev) struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj; - if (!adev->ras_features || !con) + if (!adev->ras_enabled || !con) return; list_for_each_entry(obj, &con->head, node) { @@ -1611,7 +1625,7 @@ static void amdgpu_ras_query_err_status(struct amdgpu_device *adev) struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj; - if (!adev->ras_features || !con) + if (!adev->ras_enabled || !con) return; list_for_each_entry(obj, &con->head, node) { @@ -1925,7 +1939,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev) bool exc_err_limit = false; int ret; - if (adev->ras_features && con) + if (adev->ras_enabled && con) data = &con->eh_data; else return 0; @@ -2029,6 +2043,23 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev) } /* + * this is workaround for vega20 workstation sku, + * force enable gfx ras, ignore vbios gfx ras flag + * due to GC EDC can not write + */ +static void amdgpu_ras_get_quirks(struct amdgpu_device *adev) +{ + struct atom_context *ctx = adev->mode_info.atom_context; + + if (!ctx) + return; + + if (strnstr(ctx->vbios_version, "D16406", + sizeof(ctx->vbios_version))) + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX); +} + +/* * check hardware's ras ability which will be saved in hw_supported. * if hardware does not support ras, we can skip some ras initializtion and * forbid some ras operations from IP. @@ -2037,11 +2068,9 @@ static bool amdgpu_ras_asic_supported(struct amdgpu_device *adev) * we have to initialize ras as normal. but need check if operation is * allowed or not in each function. */ -static void amdgpu_ras_check_supported(struct amdgpu_device *adev, - uint32_t *hw_supported, uint32_t *supported) +static void amdgpu_ras_check_supported(struct amdgpu_device *adev) { - *hw_supported = 0; - *supported = 0; + adev->ras_hw_enabled = adev->ras_enabled = 0; if (amdgpu_sriov_vf(adev) || !adev->is_atom_fw || !amdgpu_ras_asic_supported(adev)) @@ -2050,33 +2079,34 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev, if (!adev->gmc.xgmi.connected_to_cpu) { if (amdgpu_atomfirmware_mem_ecc_supported(adev)) { dev_info(adev->dev, "MEM ECC is active.\n"); - *hw_supported |= (1 << AMDGPU_RAS_BLOCK__UMC | - 1 << AMDGPU_RAS_BLOCK__DF); + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__UMC | + 1 << AMDGPU_RAS_BLOCK__DF); } else { dev_info(adev->dev, "MEM ECC is not presented.\n"); } if (amdgpu_atomfirmware_sram_ecc_supported(adev)) { dev_info(adev->dev, "SRAM ECC is active.\n"); - *hw_supported |= ~(1 << AMDGPU_RAS_BLOCK__UMC | - 1 << AMDGPU_RAS_BLOCK__DF); + adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC | + 1 << AMDGPU_RAS_BLOCK__DF); } else { dev_info(adev->dev, "SRAM ECC is not presented.\n"); } } else { /* driver only manages a few IP blocks RAS feature * when GPU is connected cpu through XGMI */ - *hw_supported |= (1 << AMDGPU_RAS_BLOCK__GFX | - 1 << AMDGPU_RAS_BLOCK__SDMA | - 1 << AMDGPU_RAS_BLOCK__MMHUB); + adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__GFX | + 1 << AMDGPU_RAS_BLOCK__SDMA | + 1 << AMDGPU_RAS_BLOCK__MMHUB); } + amdgpu_ras_get_quirks(adev); + /* hw_supported needs to be aligned with RAS block mask. */ - *hw_supported &= AMDGPU_RAS_BLOCK_MASK; + adev->ras_hw_enabled &= AMDGPU_RAS_BLOCK_MASK; - *supported = amdgpu_ras_enable == 0 ? - 0 : *hw_supported & amdgpu_ras_mask; - adev->ras_features = *supported; + adev->ras_enabled = amdgpu_ras_enable == 0 ? 0 : + adev->ras_hw_enabled & amdgpu_ras_mask; } int amdgpu_ras_init(struct amdgpu_device *adev) @@ -2097,13 +2127,13 @@ int amdgpu_ras_init(struct amdgpu_device *adev) amdgpu_ras_set_context(adev, con); - amdgpu_ras_check_supported(adev, &con->hw_supported, - &con->supported); - if (!con->hw_supported || (adev->asic_type == CHIP_VEGA10)) { + amdgpu_ras_check_supported(adev); + + if (!adev->ras_enabled || adev->asic_type == CHIP_VEGA10) { /* set gfx block ras context feature for VEGA20 Gaming * send ras disable cmd to ras ta during ras late init. */ - if (!adev->ras_features && adev->asic_type == CHIP_VEGA20) { + if (!adev->ras_enabled && adev->asic_type == CHIP_VEGA20) { con->features |= BIT(AMDGPU_RAS_BLOCK__GFX); return 0; @@ -2153,8 +2183,9 @@ int amdgpu_ras_init(struct amdgpu_device *adev) } dev_info(adev->dev, "RAS INFO: ras initialized successfully, " - "hardware ability[%x] ras_mask[%x]\n", - con->hw_supported, con->supported); + "hardware ability[%x] ras_mask[%x]\n", + adev->ras_hw_enabled, adev->ras_enabled); + return 0; release_con: amdgpu_ras_set_context(adev, NULL); @@ -2268,7 +2299,7 @@ void amdgpu_ras_resume(struct amdgpu_device *adev) struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj, *tmp; - if (!adev->ras_features || !con) { + if (!adev->ras_enabled || !con) { /* clean ras context for VEGA20 Gaming after send ras disable cmd */ amdgpu_release_ras_context(adev); @@ -2314,7 +2345,7 @@ void amdgpu_ras_suspend(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - if (!adev->ras_features || !con) + if (!adev->ras_enabled || !con) return; amdgpu_ras_disable_all_features(adev, 0); @@ -2328,7 +2359,7 @@ int amdgpu_ras_pre_fini(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - if (!adev->ras_features || !con) + if (!adev->ras_enabled || !con) return 0; /* Need disable ras on all IPs here before ip [hw/sw]fini */ @@ -2341,7 +2372,7 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); - if (!adev->ras_features || !con) + if (!adev->ras_enabled || !con) return 0; amdgpu_ras_fs_fini(adev); @@ -2360,10 +2391,8 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) { - uint32_t hw_supported, supported; - - amdgpu_ras_check_supported(adev, &hw_supported, &supported); - if (!hw_supported) + amdgpu_ras_check_supported(adev); + if (!adev->ras_hw_enabled) return; if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { @@ -2392,7 +2421,7 @@ void amdgpu_release_ras_context(struct amdgpu_device *adev) if (!con) return; - if (!adev->ras_features && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)) { + if (!adev->ras_enabled && con->features & BIT(AMDGPU_RAS_BLOCK__GFX)) { con->features &= ~BIT(AMDGPU_RAS_BLOCK__GFX); amdgpu_ras_set_context(adev, NULL); kfree(con); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 60df268a0c66..201fbdee1d09 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -313,9 +313,6 @@ struct ras_common_if { struct amdgpu_ras { /* ras infrastructure */ /* for ras itself. */ - uint32_t hw_supported; - /* for IP to check its ras ability. */ - uint32_t supported; uint32_t features; struct list_head head; /* sysfs */ @@ -478,7 +475,7 @@ static inline int amdgpu_ras_is_supported(struct amdgpu_device *adev, if (block >= AMDGPU_RAS_BLOCK_COUNT) return 0; - return ras && (ras->supported & (1 << block)); + return ras && (adev->ras_enabled & (1 << block)); } int amdgpu_ras_recovery_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 688624ebe421..7b634a1517f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -158,6 +158,7 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) * @irq_src: interrupt source to use for this ring * @irq_type: interrupt type to use for this ring * @hw_prio: ring priority (NORMAL/HIGH) + * @sched_score: optional score atomic shared with other schedulers * * Initialize the driver information for the selected ring (all asics). * Returns 0 on success, error on failure. diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h index b860ec913ac5..484bb3dcec47 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_smuio.h @@ -29,6 +29,7 @@ struct amdgpu_smuio_funcs { void (*update_rom_clock_gating)(struct amdgpu_device *adev, bool enable); void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags); u32 (*get_die_id)(struct amdgpu_device *adev); + u32 (*get_socket_id)(struct amdgpu_device *adev); bool (*is_host_gpu_xgmi_supported)(struct amdgpu_device *adev); }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 8c7ec09eb1a4..10391fcff343 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -32,7 +32,6 @@ #include <linux/dma-mapping.h> #include <linux/iommu.h> -#include <linux/hmm.h> #include <linux/pagemap.h> #include <linux/sched/task.h> #include <linux/sched/mm.h> @@ -112,6 +111,20 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, } abo = ttm_to_amdgpu_bo(bo); + if (abo->flags & AMDGPU_AMDKFD_CREATE_SVM_BO) { + struct dma_fence *fence; + struct dma_resv *resv = &bo->base._resv; + + rcu_read_lock(); + fence = rcu_dereference(resv->fence_excl); + if (fence && !fence->ops->signaled) + dma_fence_enable_sw_signaling(fence); + + placement->num_placement = 0; + placement->num_busy_placement = 0; + rcu_read_unlock(); + return; + } switch (bo->mem.mem_type) { case AMDGPU_PL_GDS: case AMDGPU_PL_GWS: @@ -165,13 +178,6 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) { struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); - /* - * Don't verify access for KFD BOs. They don't have a GEM - * object associated with them. - */ - if (abo->kfd_bo) - return 0; - if (amdgpu_ttm_tt_get_usermm(bo->ttm)) return -EPERM; return drm_vma_node_verify_access(&abo->tbo.base.vma_node, @@ -288,7 +294,7 @@ error_free: } /** - * amdgpu_copy_ttm_mem_to_mem - Helper function for copy + * amdgpu_ttm_copy_mem_to_mem - Helper function for copy * @adev: amdgpu device * @src: buffer/address where to read from * @dst: buffer/address where to write to @@ -670,10 +676,8 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) struct amdgpu_ttm_tt *gtt = (void *)ttm; unsigned long start = gtt->userptr; struct vm_area_struct *vma; - struct hmm_range *range; - unsigned long timeout; struct mm_struct *mm; - unsigned long i; + bool readonly; int r = 0; mm = bo->notifier.mm; @@ -689,76 +693,26 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages) if (!mmget_not_zero(mm)) /* Happens during process shutdown */ return -ESRCH; - range = kzalloc(sizeof(*range), GFP_KERNEL); - if (unlikely(!range)) { - r = -ENOMEM; - goto out; - } - range->notifier = &bo->notifier; - range->start = bo->notifier.interval_tree.start; - range->end = bo->notifier.interval_tree.last + 1; - range->default_flags = HMM_PFN_REQ_FAULT; - if (!amdgpu_ttm_tt_is_readonly(ttm)) - range->default_flags |= HMM_PFN_REQ_WRITE; - - range->hmm_pfns = kvmalloc_array(ttm->num_pages, - sizeof(*range->hmm_pfns), GFP_KERNEL); - if (unlikely(!range->hmm_pfns)) { - r = -ENOMEM; - goto out_free_ranges; - } - mmap_read_lock(mm); vma = find_vma(mm, start); + mmap_read_unlock(mm); if (unlikely(!vma || start < vma->vm_start)) { r = -EFAULT; - goto out_unlock; + goto out_putmm; } if (unlikely((gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) && vma->vm_file)) { r = -EPERM; - goto out_unlock; - } - mmap_read_unlock(mm); - timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); - -retry: - range->notifier_seq = mmu_interval_read_begin(&bo->notifier); - - mmap_read_lock(mm); - r = hmm_range_fault(range); - mmap_read_unlock(mm); - if (unlikely(r)) { - /* - * FIXME: This timeout should encompass the retry from - * mmu_interval_read_retry() as well. - */ - if (r == -EBUSY && !time_after(jiffies, timeout)) - goto retry; - goto out_free_pfns; + goto out_putmm; } - /* - * Due to default_flags, all pages are HMM_PFN_VALID or - * hmm_range_fault() fails. FIXME: The pages cannot be touched outside - * the notifier_lock, and mmu_interval_read_retry() must be done first. - */ - for (i = 0; i < ttm->num_pages; i++) - pages[i] = hmm_pfn_to_page(range->hmm_pfns[i]); - - gtt->range = range; + readonly = amdgpu_ttm_tt_is_readonly(ttm); + r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start, + ttm->num_pages, >t->range, readonly, + false); +out_putmm: mmput(mm); - return 0; - -out_unlock: - mmap_read_unlock(mm); -out_free_pfns: - kvfree(range->hmm_pfns); -out_free_ranges: - kfree(range); -out: - mmput(mm); return r; } @@ -787,10 +741,7 @@ bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm) * FIXME: Must always hold notifier_lock for this, and must * not ignore the return code. */ - r = mmu_interval_read_retry(gtt->range->notifier, - gtt->range->notifier_seq); - kvfree(gtt->range->hmm_pfns); - kfree(gtt->range); + r = amdgpu_hmm_range_get_pages_done(gtt->range); gtt->range = NULL; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 0c9c5255aa42..a57842689d42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -50,9 +50,12 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) struct drm_device *ddev = adev_to_drm(adev); /* enable virtual display */ - if (adev->mode_info.num_crtc == 0) - adev->mode_info.num_crtc = 1; - adev->enable_virtual_display = true; + if (adev->asic_type != CHIP_ALDEBARAN && + adev->asic_type != CHIP_ARCTURUS) { + if (adev->mode_info.num_crtc == 0) + adev->mode_info.num_crtc = 1; + adev->enable_virtual_display = true; + } ddev->driver_features &= ~DRIVER_ATOMIC; adev->cg_flags = 0; adev->pg_flags = 0; @@ -679,6 +682,7 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) case CHIP_VEGA10: case CHIP_VEGA20: case CHIP_ARCTURUS: + case CHIP_ALDEBARAN: soc15_set_virt_ops(adev); break; case CHIP_NAVI10: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 4a3e3f72e127..edc63d3e087e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -38,6 +38,7 @@ #include "amdgpu_gmc.h" #include "amdgpu_xgmi.h" #include "amdgpu_dma_buf.h" +#include "kfd_svm.h" /** * DOC: GPUVM @@ -850,35 +851,60 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, } /** - * amdgpu_vm_bo_param - fill in parameters for PD/PT allocation + * amdgpu_vm_pt_create - create bo for PD/PT * * @adev: amdgpu_device pointer * @vm: requesting vm * @level: the page table level * @immediate: use a immediate update - * @bp: resulting BO allocation parameters + * @bo: pointer to the buffer object pointer */ -static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, +static int amdgpu_vm_pt_create(struct amdgpu_device *adev, + struct amdgpu_vm *vm, int level, bool immediate, - struct amdgpu_bo_param *bp) + struct amdgpu_bo **bo) { - memset(bp, 0, sizeof(*bp)); + struct amdgpu_bo_param bp; + int r; - bp->size = amdgpu_vm_bo_size(adev, level); - bp->byte_align = AMDGPU_GPU_PAGE_SIZE; - bp->domain = AMDGPU_GEM_DOMAIN_VRAM; - bp->domain = amdgpu_bo_get_preferred_pin_domain(adev, bp->domain); - bp->flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + memset(&bp, 0, sizeof(bp)); + + bp.size = amdgpu_vm_bo_size(adev, level); + bp.byte_align = AMDGPU_GPU_PAGE_SIZE; + bp.domain = AMDGPU_GEM_DOMAIN_VRAM; + bp.domain = amdgpu_bo_get_preferred_pin_domain(adev, bp.domain); + bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | AMDGPU_GEM_CREATE_CPU_GTT_USWC; - bp->bo_ptr_size = sizeof(struct amdgpu_bo); + bp.bo_ptr_size = sizeof(struct amdgpu_bo); if (vm->use_cpu_for_update) - bp->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - else if (!vm->root.base.bo || vm->root.base.bo->shadow) - bp->flags |= AMDGPU_GEM_CREATE_SHADOW; - bp->type = ttm_bo_type_kernel; - bp->no_wait_gpu = immediate; + bp.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + + bp.type = ttm_bo_type_kernel; + bp.no_wait_gpu = immediate; if (vm->root.base.bo) - bp->resv = vm->root.base.bo->tbo.base.resv; + bp.resv = vm->root.base.bo->tbo.base.resv; + + r = amdgpu_bo_create(adev, &bp, bo); + if (r) + return r; + + if (vm->is_compute_context && (adev->flags & AMD_IS_APU)) + return 0; + + if (!bp.resv) + WARN_ON(dma_resv_lock((*bo)->tbo.base.resv, + NULL)); + r = amdgpu_bo_create_shadow(adev, bp.size, *bo); + + if (!bp.resv) + dma_resv_unlock((*bo)->tbo.base.resv); + + if (r) { + amdgpu_bo_unref(bo); + return r; + } + + return 0; } /** @@ -901,7 +927,6 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, bool immediate) { struct amdgpu_vm_pt *entry = cursor->entry; - struct amdgpu_bo_param bp; struct amdgpu_bo *pt; int r; @@ -919,9 +944,7 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, if (entry->base.bo) return 0; - amdgpu_vm_bo_param(adev, vm, cursor->level, immediate, &bp); - - r = amdgpu_bo_create(adev, &bp, &pt); + r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt); if (r) return r; @@ -1593,15 +1616,15 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, * Returns: * 0 for success, -EINVAL for failure. */ -static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, - struct amdgpu_device *bo_adev, - struct amdgpu_vm *vm, bool immediate, - bool unlocked, struct dma_resv *resv, - uint64_t start, uint64_t last, - uint64_t flags, uint64_t offset, - struct drm_mm_node *nodes, - dma_addr_t *pages_addr, - struct dma_fence **fence) +int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + struct amdgpu_device *bo_adev, + struct amdgpu_vm *vm, bool immediate, + bool unlocked, struct dma_resv *resv, + uint64_t start, uint64_t last, + uint64_t flags, uint64_t offset, + struct drm_mm_node *nodes, + dma_addr_t *pages_addr, + struct dma_fence **fence) { struct amdgpu_vm_update_params params; enum amdgpu_sync_mode sync_mode; @@ -2818,7 +2841,6 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) * * @adev: amdgpu_device pointer * @vm: requested vm - * @vm_context: Indicates if it GFX or Compute context * @pasid: Process address space identifier * * Init @vm fields. @@ -2826,10 +2848,8 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) * Returns: * 0 for success, error for failure. */ -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int vm_context, u32 pasid) +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid) { - struct amdgpu_bo_param bp; struct amdgpu_bo *root; int r, i; @@ -2861,16 +2881,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->pte_support_ats = false; vm->is_compute_context = false; - if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) { - vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & - AMDGPU_VM_USE_CPU_FOR_COMPUTE); + vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & + AMDGPU_VM_USE_CPU_FOR_GFX); - if (adev->asic_type == CHIP_RAVEN) - vm->pte_support_ats = true; - } else { - vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & - AMDGPU_VM_USE_CPU_FOR_GFX); - } DRM_DEBUG_DRIVER("VM update mode is %s\n", vm->use_cpu_for_update ? "CPU" : "SDMA"); WARN_ONCE((vm->use_cpu_for_update && @@ -2887,10 +2900,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, mutex_init(&vm->eviction_lock); vm->evicting = false; - amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, false, &bp); - if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) - bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW; - r = amdgpu_bo_create(adev, &bp, &root); + r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level, + false, &root); if (r) goto error_free_delayed; @@ -3349,6 +3360,7 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, uint64_t addr) { + bool is_compute_context = false; struct amdgpu_bo *root; uint64_t value, flags; struct amdgpu_vm *vm; @@ -3356,15 +3368,25 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, spin_lock(&adev->vm_manager.pasid_lock); vm = idr_find(&adev->vm_manager.pasid_idr, pasid); - if (vm) + if (vm) { root = amdgpu_bo_ref(vm->root.base.bo); - else + is_compute_context = vm->is_compute_context; + } else { root = NULL; + } spin_unlock(&adev->vm_manager.pasid_lock); if (!root) return false; + addr /= AMDGPU_GPU_PAGE_SIZE; + + if (is_compute_context && + !svm_range_restore_pages(adev, pasid, addr)) { + amdgpu_bo_unref(&root); + return true; + } + r = amdgpu_bo_reserve(root, true); if (r) goto error_unref; @@ -3378,18 +3400,16 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, if (!vm) goto error_unlock; - addr /= AMDGPU_GPU_PAGE_SIZE; flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED | AMDGPU_PTE_SYSTEM; - if (vm->is_compute_context) { + if (is_compute_context) { /* Intentionally setting invalid PTE flag * combination to force a no-retry-fault */ flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE | AMDGPU_PTE_TF; value = 0; - } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { /* Redirect the access to the dummy page */ value = adev->dummy_page_addr; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 6fd7dad0540a..ea60ec122b51 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -121,9 +121,6 @@ struct amdgpu_bo_list_entry; /* max vmids dedicated for process */ #define AMDGPU_VM_MAX_RESERVED_VMID 1 -#define AMDGPU_VM_CONTEXT_GFX 0 -#define AMDGPU_VM_CONTEXT_COMPUTE 1 - /* See vm_update_mode */ #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0) #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1) @@ -367,6 +364,8 @@ struct amdgpu_vm_manager { spinlock_t pasid_lock; }; +struct amdgpu_bo_va_mapping; + #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr))) #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) @@ -378,8 +377,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev); void amdgpu_vm_manager_fini(struct amdgpu_device *adev); long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout); -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, - int vm_context, u32 pasid); +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid); int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, u32 pasid); void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); @@ -398,6 +396,15 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct dma_fence **fence); int amdgpu_vm_handle_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm); +int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, + struct amdgpu_device *bo_adev, + struct amdgpu_vm *vm, bool immediate, + bool unlocked, struct dma_resv *resv, + uint64_t start, uint64_t last, + uint64_t flags, uint64_t offset, + struct drm_mm_node *nodes, + dma_addr_t *pages_addr, + struct dma_fence **fence); int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va, bool clear); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index f7235438535f..f78d21910e07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -29,12 +29,14 @@ #include "amdgpu_atomfirmware.h" #include "atom.h" -static inline struct amdgpu_vram_mgr *to_vram_mgr(struct ttm_resource_manager *man) +static inline struct amdgpu_vram_mgr * +to_vram_mgr(struct ttm_resource_manager *man) { return container_of(man, struct amdgpu_vram_mgr, manager); } -static inline struct amdgpu_device *to_amdgpu_device(struct amdgpu_vram_mgr *mgr) +static inline struct amdgpu_device * +to_amdgpu_device(struct amdgpu_vram_mgr *mgr) { return container_of(mgr, struct amdgpu_device, mman.vram_mgr); } @@ -82,12 +84,14 @@ static ssize_t amdgpu_mem_info_vis_vram_total_show(struct device *dev, * amount of currently used VRAM in bytes */ static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); + struct ttm_resource_manager *man; + man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); return sysfs_emit(buf, "%llu\n", amdgpu_vram_mgr_usage(man)); } @@ -100,18 +104,28 @@ static ssize_t amdgpu_mem_info_vram_used_show(struct device *dev, * amount of currently used visible VRAM in bytes */ static ssize_t amdgpu_mem_info_vis_vram_used_show(struct device *dev, - struct device_attribute *attr, char *buf) + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); - struct ttm_resource_manager *man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); + struct ttm_resource_manager *man; + man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM); return sysfs_emit(buf, "%llu\n", amdgpu_vram_mgr_vis_usage(man)); } +/** + * DOC: mem_info_vram_vendor + * + * The amdgpu driver provides a sysfs API for reporting the vendor of the + * installed VRAM + * The file mem_info_vram_vendor is used for this and returns the name of the + * vendor. + */ static ssize_t amdgpu_mem_info_vram_vendor(struct device *dev, - struct device_attribute *attr, - char *buf) + struct device_attribute *attr, + char *buf) { struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); @@ -162,78 +176,6 @@ static const struct attribute *amdgpu_vram_mgr_attributes[] = { NULL }; -static const struct ttm_resource_manager_func amdgpu_vram_mgr_func; - -/** - * amdgpu_vram_mgr_init - init VRAM manager and DRM MM - * - * @adev: amdgpu_device pointer - * - * Allocate and initialize the VRAM manager. - */ -int amdgpu_vram_mgr_init(struct amdgpu_device *adev) -{ - struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; - struct ttm_resource_manager *man = &mgr->manager; - int ret; - - ttm_resource_manager_init(man, adev->gmc.real_vram_size >> PAGE_SHIFT); - - man->func = &amdgpu_vram_mgr_func; - - drm_mm_init(&mgr->mm, 0, man->size); - spin_lock_init(&mgr->lock); - INIT_LIST_HEAD(&mgr->reservations_pending); - INIT_LIST_HEAD(&mgr->reserved_pages); - - /* Add the two VRAM-related sysfs files */ - ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes); - if (ret) - DRM_ERROR("Failed to register sysfs\n"); - - ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager); - ttm_resource_manager_set_used(man, true); - return 0; -} - -/** - * amdgpu_vram_mgr_fini - free and destroy VRAM manager - * - * @adev: amdgpu_device pointer - * - * Destroy and free the VRAM manager, returns -EBUSY if ranges are still - * allocated inside it. - */ -void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) -{ - struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; - struct ttm_resource_manager *man = &mgr->manager; - int ret; - struct amdgpu_vram_reservation *rsv, *temp; - - ttm_resource_manager_set_used(man, false); - - ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man); - if (ret) - return; - - spin_lock(&mgr->lock); - list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) - kfree(rsv); - - list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) { - drm_mm_remove_node(&rsv->mm_node); - kfree(rsv); - } - drm_mm_takedown(&mgr->mm); - spin_unlock(&mgr->lock); - - sysfs_remove_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes); - - ttm_resource_manager_cleanup(man); - ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL); -} - /** * amdgpu_vram_mgr_vis_size - Calculate visible node size * @@ -283,6 +225,7 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo) return usage; } +/* Commit the reservation of VRAM pages */ static void amdgpu_vram_mgr_do_reserve(struct ttm_resource_manager *man) { struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); @@ -415,13 +358,13 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, const struct ttm_place *place, struct ttm_resource *mem) { + unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages; struct amdgpu_vram_mgr *mgr = to_vram_mgr(man); struct amdgpu_device *adev = to_amdgpu_device(mgr); + uint64_t vis_usage = 0, mem_bytes, max_bytes; struct drm_mm *mm = &mgr->mm; - struct drm_mm_node *nodes; enum drm_mm_insert_mode mode; - unsigned long lpfn, num_nodes, pages_per_node, pages_left; - uint64_t vis_usage = 0, mem_bytes, max_bytes; + struct drm_mm_node *nodes; unsigned i; int r; @@ -448,10 +391,10 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, pages_per_node = HPAGE_PMD_NR; #else /* default to 2MB */ - pages_per_node = (2UL << (20UL - PAGE_SHIFT)); + pages_per_node = 2UL << (20UL - PAGE_SHIFT); #endif - pages_per_node = max((uint32_t)pages_per_node, - tbo->page_alignment); + pages_per_node = max_t(uint32_t, pages_per_node, + tbo->page_alignment); num_nodes = DIV_ROUND_UP(mem->num_pages, pages_per_node); } @@ -469,42 +412,37 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man, mem->start = 0; pages_left = mem->num_pages; - spin_lock(&mgr->lock); - for (i = 0; pages_left >= pages_per_node; ++i) { - unsigned long pages = rounddown_pow_of_two(pages_left); + /* Limit maximum size to 2GB due to SG table limitations */ + pages = min(pages_left, 2UL << (30 - PAGE_SHIFT)); - /* Limit maximum size to 2GB due to SG table limitations */ - pages = min(pages, (2UL << (30 - PAGE_SHIFT))); - - r = drm_mm_insert_node_in_range(mm, &nodes[i], pages, - pages_per_node, 0, - place->fpfn, lpfn, - mode); - if (unlikely(r)) - break; - - vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]); - amdgpu_vram_mgr_virt_start(mem, &nodes[i]); - pages_left -= pages; - } - - for (; pages_left; ++i) { - unsigned long pages = min(pages_left, pages_per_node); + i = 0; + spin_lock(&mgr->lock); + while (pages_left) { uint32_t alignment = tbo->page_alignment; - if (pages == pages_per_node) + if (pages >= pages_per_node) alignment = pages_per_node; - r = drm_mm_insert_node_in_range(mm, &nodes[i], - pages, alignment, 0, - place->fpfn, lpfn, - mode); - if (unlikely(r)) + r = drm_mm_insert_node_in_range(mm, &nodes[i], pages, alignment, + 0, place->fpfn, lpfn, mode); + if (unlikely(r)) { + if (pages > pages_per_node) { + if (is_power_of_2(pages)) + pages = pages / 2; + else + pages = rounddown_pow_of_two(pages); + continue; + } goto error; + } vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]); amdgpu_vram_mgr_virt_start(mem, &nodes[i]); pages_left -= pages; + ++i; + + if (pages > pages_left) + pages = pages_left; } spin_unlock(&mgr->lock); @@ -728,3 +666,73 @@ static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = { .free = amdgpu_vram_mgr_del, .debug = amdgpu_vram_mgr_debug }; + +/** + * amdgpu_vram_mgr_init - init VRAM manager and DRM MM + * + * @adev: amdgpu_device pointer + * + * Allocate and initialize the VRAM manager. + */ +int amdgpu_vram_mgr_init(struct amdgpu_device *adev) +{ + struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; + struct ttm_resource_manager *man = &mgr->manager; + int ret; + + ttm_resource_manager_init(man, adev->gmc.real_vram_size >> PAGE_SHIFT); + + man->func = &amdgpu_vram_mgr_func; + + drm_mm_init(&mgr->mm, 0, man->size); + spin_lock_init(&mgr->lock); + INIT_LIST_HEAD(&mgr->reservations_pending); + INIT_LIST_HEAD(&mgr->reserved_pages); + + /* Add the two VRAM-related sysfs files */ + ret = sysfs_create_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes); + if (ret) + DRM_ERROR("Failed to register sysfs\n"); + + ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager); + ttm_resource_manager_set_used(man, true); + return 0; +} + +/** + * amdgpu_vram_mgr_fini - free and destroy VRAM manager + * + * @adev: amdgpu_device pointer + * + * Destroy and free the VRAM manager, returns -EBUSY if ranges are still + * allocated inside it. + */ +void amdgpu_vram_mgr_fini(struct amdgpu_device *adev) +{ + struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr; + struct ttm_resource_manager *man = &mgr->manager; + int ret; + struct amdgpu_vram_reservation *rsv, *temp; + + ttm_resource_manager_set_used(man, false); + + ret = ttm_resource_manager_evict_all(&adev->mman.bdev, man); + if (ret) + return; + + spin_lock(&mgr->lock); + list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) + kfree(rsv); + + list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) { + drm_mm_remove_node(&rsv->mm_node); + kfree(rsv); + } + drm_mm_takedown(&mgr->mm); + spin_unlock(&mgr->lock); + + sysfs_remove_files(&adev->dev->kobj, amdgpu_vram_mgr_attributes); + + ttm_resource_manager_cleanup(man); + ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h index 1a8f6d4baab2..befd0b4b7bea 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h @@ -98,9 +98,9 @@ union amd_sriov_msg_feature_flags { union amd_sriov_reg_access_flags { struct { - uint32_t vf_reg_access_ih : 1; - uint32_t vf_reg_access_mmhub : 1; - uint32_t vf_reg_access_gc : 1; + uint32_t vf_reg_psp_access_ih : 1; + uint32_t vf_reg_rlc_access_mmhub : 1; + uint32_t vf_reg_rlc_access_gc : 1; uint32_t reserved : 29; } flags; uint32_t all; diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index 5c11144da051..33324427b555 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -421,6 +421,11 @@ static int dce_virtual_sw_init(void *handle) static int dce_virtual_sw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int i = 0; + + for (i = 0; i < adev->mode_info.num_crtc; i++) + if (adev->mode_info.crtcs[i]) + hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer); kfree(adev->mode_info.bios_hardcoded_edid); @@ -480,13 +485,6 @@ static int dce_virtual_hw_init(void *handle) static int dce_virtual_hw_fini(void *handle) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i = 0; - - for (i = 0; i<adev->mode_info.num_crtc; i++) - if (adev->mode_info.crtcs[i]) - hrtimer_cancel(&adev->mode_info.crtcs[i]->vblank_timer); - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c index 0d8459d63bac..36ba229576d8 100644 --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c @@ -219,11 +219,11 @@ static void df_v3_6_query_hashes(struct amdgpu_device *adev) adev->df.hash_status.hash_2m = false; adev->df.hash_status.hash_1g = false; - if (adev->asic_type != CHIP_ARCTURUS) - return; - - /* encoding for hash-enabled on Arcturus */ - if (adev->df.funcs->get_fb_channel_number(adev) == 0xe) { + /* encoding for hash-enabled on Arcturus and Aldebaran */ + if ((adev->asic_type == CHIP_ARCTURUS && + adev->df.funcs->get_fb_channel_number(adev) == 0xe) || + (adev->asic_type == CHIP_ALDEBARAN && + adev->df.funcs->get_fb_channel_number(adev) == 0x1e)) { tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DfGlobalCtrl); adev->df.hash_status.hash_64k = REG_GET_FIELD(tmp, DF_CS_UMC_AON0_DfGlobalCtrl, @@ -278,7 +278,12 @@ static u32 df_v3_6_get_fb_channel_number(struct amdgpu_device *adev) u32 tmp; tmp = RREG32_SOC15(DF, 0, mmDF_CS_UMC_AON0_DramBaseAddress0); - tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK; + if (adev->asic_type == CHIP_ALDEBARAN) + tmp &= + ALDEBARAN_DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK; + else + tmp &= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan_MASK; + tmp >>= DF_CS_UMC_AON0_DramBaseAddress0__IntLvNumChan__SHIFT; return tmp; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index a078a38c2cee..22608c45f07c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3937,7 +3937,8 @@ static void gfx_v9_0_init_tcp_config(struct amdgpu_device *adev) { u32 tmp; - if (adev->asic_type != CHIP_ARCTURUS) + if (adev->asic_type != CHIP_ARCTURUS && + adev->asic_type != CHIP_ALDEBARAN) return; tmp = RREG32_SOC15(GC, 0, mmTCP_ADDR_CONFIG); @@ -4559,8 +4560,7 @@ static int gfx_v9_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) if (!ring->sched.ready) return 0; - if (adev->asic_type == CHIP_ARCTURUS || - adev->asic_type == CHIP_ALDEBARAN) { + if (adev->asic_type == CHIP_ARCTURUS) { vgpr_init_shader_ptr = vgpr_init_compute_shader_arcturus; vgpr_init_shader_size = sizeof(vgpr_init_compute_shader_arcturus); vgpr_init_regs_ptr = vgpr_init_regs_arcturus; @@ -4745,7 +4745,11 @@ static int gfx_v9_0_ecc_late_init(void *handle) } /* requires IBs so do in late init after IB pool is initialized */ - r = gfx_v9_0_do_edc_gpr_workarounds(adev); + if (adev->asic_type == CHIP_ALDEBARAN) + r = gfx_v9_4_2_do_edc_gpr_workarounds(adev); + else + r = gfx_v9_0_do_edc_gpr_workarounds(adev); + if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c index a30c7c10cd9a..dbad9ef002d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c @@ -22,6 +22,7 @@ */ #include "amdgpu.h" #include "soc15.h" +#include "soc15d.h" #include "gc/gc_9_4_2_offset.h" #include "gc/gc_9_4_2_sh_mask.h" @@ -31,6 +32,11 @@ #include "amdgpu_ras.h" #include "amdgpu_gfx.h" +#define SE_ID_MAX 8 +#define CU_ID_MAX 16 +#define SIMD_ID_MAX 4 +#define WAVE_ID_MAX 10 + enum gfx_v9_4_2_utc_type { VML2_MEM, VML2_WALKER_MEM, @@ -79,6 +85,634 @@ static const struct soc15_reg_golden golden_settings_gc_9_4_2_alde[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, regTCI_CNTL_3, 0xff, 0x20), }; +/** + * This shader is used to clear VGPRS and LDS, and also write the input + * pattern into the write back buffer, which will be used by driver to + * check whether all SIMDs have been covered. +*/ +static const u32 vgpr_init_compute_shader_aldebaran[] = { + 0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280, + 0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208, + 0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xd3d94000, + 0x18000080, 0xd3d94001, 0x18000080, 0xd3d94002, 0x18000080, 0xd3d94003, + 0x18000080, 0xd3d94004, 0x18000080, 0xd3d94005, 0x18000080, 0xd3d94006, + 0x18000080, 0xd3d94007, 0x18000080, 0xd3d94008, 0x18000080, 0xd3d94009, + 0x18000080, 0xd3d9400a, 0x18000080, 0xd3d9400b, 0x18000080, 0xd3d9400c, + 0x18000080, 0xd3d9400d, 0x18000080, 0xd3d9400e, 0x18000080, 0xd3d9400f, + 0x18000080, 0xd3d94010, 0x18000080, 0xd3d94011, 0x18000080, 0xd3d94012, + 0x18000080, 0xd3d94013, 0x18000080, 0xd3d94014, 0x18000080, 0xd3d94015, + 0x18000080, 0xd3d94016, 0x18000080, 0xd3d94017, 0x18000080, 0xd3d94018, + 0x18000080, 0xd3d94019, 0x18000080, 0xd3d9401a, 0x18000080, 0xd3d9401b, + 0x18000080, 0xd3d9401c, 0x18000080, 0xd3d9401d, 0x18000080, 0xd3d9401e, + 0x18000080, 0xd3d9401f, 0x18000080, 0xd3d94020, 0x18000080, 0xd3d94021, + 0x18000080, 0xd3d94022, 0x18000080, 0xd3d94023, 0x18000080, 0xd3d94024, + 0x18000080, 0xd3d94025, 0x18000080, 0xd3d94026, 0x18000080, 0xd3d94027, + 0x18000080, 0xd3d94028, 0x18000080, 0xd3d94029, 0x18000080, 0xd3d9402a, + 0x18000080, 0xd3d9402b, 0x18000080, 0xd3d9402c, 0x18000080, 0xd3d9402d, + 0x18000080, 0xd3d9402e, 0x18000080, 0xd3d9402f, 0x18000080, 0xd3d94030, + 0x18000080, 0xd3d94031, 0x18000080, 0xd3d94032, 0x18000080, 0xd3d94033, + 0x18000080, 0xd3d94034, 0x18000080, 0xd3d94035, 0x18000080, 0xd3d94036, + 0x18000080, 0xd3d94037, 0x18000080, 0xd3d94038, 0x18000080, 0xd3d94039, + 0x18000080, 0xd3d9403a, 0x18000080, 0xd3d9403b, 0x18000080, 0xd3d9403c, + 0x18000080, 0xd3d9403d, 0x18000080, 0xd3d9403e, 0x18000080, 0xd3d9403f, + 0x18000080, 0xd3d94040, 0x18000080, 0xd3d94041, 0x18000080, 0xd3d94042, + 0x18000080, 0xd3d94043, 0x18000080, 0xd3d94044, 0x18000080, 0xd3d94045, + 0x18000080, 0xd3d94046, 0x18000080, 0xd3d94047, 0x18000080, 0xd3d94048, + 0x18000080, 0xd3d94049, 0x18000080, 0xd3d9404a, 0x18000080, 0xd3d9404b, + 0x18000080, 0xd3d9404c, 0x18000080, 0xd3d9404d, 0x18000080, 0xd3d9404e, + 0x18000080, 0xd3d9404f, 0x18000080, 0xd3d94050, 0x18000080, 0xd3d94051, + 0x18000080, 0xd3d94052, 0x18000080, 0xd3d94053, 0x18000080, 0xd3d94054, + 0x18000080, 0xd3d94055, 0x18000080, 0xd3d94056, 0x18000080, 0xd3d94057, + 0x18000080, 0xd3d94058, 0x18000080, 0xd3d94059, 0x18000080, 0xd3d9405a, + 0x18000080, 0xd3d9405b, 0x18000080, 0xd3d9405c, 0x18000080, 0xd3d9405d, + 0x18000080, 0xd3d9405e, 0x18000080, 0xd3d9405f, 0x18000080, 0xd3d94060, + 0x18000080, 0xd3d94061, 0x18000080, 0xd3d94062, 0x18000080, 0xd3d94063, + 0x18000080, 0xd3d94064, 0x18000080, 0xd3d94065, 0x18000080, 0xd3d94066, + 0x18000080, 0xd3d94067, 0x18000080, 0xd3d94068, 0x18000080, 0xd3d94069, + 0x18000080, 0xd3d9406a, 0x18000080, 0xd3d9406b, 0x18000080, 0xd3d9406c, + 0x18000080, 0xd3d9406d, 0x18000080, 0xd3d9406e, 0x18000080, 0xd3d9406f, + 0x18000080, 0xd3d94070, 0x18000080, 0xd3d94071, 0x18000080, 0xd3d94072, + 0x18000080, 0xd3d94073, 0x18000080, 0xd3d94074, 0x18000080, 0xd3d94075, + 0x18000080, 0xd3d94076, 0x18000080, 0xd3d94077, 0x18000080, 0xd3d94078, + 0x18000080, 0xd3d94079, 0x18000080, 0xd3d9407a, 0x18000080, 0xd3d9407b, + 0x18000080, 0xd3d9407c, 0x18000080, 0xd3d9407d, 0x18000080, 0xd3d9407e, + 0x18000080, 0xd3d9407f, 0x18000080, 0xd3d94080, 0x18000080, 0xd3d94081, + 0x18000080, 0xd3d94082, 0x18000080, 0xd3d94083, 0x18000080, 0xd3d94084, + 0x18000080, 0xd3d94085, 0x18000080, 0xd3d94086, 0x18000080, 0xd3d94087, + 0x18000080, 0xd3d94088, 0x18000080, 0xd3d94089, 0x18000080, 0xd3d9408a, + 0x18000080, 0xd3d9408b, 0x18000080, 0xd3d9408c, 0x18000080, 0xd3d9408d, + 0x18000080, 0xd3d9408e, 0x18000080, 0xd3d9408f, 0x18000080, 0xd3d94090, + 0x18000080, 0xd3d94091, 0x18000080, 0xd3d94092, 0x18000080, 0xd3d94093, + 0x18000080, 0xd3d94094, 0x18000080, 0xd3d94095, 0x18000080, 0xd3d94096, + 0x18000080, 0xd3d94097, 0x18000080, 0xd3d94098, 0x18000080, 0xd3d94099, + 0x18000080, 0xd3d9409a, 0x18000080, 0xd3d9409b, 0x18000080, 0xd3d9409c, + 0x18000080, 0xd3d9409d, 0x18000080, 0xd3d9409e, 0x18000080, 0xd3d9409f, + 0x18000080, 0xd3d940a0, 0x18000080, 0xd3d940a1, 0x18000080, 0xd3d940a2, + 0x18000080, 0xd3d940a3, 0x18000080, 0xd3d940a4, 0x18000080, 0xd3d940a5, + 0x18000080, 0xd3d940a6, 0x18000080, 0xd3d940a7, 0x18000080, 0xd3d940a8, + 0x18000080, 0xd3d940a9, 0x18000080, 0xd3d940aa, 0x18000080, 0xd3d940ab, + 0x18000080, 0xd3d940ac, 0x18000080, 0xd3d940ad, 0x18000080, 0xd3d940ae, + 0x18000080, 0xd3d940af, 0x18000080, 0xd3d940b0, 0x18000080, 0xd3d940b1, + 0x18000080, 0xd3d940b2, 0x18000080, 0xd3d940b3, 0x18000080, 0xd3d940b4, + 0x18000080, 0xd3d940b5, 0x18000080, 0xd3d940b6, 0x18000080, 0xd3d940b7, + 0x18000080, 0xd3d940b8, 0x18000080, 0xd3d940b9, 0x18000080, 0xd3d940ba, + 0x18000080, 0xd3d940bb, 0x18000080, 0xd3d940bc, 0x18000080, 0xd3d940bd, + 0x18000080, 0xd3d940be, 0x18000080, 0xd3d940bf, 0x18000080, 0xd3d940c0, + 0x18000080, 0xd3d940c1, 0x18000080, 0xd3d940c2, 0x18000080, 0xd3d940c3, + 0x18000080, 0xd3d940c4, 0x18000080, 0xd3d940c5, 0x18000080, 0xd3d940c6, + 0x18000080, 0xd3d940c7, 0x18000080, 0xd3d940c8, 0x18000080, 0xd3d940c9, + 0x18000080, 0xd3d940ca, 0x18000080, 0xd3d940cb, 0x18000080, 0xd3d940cc, + 0x18000080, 0xd3d940cd, 0x18000080, 0xd3d940ce, 0x18000080, 0xd3d940cf, + 0x18000080, 0xd3d940d0, 0x18000080, 0xd3d940d1, 0x18000080, 0xd3d940d2, + 0x18000080, 0xd3d940d3, 0x18000080, 0xd3d940d4, 0x18000080, 0xd3d940d5, + 0x18000080, 0xd3d940d6, 0x18000080, 0xd3d940d7, 0x18000080, 0xd3d940d8, + 0x18000080, 0xd3d940d9, 0x18000080, 0xd3d940da, 0x18000080, 0xd3d940db, + 0x18000080, 0xd3d940dc, 0x18000080, 0xd3d940dd, 0x18000080, 0xd3d940de, + 0x18000080, 0xd3d940df, 0x18000080, 0xd3d940e0, 0x18000080, 0xd3d940e1, + 0x18000080, 0xd3d940e2, 0x18000080, 0xd3d940e3, 0x18000080, 0xd3d940e4, + 0x18000080, 0xd3d940e5, 0x18000080, 0xd3d940e6, 0x18000080, 0xd3d940e7, + 0x18000080, 0xd3d940e8, 0x18000080, 0xd3d940e9, 0x18000080, 0xd3d940ea, + 0x18000080, 0xd3d940eb, 0x18000080, 0xd3d940ec, 0x18000080, 0xd3d940ed, + 0x18000080, 0xd3d940ee, 0x18000080, 0xd3d940ef, 0x18000080, 0xd3d940f0, + 0x18000080, 0xd3d940f1, 0x18000080, 0xd3d940f2, 0x18000080, 0xd3d940f3, + 0x18000080, 0xd3d940f4, 0x18000080, 0xd3d940f5, 0x18000080, 0xd3d940f6, + 0x18000080, 0xd3d940f7, 0x18000080, 0xd3d940f8, 0x18000080, 0xd3d940f9, + 0x18000080, 0xd3d940fa, 0x18000080, 0xd3d940fb, 0x18000080, 0xd3d940fc, + 0x18000080, 0xd3d940fd, 0x18000080, 0xd3d940fe, 0x18000080, 0xd3d940ff, + 0x18000080, 0xb07c0000, 0xbe8a00ff, 0x000000f8, 0xbf11080a, 0x7e000280, + 0x7e020280, 0x7e040280, 0x7e060280, 0x7e080280, 0x7e0a0280, 0x7e0c0280, + 0x7e0e0280, 0x808a880a, 0xbe80320a, 0xbf84fff5, 0xbf9c0000, 0xd28c0001, + 0x0001007f, 0xd28d0001, 0x0002027e, 0x10020288, 0xbe8b0004, 0xb78b4000, + 0xd1196a01, 0x00001701, 0xbe8a0087, 0xbefc00c1, 0xd89c4000, 0x00020201, + 0xd89cc080, 0x00040401, 0x320202ff, 0x00000800, 0x808a810a, 0xbf84fff8, + 0xbf810000, +}; + +const struct soc15_reg_entry vgpr_init_regs_aldebaran[] = { + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 4 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0xbf }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x400006 }, /* 64KB LDS */ + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x3F }, /* 63 - accum-offset = 256 */ + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, +}; + +/** + * The below shaders are used to clear SGPRS, and also write the input + * pattern into the write back buffer. The first two dispatch should be + * scheduled simultaneously which make sure that all SGPRS could be + * allocated, so the dispatch 1 need check write back buffer before scheduled, + * make sure that waves of dispatch 0 are all dispacthed to all simds + * balanced. both dispatch 0 and dispatch 1 should be halted until all waves + * are dispatched, and then driver write a pattern to the shared memory to make + * all waves continue. +*/ +static const u32 sgpr112_init_compute_shader_aldebaran[] = { + 0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280, + 0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208, + 0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200, + 0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102, + 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080, + 0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080, + 0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080, + 0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080, + 0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080, + 0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080, + 0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080, + 0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080, + 0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080, + 0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080, + 0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbeba0080, 0xbebb0080, + 0xbebc0080, 0xbebd0080, 0xbebe0080, 0xbebf0080, 0xbec00080, 0xbec10080, + 0xbec20080, 0xbec30080, 0xbec40080, 0xbec50080, 0xbec60080, 0xbec70080, + 0xbec80080, 0xbec90080, 0xbeca0080, 0xbecb0080, 0xbecc0080, 0xbecd0080, + 0xbece0080, 0xbecf0080, 0xbed00080, 0xbed10080, 0xbed20080, 0xbed30080, + 0xbed40080, 0xbed50080, 0xbed60080, 0xbed70080, 0xbed80080, 0xbed90080, + 0xbeda0080, 0xbedb0080, 0xbedc0080, 0xbedd0080, 0xbede0080, 0xbedf0080, + 0xbee00080, 0xbee10080, 0xbee20080, 0xbee30080, 0xbee40080, 0xbee50080, + 0xbf810000 +}; + +const struct soc15_reg_entry sgpr112_init_regs_aldebaran[] = { + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 8 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x340 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, +}; + +static const u32 sgpr96_init_compute_shader_aldebaran[] = { + 0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280, + 0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208, + 0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200, + 0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102, + 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080, + 0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080, + 0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080, + 0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080, + 0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080, + 0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080, + 0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080, + 0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080, + 0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080, + 0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080, + 0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbeba0080, 0xbebb0080, + 0xbebc0080, 0xbebd0080, 0xbebe0080, 0xbebf0080, 0xbec00080, 0xbec10080, + 0xbec20080, 0xbec30080, 0xbec40080, 0xbec50080, 0xbec60080, 0xbec70080, + 0xbec80080, 0xbec90080, 0xbeca0080, 0xbecb0080, 0xbecc0080, 0xbecd0080, + 0xbece0080, 0xbecf0080, 0xbed00080, 0xbed10080, 0xbed20080, 0xbed30080, + 0xbed40080, 0xbed50080, 0xbed60080, 0xbed70080, 0xbed80080, 0xbed90080, + 0xbf810000, +}; + +const struct soc15_reg_entry sgpr96_init_regs_aldebaran[] = { + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 0xc }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x2c0 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, +}; + +/** + * This shader is used to clear the uninitiated sgprs after the above + * two dispatches, because of hardware feature, dispath 0 couldn't clear + * top hole sgprs. Therefore need 4 waves per SIMD to cover these sgprs +*/ +static const u32 sgpr64_init_compute_shader_aldebaran[] = { + 0xb8840904, 0xb8851a04, 0xb8861344, 0xb8831804, 0x9208ff06, 0x00000280, + 0x9209a805, 0x920a8a04, 0x81080908, 0x81080a08, 0x81080308, 0x8e078208, + 0x81078407, 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8e003f, 0xc0030200, + 0x00000000, 0xbf8c0000, 0xbf06ff08, 0xdeadbeaf, 0xbf84fff9, 0x81028102, + 0xc0410080, 0x00000007, 0xbf8c0000, 0xbf8a0000, 0xbefc0080, 0xbeea0080, + 0xbeeb0080, 0xbf00f280, 0xbee60080, 0xbee70080, 0xbee80080, 0xbee90080, + 0xbefe0080, 0xbeff0080, 0xbe880080, 0xbe890080, 0xbe8a0080, 0xbe8b0080, + 0xbe8c0080, 0xbe8d0080, 0xbe8e0080, 0xbe8f0080, 0xbe900080, 0xbe910080, + 0xbe920080, 0xbe930080, 0xbe940080, 0xbe950080, 0xbe960080, 0xbe970080, + 0xbe980080, 0xbe990080, 0xbe9a0080, 0xbe9b0080, 0xbe9c0080, 0xbe9d0080, + 0xbe9e0080, 0xbe9f0080, 0xbea00080, 0xbea10080, 0xbea20080, 0xbea30080, + 0xbea40080, 0xbea50080, 0xbea60080, 0xbea70080, 0xbea80080, 0xbea90080, + 0xbeaa0080, 0xbeab0080, 0xbeac0080, 0xbead0080, 0xbeae0080, 0xbeaf0080, + 0xbeb00080, 0xbeb10080, 0xbeb20080, 0xbeb30080, 0xbeb40080, 0xbeb50080, + 0xbeb60080, 0xbeb70080, 0xbeb80080, 0xbeb90080, 0xbf810000, +}; + +const struct soc15_reg_entry sgpr64_init_regs_aldebaran[] = { + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_RESOURCE_LIMITS), 0x0000000 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_X), 0x40 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Y), 0x10 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_NUM_THREAD_Z), 1 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC1), 0x1c0 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC2), 0x6 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_PGM_RSRC3), 0x0 }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE0), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE1), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE2), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE3), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE4), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE5), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE6), 0xffffffff }, + { SOC15_REG_ENTRY(GC, 0, regCOMPUTE_STATIC_THREAD_MGMT_SE7), 0xffffffff }, +}; + +static int gfx_v9_4_2_run_shader(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + struct amdgpu_ib *ib, + const u32 *shader_ptr, u32 shader_size, + const struct soc15_reg_entry *init_regs, u32 regs_size, + u32 compute_dim_x, u64 wb_gpu_addr, u32 pattern, + struct dma_fence **fence_ptr) +{ + int r, i; + uint32_t total_size, shader_offset; + u64 gpu_addr; + + total_size = (regs_size * 3 + 4 + 5 + 5) * 4; + total_size = ALIGN(total_size, 256); + shader_offset = total_size; + total_size += ALIGN(shader_size, 256); + + /* allocate an indirect buffer to put the commands in */ + memset(ib, 0, sizeof(*ib)); + r = amdgpu_ib_get(adev, NULL, total_size, + AMDGPU_IB_POOL_DIRECT, ib); + if (r) { + dev_err(adev->dev, "failed to get ib (%d).\n", r); + return r; + } + + /* load the compute shaders */ + for (i = 0; i < shader_size/sizeof(u32); i++) + ib->ptr[i + (shader_offset / 4)] = shader_ptr[i]; + + /* init the ib length to 0 */ + ib->length_dw = 0; + + /* write the register state for the compute dispatch */ + for (i = 0; i < regs_size; i++) { + ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 1); + ib->ptr[ib->length_dw++] = SOC15_REG_ENTRY_OFFSET(init_regs[i]) + - PACKET3_SET_SH_REG_START; + ib->ptr[ib->length_dw++] = init_regs[i].reg_value; + } + + /* write the shader start address: mmCOMPUTE_PGM_LO, mmCOMPUTE_PGM_HI */ + gpu_addr = (ib->gpu_addr + (u64)shader_offset) >> 8; + ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 2); + ib->ptr[ib->length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_PGM_LO) + - PACKET3_SET_SH_REG_START; + ib->ptr[ib->length_dw++] = lower_32_bits(gpu_addr); + ib->ptr[ib->length_dw++] = upper_32_bits(gpu_addr); + + /* write the wb buffer address */ + ib->ptr[ib->length_dw++] = PACKET3(PACKET3_SET_SH_REG, 3); + ib->ptr[ib->length_dw++] = SOC15_REG_OFFSET(GC, 0, regCOMPUTE_USER_DATA_0) + - PACKET3_SET_SH_REG_START; + ib->ptr[ib->length_dw++] = lower_32_bits(wb_gpu_addr); + ib->ptr[ib->length_dw++] = upper_32_bits(wb_gpu_addr); + ib->ptr[ib->length_dw++] = pattern; + + /* write dispatch packet */ + ib->ptr[ib->length_dw++] = PACKET3(PACKET3_DISPATCH_DIRECT, 3); + ib->ptr[ib->length_dw++] = compute_dim_x; /* x */ + ib->ptr[ib->length_dw++] = 1; /* y */ + ib->ptr[ib->length_dw++] = 1; /* z */ + ib->ptr[ib->length_dw++] = + REG_SET_FIELD(0, COMPUTE_DISPATCH_INITIATOR, COMPUTE_SHADER_EN, 1); + + /* shedule the ib on the ring */ + r = amdgpu_ib_schedule(ring, 1, ib, NULL, fence_ptr); + if (r) { + dev_err(adev->dev, "ib submit failed (%d).\n", r); + amdgpu_ib_free(adev, ib, NULL); + } + return r; +} + +static void gfx_v9_4_2_log_wave_assignment(struct amdgpu_device *adev, uint32_t *wb_ptr) +{ + uint32_t se, cu, simd, wave; + uint32_t offset = 0; + char *str; + int size; + + str = kmalloc(256, GFP_KERNEL); + if (!str) + return; + + dev_dbg(adev->dev, "wave assignment:\n"); + + for (se = 0; se < adev->gfx.config.max_shader_engines; se++) { + for (cu = 0; cu < CU_ID_MAX; cu++) { + memset(str, 0, 256); + size = sprintf(str, "SE[%02d]CU[%02d]: ", se, cu); + for (simd = 0; simd < SIMD_ID_MAX; simd++) { + size += sprintf(str + size, "["); + for (wave = 0; wave < WAVE_ID_MAX; wave++) { + size += sprintf(str + size, "%x", wb_ptr[offset]); + offset++; + } + size += sprintf(str + size, "] "); + } + dev_dbg(adev->dev, "%s\n", str); + } + } + + kfree(str); +} + +static int gfx_v9_4_2_wait_for_waves_assigned(struct amdgpu_device *adev, + uint32_t *wb_ptr, uint32_t mask, + uint32_t pattern, uint32_t num_wave, bool wait) +{ + uint32_t se, cu, simd, wave; + uint32_t loop = 0; + uint32_t wave_cnt; + uint32_t offset; + + do { + wave_cnt = 0; + offset = 0; + + for (se = 0; se < adev->gfx.config.max_shader_engines; se++) + for (cu = 0; cu < CU_ID_MAX; cu++) + for (simd = 0; simd < SIMD_ID_MAX; simd++) + for (wave = 0; wave < WAVE_ID_MAX; wave++) { + if (((1 << wave) & mask) && + (wb_ptr[offset] == pattern)) + wave_cnt++; + + offset++; + } + + if (wave_cnt == num_wave) + return 0; + + mdelay(1); + } while (++loop < 2000 && wait); + + dev_err(adev->dev, "actual wave num: %d, expected wave num: %d\n", + wave_cnt, num_wave); + + gfx_v9_4_2_log_wave_assignment(adev, wb_ptr); + + return -EBADSLT; +} + +static int gfx_v9_4_2_do_sgprs_init(struct amdgpu_device *adev) +{ + int r; + int wb_size = adev->gfx.config.max_shader_engines * + CU_ID_MAX * SIMD_ID_MAX * WAVE_ID_MAX; + struct amdgpu_ib wb_ib; + struct amdgpu_ib disp_ibs[3]; + struct dma_fence *fences[3]; + u32 pattern[3] = { 0x1, 0x5, 0xa }; + + /* bail if the compute ring is not ready */ + if (!adev->gfx.compute_ring[0].sched.ready || + !adev->gfx.compute_ring[1].sched.ready) + return 0; + + /* allocate the write-back buffer from IB */ + memset(&wb_ib, 0, sizeof(wb_ib)); + r = amdgpu_ib_get(adev, NULL, (1 + wb_size) * sizeof(uint32_t), + AMDGPU_IB_POOL_DIRECT, &wb_ib); + if (r) { + dev_err(adev->dev, "failed to get ib (%d) for wb\n", r); + return r; + } + memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t)); + + r = gfx_v9_4_2_run_shader(adev, + &adev->gfx.compute_ring[0], + &disp_ibs[0], + sgpr112_init_compute_shader_aldebaran, + sizeof(sgpr112_init_compute_shader_aldebaran), + sgpr112_init_regs_aldebaran, + ARRAY_SIZE(sgpr112_init_regs_aldebaran), + adev->gfx.cu_info.number, + wb_ib.gpu_addr, pattern[0], &fences[0]); + if (r) { + dev_err(adev->dev, "failed to clear first 224 sgprs\n"); + goto pro_end; + } + + r = gfx_v9_4_2_wait_for_waves_assigned(adev, + &wb_ib.ptr[1], 0b11, + pattern[0], + adev->gfx.cu_info.number * SIMD_ID_MAX * 2, + true); + if (r) { + dev_err(adev->dev, "wave coverage failed when clear first 224 sgprs\n"); + wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */ + goto disp0_failed; + } + + r = gfx_v9_4_2_run_shader(adev, + &adev->gfx.compute_ring[1], + &disp_ibs[1], + sgpr96_init_compute_shader_aldebaran, + sizeof(sgpr96_init_compute_shader_aldebaran), + sgpr96_init_regs_aldebaran, + ARRAY_SIZE(sgpr96_init_regs_aldebaran), + adev->gfx.cu_info.number * 2, + wb_ib.gpu_addr, pattern[1], &fences[1]); + if (r) { + dev_err(adev->dev, "failed to clear next 576 sgprs\n"); + goto disp0_failed; + } + + r = gfx_v9_4_2_wait_for_waves_assigned(adev, + &wb_ib.ptr[1], 0b11111100, + pattern[1], adev->gfx.cu_info.number * SIMD_ID_MAX * 6, + true); + if (r) { + dev_err(adev->dev, "wave coverage failed when clear first 576 sgprs\n"); + wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */ + goto disp1_failed; + } + + wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */ + + /* wait for the GPU to finish processing the IB */ + r = dma_fence_wait(fences[0], false); + if (r) { + dev_err(adev->dev, "timeout to clear first 224 sgprs\n"); + goto disp1_failed; + } + + r = dma_fence_wait(fences[1], false); + if (r) { + dev_err(adev->dev, "timeout to clear first 576 sgprs\n"); + goto disp1_failed; + } + + memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t)); + r = gfx_v9_4_2_run_shader(adev, + &adev->gfx.compute_ring[0], + &disp_ibs[2], + sgpr64_init_compute_shader_aldebaran, + sizeof(sgpr64_init_compute_shader_aldebaran), + sgpr64_init_regs_aldebaran, + ARRAY_SIZE(sgpr64_init_regs_aldebaran), + adev->gfx.cu_info.number, + wb_ib.gpu_addr, pattern[2], &fences[2]); + if (r) { + dev_err(adev->dev, "failed to clear first 256 sgprs\n"); + goto disp1_failed; + } + + r = gfx_v9_4_2_wait_for_waves_assigned(adev, + &wb_ib.ptr[1], 0b1111, + pattern[2], + adev->gfx.cu_info.number * SIMD_ID_MAX * 4, + true); + if (r) { + dev_err(adev->dev, "wave coverage failed when clear first 256 sgprs\n"); + wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */ + goto disp2_failed; + } + + wb_ib.ptr[0] = 0xdeadbeaf; /* stop waves */ + + r = dma_fence_wait(fences[2], false); + if (r) { + dev_err(adev->dev, "timeout to clear first 256 sgprs\n"); + goto disp2_failed; + } + +disp2_failed: + amdgpu_ib_free(adev, &disp_ibs[2], NULL); + dma_fence_put(fences[2]); +disp1_failed: + amdgpu_ib_free(adev, &disp_ibs[1], NULL); + dma_fence_put(fences[1]); +disp0_failed: + amdgpu_ib_free(adev, &disp_ibs[0], NULL); + dma_fence_put(fences[0]); +pro_end: + amdgpu_ib_free(adev, &wb_ib, NULL); + + if (r) + dev_info(adev->dev, "Init SGPRS Failed\n"); + else + dev_info(adev->dev, "Init SGPRS Successfully\n"); + + return r; +} + +static int gfx_v9_4_2_do_vgprs_init(struct amdgpu_device *adev) +{ + int r; + /* CU_ID: 0~15, SIMD_ID: 0~3, WAVE_ID: 0 ~ 9 */ + int wb_size = adev->gfx.config.max_shader_engines * + CU_ID_MAX * SIMD_ID_MAX * WAVE_ID_MAX; + struct amdgpu_ib wb_ib; + struct amdgpu_ib disp_ib; + struct dma_fence *fence; + u32 pattern = 0xa; + + /* bail if the compute ring is not ready */ + if (!adev->gfx.compute_ring[0].sched.ready) + return 0; + + /* allocate the write-back buffer from IB */ + memset(&wb_ib, 0, sizeof(wb_ib)); + r = amdgpu_ib_get(adev, NULL, (1 + wb_size) * sizeof(uint32_t), + AMDGPU_IB_POOL_DIRECT, &wb_ib); + if (r) { + dev_err(adev->dev, "failed to get ib (%d) for wb.\n", r); + return r; + } + memset(wb_ib.ptr, 0, (1 + wb_size) * sizeof(uint32_t)); + + r = gfx_v9_4_2_run_shader(adev, + &adev->gfx.compute_ring[0], + &disp_ib, + vgpr_init_compute_shader_aldebaran, + sizeof(vgpr_init_compute_shader_aldebaran), + vgpr_init_regs_aldebaran, + ARRAY_SIZE(vgpr_init_regs_aldebaran), + adev->gfx.cu_info.number, + wb_ib.gpu_addr, pattern, &fence); + if (r) { + dev_err(adev->dev, "failed to clear vgprs\n"); + goto pro_end; + } + + /* wait for the GPU to finish processing the IB */ + r = dma_fence_wait(fence, false); + if (r) { + dev_err(adev->dev, "timeout to clear vgprs\n"); + goto disp_failed; + } + + r = gfx_v9_4_2_wait_for_waves_assigned(adev, + &wb_ib.ptr[1], 0b1, + pattern, + adev->gfx.cu_info.number * SIMD_ID_MAX, + false); + if (r) { + dev_err(adev->dev, "failed to cover all simds when clearing vgprs\n"); + goto disp_failed; + } + +disp_failed: + amdgpu_ib_free(adev, &disp_ib, NULL); + dma_fence_put(fence); +pro_end: + amdgpu_ib_free(adev, &wb_ib, NULL); + + if (r) + dev_info(adev->dev, "Init VGPRS Failed\n"); + else + dev_info(adev->dev, "Init VGPRS Successfully\n"); + + return r; +} + +int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev) +{ + /* only support when RAS is enabled */ + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + return 0; + + gfx_v9_4_2_do_sgprs_init(adev); + + gfx_v9_4_2_do_vgprs_init(adev); + + return 0; +} + static void gfx_v9_4_2_query_sq_timeout_status(struct amdgpu_device *adev); static void gfx_v9_4_2_reset_sq_timeout_status(struct amdgpu_device *adev); @@ -808,8 +1442,9 @@ static struct gfx_v9_4_2_utc_block gfx_v9_4_2_utc_blocks[] = { REG_SET_FIELD(0, ATC_L2_CACHE_4K_DSM_CNTL, WRITE_COUNTERS, 1) }, }; -static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs = - { SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 }; +static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs = { + SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 +}; static int gfx_v9_4_2_get_reg_error_count(struct amdgpu_device *adev, const struct soc15_reg_entry *reg, @@ -1039,13 +1674,16 @@ static void gfx_v9_4_2_reset_utc_err_status(struct amdgpu_device *adev) static void gfx_v9_4_2_reset_ea_err_status(struct amdgpu_device *adev) { uint32_t i, j; + uint32_t value; + + value = REG_SET_FIELD(0, GCEA_ERR_STATUS, CLEAR_ERROR_STATUS, 0x1); mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) { for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance; j++) { gfx_v9_4_2_select_se_sh(adev, i, 0, j); - WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), 0x10); + WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), value); } } gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h index 81c5833b6b9f..6db1f88509af 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.h @@ -29,6 +29,7 @@ void gfx_v9_4_2_debug_trap_config_init(struct amdgpu_device *adev, void gfx_v9_4_2_init_golden_registers(struct amdgpu_device *adev, uint32_t die_id); void gfx_v9_4_2_set_power_brake_sequence(struct amdgpu_device *adev); +int gfx_v9_4_2_do_edc_gpr_workarounds(struct amdgpu_device *adev); extern const struct amdgpu_gfx_ras_funcs gfx_v9_4_2_ras_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index 1e4678cb98f0..a03fdd41212b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -283,10 +283,14 @@ static void gfxhub_v1_0_setup_vmid_config(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE, block_size); - /* Send no-retry XNACK on fault to suppress VM fault storm. */ + /* Send no-retry XNACK on fault to suppress VM fault storm. + * On Aldebaran, XNACK can be enabled in the SQ per-process. + * Retry faults need to be enabled for that to work. + */ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, - !adev->gmc.noretry); + !adev->gmc.noretry || + adev->asic_type == CHIP_ALDEBARAN); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL, i * hub->ctx_distance, tmp); WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 455bb91060d0..093ab98c31bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -53,6 +53,7 @@ #include "mmhub_v1_7.h" #include "umc_v6_1.h" #include "umc_v6_0.h" +#include "hdp_v4_0.h" #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" @@ -1210,6 +1211,11 @@ static void gmc_v9_0_set_gfxhub_funcs(struct amdgpu_device *adev) adev->gfxhub.funcs = &gfxhub_v1_0_funcs; } +static void gmc_v9_0_set_hdp_ras_funcs(struct amdgpu_device *adev) +{ + adev->hdp.ras_funcs = &hdp_v4_0_ras_funcs; +} + static int gmc_v9_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -1230,6 +1236,7 @@ static int gmc_v9_0_early_init(void *handle) gmc_v9_0_set_mmhub_funcs(adev); gmc_v9_0_set_mmhub_ras_funcs(adev); gmc_v9_0_set_gfxhub_funcs(adev); + gmc_v9_0_set_hdp_ras_funcs(adev); adev->gmc.shared_aperture_start = 0x2000000000000000ULL; adev->gmc.shared_aperture_end = @@ -1255,7 +1262,7 @@ static int gmc_v9_0_late_init(void *handle) * writes, while disables HBM ECC for vega10. */ if (!amdgpu_sriov_vf(adev) && (adev->asic_type == CHIP_VEGA10)) { - if (!(adev->ras_features & (1 << AMDGPU_RAS_BLOCK__UMC))) { + if (!(adev->ras_enabled & (1 << AMDGPU_RAS_BLOCK__UMC))) { if (adev->df.funcs->enable_ecc_force_par_wr_rmw) adev->df.funcs->enable_ecc_force_par_wr_rmw(adev, false); } @@ -1265,6 +1272,10 @@ static int gmc_v9_0_late_init(void *handle) adev->mmhub.ras_funcs->reset_ras_error_count) adev->mmhub.ras_funcs->reset_ras_error_count(adev); + if (adev->hdp.ras_funcs && + adev->hdp.ras_funcs->reset_ras_error_count) + adev->hdp.ras_funcs->reset_ras_error_count(adev); + r = amdgpu_gmc_ras_late_init(adev); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c index edbd35d293eb..74b90cc2bf48 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -59,12 +59,31 @@ static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev, HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); } +static void hdp_v4_0_query_ras_error_count(struct amdgpu_device *adev, + void *ras_error_status) +{ + struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status; + + err_data->ue_count = 0; + err_data->ce_count = 0; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP)) + return; + + /* HDP SRAM errors are uncorrectable ones (i.e. fatal errors) */ + err_data->ue_count += RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT); +}; + static void hdp_v4_0_reset_ras_error_count(struct amdgpu_device *adev) { if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP)) return; - /*read back hdp ras counter to reset it to 0 */ - RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT); + + if (adev->asic_type >= CHIP_ALDEBARAN) + WREG32_SOC15(HDP, 0, mmHDP_EDC_CNT, 0); + else + /*read back hdp ras counter to reset it to 0 */ + RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT); } static void hdp_v4_0_update_clock_gating(struct amdgpu_device *adev, @@ -130,10 +149,16 @@ static void hdp_v4_0_init_registers(struct amdgpu_device *adev) WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40)); } +const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs = { + .ras_late_init = amdgpu_hdp_ras_late_init, + .ras_fini = amdgpu_hdp_ras_fini, + .query_ras_error_count = hdp_v4_0_query_ras_error_count, + .reset_ras_error_count = hdp_v4_0_reset_ras_error_count, +}; + const struct amdgpu_hdp_funcs hdp_v4_0_funcs = { .flush_hdp = hdp_v4_0_flush_hdp, .invalidate_hdp = hdp_v4_0_invalidate_hdp, - .reset_ras_error_count = hdp_v4_0_reset_ras_error_count, .update_clock_gating = hdp_v4_0_update_clock_gating, .get_clock_gating_state = hdp_v4_0_get_clockgating_state, .init_registers = hdp_v4_0_init_registers, diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h index d1e6399e8c46..dc3a1b81dd62 100644 --- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h @@ -27,5 +27,6 @@ #include "soc15_common.h" extern const struct amdgpu_hdp_funcs hdp_v4_0_funcs; +extern const struct amdgpu_hdp_ras_funcs hdp_v4_0_ras_funcs; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c index 0103a5ab28e6..6264934b67ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c @@ -296,10 +296,12 @@ static void mmhub_v1_7_setup_vmid_config(struct amdgpu_device *adev) tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE, block_size); - /* Send no-retry XNACK on fault to suppress VM fault storm. */ + /* On Aldebaran, XNACK can be enabled in the SQ per-process. + * Retry faults need to be enabled for that to work. + */ tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, - !adev->gmc.noretry); + 1); WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_CNTL, i * hub->ctx_distance, tmp); WREG32_SOC15_OFFSET(MMHUB, 0, regVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, @@ -1313,12 +1315,31 @@ static void mmhub_v1_7_query_ras_error_status(struct amdgpu_device *adev) } } +static void mmhub_v1_7_reset_ras_error_status(struct amdgpu_device *adev) +{ + int i; + uint32_t reg_value; + + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) + return; + + for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_ea_err_status_regs); i++) { + reg_value = RREG32(SOC15_REG_ENTRY_OFFSET( + mmhub_v1_7_ea_err_status_regs[i])); + reg_value = REG_SET_FIELD(reg_value, MMEA0_ERR_STATUS, + CLEAR_ERROR_STATUS, 0x01); + WREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_ea_err_status_regs[i]), + reg_value); + } +} + const struct amdgpu_mmhub_ras_funcs mmhub_v1_7_ras_funcs = { .ras_late_init = amdgpu_mmhub_ras_late_init, .ras_fini = amdgpu_mmhub_ras_fini, .query_ras_error_count = mmhub_v1_7_query_ras_error_count, .reset_ras_error_count = mmhub_v1_7_reset_ras_error_count, .query_ras_error_status = mmhub_v1_7_query_ras_error_status, + .reset_ras_error_status = mmhub_v1_7_reset_ras_error_status, }; const struct amdgpu_mmhub_funcs mmhub_v1_7_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index a9899335d0b1..709ac576ac7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -569,9 +569,9 @@ static int mmhub_v2_3_set_clockgating(struct amdgpu_device *adev, return 0; mmhub_v2_3_update_medium_grain_clock_gating(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); mmhub_v2_3_update_medium_grain_light_sleep(adev, - state == AMD_CG_STATE_GATE ? true : false); + state == AMD_CG_STATE_GATE); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index d290ca0b06da..75d1f9b939b2 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -598,7 +598,7 @@ static void nv_pcie_gen3_enable(struct amdgpu_device *adev) static void nv_program_aspm(struct amdgpu_device *adev) { - if (amdgpu_aspm != 1) + if (!amdgpu_aspm) return; if (!(adev->flags & AMD_IS_APU) && @@ -1068,6 +1068,7 @@ static int nv_common_early_init(void *handle) case CHIP_SIENNA_CICHLID: adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_MC_MGCG | AMD_CG_SUPPORT_VCN_MGCG | @@ -1091,6 +1092,7 @@ static int nv_common_early_init(void *handle) case CHIP_NAVY_FLOUNDER: adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG | @@ -1121,6 +1123,8 @@ static int nv_common_early_init(void *handle) AMD_CG_SUPPORT_MC_LS | AMD_CG_SUPPORT_GFX_FGCG | AMD_CG_SUPPORT_VCN_MGCG | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | AMD_CG_SUPPORT_JPEG_MGCG; adev->pg_flags = AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_VCN | @@ -1132,6 +1136,7 @@ static int nv_common_early_init(void *handle) case CHIP_DIMGREY_CAVEFISH: adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | AMD_CG_SUPPORT_GFX_3D_CGCG | AMD_CG_SUPPORT_VCN_MGCG | AMD_CG_SUPPORT_JPEG_MGCG | diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 96064c343163..f6d3180febc4 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -97,7 +97,6 @@ enum psp_gfx_cmd_id GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */ GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */ GFX_CMD_ID_PROG_REG = 0x0000000B, /* program regs */ - GFX_CMD_ID_CLEAR_VF_FW = 0x0000000D, /* Clear VF FW, to be used on VF shutdown. */ GFX_CMD_ID_GET_FW_ATTESTATION = 0x0000000F, /* Query GPUVA of the Fw Attestation DB */ /* IDs upto 0x1F are reserved for older programs (Raven, Vega 10/12/20) */ GFX_CMD_ID_LOAD_TOC = 0x00000020, /* Load TOC and obtain TMR size */ diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 5715be6770ec..d197185f7789 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -1109,6 +1109,8 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) if (adev->asic_type == CHIP_ARCTURUS && adev->sdma.instance[i].fw_version >= 14) WREG32_SDMA(i, mmSDMA0_PUB_DUMMY_REG2, enable); + /* Extend page fault timeout to avoid interrupt storm */ + WREG32_SDMA(i, mmSDMA0_UTCL1_TIMEOUT, 0x00800080); } } @@ -2227,7 +2229,7 @@ static int sdma_v4_0_print_iv_entry(struct amdgpu_device *adev, memset(&task_info, 0, sizeof(struct amdgpu_task_info)); amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); - dev_info(adev->dev, + dev_dbg_ratelimited(adev->dev, "[sdma%d] address:0x%016llx src_id:%u ring:%u vmid:%u " "pasid:%u, for process %s pid %d thread %s pid %d\n", instance, addr, entry->src_id, entry->ring_id, entry->vmid, @@ -2240,7 +2242,7 @@ static int sdma_v4_0_process_vm_hole_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - dev_err(adev->dev, "MC or SEM address in VM hole\n"); + dev_dbg_ratelimited(adev->dev, "MC or SEM address in VM hole\n"); sdma_v4_0_print_iv_entry(adev, entry); return 0; } @@ -2249,7 +2251,7 @@ static int sdma_v4_0_process_doorbell_invalid_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - dev_err(adev->dev, "SDMA received a doorbell from BIF with byte_enable !=0xff\n"); + dev_dbg_ratelimited(adev->dev, "SDMA received a doorbell from BIF with byte_enable !=0xff\n"); sdma_v4_0_print_iv_entry(adev, entry); return 0; } @@ -2258,7 +2260,7 @@ static int sdma_v4_0_process_pool_timeout_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - dev_err(adev->dev, + dev_dbg_ratelimited(adev->dev, "Polling register/memory timeout executing POLL_REG/MEM with finite timer\n"); sdma_v4_0_print_iv_entry(adev, entry); return 0; @@ -2268,7 +2270,7 @@ static int sdma_v4_0_process_srbm_write_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - dev_err(adev->dev, + dev_dbg_ratelimited(adev->dev, "SDMA gets an Register Write SRBM_WRITE command in non-privilege command buffer\n"); sdma_v4_0_print_iv_entry(adev, entry); return 0; @@ -2597,27 +2599,18 @@ static const struct amdgpu_irq_src_funcs sdma_v4_0_srbm_write_irq_funcs = { static void sdma_v4_0_set_irq_funcs(struct amdgpu_device *adev) { + adev->sdma.trap_irq.num_types = adev->sdma.num_instances; + adev->sdma.ecc_irq.num_types = adev->sdma.num_instances; + /*For Arcturus and Aldebaran, add another 4 irq handler*/ switch (adev->sdma.num_instances) { - case 1: - adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1; - adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE1; - break; case 5: - adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5; - adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5; - break; case 8: - adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; - adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_LAST; - adev->sdma.vm_hole_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE5; - adev->sdma.doorbell_invalid_irq.num_types = AMDGPU_SDMA_IRQ_LAST; - adev->sdma.pool_timeout_irq.num_types = AMDGPU_SDMA_IRQ_LAST; - adev->sdma.srbm_write_irq.num_types = AMDGPU_SDMA_IRQ_LAST; + adev->sdma.vm_hole_irq.num_types = adev->sdma.num_instances; + adev->sdma.doorbell_invalid_irq.num_types = adev->sdma.num_instances; + adev->sdma.pool_timeout_irq.num_types = adev->sdma.num_instances; + adev->sdma.srbm_write_irq.num_types = adev->sdma.num_instances; break; - case 2: default: - adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2; - adev->sdma.ecc_irq.num_types = AMDGPU_SDMA_IRQ_INSTANCE2; break; } adev->sdma.trap_irq.funcs = &sdma_v4_0_trap_irq_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index 920fc6d4a127..04c68a79eca4 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -405,18 +405,6 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring, unsigned vmid = AMDGPU_JOB_GET_VMID(job); uint64_t csa_mc_addr = amdgpu_sdma_get_csa_mc_addr(ring, vmid); - /* Invalidate L2, because if we don't do it, we might get stale cache - * lines from previous IBs. - */ - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ)); - amdgpu_ring_write(ring, 0); - amdgpu_ring_write(ring, (SDMA_GCR_GL2_INV | - SDMA_GCR_GL2_WB | - SDMA_GCR_GLM_INV | - SDMA_GCR_GLM_WB) << 16); - amdgpu_ring_write(ring, 0xffffff80); - amdgpu_ring_write(ring, 0xffff); - /* An IB packet must end on a 8 DW boundary--the next dword * must be on a 8-dword boundary. Our IB packet below is 6 * dwords long, thus add x number of NOPs, such that, in @@ -438,6 +426,33 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring, } /** + * sdma_v5_0_ring_emit_mem_sync - flush the IB by graphics cache rinse + * + * @ring: amdgpu ring pointer + * @job: job to retrieve vmid from + * @ib: IB object to schedule + * + * flush the IB by graphics cache rinse. + */ +static void sdma_v5_0_ring_emit_mem_sync(struct amdgpu_ring *ring) +{ + uint32_t gcr_cntl = + SDMA_GCR_GL2_INV | SDMA_GCR_GL2_WB | SDMA_GCR_GLM_INV | + SDMA_GCR_GL1_INV | SDMA_GCR_GLV_INV | SDMA_GCR_GLK_INV | + SDMA_GCR_GLI_INV(1); + + /* flush entire cache L0/L1/L2, this can be optimized by performance requirement */ + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_GCR_REQ)); + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD1_BASE_VA_31_7(0)); + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD2_GCR_CONTROL_15_0(gcr_cntl) | + SDMA_PKT_GCR_REQ_PAYLOAD2_BASE_VA_47_32(0)); + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD3_LIMIT_VA_31_7(0) | + SDMA_PKT_GCR_REQ_PAYLOAD3_GCR_CONTROL_18_16(gcr_cntl >> 16)); + amdgpu_ring_write(ring, SDMA_PKT_GCR_REQ_PAYLOAD4_LIMIT_VA_47_32(0) | + SDMA_PKT_GCR_REQ_PAYLOAD4_VMID(0)); +} + +/** * sdma_v5_0_ring_emit_hdp_flush - emit an hdp flush on the DMA ring * * @ring: amdgpu ring pointer @@ -1643,6 +1658,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = { 10 + 10 + 10, /* sdma_v5_0_ring_emit_fence x3 for user fence, vm fence */ .emit_ib_size = 5 + 7 + 6, /* sdma_v5_0_ring_emit_ib */ .emit_ib = sdma_v5_0_ring_emit_ib, + .emit_mem_sync = sdma_v5_0_ring_emit_mem_sync, .emit_fence = sdma_v5_0_ring_emit_fence, .emit_pipeline_sync = sdma_v5_0_ring_emit_pipeline_sync, .emit_vm_flush = sdma_v5_0_ring_emit_vm_flush, diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index b1ad9e52b234..7c4e0586e26d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -1556,6 +1556,10 @@ static void sdma_v5_2_update_medium_grain_clock_gating(struct amdgpu_device *ade int i; for (i = 0; i < adev->sdma.num_instances; i++) { + + if (adev->sdma.instance[i].fw_version < 70 && adev->asic_type == CHIP_VANGOGH) + adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_MGCG; + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) { /* Enable sdma clock gating */ def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CLK_CTRL)); @@ -1589,6 +1593,10 @@ static void sdma_v5_2_update_medium_grain_light_sleep(struct amdgpu_device *adev int i; for (i = 0; i < adev->sdma.num_instances; i++) { + + if (adev->sdma.instance[i].fw_version < 70 && adev->asic_type == CHIP_VANGOGH) + adev->cg_flags &= ~AMD_CG_SUPPORT_SDMA_LS; + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) { /* Enable sdma mem light sleep */ def = data = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_POWER_CNTL)); diff --git a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c index 079b094c48ad..3c47c94846d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c +++ b/drivers/gpu/drm/amd/amdgpu/smuio_v13_0.c @@ -89,6 +89,23 @@ static u32 smuio_v13_0_get_die_id(struct amdgpu_device *adev) } /** + * smuio_v13_0_get_socket_id - query socket id from FCH + * + * @adev: amdgpu device pointer + * + * Returns socket id + */ +static u32 smuio_v13_0_get_socket_id(struct amdgpu_device *adev) +{ + u32 data, socket_id; + + data = RREG32_SOC15(SMUIO, 0, regSMUIO_MCM_CONFIG); + socket_id = REG_GET_FIELD(data, SMUIO_MCM_CONFIG, SOCKET_ID); + + return socket_id; +} + +/** * smuio_v13_0_supports_host_gpu_xgmi - detect xgmi interface between cpu and gpu/s. * * @adev: amdgpu device pointer @@ -115,6 +132,7 @@ const struct amdgpu_smuio_funcs smuio_v13_0_funcs = { .get_rom_index_offset = smuio_v13_0_get_rom_index_offset, .get_rom_data_offset = smuio_v13_0_get_rom_data_offset, .get_die_id = smuio_v13_0_get_die_id, + .get_socket_id = smuio_v13_0_get_socket_id, .is_host_gpu_xgmi_supported = smuio_v13_0_is_host_gpu_xgmi_supported, .update_rom_clock_gating = smuio_v13_0_update_rom_clock_gating, .get_clock_gating_state = smuio_v13_0_get_clock_gating_state, diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 8e1b9a40839f..4b660b2d1c22 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -655,7 +655,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev) int ret = 0; /* avoid NBIF got stuck when do RAS recovery in BACO reset */ - if (ras && ras->supported) + if (ras && adev->ras_enabled) adev->nbio.funcs->enable_doorbell_interrupt(adev, false); ret = amdgpu_dpm_baco_reset(adev); @@ -663,7 +663,7 @@ static int soc15_asic_baco_reset(struct amdgpu_device *adev) return ret; /* re-enable doorbell interrupt after BACO exit */ - if (ras && ras->supported) + if (ras && adev->ras_enabled) adev->nbio.funcs->enable_doorbell_interrupt(adev, true); return 0; @@ -710,7 +710,8 @@ soc15_asic_reset_method(struct amdgpu_device *adev) * 1. PMFW version > 0x284300: all cases use baco * 2. PMFW version <= 0x284300: only sGPU w/o RAS use baco */ - if ((ras && ras->supported) && adev->pm.fw_version <= 0x283400) + if (ras && adev->ras_enabled && + adev->pm.fw_version <= 0x283400) baco_reset = false; break; case CHIP_ALDEBARAN: @@ -816,7 +817,7 @@ static void soc15_pcie_gen3_enable(struct amdgpu_device *adev) static void soc15_program_aspm(struct amdgpu_device *adev) { - if (amdgpu_aspm != 1) + if (!amdgpu_aspm) return; if (!(adev->flags & AMD_IS_APU) && @@ -1522,9 +1523,6 @@ static int soc15_common_late_init(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_get_irq(adev); - if (adev->hdp.funcs->reset_ras_error_count) - adev->hdp.funcs->reset_ras_error_count(adev); - if (adev->nbio.ras_funcs && adev->nbio.ras_funcs->ras_late_init) r = adev->nbio.ras_funcs->ras_late_init(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h index 745ed0fba1ed..0f214a398dd8 100644 --- a/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h +++ b/drivers/gpu/drm/amd/amdgpu/ta_ras_if.h @@ -105,6 +105,12 @@ struct ta_ras_trigger_error_input { uint64_t value; // method if error injection. i.e persistent, coherent etc. }; +struct ta_ras_init_flags +{ + uint8_t poison_mode_en; + uint8_t dgpu_mode; +}; + struct ta_ras_output_flags { uint8_t ras_init_success_flag; @@ -115,6 +121,7 @@ struct ta_ras_output_flags /* Common input structure for RAS callbacks */ /**********************************************************/ union ta_ras_cmd_input { + struct ta_ras_init_flags init_flags; struct ta_ras_enable_features_input enable_features; struct ta_ras_disable_features_input disable_features; struct ta_ras_trigger_error_input trigger_error; diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index ca8efa5c6978..2f17c8a57015 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -104,6 +104,7 @@ static int vega10_ih_toggle_ring_interrupts(struct amdgpu_device *adev, tmp = RREG32(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0)); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_GPU_TS_ENABLE, 1); /* enable_intr field is only valid in ring0 */ if (ih == &adev->irq.ih) tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0)); diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 735ebbd1148f..3d21c0799037 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -1136,7 +1136,7 @@ static void vi_program_aspm(struct amdgpu_device *adev) bool bL1SS = false; bool bClkReqSupport = true; - if (amdgpu_aspm != 1) + if (!amdgpu_aspm) return; if (adev->flags & AMD_IS_APU || |