diff options
author | Dave Airlie <airlied@redhat.com> | 2024-04-30 14:42:54 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2024-04-30 14:43:00 +1000 |
commit | 4a56c0ed5aa0bcbe1f5f7d755fb1fe1ebf48ae9c (patch) | |
tree | ec7d1f08d654ef4b6e596c891eadb504df2f611e /drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | |
parent | 68b89e23c2282877b0d411e07a3ef90490d6fe30 (diff) | |
parent | b77bef36015c501f1e0f51db72c55e6dcd8bdd48 (diff) |
Merge tag 'amd-drm-next-6.10-2024-04-26' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.10-2024-04-26:
amdgpu:
- Misc code cleanups and refactors
- Support setting reset method at runtime
- Report OD status
- SMU 14.0.1 fixes
- SDMA 4.4.2 fixes
- VPE fixes
- MES fixes
- Update BO eviction priorities
- UMSCH fixes
- Reset fixes
- Freesync fixes
- GFXIP 9.4.3 fixes
- SDMA 5.2 fixes
- MES UAF fix
- RAS updates
- Devcoredump updates for dumping IP state
- DSC fixes
- JPEG fix
- Fix VRAM memory accounting
- VCN 5.0 fixes
- MES fixes
- UMC 12.0 updates
- Modify contiguous flags handling
- Initial support for mapping kernel queues via MES
amdkfd:
- Fix rescheduling of restore worker
- VRAM accounting for SVM migrations
- mGPU fix
- Enable SQ watchpoint for gfx10
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240426221245.1613332-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 47 |
1 files changed, 47 insertions, 0 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 8d26989c75c8..c8980d5f6540 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -26,6 +26,9 @@ #include <linux/debugfs.h> #include <linux/list.h> +#include <linux/kfifo.h> +#include <linux/radix-tree.h> +#include <linux/siphash.h> #include "ta_ras_if.h" #include "amdgpu_ras_eeprom.h" #include "amdgpu_smuio.h" @@ -442,6 +445,37 @@ struct ras_query_context { u64 event_id; }; +typedef int (*pasid_notify)(struct amdgpu_device *adev, + uint16_t pasid, void *data); + +struct ras_poison_msg { + enum amdgpu_ras_block block; + uint16_t pasid; + uint32_t reset; + pasid_notify pasid_fn; + void *data; +}; + +struct ras_err_pages { + uint32_t count; + uint64_t *pfn; +}; + +struct ras_ecc_err { + u64 hash_index; + uint64_t status; + uint64_t ipid; + uint64_t addr; + struct ras_err_pages err_pages; +}; + +struct ras_ecc_log_info { + struct mutex lock; + siphash_key_t ecc_key; + struct radix_tree_root de_page_tree; + bool de_updated; +}; + struct amdgpu_ras { /* ras infrastructure */ /* for ras itself. */ @@ -500,6 +534,11 @@ struct amdgpu_ras { wait_queue_head_t page_retirement_wq; struct mutex page_retirement_lock; atomic_t page_retirement_req_cnt; + struct mutex page_rsv_lock; + DECLARE_KFIFO(poison_fifo, struct ras_poison_msg, 128); + struct ras_ecc_log_info umc_ecc_log; + struct delayed_work page_retirement_dwork; + /* Fatal error detected flag */ atomic_t fed; @@ -540,6 +579,7 @@ struct ras_err_data { unsigned long de_count; unsigned long err_addr_cnt; struct eeprom_table_record *err_addr; + unsigned long err_addr_len; u32 err_list_count; struct list_head err_node_list; }; @@ -909,4 +949,11 @@ bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev); bool amdgpu_ras_event_id_is_valid(struct amdgpu_device *adev, u64 id); u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type); + +int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn); + +int amdgpu_ras_put_poison_req(struct amdgpu_device *adev, + enum amdgpu_ras_block block, uint16_t pasid, + pasid_notify pasid_fn, void *data, uint32_t reset); + #endif |