diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-11-01 06:28:35 -1000 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-11-01 06:28:35 -1000 |
commit | 7d461b291e65938f15f56fe58da2303b07578a76 (patch) | |
tree | 015dd7c2f1743dd70be52787dd9aff33822bc938 /drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | |
parent | 8bc9e6515183935fa0cccaf67455c439afe4982b (diff) | |
parent | 631808095a82e6b6f8410a95f8b12b8d0d38b161 (diff) |
Merge tag 'drm-next-2023-10-31-1' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie:
"Highlights:
- AMD adds some more upcoming HW platforms
- Intel made Meteorlake stable and started adding Lunarlake
- nouveau has a bunch of display rework in prepartion for the NVIDIA
GSP firmware support
- msm adds a7xx support
- habanalabs has finished migration to accel subsystem
Detail summary:
kernel:
- add initial vmemdup-user-array
core:
- fix platform remove() to return void
- drm_file owner updated to reflect owner
- move size calcs to drm buddy allocator
- let GPUVM build as a module
- allow variable number of run-queues in scheduler
edid:
- handle bad h/v sync_end in EDIDs
panfrost:
- add Boris as maintainer
fbdev:
- use fb_ops helpers more
- only allow logo use from fbcon
- rename fb_pgproto to pgprot_framebuffer
- add HPD state to drm_connector_oob_hotplug_event
- convert to fbdev i/o mem helpers
i915:
- Enable meteorlake by default
- Early Xe2 LPD/Lunarlake display enablement
- Rework subplatforms into IP version checks
- GuC based TLB invalidation for Meteorlake
- Display rework for future Xe driver integration
- LNL FBC features
- LNL display feature capability reads
- update recommended fw versions for DG2+
- drop fastboot module parameter
- added deviceid for Arrowlake-S
- drop preproduction workarounds
- don't disable preemption for resets
- cleanup inlines in headers
- PXP firmware loading fix
- Fix sg list lengths
- DSC PPS state readout/verification
- Add more RPL P/U PCI IDs
- Add new DG2-G12 stepping
- DP enhanced framing support to state checker
- Improve shared link bandwidth management
- stop using GEM macros in display code
- refactor related code into display code
- locally enable W=1 warnings
- remove PSR watchdog timers on LNL
amdgpu:
- RAS/FRU EEPROM updatse
- IP discovery updatses
- GC 11.5 support
- DCN 3.5 support
- VPE 6.1 support
- NBIO 7.11 support
- DML2 support
- lots of IP updates
- use flexible arrays for bo list handling
- W=1 fixes
- Enable seamless boot in more cases
- Enable context type property for HDMI
- Rework GPUVM TLB flushing
- VCN IB start/size alignment fixes
amdkfd:
- GC 10/11 fixes
- GC 11.5 support
- use partial migration in GPU faults
radeon:
- W=1 Fixes
- fix some possible buffer overflow/NULL derefs
nouveau:
- update uapi for NO_PREFETCH
- scheduler/fence fixes
- rework suspend/resume for GSP-RM
- rework display in preparation for GSP-RM
habanalabs:
- uapi: expose tsc clock
- uapi: block access to eventfd through control device
- uapi: force dma-buf export to PAGE_SIZE alignments
- complete move to accel subsystem
- move firmware interface include files
- perform hard reset on PCIe AXI drain event
- optimise user interrupt handling
msm:
- DP: use existing helpers for DPCD
- DPU: interrupts reworked
- gpu: a7xx (a730/a740) support
- decouple msm_drv from kms for headless devices
mediatek:
- MT8188 dsi/dp/edp support
- DDP GAMMA - 12 bit LUT support
- connector dynamic selection capability
rockchip:
- rv1126 mipi-dsi/vop support
- add planar formats
ast:
- rename constants
panels:
- Mitsubishi AA084XE01
- JDI LPM102A188A
- LTK050H3148W-CTA6
ivpu:
- power management fixes
qaic:
- add detach slice bo api
komeda:
- add NV12 writeback
tegra:
- support NVSYNC/NHSYNC
- host1x suspend fixes
ili9882t:
- separate into own driver"
* tag 'drm-next-2023-10-31-1' of git://anongit.freedesktop.org/drm/drm: (1803 commits)
drm/amdgpu: Remove unused variables from amdgpu_show_fdinfo
drm/amdgpu: Remove duplicate fdinfo fields
drm/amd/amdgpu: avoid to disable gfxhub interrupt when driver is unloaded
drm/amdgpu: Add EXT_COHERENT support for APU and NUMA systems
drm/amdgpu: Retrieve CE count from ce_count_lo_chip in EccInfo table
drm/amdgpu: Identify data parity error corrected in replay mode
drm/amdgpu: Fix typo in IP discovery parsing
drm/amd/display: fix S/G display enablement
drm/amdxcp: fix amdxcp unloads incompletely
drm/amd/amdgpu: fix the GPU power print error in pm info
drm/amdgpu: Use pcie domain of xcc acpi objects
drm/amd: check num of link levels when update pcie param
drm/amdgpu: Add a read to GFX v9.4.3 ring test
drm/amd/pm: call smu_cmn_get_smc_version in is_mode1_reset_supported.
drm/amdgpu: get RAS poison status from DF v4_6_2
drm/amdgpu: Use discovery table's subrevision
drm/amd/display: 3.2.256
drm/amd/display: add interface to query SubVP status
drm/amd/display: Read before writing Backlight Mode Set Register
drm/amd/display: Disable SYMCLK32_SE RCO on DCN314
...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 57 |
1 files changed, 46 insertions, 11 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index e036011137aa..1eccad4ce243 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -44,6 +44,7 @@ * changes to accumulate */ #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 +#define AMDGPU_RESERVE_MEM_LIMIT (3UL << 29) /* * Align VRAM availability to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB @@ -117,11 +118,16 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) return; si_meminfo(&si); - mem = si.freeram - si.freehigh; + mem = si.totalram - si.totalhigh; mem *= si.mem_unit; spin_lock_init(&kfd_mem_limit.mem_limit_lock); - kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4); + kfd_mem_limit.max_system_mem_limit = mem - (mem >> 6); + if (kfd_mem_limit.max_system_mem_limit < 2 * AMDGPU_RESERVE_MEM_LIMIT) + kfd_mem_limit.max_system_mem_limit >>= 1; + else + kfd_mem_limit.max_system_mem_limit -= AMDGPU_RESERVE_MEM_LIMIT; + kfd_mem_limit.max_ttm_mem_limit = ttm_tt_pages_limit() << PAGE_SHIFT; pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n", (kfd_mem_limit.max_system_mem_limit >> 20), @@ -733,7 +739,7 @@ kfd_mem_dmaunmap_sg_bo(struct kgd_mem *mem, enum dma_data_direction dir; if (unlikely(!ttm->sg)) { - pr_err("SG Table of BO is UNEXPECTEDLY NULL"); + pr_debug("SG Table of BO is NULL"); return; } @@ -828,6 +834,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, uint64_t va = mem->va; struct kfd_mem_attachment *attachment[2] = {NULL, NULL}; struct amdgpu_bo *bo[2] = {NULL, NULL}; + struct amdgpu_bo_va *bo_va; bool same_hive = false; int i, ret; @@ -866,9 +873,10 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, if ((adev == bo_adev && !(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) || (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && reuse_dmamap(adev, bo_adev)) || - same_hive) { + (mem->domain == AMDGPU_GEM_DOMAIN_GTT && reuse_dmamap(adev, bo_adev)) || + same_hive) { /* Mappings on the local GPU, or VRAM mappings in the - * local hive, or userptr mapping can reuse dma map + * local hive, or userptr, or GTT mapping can reuse dma map * address space share the original BO */ attachment[i]->type = KFD_MEM_ATT_SHARED; @@ -914,7 +922,12 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, pr_debug("Unable to reserve BO during memory attach"); goto unwind; } - attachment[i]->bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]); + bo_va = amdgpu_vm_bo_find(vm, bo[i]); + if (!bo_va) + bo_va = amdgpu_vm_bo_add(adev, vm, bo[i]); + else + ++bo_va->ref_count; + attachment[i]->bo_va = bo_va; amdgpu_bo_unreserve(bo[i]); if (unlikely(!attachment[i]->bo_va)) { ret = -ENOMEM; @@ -938,7 +951,8 @@ unwind: continue; if (attachment[i]->bo_va) { amdgpu_bo_reserve(bo[i], true); - amdgpu_vm_bo_del(adev, attachment[i]->bo_va); + if (--attachment[i]->bo_va->ref_count == 0) + amdgpu_vm_bo_del(adev, attachment[i]->bo_va); amdgpu_bo_unreserve(bo[i]); list_del(&attachment[i]->list); } @@ -955,7 +969,8 @@ static void kfd_mem_detach(struct kfd_mem_attachment *attachment) pr_debug("\t remove VA 0x%llx in entry %p\n", attachment->va, attachment); - amdgpu_vm_bo_del(attachment->adev, attachment->bo_va); + if (--attachment->bo_va->ref_count == 0) + amdgpu_vm_bo_del(attachment->adev, attachment->bo_va); drm_gem_object_put(&bo->tbo.base); list_del(&attachment->list); kfree(attachment); @@ -1201,8 +1216,6 @@ static void unmap_bo_from_gpuvm(struct kgd_mem *mem, amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); amdgpu_sync_fence(sync, bo_va->last_pt_update); - - kfd_mem_dmaunmap_attachment(mem, entry); } static int update_gpuvm_pte(struct kgd_mem *mem, @@ -1257,6 +1270,7 @@ static int map_bo_to_gpuvm(struct kgd_mem *mem, update_gpuvm_pte_failed: unmap_bo_from_gpuvm(mem, entry, sync); + kfd_mem_dmaunmap_attachment(mem, entry); return ret; } @@ -1690,6 +1704,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT) alloc_flags |= AMDGPU_GEM_CREATE_COHERENT; + if (flags & KFD_IOC_ALLOC_MEM_FLAGS_EXT_COHERENT) + alloc_flags |= AMDGPU_GEM_CREATE_EXT_COHERENT; if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED) alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED; @@ -1860,8 +1876,10 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( mem->va + bo_size * (1 + mem->aql_queue)); /* Remove from VM internal data structures */ - list_for_each_entry_safe(entry, tmp, &mem->attachments, list) + list_for_each_entry_safe(entry, tmp, &mem->attachments, list) { + kfd_mem_dmaunmap_attachment(mem, entry); kfd_mem_detach(entry); + } ret = unreserve_bo_and_vms(&ctx, false, false); @@ -2035,6 +2053,23 @@ out: return ret; } +void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void *drm_priv) +{ + struct kfd_mem_attachment *entry; + struct amdgpu_vm *vm; + + vm = drm_priv_to_vm(drm_priv); + + mutex_lock(&mem->lock); + + list_for_each_entry(entry, &mem->attachments, list) { + if (entry->bo_va->base.vm == vm) + kfd_mem_dmaunmap_attachment(mem, entry); + } + + mutex_unlock(&mem->lock); +} + int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu( struct amdgpu_device *adev, struct kgd_mem *mem, void *drm_priv) { |