diff options
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/Makefile | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 62 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c | 112 |
7 files changed, 175 insertions, 20 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 4536c8ad0e11..f24f11ac3e92 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -70,7 +70,8 @@ amdgpu-y += amdgpu_device.o amdgpu_doorbell_mgr.o amdgpu_kms.o \ amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o \ atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \ - amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_ib.o amdgpu_pll.o \ + amdgpu_dma_buf.o amdgpu_vm.o amdgpu_vm_pt.o amdgpu_vm_tlb_fence.o \ + amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_preempt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o \ amdgpu_atomfirmware.o amdgpu_vf_error.o amdgpu_sched.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 81fb3465e197..317d2d468a6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -886,6 +886,44 @@ static void amdgpu_vm_tlb_seq_cb(struct dma_fence *fence, } /** + * amdgpu_vm_tlb_flush - prepare TLB flush + * + * @params: parameters for update + * @fence: input fence to sync TLB flush with + * @tlb_cb: the callback structure + * + * Increments the tlb sequence to make sure that future CS execute a VM flush. + */ +static void +amdgpu_vm_tlb_flush(struct amdgpu_vm_update_params *params, + struct dma_fence **fence, + struct amdgpu_vm_tlb_seq_struct *tlb_cb) +{ + struct amdgpu_vm *vm = params->vm; + + if (!fence || !*fence) + return; + + tlb_cb->vm = vm; + if (!dma_fence_add_callback(*fence, &tlb_cb->cb, + amdgpu_vm_tlb_seq_cb)) { + dma_fence_put(vm->last_tlb_flush); + vm->last_tlb_flush = dma_fence_get(*fence); + } else { + amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); + } + + /* Prepare a TLB flush fence to be attached to PTs */ + if (!params->unlocked && vm->is_compute_context) { + amdgpu_vm_tlb_fence_create(params->adev, vm, fence); + + /* Makes sure no PD/PT is freed before the flush */ + dma_resv_add_fence(vm->root.bo->tbo.base.resv, *fence, + DMA_RESV_USAGE_BOOKKEEP); + } +} + +/** * amdgpu_vm_update_range - update a range in the vm page table * * @adev: amdgpu_device pointer to use for commands @@ -916,8 +954,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, struct ttm_resource *res, dma_addr_t *pages_addr, struct dma_fence **fence) { - struct amdgpu_vm_update_params params; struct amdgpu_vm_tlb_seq_struct *tlb_cb; + struct amdgpu_vm_update_params params; struct amdgpu_res_cursor cursor; enum amdgpu_sync_mode sync_mode; int r, idx; @@ -927,8 +965,8 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, tlb_cb = kmalloc(sizeof(*tlb_cb), GFP_KERNEL); if (!tlb_cb) { - r = -ENOMEM; - goto error_unlock; + drm_dev_exit(idx); + return -ENOMEM; } /* Vega20+XGMI where PTEs get inadvertently cached in L2 texture cache, @@ -948,6 +986,7 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, params.immediate = immediate; params.pages_addr = pages_addr; params.unlocked = unlocked; + params.needs_flush = flush_tlb; params.allow_override = allow_override; /* Implicitly sync to command submissions in the same VM before @@ -1031,24 +1070,16 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm, } r = vm->update_funcs->commit(¶ms, fence); + if (r) + goto error_free; - if (flush_tlb || params.table_freed) { - tlb_cb->vm = vm; - if (fence && *fence && - !dma_fence_add_callback(*fence, &tlb_cb->cb, - amdgpu_vm_tlb_seq_cb)) { - dma_fence_put(vm->last_tlb_flush); - vm->last_tlb_flush = dma_fence_get(*fence); - } else { - amdgpu_vm_tlb_seq_cb(NULL, &tlb_cb->cb); - } + if (params.needs_flush) { + amdgpu_vm_tlb_flush(¶ms, fence, tlb_cb); tlb_cb = NULL; } error_free: kfree(tlb_cb); - -error_unlock: amdgpu_vm_eviction_unlock(vm); drm_dev_exit(idx); return r; @@ -2391,6 +2422,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, mutex_init(&vm->eviction_lock); vm->evicting = false; + vm->tlb_fence_context = dma_fence_context_alloc(1); r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level, false, &root, xcp_id); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 8efa8422f4f7..b0a4fe683352 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -257,9 +257,9 @@ struct amdgpu_vm_update_params { unsigned int num_dw_left; /** - * @table_freed: return true if page table is freed when updating + * @needs_flush: true whenever we need to invalidate the TLB */ - bool table_freed; + bool needs_flush; /** * @allow_override: true for memory that is not uncached: allows MTYPE @@ -342,6 +342,7 @@ struct amdgpu_vm { atomic64_t tlb_seq; struct dma_fence *last_tlb_flush; atomic64_t kfd_last_flushed_seq; + uint64_t tlb_fence_context; /* How many times we had to re-generate the page tables */ uint64_t generation; @@ -611,5 +612,8 @@ void amdgpu_vm_update_fault_cache(struct amdgpu_device *adev, uint64_t addr, uint32_t status, unsigned int vmhub); +void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct dma_fence **fence); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index 6e31621452de..3895bd7d176a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -108,7 +108,9 @@ static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p, static int amdgpu_vm_cpu_commit(struct amdgpu_vm_update_params *p, struct dma_fence **fence) { - /* Flush HDP */ + if (p->needs_flush) + atomic64_inc(&p->vm->tlb_seq); + mb(); amdgpu_device_flush_hdp(p->adev, NULL); return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c index 124389a6bf48..601df0ce8290 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c @@ -972,7 +972,7 @@ int amdgpu_vm_ptes_update(struct amdgpu_vm_update_params *params, while (cursor.pfn < frag_start) { /* Make sure previous mapping is freed */ if (cursor.entry->bo) { - params->table_freed = true; + params->needs_flush = true; amdgpu_vm_pt_free_dfs(adev, params->vm, &cursor, params->unlocked); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 349416e176a1..66e8a016126b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -126,6 +126,10 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, WARN_ON(ib->length_dw == 0); amdgpu_ring_pad_ib(ring, ib); + + if (p->needs_flush) + atomic64_inc(&p->vm->tlb_seq); + WARN_ON(ib->length_dw > p->num_dw_left); f = amdgpu_job_submit(p->job); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c new file mode 100644 index 000000000000..51cddfa3f1e8 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_tlb_fence.c @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-2.0 OR MIT +/* + * Copyright 2023 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/dma-fence.h> +#include <linux/workqueue.h> + +#include "amdgpu.h" +#include "amdgpu_vm.h" +#include "amdgpu_gmc.h" + +struct amdgpu_tlb_fence { + struct dma_fence base; + struct amdgpu_device *adev; + struct dma_fence *dependency; + struct work_struct work; + spinlock_t lock; + uint16_t pasid; + +}; + +static const char *amdgpu_tlb_fence_get_driver_name(struct dma_fence *fence) +{ + return "amdgpu tlb fence"; +} + +static const char *amdgpu_tlb_fence_get_timeline_name(struct dma_fence *f) +{ + return "amdgpu tlb timeline"; +} + +static void amdgpu_tlb_fence_work(struct work_struct *work) +{ + struct amdgpu_tlb_fence *f = container_of(work, typeof(*f), work); + int r; + + if (f->dependency) { + dma_fence_wait(f->dependency, false); + dma_fence_put(f->dependency); + f->dependency = NULL; + } + + r = amdgpu_gmc_flush_gpu_tlb_pasid(f->adev, f->pasid, 2, true, 0); + if (r) { + dev_err(f->adev->dev, "TLB flush failed for PASID %d.\n", + f->pasid); + dma_fence_set_error(&f->base, r); + } + + dma_fence_signal(&f->base); + dma_fence_put(&f->base); +} + +static const struct dma_fence_ops amdgpu_tlb_fence_ops = { + .use_64bit_seqno = true, + .get_driver_name = amdgpu_tlb_fence_get_driver_name, + .get_timeline_name = amdgpu_tlb_fence_get_timeline_name +}; + +void amdgpu_vm_tlb_fence_create(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct dma_fence **fence) +{ + struct amdgpu_tlb_fence *f; + + f = kmalloc(sizeof(*f), GFP_KERNEL); + if (!f) { + /* + * We can't fail since the PDEs and PTEs are already updated, so + * just block for the dependency and execute the TLB flush + */ + if (*fence) + dma_fence_wait(*fence, false); + + amdgpu_gmc_flush_gpu_tlb_pasid(adev, vm->pasid, 2, true, 0); + *fence = dma_fence_get_stub(); + return; + } + + f->adev = adev; + f->dependency = *fence; + f->pasid = vm->pasid; + INIT_WORK(&f->work, amdgpu_tlb_fence_work); + spin_lock_init(&f->lock); + + dma_fence_init(&f->base, &amdgpu_tlb_fence_ops, &f->lock, + vm->tlb_fence_context, atomic64_read(&vm->tlb_seq)); + + /* TODO: We probably need a separate wq here */ + dma_fence_get(&f->base); + schedule_work(&f->work); + + *fence = &f->base; +} |