diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 252 |
1 files changed, 129 insertions, 123 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 59cac347baa3..dc379dc22c77 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -45,22 +45,43 @@ /** * DOC: GPUVM * - * GPUVM is similar to the legacy gart on older asics, however - * rather than there being a single global gart table - * for the entire GPU, there are multiple VM page tables active - * at any given time. The VM page tables can contain a mix - * vram pages and system memory pages and system memory pages + * GPUVM is the MMU functionality provided on the GPU. + * GPUVM is similar to the legacy GART on older asics, however + * rather than there being a single global GART table + * for the entire GPU, there can be multiple GPUVM page tables active + * at any given time. The GPUVM page tables can contain a mix + * VRAM pages and system pages (both memory and MMIO) and system pages * can be mapped as snooped (cached system pages) or unsnooped * (uncached system pages). - * Each VM has an ID associated with it and there is a page table - * associated with each VMID. When executing a command buffer, - * the kernel tells the ring what VMID to use for that command + * + * Each active GPUVM has an ID associated with it and there is a page table + * linked with each VMID. When executing a command buffer, + * the kernel tells the engine what VMID to use for that command * buffer. VMIDs are allocated dynamically as commands are submitted. * The userspace drivers maintain their own address space and the kernel * sets up their pages tables accordingly when they submit their * command buffers and a VMID is assigned. - * Cayman/Trinity support up to 8 active VMs at any given time; - * SI supports 16. + * The hardware supports up to 16 active GPUVMs at any given time. + * + * Each GPUVM is represented by a 1-2 or 1-5 level page table, depending + * on the ASIC family. GPUVM supports RWX attributes on each page as well + * as other features such as encryption and caching attributes. + * + * VMID 0 is special. It is the GPUVM used for the kernel driver. In + * addition to an aperture managed by a page table, VMID 0 also has + * several other apertures. There is an aperture for direct access to VRAM + * and there is a legacy AGP aperture which just forwards accesses directly + * to the matching system physical addresses (or IOVAs when an IOMMU is + * present). These apertures provide direct access to these memories without + * incurring the overhead of a page table. VMID 0 is used by the kernel + * driver for tasks like memory management. + * + * GPU clients (i.e., engines on the GPU) use GPUVM VMIDs to access memory. + * For user applications, each application can have their own unique GPUVM + * address space. The application manages the address space and the kernel + * driver manages the GPUVM page tables for each process. If an GPU client + * accesses an invalid page, it will generate a GPU page fault, similar to + * accessing an invalid page on a CPU. */ #define START(node) ((node)->start) @@ -143,32 +164,6 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm, return 0; } -/* - * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS - * happens while holding this lock anywhere to prevent deadlocks when - * an MMU notifier runs in reclaim-FS context. - */ -static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm) -{ - mutex_lock(&vm->eviction_lock); - vm->saved_flags = memalloc_noreclaim_save(); -} - -static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm) -{ - if (mutex_trylock(&vm->eviction_lock)) { - vm->saved_flags = memalloc_noreclaim_save(); - return 1; - } - return 0; -} - -static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm) -{ - memalloc_noreclaim_restore(vm->saved_flags); - mutex_unlock(&vm->eviction_lock); -} - /** * amdgpu_vm_bo_evicted - vm_bo is evicted * @@ -183,10 +178,12 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo) struct amdgpu_bo *bo = vm_bo->bo; vm_bo->moved = true; + spin_lock(&vm_bo->vm->status_lock); if (bo->tbo.type == ttm_bo_type_kernel) list_move(&vm_bo->vm_status, &vm->evicted); else list_move_tail(&vm_bo->vm_status, &vm->evicted); + spin_unlock(&vm_bo->vm->status_lock); } /** * amdgpu_vm_bo_moved - vm_bo is moved @@ -198,7 +195,9 @@ static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) { + spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->moved); + spin_unlock(&vm_bo->vm->status_lock); } /** @@ -211,7 +210,9 @@ static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo) { + spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->idle); + spin_unlock(&vm_bo->vm->status_lock); vm_bo->moved = false; } @@ -225,9 +226,9 @@ static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo) { - spin_lock(&vm_bo->vm->invalidated_lock); + spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated); - spin_unlock(&vm_bo->vm->invalidated_lock); + spin_unlock(&vm_bo->vm->status_lock); } /** @@ -240,10 +241,13 @@ static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo) { - if (vm_bo->bo->parent) + if (vm_bo->bo->parent) { + spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->relocated); - else + spin_unlock(&vm_bo->vm->status_lock); + } else { amdgpu_vm_bo_idle(vm_bo); + } } /** @@ -256,9 +260,9 @@ static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo) */ static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo) { - spin_lock(&vm_bo->vm->invalidated_lock); + spin_lock(&vm_bo->vm->status_lock); list_move(&vm_bo->vm_status, &vm_bo->vm->done); - spin_unlock(&vm_bo->vm->invalidated_lock); + spin_unlock(&vm_bo->vm->status_lock); } /** @@ -363,12 +367,20 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*validate)(void *p, struct amdgpu_bo *bo), void *param) { - struct amdgpu_vm_bo_base *bo_base, *tmp; + struct amdgpu_vm_bo_base *bo_base; + struct amdgpu_bo *shadow; + struct amdgpu_bo *bo; int r; - list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) { - struct amdgpu_bo *bo = bo_base->bo; - struct amdgpu_bo *shadow = amdgpu_bo_shadowed(bo); + spin_lock(&vm->status_lock); + while (!list_empty(&vm->evicted)) { + bo_base = list_first_entry(&vm->evicted, + struct amdgpu_vm_bo_base, + vm_status); + spin_unlock(&vm->status_lock); + + bo = bo_base->bo; + shadow = amdgpu_bo_shadowed(bo); r = validate(param, bo); if (r) @@ -385,7 +397,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->update_funcs->map_table(to_amdgpu_bo_vm(bo)); amdgpu_vm_bo_relocated(bo_base); } + spin_lock(&vm->status_lock); } + spin_unlock(&vm->status_lock); amdgpu_vm_eviction_lock(vm); vm->evicting = false; @@ -406,13 +420,18 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, */ bool amdgpu_vm_ready(struct amdgpu_vm *vm) { + bool empty; bool ret; amdgpu_vm_eviction_lock(vm); ret = !vm->evicting; amdgpu_vm_eviction_unlock(vm); - return ret && list_empty(&vm->evicted); + spin_lock(&vm->status_lock); + empty = list_empty(&vm->evicted); + spin_unlock(&vm->status_lock); + + return ret && empty; } /** @@ -465,25 +484,20 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, struct amdgpu_device *adev = ring->adev; unsigned vmhub = ring->funcs->vmhub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; - struct amdgpu_vmid *id; - bool gds_switch_needed; - bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug; if (job->vmid == 0) return false; - id = &id_mgr->ids[job->vmid]; - gds_switch_needed = ring->funcs->emit_gds_switch && ( - id->gds_base != job->gds_base || - id->gds_size != job->gds_size || - id->gws_base != job->gws_base || - id->gws_size != job->gws_size || - id->oa_base != job->oa_base || - id->oa_size != job->oa_size); - if (amdgpu_vmid_had_gpu_reset(adev, id)) + if (job->vm_needs_flush || ring->has_compute_vm_bug) + return true; + + if (ring->funcs->emit_gds_switch && job->gds_switch_needed) + return true; + + if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid])) return true; - return vm_flush_needed || gds_switch_needed; + return false; } /** @@ -505,27 +519,20 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, unsigned vmhub = ring->funcs->vmhub; struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub]; struct amdgpu_vmid *id = &id_mgr->ids[job->vmid]; - bool gds_switch_needed = ring->funcs->emit_gds_switch && ( - id->gds_base != job->gds_base || - id->gds_size != job->gds_size || - id->gws_base != job->gws_base || - id->gws_size != job->gws_size || - id->oa_base != job->oa_base || - id->oa_size != job->oa_size); + bool spm_update_needed = job->spm_update_needed; + bool gds_switch_needed = ring->funcs->emit_gds_switch && + job->gds_switch_needed; bool vm_flush_needed = job->vm_needs_flush; struct dma_fence *fence = NULL; bool pasid_mapping_needed = false; unsigned patch_offset = 0; - bool update_spm_vmid_needed = (job->vm && (job->vm->reserved_vmid[vmhub] != NULL)); int r; - if (update_spm_vmid_needed && adev->gfx.rlc.funcs->update_spm_vmid) - adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid); - if (amdgpu_vmid_had_gpu_reset(adev, id)) { gds_switch_needed = true; vm_flush_needed = true; pasid_mapping_needed = true; + spm_update_needed = true; } mutex_lock(&id_mgr->lock); @@ -543,6 +550,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync) return 0; + amdgpu_ring_ib_begin(ring); if (ring->funcs->init_cond_exec) patch_offset = amdgpu_ring_init_cond_exec(ring); @@ -557,6 +565,17 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, if (pasid_mapping_needed) amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid); + if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid) + adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid); + + if (!ring->is_mes_queue && ring->funcs->emit_gds_switch && + gds_switch_needed) { + amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, + job->gds_size, job->gws_base, + job->gws_size, job->oa_base, + job->oa_size); + } + if (vm_flush_needed || pasid_mapping_needed) { r = amdgpu_fence_emit(ring, &fence, NULL, 0); if (r) @@ -581,20 +600,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, } dma_fence_put(fence); - if (!ring->is_mes_queue && ring->funcs->emit_gds_switch && - gds_switch_needed) { - id->gds_base = job->gds_base; - id->gds_size = job->gds_size; - id->gws_base = job->gws_base; - id->gws_size = job->gws_size; - id->oa_base = job->oa_base; - id->oa_size = job->oa_size; - amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base, - job->gds_size, job->gws_base, - job->gws_size, job->oa_base, - job->oa_size); - } - if (ring->funcs->patch_cond_exec) amdgpu_ring_patch_cond_exec(ring, patch_offset); @@ -603,6 +608,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, amdgpu_ring_emit_switch_buffer(ring); amdgpu_ring_emit_switch_buffer(ring); } + amdgpu_ring_ib_end(ring); return 0; } @@ -680,9 +686,14 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, struct amdgpu_vm_update_params params; struct amdgpu_vm_bo_base *entry; bool flush_tlb_needed = false; + LIST_HEAD(relocated); int r, idx; - if (list_empty(&vm->relocated)) + spin_lock(&vm->status_lock); + list_splice_init(&vm->relocated, &relocated); + spin_unlock(&vm->status_lock); + + if (list_empty(&relocated)) return 0; if (!drm_dev_enter(adev_to_drm(adev), &idx)) @@ -697,7 +708,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, if (r) goto error; - list_for_each_entry(entry, &vm->relocated, vm_status) { + list_for_each_entry(entry, &relocated, vm_status) { /* vm_flush_needed after updating moved PDEs */ flush_tlb_needed |= entry->moved; @@ -713,9 +724,8 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev, if (flush_tlb_needed) atomic64_inc(&vm->tlb_seq); - while (!list_empty(&vm->relocated)) { - entry = list_first_entry(&vm->relocated, - struct amdgpu_vm_bo_base, + while (!list_empty(&relocated)) { + entry = list_first_entry(&relocated, struct amdgpu_vm_bo_base, vm_status); amdgpu_vm_bo_idle(entry); } @@ -912,6 +922,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, { struct amdgpu_bo_va *bo_va, *tmp; + spin_lock(&vm->status_lock); list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) { if (!bo_va->base.bo) continue; @@ -936,7 +947,6 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, amdgpu_bo_get_memory(bo_va->base.bo, vram_mem, gtt_mem, cpu_mem); } - spin_lock(&vm->invalidated_lock); list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) { if (!bo_va->base.bo) continue; @@ -949,7 +959,7 @@ void amdgpu_vm_get_memory(struct amdgpu_vm *vm, uint64_t *vram_mem, amdgpu_bo_get_memory(bo_va->base.bo, vram_mem, gtt_mem, cpu_mem); } - spin_unlock(&vm->invalidated_lock); + spin_unlock(&vm->status_lock); } /** * amdgpu_vm_bo_update - update all BO mappings in the vm page table @@ -1278,24 +1288,29 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, int amdgpu_vm_handle_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - struct amdgpu_bo_va *bo_va, *tmp; + struct amdgpu_bo_va *bo_va; struct dma_resv *resv; bool clear; int r; - list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { + spin_lock(&vm->status_lock); + while (!list_empty(&vm->moved)) { + bo_va = list_first_entry(&vm->moved, struct amdgpu_bo_va, + base.vm_status); + spin_unlock(&vm->status_lock); + /* Per VM BOs never need to bo cleared in the page tables */ r = amdgpu_vm_bo_update(adev, bo_va, false); if (r) return r; + spin_lock(&vm->status_lock); } - spin_lock(&vm->invalidated_lock); while (!list_empty(&vm->invalidated)) { bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, base.vm_status); resv = bo_va->base.bo->tbo.base.resv; - spin_unlock(&vm->invalidated_lock); + spin_unlock(&vm->status_lock); /* Try to reserve the BO to avoid clearing its ptes */ if (!amdgpu_vm_debug && dma_resv_trylock(resv)) @@ -1310,9 +1325,9 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, if (!clear) dma_resv_unlock(resv); - spin_lock(&vm->invalidated_lock); + spin_lock(&vm->status_lock); } - spin_unlock(&vm->invalidated_lock); + spin_unlock(&vm->status_lock); return 0; } @@ -1387,7 +1402,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, if (bo && bo->tbo.base.resv == vm->root.bo->tbo.base.resv && !bo_va->base.moved) { - list_move(&bo_va->base.vm_status, &vm->moved); + amdgpu_vm_bo_moved(&bo_va->base); } trace_amdgpu_vm_bo_map(bo_va, mapping); } @@ -1763,9 +1778,9 @@ void amdgpu_vm_bo_del(struct amdgpu_device *adev, } } - spin_lock(&vm->invalidated_lock); + spin_lock(&vm->status_lock); list_del(&bo_va->base.vm_status); - spin_unlock(&vm->invalidated_lock); + spin_unlock(&vm->status_lock); list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { list_del(&mapping->list); @@ -2019,9 +2034,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm) INIT_LIST_HEAD(&vm->moved); INIT_LIST_HEAD(&vm->idle); INIT_LIST_HEAD(&vm->invalidated); - spin_lock_init(&vm->invalidated_lock); + spin_lock_init(&vm->status_lock); INIT_LIST_HEAD(&vm->freed); INIT_LIST_HEAD(&vm->done); + INIT_LIST_HEAD(&vm->pt_freed); + INIT_WORK(&vm->pt_free_work, amdgpu_vm_pt_free_work); /* create scheduler entities for page table updates */ r = drm_sched_entity_init(&vm->immediate, DRM_SCHED_PRIORITY_NORMAL, @@ -2223,6 +2240,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); + flush_work(&vm->pt_free_work); + root = amdgpu_bo_ref(vm->root.bo); amdgpu_bo_reserve(root, true); amdgpu_vm_set_pasid(adev, vm, 0); @@ -2301,7 +2320,11 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) */ #ifdef CONFIG_X86_64 if (amdgpu_vm_update_mode == -1) { - if (amdgpu_gmc_vram_full_visible(&adev->gmc)) + /* For asic with VF MMIO access protection + * avoid using CPU for VM table updates + */ + if (amdgpu_gmc_vram_full_visible(&adev->gmc) && + !amdgpu_sriov_vf_mmio_access_protection(adev)) adev->vm_manager.vm_update_mode = AMDGPU_VM_USE_CPU_FOR_COMPUTE; else @@ -2345,7 +2368,6 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) union drm_amdgpu_vm *args = data; struct amdgpu_device *adev = drm_to_adev(dev); struct amdgpu_fpriv *fpriv = filp->driver_priv; - long timeout = msecs_to_jiffies(2000); int r; switch (args->in.op) { @@ -2357,21 +2379,6 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return r; break; case AMDGPU_VM_OP_UNRESERVE_VMID: - if (amdgpu_sriov_runtime(adev)) - timeout = 8 * timeout; - - /* Wait vm idle to make sure the vmid set in SPM_VMID is - * not referenced anymore. - */ - r = amdgpu_bo_reserve(fpriv->vm.root.bo, true); - if (r) - return r; - - r = amdgpu_vm_wait_idle(&fpriv->vm, timeout); - if (r < 0) - return r; - - amdgpu_bo_unreserve(fpriv->vm.root.bo); amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0); break; default: @@ -2484,8 +2491,7 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, u32 pasid, /* Intentionally setting invalid PTE flag * combination to force a no-retry-fault */ - flags = AMDGPU_PTE_EXECUTABLE | AMDGPU_PDE_PTE | - AMDGPU_PTE_TF; + flags = AMDGPU_PTE_SNOOPED | AMDGPU_PTE_PRT; value = 0; } else if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_NEVER) { /* Redirect the access to the dummy page */ @@ -2548,6 +2554,7 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m) unsigned int total_done_objs = 0; unsigned int id = 0; + spin_lock(&vm->status_lock); seq_puts(m, "\tIdle BOs:\n"); list_for_each_entry_safe(bo_va, tmp, &vm->idle, base.vm_status) { if (!bo_va->base.bo) @@ -2585,7 +2592,6 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m) id = 0; seq_puts(m, "\tInvalidated BOs:\n"); - spin_lock(&vm->invalidated_lock); list_for_each_entry_safe(bo_va, tmp, &vm->invalidated, base.vm_status) { if (!bo_va->base.bo) continue; @@ -2600,7 +2606,7 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m) continue; total_done += amdgpu_bo_print_info(id++, bo_va->base.bo, m); } - spin_unlock(&vm->invalidated_lock); + spin_unlock(&vm->status_lock); total_done_objs = id; seq_printf(m, "\tTotal idle size: %12lld\tobjs:\t%d\n", total_idle, |
