diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
34 files changed, 1291 insertions, 258 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 25a95c95df14..ef9a3b6d7b62 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -25,7 +25,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ - amdgpu_queue_mgr.o amdgpu_vf_error.o + amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sched.o # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index a23b8af95319..cbcb6a153aba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -732,10 +732,14 @@ struct amdgpu_ctx { struct amdgpu_device *adev; struct amdgpu_queue_mgr queue_mgr; unsigned reset_counter; + uint32_t vram_lost_counter; spinlock_t ring_lock; struct dma_fence **fences; struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; - bool preamble_presented; + bool preamble_presented; + enum amd_sched_priority init_priority; + enum amd_sched_priority override_priority; + struct mutex lock; }; struct amdgpu_ctx_mgr { @@ -752,13 +756,18 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, struct dma_fence *fence, uint64_t *seq); struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, uint64_t seq); +void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, + enum amd_sched_priority priority); int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id); + void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); + /* * file private structure */ @@ -770,7 +779,6 @@ struct amdgpu_fpriv { struct mutex bo_list_lock; struct idr bo_list_handles; struct amdgpu_ctx_mgr ctx_mgr; - u32 vram_lost_counter; }; /* @@ -871,7 +879,7 @@ struct amdgpu_mec { struct amdgpu_kiq { u64 eop_gpu_addr; struct amdgpu_bo *eop_obj; - struct mutex ring_mutex; + spinlock_t ring_lock; struct amdgpu_ring ring; struct amdgpu_irq_src irq; }; @@ -1035,6 +1043,10 @@ struct amdgpu_gfx { bool in_suspend; /* NGG */ struct amdgpu_ngg ngg; + + /* pipe reservation */ + struct mutex pipe_reserve_mutex; + DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); }; int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, @@ -1113,6 +1125,7 @@ struct amdgpu_job { uint32_t gds_base, gds_size; uint32_t gws_base, gws_size; uint32_t oa_base, oa_size; + uint32_t vram_lost_counter; /* user fence handling */ uint64_t uf_addr; @@ -1138,7 +1151,7 @@ static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p, /* * Writeback */ -#define AMDGPU_MAX_WB 1024 /* Reserve at most 1024 WB slots for amdgpu-owned rings. */ +#define AMDGPU_MAX_WB 512 /* Reserve at most 512 WB slots for amdgpu-owned rings. */ struct amdgpu_wb { struct amdgpu_bo *wb_obj; @@ -1379,6 +1392,18 @@ struct amdgpu_atcs { }; /* + * Firmware VRAM reservation + */ +struct amdgpu_fw_vram_usage { + u64 start_offset; + u64 size; + struct amdgpu_bo *reserved_bo; + void *va; +}; + +int amdgpu_fw_reserve_vram_init(struct amdgpu_device *adev); + +/* * CGS */ struct cgs_device *amdgpu_cgs_create_device(struct amdgpu_device *adev); @@ -1582,6 +1607,8 @@ struct amdgpu_device { struct delayed_work late_init_work; struct amdgpu_virt virt; + /* firmware VRAM reservation */ + struct amdgpu_fw_vram_usage fw_vram_usage; /* link all shadow bo */ struct list_head shadow_list; @@ -1833,8 +1860,6 @@ static inline bool amdgpu_has_atpx(void) { return false; } extern const struct drm_ioctl_desc amdgpu_ioctls_kms[]; extern const int amdgpu_max_kms_ioctl; -bool amdgpu_kms_vram_lost(struct amdgpu_device *adev, - struct amdgpu_fpriv *fpriv); int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags); void amdgpu_driver_unload_kms(struct drm_device *dev); void amdgpu_driver_lastclose_kms(struct drm_device *dev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index ce443586a0c7..f66d33e4baca 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -1807,6 +1807,8 @@ int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev) uint16_t data_offset; int usage_bytes = 0; struct _ATOM_VRAM_USAGE_BY_FIRMWARE *firmware_usage; + u64 start_addr; + u64 size; if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) { firmware_usage = (struct _ATOM_VRAM_USAGE_BY_FIRMWARE *)(ctx->bios + data_offset); @@ -1815,7 +1817,21 @@ int amdgpu_atombios_allocate_fb_scratch(struct amdgpu_device *adev) le32_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware), le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb)); - usage_bytes = le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb) * 1024; + start_addr = firmware_usage->asFirmwareVramReserveInfo[0].ulStartAddrUsedByFirmware; + size = firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb; + + if ((uint32_t)(start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) == + (uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION << + ATOM_VRAM_OPERATION_FLAGS_SHIFT)) { + /* Firmware request VRAM reservation for SR-IOV */ + adev->fw_vram_usage.start_offset = (start_addr & + (~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10; + adev->fw_vram_usage.size = size << 10; + /* Use the default scratch size */ + usage_bytes = 0; + } else { + usage_bytes = le16_to_cpu(firmware_usage->asFirmwareVramReserveInfo[0].usFirmwareUseInKb) * 1024; + } } ctx->scratch_size_bytes = 0; if (usage_bytes == 0) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index ab83dfcabb41..f7fceb63413c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -90,12 +90,14 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) goto free_chunk; } + mutex_lock(&p->ctx->lock); + /* get chunks */ chunk_array_user = u64_to_user_ptr(cs->in.chunks); if (copy_from_user(chunk_array, chunk_array_user, sizeof(uint64_t)*cs->in.num_chunks)) { ret = -EFAULT; - goto put_ctx; + goto free_chunk; } p->nchunks = cs->in.num_chunks; @@ -103,7 +105,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) GFP_KERNEL); if (!p->chunks) { ret = -ENOMEM; - goto put_ctx; + goto free_chunk; } for (i = 0; i < p->nchunks; i++) { @@ -170,6 +172,11 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) if (ret) goto free_all_kdata; + if (p->ctx->vram_lost_counter != p->job->vram_lost_counter) { + ret = -ECANCELED; + goto free_all_kdata; + } + if (p->uf_entry.robj) p->job->uf_addr = uf_offset; kfree(chunk_array); @@ -183,8 +190,6 @@ free_partial_kdata: kfree(p->chunks); p->chunks = NULL; p->nchunks = 0; -put_ctx: - amdgpu_ctx_put(p->ctx); free_chunk: kfree(chunk_array); @@ -705,7 +710,8 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) list_for_each_entry(e, &p->validated, tv.head) { struct reservation_object *resv = e->robj->tbo.resv; - r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp); + r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp, + amdgpu_bo_explicit_sync(e->robj)); if (r) return r; @@ -736,8 +742,10 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, dma_fence_put(parser->fence); - if (parser->ctx) + if (parser->ctx) { + mutex_unlock(&parser->ctx->lock); amdgpu_ctx_put(parser->ctx); + } if (parser->bo_list) amdgpu_bo_list_put(parser->bo_list); @@ -844,14 +852,58 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; struct amdgpu_ring *ring = p->job->ring; - int i, r; + int r; /* Only for UVD/VCE VM emulation */ - if (ring->funcs->parse_cs) { - for (i = 0; i < p->job->num_ibs; i++) { - r = amdgpu_ring_parse_cs(ring, p, i); + if (p->job->ring->funcs->parse_cs) { + unsigned i, j; + + for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { + struct drm_amdgpu_cs_chunk_ib *chunk_ib; + struct amdgpu_bo_va_mapping *m; + struct amdgpu_bo *aobj = NULL; + struct amdgpu_cs_chunk *chunk; + struct amdgpu_ib *ib; + uint64_t offset; + uint8_t *kptr; + + chunk = &p->chunks[i]; + ib = &p->job->ibs[j]; + chunk_ib = chunk->kdata; + + if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) + continue; + + r = amdgpu_cs_find_mapping(p, chunk_ib->va_start, + &aobj, &m); + if (r) { + DRM_ERROR("IB va_start is invalid\n"); + return r; + } + + if ((chunk_ib->va_start + chunk_ib->ib_bytes) > + (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { + DRM_ERROR("IB va_start+ib_bytes is invalid\n"); + return -EINVAL; + } + + /* the IB should be reserved at this point */ + r = amdgpu_bo_kmap(aobj, (void **)&kptr); + if (r) { + return r; + } + + offset = m->start * AMDGPU_GPU_PAGE_SIZE; + kptr += chunk_ib->va_start - offset; + + memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); + amdgpu_bo_kunmap(aobj); + + r = amdgpu_ring_parse_cs(ring, p, j); if (r) return r; + + j++; } } @@ -918,54 +970,18 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, parser->job->ring = ring; - if (ring->funcs->parse_cs) { - struct amdgpu_bo_va_mapping *m; - struct amdgpu_bo *aobj = NULL; - uint64_t offset; - uint8_t *kptr; - - r = amdgpu_cs_find_mapping(parser, chunk_ib->va_start, - &aobj, &m); - if (r) { - DRM_ERROR("IB va_start is invalid\n"); - return r; - } - - if ((chunk_ib->va_start + chunk_ib->ib_bytes) > - (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { - DRM_ERROR("IB va_start+ib_bytes is invalid\n"); - return -EINVAL; - } - - /* the IB should be reserved at this point */ - r = amdgpu_bo_kmap(aobj, (void **)&kptr); - if (r) { - return r; - } - - offset = m->start * AMDGPU_GPU_PAGE_SIZE; - kptr += chunk_ib->va_start - offset; - - r = amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib); - if (r) { - DRM_ERROR("Failed to get ib !\n"); - return r; - } - - memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); - amdgpu_bo_kunmap(aobj); - } else { - r = amdgpu_ib_get(adev, vm, 0, ib); - if (r) { - DRM_ERROR("Failed to get ib !\n"); - return r; - } - + r = amdgpu_ib_get(adev, vm, + ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0, + ib); + if (r) { + DRM_ERROR("Failed to get ib !\n"); + return r; } ib->gpu_addr = chunk_ib->va_start; ib->length_dw = chunk_ib->ib_bytes / 4; ib->flags = chunk_ib->flags; + j++; } @@ -975,7 +991,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) return -EINVAL; - return 0; + return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx); } static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, @@ -1176,6 +1192,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, job->uf_sequence = seq; amdgpu_job_free_resources(job); + amdgpu_ring_priority_get(job->ring, + amd_sched_get_job_priority(&job->base)); trace_amdgpu_cs_ioctl(job); amd_sched_entity_push_job(&job->base); @@ -1189,7 +1207,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_fpriv *fpriv = filp->driver_priv; union drm_amdgpu_cs *cs = data; struct amdgpu_cs_parser parser = {}; bool reserved_buffers = false; @@ -1197,8 +1214,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (!adev->accel_working) return -EBUSY; - if (amdgpu_kms_vram_lost(adev, fpriv)) - return -ENODEV; parser.adev = adev; parser.filp = filp; @@ -1209,6 +1224,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) goto out; } + r = amdgpu_cs_ib_fill(adev, &parser); + if (r) + goto out; + r = amdgpu_cs_parser_bos(&parser, data); if (r) { if (r == -ENOMEM) @@ -1219,9 +1238,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) } reserved_buffers = true; - r = amdgpu_cs_ib_fill(adev, &parser); - if (r) - goto out; r = amdgpu_cs_dependencies(adev, &parser); if (r) { @@ -1257,16 +1273,12 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, { union drm_amdgpu_wait_cs *wait = data; struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_fpriv *fpriv = filp->driver_priv; unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); struct amdgpu_ring *ring = NULL; struct amdgpu_ctx *ctx; struct dma_fence *fence; long r; - if (amdgpu_kms_vram_lost(adev, fpriv)) - return -ENODEV; - ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id); if (ctx == NULL) return -EINVAL; @@ -1284,6 +1296,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, r = PTR_ERR(fence); else if (fence) { r = dma_fence_wait_timeout(fence, true, timeout); + if (r > 0 && fence->error) + r = fence->error; dma_fence_put(fence); } else r = 1; @@ -1335,16 +1349,12 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_fpriv *fpriv = filp->driver_priv; union drm_amdgpu_fence_to_handle *info = data; struct dma_fence *fence; struct drm_syncobj *syncobj; struct sync_file *sync_file; int fd, r; - if (amdgpu_kms_vram_lost(adev, fpriv)) - return -ENODEV; - fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence); if (IS_ERR(fence)) return PTR_ERR(fence); @@ -1425,6 +1435,9 @@ static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev, if (r == 0) break; + + if (fence->error) + return fence->error; } memset(wait, 0, sizeof(*wait)); @@ -1485,7 +1498,7 @@ out: wait->out.status = (r > 0); wait->out.first_signaled = first; /* set return value 0 to indicate success */ - r = 0; + r = array[first]->error; err_free_fence_array: for (i = 0; i < fence_count; i++) @@ -1506,15 +1519,12 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_fpriv *fpriv = filp->driver_priv; union drm_amdgpu_wait_fences *wait = data; uint32_t fence_count = wait->in.fence_count; struct drm_amdgpu_fence *fences_user; struct drm_amdgpu_fence *fences; int r; - if (amdgpu_kms_vram_lost(adev, fpriv)) - return -ENODEV; /* Get the fences from userspace */ fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence), GFP_KERNEL); @@ -1572,14 +1582,14 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser, if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket) return -EINVAL; - r = amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem); - if (unlikely(r)) - return r; - - if ((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS) - return 0; + if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) { + (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; + amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains); + r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, false, + false); + if (r) + return r; + } - (*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains); - return ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, false, false); + return amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index 75c933b1a432..c184468e2b2b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -23,13 +23,41 @@ */ #include <drm/drmP.h> +#include <drm/drm_auth.h> #include "amdgpu.h" +#include "amdgpu_sched.h" -static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) +static int amdgpu_ctx_priority_permit(struct drm_file *filp, + enum amd_sched_priority priority) +{ + /* NORMAL and below are accessible by everyone */ + if (priority <= AMD_SCHED_PRIORITY_NORMAL) + return 0; + + if (capable(CAP_SYS_NICE)) + return 0; + + if (drm_is_current_master(filp)) + return 0; + + return -EACCES; +} + +static int amdgpu_ctx_init(struct amdgpu_device *adev, + enum amd_sched_priority priority, + struct drm_file *filp, + struct amdgpu_ctx *ctx) { unsigned i, j; int r; + if (priority < 0 || priority >= AMD_SCHED_PRIORITY_MAX) + return -EINVAL; + + r = amdgpu_ctx_priority_permit(filp, priority); + if (r) + return r; + memset(ctx, 0, sizeof(*ctx)); ctx->adev = adev; kref_init(&ctx->refcount); @@ -39,19 +67,24 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) if (!ctx->fences) return -ENOMEM; + mutex_init(&ctx->lock); + for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { ctx->rings[i].sequence = 1; ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i]; } ctx->reset_counter = atomic_read(&adev->gpu_reset_counter); + ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter); + ctx->init_priority = priority; + ctx->override_priority = AMD_SCHED_PRIORITY_UNSET; /* create context entity for each ring */ for (i = 0; i < adev->num_rings; i++) { struct amdgpu_ring *ring = adev->rings[i]; struct amd_sched_rq *rq; - rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL]; + rq = &ring->sched.sched_rq[priority]; if (ring == &adev->gfx.kiq.ring) continue; @@ -96,10 +129,14 @@ static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx) &ctx->rings[i].entity); amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr); + + mutex_destroy(&ctx->lock); } static int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, + struct drm_file *filp, + enum amd_sched_priority priority, uint32_t *id) { struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr; @@ -117,8 +154,9 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, kfree(ctx); return r; } + *id = (uint32_t)r; - r = amdgpu_ctx_init(adev, ctx); + r = amdgpu_ctx_init(adev, priority, filp, ctx); if (r) { idr_remove(&mgr->ctx_handles, *id); *id = 0; @@ -193,6 +231,7 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, { int r; uint32_t id; + enum amd_sched_priority priority; union drm_amdgpu_ctx *args = data; struct amdgpu_device *adev = dev->dev_private; @@ -200,10 +239,16 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, r = 0; id = args->in.ctx_id; + priority = amdgpu_to_sched_priority(args->in.priority); + + /* For backwards compatibility reasons, we need to accept + * ioctls with garbage in the priority field */ + if (priority == AMD_SCHED_PRIORITY_INVALID) + priority = AMD_SCHED_PRIORITY_NORMAL; switch (args->in.op) { case AMDGPU_CTX_OP_ALLOC_CTX: - r = amdgpu_ctx_alloc(adev, fpriv, &id); + r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id); args->out.alloc.ctx_id = id; break; case AMDGPU_CTX_OP_FREE_CTX: @@ -256,12 +301,8 @@ int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, idx = seq & (amdgpu_sched_jobs - 1); other = cring->fences[idx]; - if (other) { - signed long r; - r = dma_fence_wait_timeout(other, true, MAX_SCHEDULE_TIMEOUT); - if (r < 0) - return r; - } + if (other) + BUG_ON(!dma_fence_is_signaled(other)); dma_fence_get(fence); @@ -305,6 +346,51 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, return fence; } +void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, + enum amd_sched_priority priority) +{ + int i; + struct amdgpu_device *adev = ctx->adev; + struct amd_sched_rq *rq; + struct amd_sched_entity *entity; + struct amdgpu_ring *ring; + enum amd_sched_priority ctx_prio; + + ctx->override_priority = priority; + + ctx_prio = (ctx->override_priority == AMD_SCHED_PRIORITY_UNSET) ? + ctx->init_priority : ctx->override_priority; + + for (i = 0; i < adev->num_rings; i++) { + ring = adev->rings[i]; + entity = &ctx->rings[i].entity; + rq = &ring->sched.sched_rq[ctx_prio]; + + if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) + continue; + + amd_sched_entity_set_rq(entity, rq); + } +} + +int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id) +{ + struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id]; + unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1); + struct dma_fence *other = cring->fences[idx]; + + if (other) { + signed long r; + r = dma_fence_wait_timeout(other, false, MAX_SCHEDULE_TIMEOUT); + if (r < 0) { + DRM_ERROR("Error (%ld) waiting for fence!\n", r); + return r; + } + } + + return 0; +} + void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) { mutex_init(&mgr->lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1949d8aedf49..0b9332e65a4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -109,10 +109,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg, { uint32_t ret; - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) { - BUG_ON(in_interrupt()); + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) return amdgpu_virt_kiq_rreg(adev, reg); - } if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); @@ -137,10 +135,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v, adev->last_mm_index = v; } - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) { - BUG_ON(in_interrupt()); + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) return amdgpu_virt_kiq_wreg(adev, reg, v); - } if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX)) writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); @@ -658,6 +654,81 @@ void amdgpu_gart_location(struct amdgpu_device *adev, struct amdgpu_mc *mc) } /* + * Firmware Reservation functions + */ +/** + * amdgpu_fw_reserve_vram_fini - free fw reserved vram + * + * @adev: amdgpu_device pointer + * + * free fw reserved vram if it has been reserved. + */ +void amdgpu_fw_reserve_vram_fini(struct amdgpu_device *adev) +{ + amdgpu_bo_free_kernel(&adev->fw_vram_usage.reserved_bo, + NULL, &adev->fw_vram_usage.va); +} + +/** + * amdgpu_fw_reserve_vram_init - create bo vram reservation from fw + * + * @adev: amdgpu_device pointer + * + * create bo vram reservation from fw. + */ +int amdgpu_fw_reserve_vram_init(struct amdgpu_device *adev) +{ + int r = 0; + u64 gpu_addr; + u64 vram_size = adev->mc.visible_vram_size; + + adev->fw_vram_usage.va = NULL; + adev->fw_vram_usage.reserved_bo = NULL; + + if (adev->fw_vram_usage.size > 0 && + adev->fw_vram_usage.size <= vram_size) { + + r = amdgpu_bo_create(adev, adev->fw_vram_usage.size, + PAGE_SIZE, true, 0, + AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED | + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, NULL, NULL, 0, + &adev->fw_vram_usage.reserved_bo); + if (r) + goto error_create; + + r = amdgpu_bo_reserve(adev->fw_vram_usage.reserved_bo, false); + if (r) + goto error_reserve; + r = amdgpu_bo_pin_restricted(adev->fw_vram_usage.reserved_bo, + AMDGPU_GEM_DOMAIN_VRAM, + adev->fw_vram_usage.start_offset, + (adev->fw_vram_usage.start_offset + + adev->fw_vram_usage.size), &gpu_addr); + if (r) + goto error_pin; + r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo, + &adev->fw_vram_usage.va); + if (r) + goto error_kmap; + + amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); + } + return r; + +error_kmap: + amdgpu_bo_unpin(adev->fw_vram_usage.reserved_bo); +error_pin: + amdgpu_bo_unreserve(adev->fw_vram_usage.reserved_bo); +error_reserve: + amdgpu_bo_unref(&adev->fw_vram_usage.reserved_bo); +error_create: + adev->fw_vram_usage.va = NULL; + adev->fw_vram_usage.reserved_bo = NULL; + return r; +} + + +/* * GPU helpers function. */ /** @@ -1604,7 +1675,6 @@ static int amdgpu_init(struct amdgpu_device *adev) return r; } adev->ip_blocks[i].status.sw = true; - /* need to do gmc hw init early so we can allocate gpu mem */ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { r = amdgpu_vram_scratch_init(adev); @@ -1635,11 +1705,6 @@ static int amdgpu_init(struct amdgpu_device *adev) } } - mutex_lock(&adev->firmware.mutex); - if (amdgpu_ucode_init_bo(adev)) - adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT; - mutex_unlock(&adev->firmware.mutex); - for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.sw) continue; @@ -1775,8 +1840,6 @@ static int amdgpu_fini(struct amdgpu_device *adev) adev->ip_blocks[i].status.hw = false; } - if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) - amdgpu_ucode_fini_bo(adev); for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.sw) @@ -2019,6 +2082,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->vm_manager.vm_pte_num_rings = 0; adev->gart.gart_funcs = NULL; adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); + bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); adev->smc_rreg = &amdgpu_invalid_rreg; adev->smc_wreg = &amdgpu_invalid_wreg; @@ -2047,6 +2111,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->pm.mutex); mutex_init(&adev->gfx.gpu_clock_mutex); mutex_init(&adev->srbm_mutex); + mutex_init(&adev->gfx.pipe_reserve_mutex); mutex_init(&adev->grbm_idx_mutex); mutex_init(&adev->mn_lock); mutex_init(&adev->virt.vf_errors.lock); @@ -2223,6 +2288,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) DRM_ERROR("ib ring test failed (%d).\n", r); + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_init_data_exchange(adev); + amdgpu_fbdev_init(adev); r = amdgpu_pm_sysfs_init(adev); @@ -2300,6 +2368,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev) /* evict vram memory */ amdgpu_bo_evict_vram(adev); amdgpu_ib_pool_fini(adev); + amdgpu_fw_reserve_vram_fini(adev); amdgpu_fence_driver_fini(adev); amdgpu_fbdev_fini(adev); r = amdgpu_fini(adev); @@ -2552,6 +2621,9 @@ static bool amdgpu_check_soft_reset(struct amdgpu_device *adev) int i; bool asic_hang = false; + if (amdgpu_sriov_vf(adev)) + return true; + for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.valid) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index ad02d3fbb44c..dd2f060d62a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -71,9 +71,11 @@ * - 3.19.0 - Add support for UVD MJPEG decode * - 3.20.0 - Add support for local BOs * - 3.21.0 - Add DRM_AMDGPU_FENCE_TO_HANDLE ioctl + * - 3.22.0 - Add DRM_AMDGPU_SCHED ioctl + * - 3.23.0 - Add query for VRAM lost counter */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 21 +#define KMS_DRIVER_MINOR 23 #define KMS_DRIVER_PATCHLEVEL 0 int amdgpu_vram_limit = 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 333bad749067..fb9f88ef6059 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -169,6 +169,32 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f) } /** + * amdgpu_fence_emit_polling - emit a fence on the requeste ring + * + * @ring: ring the fence is associated with + * @s: resulting sequence number + * + * Emits a fence command on the requested ring (all asics). + * Used For polling fence. + * Returns 0 on success, -ENOMEM on failure. + */ +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s) +{ + uint32_t seq; + + if (!s) + return -EINVAL; + + seq = ++ring->fence_drv.sync_seq; + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr, + seq, AMDGPU_FENCE_FLAG_INT); + + *s = seq; + + return 0; +} + +/** * amdgpu_fence_schedule_fallback - schedule fallback check * * @ring: pointer to struct amdgpu_ring @@ -282,6 +308,30 @@ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring) } /** + * amdgpu_fence_wait_polling - busy wait for givn sequence number + * + * @ring: ring index the fence is associated with + * @wait_seq: sequence number to wait + * @timeout: the timeout for waiting in usecs + * + * Wait for all fences on the requested ring to signal (all asics). + * Returns left time if no timeout, 0 or minus if timeout. + */ +signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, + uint32_t wait_seq, + signed long timeout) +{ + uint32_t seq; + + do { + seq = amdgpu_fence_read(ring); + udelay(5); + timeout -= 5; + } while ((int32_t)(wait_seq - seq) > 0 && timeout > 0); + + return timeout > 0 ? timeout : 0; +} +/** * amdgpu_fence_count_emitted - get the count of emitted fences * * @ring: ring the fence is associated with @@ -641,6 +691,19 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data) atomic_read(&ring->fence_drv.last_seq)); seq_printf(m, "Last emitted 0x%08x\n", ring->fence_drv.sync_seq); + + if (ring->funcs->type != AMDGPU_RING_TYPE_GFX) + continue; + + /* set in CP_VMID_PREEMPT and preemption occurred */ + seq_printf(m, "Last preempted 0x%08x\n", + le32_to_cpu(*(ring->fence_drv.cpu_addr + 2))); + /* set in CP_VMID_RESET and reset occurred */ + seq_printf(m, "Last reset 0x%08x\n", + le32_to_cpu(*(ring->fence_drv.cpu_addr + 4))); + /* Both preemption and reset occurred */ + seq_printf(m, "Last both 0x%08x\n", + le32_to_cpu(*(ring->fence_drv.cpu_addr + 6))); } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index b0d45c8e6bb3..fb72edc4c026 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -212,7 +212,9 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, AMDGPU_GEM_CREATE_NO_CPU_ACCESS | AMDGPU_GEM_CREATE_CPU_GTT_USWC | AMDGPU_GEM_CREATE_VRAM_CLEARED | - AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)) + AMDGPU_GEM_CREATE_VM_ALWAYS_VALID | + AMDGPU_GEM_CREATE_EXPLICIT_SYNC)) + return -EINVAL; /* reject invalid gem domains */ @@ -577,11 +579,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, args->operation); return -EINVAL; } - if ((args->operation == AMDGPU_VA_OP_MAP) || - (args->operation == AMDGPU_VA_OP_REPLACE)) { - if (amdgpu_kms_vram_lost(adev, fpriv)) - return -ENODEV; - } INIT_LIST_HEAD(&list); INIT_LIST_HEAD(&duplicates); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 83435ccbad44..ef043361009f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -201,7 +201,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, struct amdgpu_kiq *kiq = &adev->gfx.kiq; int r = 0; - mutex_init(&kiq->ring_mutex); + spin_lock_init(&kiq->ring_lock); r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c index 0d15eb7d31d7..33535d347734 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c @@ -169,7 +169,8 @@ static int amdgpu_gtt_mgr_new(struct ttm_mem_type_manager *man, int r; spin_lock(&mgr->lock); - if (atomic64_read(&mgr->available) < mem->num_pages) { + if ((&tbo->mem == mem || tbo->mem.mem_type != TTM_PL_TT) && + atomic64_read(&mgr->available) < mem->num_pages) { spin_unlock(&mgr->lock); return 0; } @@ -244,8 +245,9 @@ static void amdgpu_gtt_mgr_del(struct ttm_mem_type_manager *man, uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man) { struct amdgpu_gtt_mgr *mgr = man->priv; + s64 result = man->size - atomic64_read(&mgr->available); - return (u64)(man->size - atomic64_read(&mgr->available)) * PAGE_SIZE; + return (result > 0 ? result : 0) * PAGE_SIZE; } /** @@ -265,7 +267,7 @@ static void amdgpu_gtt_mgr_debug(struct ttm_mem_type_manager *man, drm_mm_print(&mgr->mm, printer); spin_unlock(&mgr->lock); - drm_printf(printer, "man size:%llu pages, gtt available:%llu pages, usage:%lluMB\n", + drm_printf(printer, "man size:%llu pages, gtt available:%lld pages, usage:%lluMB\n", man->size, (u64)atomic64_read(&mgr->available), amdgpu_gtt_mgr_usage(man) >> 20); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 4510627ae83e..0cfc68db575b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -65,6 +65,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, amdgpu_sync_create(&(*job)->sync); amdgpu_sync_create(&(*job)->dep_sync); amdgpu_sync_create(&(*job)->sched_sync); + (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter); return 0; } @@ -103,6 +104,7 @@ static void amdgpu_job_free_cb(struct amd_sched_job *s_job) { struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base); + amdgpu_ring_priority_put(job->ring, amd_sched_get_job_priority(s_job)); dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); amdgpu_sync_free(&job->dep_sync); @@ -139,6 +141,8 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring, job->fence_ctx = entity->fence_context; *f = dma_fence_get(&job->base.s_fence->finished); amdgpu_job_free_resources(job); + amdgpu_ring_priority_get(job->ring, + amd_sched_get_job_priority(&job->base)); amd_sched_entity_push_job(&job->base); return 0; @@ -177,8 +181,8 @@ static struct dma_fence *amdgpu_job_dependency(struct amd_sched_job *sched_job) static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) { struct dma_fence *fence = NULL; + struct amdgpu_device *adev; struct amdgpu_job *job; - struct amdgpu_fpriv *fpriv = NULL; int r; if (!sched_job) { @@ -186,23 +190,25 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job) return NULL; } job = to_amdgpu_job(sched_job); + adev = job->adev; BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL)); trace_amdgpu_sched_run_job(job); - if (job->vm) - fpriv = container_of(job->vm, struct amdgpu_fpriv, vm); /* skip ib schedule when vram is lost */ - if (fpriv && amdgpu_kms_vram_lost(job->adev, fpriv)) + if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter)) { + dma_fence_set_error(&job->base.s_fence->finished, -ECANCELED); DRM_ERROR("Skip scheduling IBs!\n"); - else { - r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence); + } else { + r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, + &fence); if (r) DRM_ERROR("Error scheduling IBs (%d)\n", r); } /* if gpu reset, hw fence will be replaced here */ dma_fence_put(job->fence); job->fence = dma_fence_get(fence); + amdgpu_job_free_resources(job); return fence; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 51841259e23f..6f0b26dae3b0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -28,6 +28,7 @@ #include <drm/drmP.h> #include "amdgpu.h" #include <drm/amdgpu_drm.h> +#include "amdgpu_sched.h" #include "amdgpu_uvd.h" #include "amdgpu_vce.h" @@ -269,7 +270,6 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { struct amdgpu_device *adev = dev->dev_private; - struct amdgpu_fpriv *fpriv = filp->driver_priv; struct drm_amdgpu_info *info = data; struct amdgpu_mode_info *minfo = &adev->mode_info; void __user *out = (void __user *)(uintptr_t)info->return_pointer; @@ -282,8 +282,6 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file if (!info->return_size || !info->return_pointer) return -EINVAL; - if (amdgpu_kms_vram_lost(adev, fpriv)) - return -ENODEV; switch (info->query) { case AMDGPU_INFO_ACCEL_WORKING: @@ -765,6 +763,9 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file } return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0; } + case AMDGPU_INFO_VRAM_LOST_COUNTER: + ui32 = atomic_read(&adev->vram_lost_counter); + return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0; default: DRM_DEBUG_KMS("Invalid request %d\n", info->query); return -EINVAL; @@ -791,12 +792,6 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev) vga_switcheroo_process_delayed_switch(); } -bool amdgpu_kms_vram_lost(struct amdgpu_device *adev, - struct amdgpu_fpriv *fpriv) -{ - return fpriv->vram_lost_counter != atomic_read(&adev->vram_lost_counter); -} - /** * amdgpu_driver_open_kms - drm callback for open * @@ -853,7 +848,6 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) amdgpu_ctx_mgr_init(&fpriv->ctx_mgr); - fpriv->vram_lost_counter = atomic_read(&adev->vram_lost_counter); file_priv->driver_priv = fpriv; out_suspend: @@ -1023,6 +1017,7 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = { DRM_IOCTL_DEF_DRV(AMDGPU_GEM_CREATE, amdgpu_gem_create_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_CTX, amdgpu_ctx_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_VM, amdgpu_vm_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(AMDGPU_SCHED, amdgpu_sched_ioctl, DRM_MASTER), DRM_IOCTL_DEF_DRV(AMDGPU_BO_LIST, amdgpu_bo_list_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(AMDGPU_FENCE_TO_HANDLE, amdgpu_cs_fence_to_handle_ioctl, DRM_AUTH|DRM_RENDER_ALLOW), /* KMS */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index 6982baeccd14..8b4ed8a98a18 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -40,9 +40,7 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo) { struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev); - struct amdgpu_bo *bo; - - bo = container_of(tbo, struct amdgpu_bo, tbo); + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo); amdgpu_bo_kunmap(bo); @@ -884,7 +882,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) return; - abo = container_of(bo, struct amdgpu_bo, tbo); + abo = ttm_to_amdgpu_bo(bo); amdgpu_vm_bo_invalidate(adev, abo, evict); amdgpu_bo_kunmap(abo); @@ -911,7 +909,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo) if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) return 0; - abo = container_of(bo, struct amdgpu_bo, tbo); + abo = ttm_to_amdgpu_bo(bo); /* Remember that this BO was accessed by the CPU */ abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 39b6bf6fb051..428aae048f4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -94,6 +94,11 @@ struct amdgpu_bo { }; }; +static inline struct amdgpu_bo *ttm_to_amdgpu_bo(struct ttm_buffer_object *tbo) +{ + return container_of(tbo, struct amdgpu_bo, tbo); +} + /** * amdgpu_mem_type_to_domain - return domain corresponding to mem_type * @mem_type: ttm memory type @@ -188,6 +193,14 @@ static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo) } } +/** + * amdgpu_bo_explicit_sync - return whether the bo is explicitly synced + */ +static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo) +{ + return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC; +} + int amdgpu_bo_create(struct amdgpu_device *adev, unsigned long size, int byte_align, bool kernel, u32 domain, u64 flags, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c index 3b42f407971d..5f5aa5fddc16 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c @@ -145,6 +145,8 @@ static int amdgpu_pp_hw_init(void *handle) int ret = 0; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) + amdgpu_ucode_init_bo(adev); if (adev->powerplay.ip_funcs->hw_init) ret = adev->powerplay.ip_funcs->hw_init( @@ -162,6 +164,9 @@ static int amdgpu_pp_hw_fini(void *handle) ret = adev->powerplay.ip_funcs->hw_fini( adev->powerplay.pp_handle); + if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) + amdgpu_ucode_fini_bo(adev); + return ret; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index f1035a689d35..447d446b5015 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -411,6 +411,13 @@ static int psp_hw_init(void *handle) return 0; mutex_lock(&adev->firmware.mutex); + /* + * This sequence is just used on hw_init only once, no need on + * resume. + */ + ret = amdgpu_ucode_init_bo(adev); + if (ret) + goto failed; ret = psp_load_fw(adev); if (ret) { @@ -435,6 +442,8 @@ static int psp_hw_fini(void *handle) if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) return 0; + amdgpu_ucode_fini_bo(adev); + psp_ring_destroy(psp, PSP_RING_TYPE__KM); amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 019932a7ea3a..e5ece1fae149 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -155,6 +155,75 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring) } /** + * amdgpu_ring_priority_put - restore a ring's priority + * + * @ring: amdgpu_ring structure holding the information + * @priority: target priority + * + * Release a request for executing at @priority + */ +void amdgpu_ring_priority_put(struct amdgpu_ring *ring, + enum amd_sched_priority priority) +{ + int i; + + if (!ring->funcs->set_priority) + return; + + if (atomic_dec_return(&ring->num_jobs[priority]) > 0) + return; + + /* no need to restore if the job is already at the lowest priority */ + if (priority == AMD_SCHED_PRIORITY_NORMAL) + return; + + mutex_lock(&ring->priority_mutex); + /* something higher prio is executing, no need to decay */ + if (ring->priority > priority) + goto out_unlock; + + /* decay priority to the next level with a job available */ + for (i = priority; i >= AMD_SCHED_PRIORITY_MIN; i--) { + if (i == AMD_SCHED_PRIORITY_NORMAL + || atomic_read(&ring->num_jobs[i])) { + ring->priority = i; + ring->funcs->set_priority(ring, i); + break; + } + } + +out_unlock: + mutex_unlock(&ring->priority_mutex); +} + +/** + * amdgpu_ring_priority_get - change the ring's priority + * + * @ring: amdgpu_ring structure holding the information + * @priority: target priority + * + * Request a ring's priority to be raised to @priority (refcounted). + */ +void amdgpu_ring_priority_get(struct amdgpu_ring *ring, + enum amd_sched_priority priority) +{ + if (!ring->funcs->set_priority) + return; + + atomic_inc(&ring->num_jobs[priority]); + + mutex_lock(&ring->priority_mutex); + if (priority <= ring->priority) + goto out_unlock; + + ring->priority = priority; + ring->funcs->set_priority(ring, priority); + +out_unlock: + mutex_unlock(&ring->priority_mutex); +} + +/** * amdgpu_ring_init - init driver ring struct. * * @adev: amdgpu_device pointer @@ -169,7 +238,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned max_dw, struct amdgpu_irq_src *irq_src, unsigned irq_type) { - int r; + int r, i; int sched_hw_submission = amdgpu_sched_hw_submission; /* Set the hw submission limit higher for KIQ because @@ -247,9 +316,14 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, } ring->max_dw = max_dw; + ring->priority = AMD_SCHED_PRIORITY_NORMAL; + mutex_init(&ring->priority_mutex); INIT_LIST_HEAD(&ring->lru_list); amdgpu_ring_lru_touch(adev, ring); + for (i = 0; i < AMD_SCHED_PRIORITY_MAX; ++i) + atomic_set(&ring->num_jobs[i], 0); + if (amdgpu_debugfs_ring_init(adev, ring)) { DRM_ERROR("Failed to register debugfs file for rings !\n"); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 491bd5512dcc..b18c2b96691f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -24,6 +24,7 @@ #ifndef __AMDGPU_RING_H__ #define __AMDGPU_RING_H__ +#include <drm/amdgpu_drm.h> #include "gpu_scheduler.h" /* max number of rings */ @@ -56,6 +57,7 @@ struct amdgpu_device; struct amdgpu_ring; struct amdgpu_ib; struct amdgpu_cs_parser; +struct amdgpu_job; /* * Fences. @@ -88,8 +90,12 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, void amdgpu_fence_driver_suspend(struct amdgpu_device *adev); void amdgpu_fence_driver_resume(struct amdgpu_device *adev); int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence); +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); void amdgpu_fence_process(struct amdgpu_ring *ring); int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); +signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, + uint32_t wait_seq, + signed long timeout); unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); /* @@ -147,6 +153,9 @@ struct amdgpu_ring_funcs { void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg); void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); void (*emit_tmz)(struct amdgpu_ring *ring, bool start); + /* priority functions */ + void (*set_priority) (struct amdgpu_ring *ring, + enum amd_sched_priority priority); }; struct amdgpu_ring { @@ -187,6 +196,12 @@ struct amdgpu_ring { volatile u32 *cond_exe_cpu_addr; unsigned vm_inv_eng; bool has_compute_vm_bug; + + atomic_t num_jobs[AMD_SCHED_PRIORITY_MAX]; + struct mutex priority_mutex; + /* protected by priority_mutex */ + int priority; + #if defined(CONFIG_DEBUG_FS) struct dentry *ent; #endif @@ -197,6 +212,10 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); void amdgpu_ring_commit(struct amdgpu_ring *ring); void amdgpu_ring_undo(struct amdgpu_ring *ring); +void amdgpu_ring_priority_get(struct amdgpu_ring *ring, + enum amd_sched_priority priority); +void amdgpu_ring_priority_put(struct amdgpu_ring *ring, + enum amd_sched_priority priority); int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned ring_size, struct amdgpu_irq_src *irq_src, unsigned irq_type); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c new file mode 100644 index 000000000000..290cc3f9c433 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -0,0 +1,109 @@ +/* + * Copyright 2017 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Andres Rodriguez <andresx7@gmail.com> + */ + +#include <linux/fdtable.h> +#include <linux/pid.h> +#include <drm/amdgpu_drm.h> +#include "amdgpu.h" + +#include "amdgpu_vm.h" + +enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority) +{ + switch (amdgpu_priority) { + case AMDGPU_CTX_PRIORITY_VERY_HIGH: + return AMD_SCHED_PRIORITY_HIGH_HW; + case AMDGPU_CTX_PRIORITY_HIGH: + return AMD_SCHED_PRIORITY_HIGH_SW; + case AMDGPU_CTX_PRIORITY_NORMAL: + return AMD_SCHED_PRIORITY_NORMAL; + case AMDGPU_CTX_PRIORITY_LOW: + case AMDGPU_CTX_PRIORITY_VERY_LOW: + return AMD_SCHED_PRIORITY_LOW; + case AMDGPU_CTX_PRIORITY_UNSET: + return AMD_SCHED_PRIORITY_UNSET; + default: + WARN(1, "Invalid context priority %d\n", amdgpu_priority); + return AMD_SCHED_PRIORITY_INVALID; + } +} + +static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev, + int fd, + enum amd_sched_priority priority) +{ + struct file *filp = fcheck(fd); + struct drm_file *file; + struct pid *pid; + struct amdgpu_fpriv *fpriv; + struct amdgpu_ctx *ctx; + uint32_t id; + + if (!filp) + return -EINVAL; + + pid = get_pid(((struct drm_file *)filp->private_data)->pid); + + mutex_lock(&adev->ddev->filelist_mutex); + list_for_each_entry(file, &adev->ddev->filelist, lhead) { + if (file->pid != pid) + continue; + + fpriv = file->driver_priv; + idr_for_each_entry(&fpriv->ctx_mgr.ctx_handles, ctx, id) + amdgpu_ctx_priority_override(ctx, priority); + } + mutex_unlock(&adev->ddev->filelist_mutex); + + put_pid(pid); + + return 0; +} + +int amdgpu_sched_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp) +{ + union drm_amdgpu_sched *args = data; + struct amdgpu_device *adev = dev->dev_private; + enum amd_sched_priority priority; + int r; + + priority = amdgpu_to_sched_priority(args->in.priority); + if (args->in.flags || priority == AMD_SCHED_PRIORITY_INVALID) + return -EINVAL; + + switch (args->in.op) { + case AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE: + r = amdgpu_sched_process_priority_override(adev, + args->in.fd, + priority); + break; + default: + DRM_ERROR("Invalid sched op specified: %d\n", args->in.op); + r = -EINVAL; + break; + } + + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h new file mode 100644 index 000000000000..b28c067d3822 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.h @@ -0,0 +1,34 @@ +/* + * Copyright 2017 Valve Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: Andres Rodriguez <andresx7@gmail.com> + */ + +#ifndef __AMDGPU_SCHED_H__ +#define __AMDGPU_SCHED_H__ + +#include <drm/drmP.h> + +enum amd_sched_priority amdgpu_to_sched_priority(int amdgpu_priority); +int amdgpu_sched_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); + +#endif // __AMDGPU_SCHED_H__ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index c586f44312f9..a4bf21f8f1c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -169,14 +169,14 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, * * @sync: sync object to add fences from reservation object to * @resv: reservation object with embedded fence - * @shared: true if we should only sync to the exclusive fence + * @explicit_sync: true if we should only sync to the exclusive fence * * Sync to the fence */ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct reservation_object *resv, - void *owner) + void *owner, bool explicit_sync) { struct reservation_object_list *flist; struct dma_fence *f; @@ -191,6 +191,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, f = reservation_object_get_excl(resv); r = amdgpu_sync_fence(adev, sync, f); + if (explicit_sync) + return r; + flist = reservation_object_get_list(resv); if (!flist || r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h index dc7687993317..70d7e3a279a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h @@ -45,7 +45,8 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, struct reservation_object *resv, - void *owner); + void *owner, + bool explicit_sync); struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, struct amdgpu_ring *ring); struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 15a28578d458..51eacefadea1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -44,6 +44,7 @@ #include <linux/debugfs.h> #include <linux/iommu.h> #include "amdgpu.h" +#include "amdgpu_object.h" #include "amdgpu_trace.h" #include "bif/bif_4_1_d.h" @@ -209,7 +210,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, placement->num_busy_placement = 1; return; } - abo = container_of(bo, struct amdgpu_bo, tbo); + abo = ttm_to_amdgpu_bo(bo); switch (bo->mem.mem_type) { case TTM_PL_VRAM: if (adev->mman.buffer_funcs && @@ -257,7 +258,7 @@ gtt: static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp) { - struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); if (amdgpu_ttm_tt_get_usermm(bo->ttm)) return -EPERM; @@ -289,97 +290,177 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, return addr; } -static int amdgpu_move_blit(struct ttm_buffer_object *bo, - bool evict, bool no_wait_gpu, - struct ttm_mem_reg *new_mem, - struct ttm_mem_reg *old_mem) +/** + * amdgpu_find_mm_node - Helper function finds the drm_mm_node + * corresponding to @offset. It also modifies the offset to be + * within the drm_mm_node returned + */ +static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem, + unsigned long *offset) { - struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + struct drm_mm_node *mm_node = mem->mm_node; - struct drm_mm_node *old_mm, *new_mm; - uint64_t old_start, old_size, new_start, new_size; - unsigned long num_pages; - struct dma_fence *fence = NULL; - int r; + while (*offset >= (mm_node->size << PAGE_SHIFT)) { + *offset -= (mm_node->size << PAGE_SHIFT); + ++mm_node; + } + return mm_node; +} - BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0); +/** + * amdgpu_copy_ttm_mem_to_mem - Helper function for copy + * + * The function copies @size bytes from {src->mem + src->offset} to + * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a + * move and different for a BO to BO copy. + * + * @f: Returns the last fence if multiple jobs are submitted. + */ +int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, + struct amdgpu_copy_mem *src, + struct amdgpu_copy_mem *dst, + uint64_t size, + struct reservation_object *resv, + struct dma_fence **f) +{ + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + struct drm_mm_node *src_mm, *dst_mm; + uint64_t src_node_start, dst_node_start, src_node_size, + dst_node_size, src_page_offset, dst_page_offset; + struct dma_fence *fence = NULL; + int r = 0; + const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE * + AMDGPU_GPU_PAGE_SIZE); if (!ring->ready) { DRM_ERROR("Trying to move memory with ring turned off.\n"); return -EINVAL; } - old_mm = old_mem->mm_node; - old_size = old_mm->size; - old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem); + src_mm = amdgpu_find_mm_node(src->mem, &src->offset); + src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) + + src->offset; + src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset; + src_page_offset = src_node_start & (PAGE_SIZE - 1); - new_mm = new_mem->mm_node; - new_size = new_mm->size; - new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem); + dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset); + dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) + + dst->offset; + dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset; + dst_page_offset = dst_node_start & (PAGE_SIZE - 1); - num_pages = new_mem->num_pages; mutex_lock(&adev->mman.gtt_window_lock); - while (num_pages) { - unsigned long cur_pages = min(min(old_size, new_size), - (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE); - uint64_t from = old_start, to = new_start; + + while (size) { + unsigned long cur_size; + uint64_t from = src_node_start, to = dst_node_start; struct dma_fence *next; - if (old_mem->mem_type == TTM_PL_TT && - !amdgpu_gtt_mgr_is_allocated(old_mem)) { - r = amdgpu_map_buffer(bo, old_mem, cur_pages, - old_start, 0, ring, &from); + /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst + * begins at an offset, then adjust the size accordingly + */ + cur_size = min3(min(src_node_size, dst_node_size), size, + GTT_MAX_BYTES); + if (cur_size + src_page_offset > GTT_MAX_BYTES || + cur_size + dst_page_offset > GTT_MAX_BYTES) + cur_size -= max(src_page_offset, dst_page_offset); + + /* Map only what needs to be accessed. Map src to window 0 and + * dst to window 1 + */ + if (src->mem->mem_type == TTM_PL_TT && + !amdgpu_gtt_mgr_is_allocated(src->mem)) { + r = amdgpu_map_buffer(src->bo, src->mem, + PFN_UP(cur_size + src_page_offset), + src_node_start, 0, ring, + &from); if (r) goto error; + /* Adjust the offset because amdgpu_map_buffer returns + * start of mapped page + */ + from += src_page_offset; } - if (new_mem->mem_type == TTM_PL_TT && - !amdgpu_gtt_mgr_is_allocated(new_mem)) { - r = amdgpu_map_buffer(bo, new_mem, cur_pages, - new_start, 1, ring, &to); + if (dst->mem->mem_type == TTM_PL_TT && + !amdgpu_gtt_mgr_is_allocated(dst->mem)) { + r = amdgpu_map_buffer(dst->bo, dst->mem, + PFN_UP(cur_size + dst_page_offset), + dst_node_start, 1, ring, + &to); if (r) goto error; + to += dst_page_offset; } - r = amdgpu_copy_buffer(ring, from, to, - cur_pages * PAGE_SIZE, - bo->resv, &next, false, true); + r = amdgpu_copy_buffer(ring, from, to, cur_size, + resv, &next, false, true); if (r) goto error; dma_fence_put(fence); fence = next; - num_pages -= cur_pages; - if (!num_pages) + size -= cur_size; + if (!size) break; - old_size -= cur_pages; - if (!old_size) { - old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem); - old_size = old_mm->size; + src_node_size -= cur_size; + if (!src_node_size) { + src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm, + src->mem); + src_node_size = (src_mm->size << PAGE_SHIFT); } else { - old_start += cur_pages * PAGE_SIZE; + src_node_start += cur_size; + src_page_offset = src_node_start & (PAGE_SIZE - 1); } - - new_size -= cur_pages; - if (!new_size) { - new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem); - new_size = new_mm->size; + dst_node_size -= cur_size; + if (!dst_node_size) { + dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm, + dst->mem); + dst_node_size = (dst_mm->size << PAGE_SHIFT); } else { - new_start += cur_pages * PAGE_SIZE; + dst_node_start += cur_size; + dst_page_offset = dst_node_start & (PAGE_SIZE - 1); } } +error: mutex_unlock(&adev->mman.gtt_window_lock); + if (f) + *f = dma_fence_get(fence); + dma_fence_put(fence); + return r; +} + + +static int amdgpu_move_blit(struct ttm_buffer_object *bo, + bool evict, bool no_wait_gpu, + struct ttm_mem_reg *new_mem, + struct ttm_mem_reg *old_mem) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); + struct amdgpu_copy_mem src, dst; + struct dma_fence *fence = NULL; + int r; + + src.bo = bo; + dst.bo = bo; + src.mem = old_mem; + dst.mem = new_mem; + src.offset = 0; + dst.offset = 0; + + r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst, + new_mem->num_pages << PAGE_SHIFT, + bo->resv, &fence); + if (r) + goto error; r = ttm_bo_pipeline_move(bo, fence, evict, new_mem); dma_fence_put(fence); return r; error: - mutex_unlock(&adev->mman.gtt_window_lock); - if (fence) dma_fence_wait(fence, false); dma_fence_put(fence); @@ -484,7 +565,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, int r; /* Can't move a pinned BO */ - abo = container_of(bo, struct amdgpu_bo, tbo); + abo = ttm_to_amdgpu_bo(bo); if (WARN_ON_ONCE(abo->pin_count > 0)) return -EINVAL; @@ -582,13 +663,12 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, unsigned long page_offset) { - struct drm_mm_node *mm = bo->mem.mm_node; - uint64_t size = mm->size; - uint64_t offset = page_offset; + struct drm_mm_node *mm; + unsigned long offset = (page_offset << PAGE_SHIFT); - page_offset = do_div(offset, size); - mm += offset; - return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + page_offset; + mm = amdgpu_find_mm_node(&bo->mem, &offset); + return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + + (offset >> PAGE_SHIFT); } /* @@ -1142,9 +1222,9 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, unsigned long offset, void *buf, int len, int write) { - struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo); + struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo); struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev); - struct drm_mm_node *nodes = abo->tbo.mem.mm_node; + struct drm_mm_node *nodes; uint32_t value = 0; int ret = 0; uint64_t pos; @@ -1153,10 +1233,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo, if (bo->mem.mem_type != TTM_PL_VRAM) return -EIO; - while (offset >= (nodes->size << PAGE_SHIFT)) { - offset -= nodes->size << PAGE_SHIFT; - ++nodes; - } + nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset); pos = (nodes->start << PAGE_SHIFT) + offset; while (len && pos < adev->mc.mc_vram_size) { @@ -1255,6 +1332,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) /* Change the size here instead of the init above so only lpfn is affected */ amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size); + /* + *The reserved vram for firmware must be pinned to the specified + *place on the VRAM, so reserve it early. + */ + r = amdgpu_fw_reserve_vram_init(adev); + if (r) { + return r; + } + r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, &adev->stolen_vga_memory, @@ -1479,7 +1565,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, job->vm_needs_flush = vm_needs_flush; if (resv) { r = amdgpu_sync_resv(adev, &job->sync, resv, - AMDGPU_FENCE_OWNER_UNDEFINED); + AMDGPU_FENCE_OWNER_UNDEFINED, + false); if (r) { DRM_ERROR("sync failed (%d).\n", r); goto error_free; @@ -1571,7 +1658,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo, if (resv) { r = amdgpu_sync_resv(adev, &job->sync, resv, - AMDGPU_FENCE_OWNER_UNDEFINED); + AMDGPU_FENCE_OWNER_UNDEFINED, false); if (r) { DRM_ERROR("sync failed (%d).\n", r); goto error_free; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 7abae6867339..abd4084982a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -58,6 +58,12 @@ struct amdgpu_mman { struct amd_sched_entity entity; }; +struct amdgpu_copy_mem { + struct ttm_buffer_object *bo; + struct ttm_mem_reg *mem; + unsigned long offset; +}; + extern const struct ttm_mem_type_manager_func amdgpu_gtt_mgr_func; extern const struct ttm_mem_type_manager_func amdgpu_vram_mgr_func; @@ -72,6 +78,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, struct reservation_object *resv, struct dma_fence **fence, bool direct_submit, bool vm_needs_flush); +int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, + struct amdgpu_copy_mem *src, + struct amdgpu_copy_mem *dst, + uint64_t size, + struct reservation_object *resv, + struct dma_fence **f); int amdgpu_fill_buffer(struct amdgpu_bo *bo, uint64_t src_data, struct reservation_object *resv, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index ab05121b9272..e97f80f86005 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -22,7 +22,7 @@ */ #include "amdgpu.h" -#define MAX_KIQ_REG_WAIT 100000 +#define MAX_KIQ_REG_WAIT 100000000 /* in usecs */ int amdgpu_allocate_static_csa(struct amdgpu_device *adev) { @@ -114,27 +114,24 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev) uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) { signed long r; - uint32_t val; - struct dma_fence *f; + uint32_t val, seq; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_rreg); - mutex_lock(&kiq->ring_mutex); + spin_lock(&kiq->ring_lock); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_rreg(ring, reg); - amdgpu_fence_emit(ring, &f); + amdgpu_fence_emit_polling(ring, &seq); amdgpu_ring_commit(ring); - mutex_unlock(&kiq->ring_mutex); + spin_unlock(&kiq->ring_lock); - r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT)); - dma_fence_put(f); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); if (r < 1) { - DRM_ERROR("wait for kiq fence error: %ld.\n", r); + DRM_ERROR("wait for kiq fence error: %ld\n", r); return ~0; } - val = adev->wb.wb[adev->virt.reg_val_offs]; return val; @@ -143,23 +140,22 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) { signed long r; - struct dma_fence *f; + uint32_t seq; struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_wreg); - mutex_lock(&kiq->ring_mutex); + spin_lock(&kiq->ring_lock); amdgpu_ring_alloc(ring, 32); amdgpu_ring_emit_wreg(ring, reg, v); - amdgpu_fence_emit(ring, &f); + amdgpu_fence_emit_polling(ring, &seq); amdgpu_ring_commit(ring); - mutex_unlock(&kiq->ring_mutex); + spin_unlock(&kiq->ring_lock); - r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT)); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); if (r < 1) - DRM_ERROR("wait for kiq fence error: %ld.\n", r); - dma_fence_put(f); + DRM_ERROR("wait for kiq fence error: %ld\n", r); } /** @@ -274,3 +270,78 @@ void amdgpu_virt_free_mm_table(struct amdgpu_device *adev) (void *)&adev->virt.mm_table.cpu_addr); adev->virt.mm_table.gpu_addr = 0; } + + +int amdgpu_virt_fw_reserve_get_checksum(void *obj, + unsigned long obj_size, + unsigned int key, + unsigned int chksum) +{ + unsigned int ret = key; + unsigned long i = 0; + unsigned char *pos; + + pos = (char *)obj; + /* calculate checksum */ + for (i = 0; i < obj_size; ++i) + ret += *(pos + i); + /* minus the chksum itself */ + pos = (char *)&chksum; + for (i = 0; i < sizeof(chksum); ++i) + ret -= *(pos + i); + return ret; +} + +void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) +{ + uint32_t pf2vf_ver = 0; + uint32_t pf2vf_size = 0; + uint32_t checksum = 0; + uint32_t checkval; + char *str; + + adev->virt.fw_reserve.p_pf2vf = NULL; + adev->virt.fw_reserve.p_vf2pf = NULL; + + if (adev->fw_vram_usage.va != NULL) { + adev->virt.fw_reserve.p_pf2vf = + (struct amdgim_pf2vf_info_header *)( + adev->fw_vram_usage.va + AMDGIM_DATAEXCHANGE_OFFSET); + pf2vf_ver = adev->virt.fw_reserve.p_pf2vf->version; + AMDGPU_FW_VRAM_PF2VF_READ(adev, header.size, &pf2vf_size); + AMDGPU_FW_VRAM_PF2VF_READ(adev, checksum, &checksum); + + /* pf2vf message must be in 4K */ + if (pf2vf_size > 0 && pf2vf_size < 4096) { + checkval = amdgpu_virt_fw_reserve_get_checksum( + adev->virt.fw_reserve.p_pf2vf, pf2vf_size, + adev->virt.fw_reserve.checksum_key, checksum); + if (checkval == checksum) { + adev->virt.fw_reserve.p_vf2pf = + ((void *)adev->virt.fw_reserve.p_pf2vf + + pf2vf_size); + memset((void *)adev->virt.fw_reserve.p_vf2pf, 0, + sizeof(amdgim_vf2pf_info)); + AMDGPU_FW_VRAM_VF2PF_WRITE(adev, header.version, + AMDGPU_FW_VRAM_VF2PF_VER); + AMDGPU_FW_VRAM_VF2PF_WRITE(adev, header.size, + sizeof(amdgim_vf2pf_info)); + AMDGPU_FW_VRAM_VF2PF_READ(adev, driver_version, + &str); + if (THIS_MODULE->version != NULL) + strcpy(str, THIS_MODULE->version); + else + strcpy(str, "N/A"); + AMDGPU_FW_VRAM_VF2PF_WRITE(adev, driver_cert, + 0); + AMDGPU_FW_VRAM_VF2PF_WRITE(adev, checksum, + amdgpu_virt_fw_reserve_get_checksum( + adev->virt.fw_reserve.p_vf2pf, + pf2vf_size, + adev->virt.fw_reserve.checksum_key, 0)); + } + } + } +} + + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index e5fd0ff6b29d..b89d37fc406f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -58,6 +58,179 @@ struct amdgpu_virt_ops { void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3); }; +/* + * Firmware Reserve Frame buffer + */ +struct amdgpu_virt_fw_reserve { + struct amdgim_pf2vf_info_header *p_pf2vf; + struct amdgim_vf2pf_info_header *p_vf2pf; + unsigned int checksum_key; +}; +/* + * Defination between PF and VF + * Structures forcibly aligned to 4 to keep the same style as PF. + */ +#define AMDGIM_DATAEXCHANGE_OFFSET (64 * 1024) + +#define AMDGIM_GET_STRUCTURE_RESERVED_SIZE(total, u8, u16, u32, u64) \ + (total - (((u8)+3) / 4 + ((u16)+1) / 2 + (u32) + (u64)*2)) + +enum AMDGIM_FEATURE_FLAG { + /* GIM supports feature of Error log collecting */ + AMDGIM_FEATURE_ERROR_LOG_COLLECT = 0x1, + /* GIM supports feature of loading uCodes */ + AMDGIM_FEATURE_GIM_LOAD_UCODES = 0x2, +}; + +struct amdgim_pf2vf_info_header { + /* the total structure size in byte. */ + uint32_t size; + /* version of this structure, written by the GIM */ + uint32_t version; +} __aligned(4); +struct amdgim_pf2vf_info_v1 { + /* header contains size and version */ + struct amdgim_pf2vf_info_header header; + /* max_width * max_height */ + unsigned int uvd_enc_max_pixels_count; + /* 16x16 pixels/sec, codec independent */ + unsigned int uvd_enc_max_bandwidth; + /* max_width * max_height */ + unsigned int vce_enc_max_pixels_count; + /* 16x16 pixels/sec, codec independent */ + unsigned int vce_enc_max_bandwidth; + /* MEC FW position in kb from the start of visible frame buffer */ + unsigned int mecfw_kboffset; + /* The features flags of the GIM driver supports. */ + unsigned int feature_flags; + /* use private key from mailbox 2 to create chueksum */ + unsigned int checksum; +} __aligned(4); + +struct amdgim_pf2vf_info_v2 { + /* header contains size and version */ + struct amdgim_pf2vf_info_header header; + /* use private key from mailbox 2 to create chueksum */ + uint32_t checksum; + /* The features flags of the GIM driver supports. */ + uint32_t feature_flags; + /* max_width * max_height */ + uint32_t uvd_enc_max_pixels_count; + /* 16x16 pixels/sec, codec independent */ + uint32_t uvd_enc_max_bandwidth; + /* max_width * max_height */ + uint32_t vce_enc_max_pixels_count; + /* 16x16 pixels/sec, codec independent */ + uint32_t vce_enc_max_bandwidth; + /* MEC FW position in kb from the start of VF visible frame buffer */ + uint64_t mecfw_kboffset; + /* MEC FW size in KB */ + uint32_t mecfw_ksize; + /* UVD FW position in kb from the start of VF visible frame buffer */ + uint64_t uvdfw_kboffset; + /* UVD FW size in KB */ + uint32_t uvdfw_ksize; + /* VCE FW position in kb from the start of VF visible frame buffer */ + uint64_t vcefw_kboffset; + /* VCE FW size in KB */ + uint32_t vcefw_ksize; + uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 0, 0, (9 + sizeof(struct amdgim_pf2vf_info_header)/sizeof(uint32_t)), 3)]; +} __aligned(4); + + +struct amdgim_vf2pf_info_header { + /* the total structure size in byte. */ + uint32_t size; + /*version of this structure, written by the guest */ + uint32_t version; +} __aligned(4); + +struct amdgim_vf2pf_info_v1 { + /* header contains size and version */ + struct amdgim_vf2pf_info_header header; + /* driver version */ + char driver_version[64]; + /* driver certification, 1=WHQL, 0=None */ + unsigned int driver_cert; + /* guest OS type and version: need a define */ + unsigned int os_info; + /* in the unit of 1M */ + unsigned int fb_usage; + /* guest gfx engine usage percentage */ + unsigned int gfx_usage; + /* guest gfx engine health percentage */ + unsigned int gfx_health; + /* guest compute engine usage percentage */ + unsigned int compute_usage; + /* guest compute engine health percentage */ + unsigned int compute_health; + /* guest vce engine usage percentage. 0xffff means N/A. */ + unsigned int vce_enc_usage; + /* guest vce engine health percentage. 0xffff means N/A. */ + unsigned int vce_enc_health; + /* guest uvd engine usage percentage. 0xffff means N/A. */ + unsigned int uvd_enc_usage; + /* guest uvd engine usage percentage. 0xffff means N/A. */ + unsigned int uvd_enc_health; + unsigned int checksum; +} __aligned(4); + +struct amdgim_vf2pf_info_v2 { + /* header contains size and version */ + struct amdgim_vf2pf_info_header header; + uint32_t checksum; + /* driver version */ + uint8_t driver_version[64]; + /* driver certification, 1=WHQL, 0=None */ + uint32_t driver_cert; + /* guest OS type and version: need a define */ + uint32_t os_info; + /* in the unit of 1M */ + uint32_t fb_usage; + /* guest gfx engine usage percentage */ + uint32_t gfx_usage; + /* guest gfx engine health percentage */ + uint32_t gfx_health; + /* guest compute engine usage percentage */ + uint32_t compute_usage; + /* guest compute engine health percentage */ + uint32_t compute_health; + /* guest vce engine usage percentage. 0xffff means N/A. */ + uint32_t vce_enc_usage; + /* guest vce engine health percentage. 0xffff means N/A. */ + uint32_t vce_enc_health; + /* guest uvd engine usage percentage. 0xffff means N/A. */ + uint32_t uvd_enc_usage; + /* guest uvd engine usage percentage. 0xffff means N/A. */ + uint32_t uvd_enc_health; + uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 64, 0, (12 + sizeof(struct amdgim_vf2pf_info_header)/sizeof(uint32_t)), 0)]; +} __aligned(4); + +#define AMDGPU_FW_VRAM_VF2PF_VER 2 +typedef struct amdgim_vf2pf_info_v2 amdgim_vf2pf_info ; + +#define AMDGPU_FW_VRAM_VF2PF_WRITE(adev, field, val) \ + do { \ + ((amdgim_vf2pf_info *)adev->virt.fw_reserve.p_vf2pf)->field = (val); \ + } while (0) + +#define AMDGPU_FW_VRAM_VF2PF_READ(adev, field, val) \ + do { \ + (*val) = ((amdgim_vf2pf_info *)adev->virt.fw_reserve.p_vf2pf)->field; \ + } while (0) + +#define AMDGPU_FW_VRAM_PF2VF_READ(adev, field, val) \ + do { \ + if (!adev->virt.fw_reserve.p_pf2vf) \ + *(val) = 0; \ + else { \ + if (adev->virt.fw_reserve.p_pf2vf->version == 1) \ + *(val) = ((struct amdgim_pf2vf_info_v1 *)adev->virt.fw_reserve.p_pf2vf)->field; \ + if (adev->virt.fw_reserve.p_pf2vf->version == 2) \ + *(val) = ((struct amdgim_pf2vf_info_v2 *)adev->virt.fw_reserve.p_pf2vf)->field; \ + } \ + } while (0) + /* GPU virtualization */ struct amdgpu_virt { uint32_t caps; @@ -72,6 +245,7 @@ struct amdgpu_virt { struct amdgpu_mm_table mm_table; const struct amdgpu_virt_ops *ops; struct amdgpu_vf_error_buffer vf_errors; + struct amdgpu_virt_fw_reserve fw_reserve; }; #define AMDGPU_CSA_SIZE (8 * 1024) @@ -114,5 +288,9 @@ int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); int amdgpu_sriov_gpu_reset(struct amdgpu_device *adev, struct amdgpu_job *job); int amdgpu_virt_alloc_mm_table(struct amdgpu_device *adev); void amdgpu_virt_free_mm_table(struct amdgpu_device *adev); +int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size, + unsigned int key, + unsigned int chksum); +void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index fee0a32ac56f..010d14195a5e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -328,9 +328,10 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, AMDGPU_GEM_CREATE_SHADOW); if (vm->pte_support_ats) { - init_value = AMDGPU_PTE_SYSTEM; + init_value = AMDGPU_PTE_DEFAULT_ATC; if (level != adev->vm_manager.num_level - 1) init_value |= AMDGPU_PDE_PTE; + } /* walk over the address space and allocate the page tables */ @@ -1034,7 +1035,7 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, int r; amdgpu_sync_create(&sync); - amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner); + amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false); r = amdgpu_sync_wait(&sync, true); amdgpu_sync_free(&sync); @@ -1175,11 +1176,11 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev, amdgpu_ring_pad_ib(ring, params.ib); amdgpu_sync_resv(adev, &job->sync, parent->base.bo->tbo.resv, - AMDGPU_FENCE_OWNER_VM); + AMDGPU_FENCE_OWNER_VM, false); if (shadow) amdgpu_sync_resv(adev, &job->sync, shadow->tbo.resv, - AMDGPU_FENCE_OWNER_VM); + AMDGPU_FENCE_OWNER_VM, false); WARN_ON(params.ib->length_dw > ndw); r = amdgpu_job_submit(job, ring, &vm->entity, @@ -1643,7 +1644,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, goto error_free; r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv, - owner); + owner, false); if (r) goto error_free; @@ -1698,6 +1699,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, struct drm_mm_node *nodes, struct dma_fence **fence) { + unsigned min_linear_pages = 1 << adev->vm_manager.fragment_size; uint64_t pfn, start = mapping->start; int r; @@ -1732,6 +1734,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, } do { + dma_addr_t *dma_addr = NULL; uint64_t max_entries; uint64_t addr, last; @@ -1745,15 +1748,32 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, } if (pages_addr) { + uint64_t count; + max_entries = min(max_entries, 16ull * 1024ull); - addr = 0; + for (count = 1; count < max_entries; ++count) { + uint64_t idx = pfn + count; + + if (pages_addr[idx] != + (pages_addr[idx - 1] + PAGE_SIZE)) + break; + } + + if (count < min_linear_pages) { + addr = pfn << PAGE_SHIFT; + dma_addr = pages_addr; + } else { + addr = pages_addr[pfn]; + max_entries = count; + } + } else if (flags & AMDGPU_PTE_VALID) { addr += adev->vm_manager.vram_base_offset; + addr += pfn << PAGE_SHIFT; } - addr += pfn << PAGE_SHIFT; last = min((uint64_t)mapping->last, start + max_entries - 1); - r = amdgpu_vm_bo_update_mapping(adev, exclusive, pages_addr, vm, + r = amdgpu_vm_bo_update_mapping(adev, exclusive, dma_addr, vm, start, last, flags, addr, fence); if (r) @@ -2017,7 +2037,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, list_del(&mapping->list); if (vm->pte_support_ats) - init_pte_value = AMDGPU_PTE_SYSTEM; + init_pte_value = AMDGPU_PTE_DEFAULT_ATC; r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, mapping->start, mapping->last, @@ -2629,7 +2649,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (adev->asic_type == CHIP_RAVEN) { vm->pte_support_ats = true; - init_pde_value = AMDGPU_PTE_SYSTEM | AMDGPU_PDE_PTE; + init_pde_value = AMDGPU_PTE_DEFAULT_ATC + | AMDGPU_PDE_PTE; + } } else vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & @@ -2737,8 +2759,9 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdgpu_bo_va_mapping *mapping, *tmp; bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt; + struct amdgpu_bo *root; u64 fault; - int i; + int i, r; /* Clear pending page faults from IH when the VM is destroyed */ while (kfifo_get(&vm->faults, &fault)) @@ -2773,7 +2796,15 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) amdgpu_vm_free_mapping(adev, vm, mapping, NULL); } - amdgpu_vm_free_levels(&vm->root); + root = amdgpu_bo_ref(vm->root.base.bo); + r = amdgpu_bo_reserve(root, true); + if (r) { + dev_err(adev->dev, "Leaking page tables because BO reservation failed\n"); + } else { + amdgpu_vm_free_levels(&vm->root); + amdgpu_bo_unreserve(root); + } + amdgpu_bo_unref(&root); dma_fence_put(vm->last_update); for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) amdgpu_vm_free_reserved_vmid(adev, vm, i); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index d68f39b4e5e7..aa914256b4bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -73,6 +73,16 @@ struct amdgpu_bo_list_entry; #define AMDGPU_PTE_MTYPE(a) ((uint64_t)a << 57) #define AMDGPU_PTE_MTYPE_MASK AMDGPU_PTE_MTYPE(3ULL) +/* For Raven */ +#define AMDGPU_MTYPE_CC 2 + +#define AMDGPU_PTE_DEFAULT_ATC (AMDGPU_PTE_SYSTEM \ + | AMDGPU_PTE_SNOOPED \ + | AMDGPU_PTE_EXECUTABLE \ + | AMDGPU_PTE_READABLE \ + | AMDGPU_PTE_WRITEABLE \ + | AMDGPU_PTE_MTYPE(AMDGPU_MTYPE_CC)) + /* How to programm VM fault handling */ #define AMDGPU_VM_FAULT_STOP_NEVER 0 #define AMDGPU_VM_FAULT_STOP_FIRST 1 diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 147e92b3a959..b8002ac3e536 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -20,6 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ +#include <linux/kernel.h> #include <linux/firmware.h> #include <drm/drmP.h> #include "amdgpu.h" @@ -3952,10 +3953,10 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) adev->gfx.rlc.reg_list_format_size_bytes >> 2, unique_indices, &indices_count, - sizeof(unique_indices) / sizeof(int), + ARRAY_SIZE(unique_indices), indirect_start_offsets, &offset_count, - sizeof(indirect_start_offsets)/sizeof(int)); + ARRAY_SIZE(indirect_start_offsets)); /* save and restore list */ WREG32_FIELD(RLC_SRM_CNTL, AUTO_INCR_ADDR, 1); @@ -3977,14 +3978,14 @@ static int gfx_v8_0_init_save_restore_list(struct amdgpu_device *adev) /* starting offsets starts */ WREG32(mmRLC_GPM_SCRATCH_ADDR, adev->gfx.rlc.starting_offsets_start); - for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++) + for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) WREG32(mmRLC_GPM_SCRATCH_DATA, indirect_start_offsets[i]); /* unique indices */ temp = mmRLC_SRM_INDEX_CNTL_ADDR_0; data = mmRLC_SRM_INDEX_CNTL_DATA_0; - for (i = 0; i < sizeof(unique_indices) / sizeof(int); i++) { + for (i = 0; i < ARRAY_SIZE(unique_indices); i++) { if (unique_indices[i] != 0) { WREG32(temp + i, unique_indices[i] & 0x3FFFF); WREG32(data + i, unique_indices[i] >> 20); @@ -6394,6 +6395,104 @@ static void gfx_v8_0_ring_set_wptr_compute(struct amdgpu_ring *ring) WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr)); } +static void gfx_v8_0_ring_set_pipe_percent(struct amdgpu_ring *ring, + bool acquire) +{ + struct amdgpu_device *adev = ring->adev; + int pipe_num, tmp, reg; + int pipe_percent = acquire ? SPI_WCL_PIPE_PERCENT_GFX__VALUE_MASK : 0x1; + + pipe_num = ring->me * adev->gfx.mec.num_pipe_per_mec + ring->pipe; + + /* first me only has 2 entries, GFX and HP3D */ + if (ring->me > 0) + pipe_num -= 2; + + reg = mmSPI_WCL_PIPE_PERCENT_GFX + pipe_num; + tmp = RREG32(reg); + tmp = REG_SET_FIELD(tmp, SPI_WCL_PIPE_PERCENT_GFX, VALUE, pipe_percent); + WREG32(reg, tmp); +} + +static void gfx_v8_0_pipe_reserve_resources(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + bool acquire) +{ + int i, pipe; + bool reserve; + struct amdgpu_ring *iring; + + mutex_lock(&adev->gfx.pipe_reserve_mutex); + pipe = amdgpu_gfx_queue_to_bit(adev, ring->me, ring->pipe, 0); + if (acquire) + set_bit(pipe, adev->gfx.pipe_reserve_bitmap); + else + clear_bit(pipe, adev->gfx.pipe_reserve_bitmap); + + if (!bitmap_weight(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)) { + /* Clear all reservations - everyone reacquires all resources */ + for (i = 0; i < adev->gfx.num_gfx_rings; ++i) + gfx_v8_0_ring_set_pipe_percent(&adev->gfx.gfx_ring[i], + true); + + for (i = 0; i < adev->gfx.num_compute_rings; ++i) + gfx_v8_0_ring_set_pipe_percent(&adev->gfx.compute_ring[i], + true); + } else { + /* Lower all pipes without a current reservation */ + for (i = 0; i < adev->gfx.num_gfx_rings; ++i) { + iring = &adev->gfx.gfx_ring[i]; + pipe = amdgpu_gfx_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); + reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); + gfx_v8_0_ring_set_pipe_percent(iring, reserve); + } + + for (i = 0; i < adev->gfx.num_compute_rings; ++i) { + iring = &adev->gfx.compute_ring[i]; + pipe = amdgpu_gfx_queue_to_bit(adev, + iring->me, + iring->pipe, + 0); + reserve = test_bit(pipe, adev->gfx.pipe_reserve_bitmap); + gfx_v8_0_ring_set_pipe_percent(iring, reserve); + } + } + + mutex_unlock(&adev->gfx.pipe_reserve_mutex); +} + +static void gfx_v8_0_hqd_set_priority(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + bool acquire) +{ + uint32_t pipe_priority = acquire ? 0x2 : 0x0; + uint32_t queue_priority = acquire ? 0xf : 0x0; + + mutex_lock(&adev->srbm_mutex); + vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + + WREG32(mmCP_HQD_PIPE_PRIORITY, pipe_priority); + WREG32(mmCP_HQD_QUEUE_PRIORITY, queue_priority); + + vi_srbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); +} +static void gfx_v8_0_ring_set_priority_compute(struct amdgpu_ring *ring, + enum amd_sched_priority priority) +{ + struct amdgpu_device *adev = ring->adev; + bool acquire = priority == AMD_SCHED_PRIORITY_HIGH_HW; + + if (ring->funcs->type != AMDGPU_RING_TYPE_COMPUTE) + return; + + gfx_v8_0_hqd_set_priority(adev, ring, acquire); + gfx_v8_0_pipe_reserve_resources(adev, ring, acquire); +} + static void gfx_v8_0_ring_emit_fence_compute(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags) @@ -6839,6 +6938,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { .test_ib = gfx_v8_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, + .set_priority = gfx_v8_0_ring_set_priority_compute, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 99a5b3b92e8e..7f15bb2c5233 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -20,6 +20,7 @@ * OTHER DEALINGS IN THE SOFTWARE. * */ +#include <linux/kernel.h> #include <linux/firmware.h> #include <drm/drmP.h> #include "amdgpu.h" @@ -1730,10 +1731,10 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) adev->gfx.rlc.reg_list_format_size_bytes >> 2, unique_indirect_regs, &unique_indirect_reg_count, - sizeof(unique_indirect_regs)/sizeof(int), + ARRAY_SIZE(unique_indirect_regs), indirect_start_offsets, &indirect_start_offsets_count, - sizeof(indirect_start_offsets)/sizeof(int)); + ARRAY_SIZE(indirect_start_offsets)); /* enable auto inc in case it is disabled */ tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_CNTL)); @@ -1770,12 +1771,12 @@ static int gfx_v9_0_init_rlc_save_restore_list(struct amdgpu_device *adev) /* write the starting offsets to RLC scratch ram */ WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_ADDR), adev->gfx.rlc.starting_offsets_start); - for (i = 0; i < sizeof(indirect_start_offsets)/sizeof(int); i++) + for (i = 0; i < ARRAY_SIZE(indirect_start_offsets); i++) WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_SCRATCH_DATA), indirect_start_offsets[i]); /* load unique indirect regs*/ - for (i = 0; i < sizeof(unique_indirect_regs)/sizeof(int); i++) { + for (i = 0; i < ARRAY_SIZE(unique_indirect_regs); i++) { WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_ADDR_0) + i, unique_indirect_regs[i] & 0x3FFFF); WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_SRM_INDEX_CNTL_DATA_0) + i, diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 2812d88a8bdd..b4906d2f30d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -183,6 +183,12 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev, pr_err("Doesn't get READY_TO_ACCESS_GPU from pf, give up\n"); return r; } + /* Retrieve checksum from mailbox2 */ + if (req == IDH_REQ_GPU_INIT_ACCESS) { + adev->virt.fw_reserve.checksum_key = + RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, + mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW2)); + } } return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 1c006ba9d826..3ca9d114f630 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -279,10 +279,7 @@ static void soc15_init_golden_registers(struct amdgpu_device *adev) } static u32 soc15_get_xclk(struct amdgpu_device *adev) { - if (adev->asic_type == CHIP_VEGA10) - return adev->clock.spll.reference_freq/4; - else - return adev->clock.spll.reference_freq; + return adev->clock.spll.reference_freq; } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 60af7310a234..71299c67c517 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -268,8 +268,9 @@ err: * * Close up a stream for HW test or if userspace failed to do so */ -int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - bool direct, struct dma_fence **fence) +static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, + uint32_t handle, + bool direct, struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; |