diff options
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_render_state.c')
| -rw-r--r-- | drivers/gpu/drm/i915/i915_gem_render_state.c | 232 |
1 files changed, 127 insertions, 105 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c index f75bbd67a13a..5af19b0bf713 100644 --- a/drivers/gpu/drm/i915/i915_gem_render_state.c +++ b/drivers/gpu/drm/i915/i915_gem_render_state.c @@ -28,10 +28,19 @@ #include "i915_drv.h" #include "intel_renderstate.h" +struct intel_render_state { + const struct intel_renderstate_rodata *rodata; + struct i915_vma *vma; + u32 batch_offset; + u32 batch_size; + u32 aux_offset; + u32 aux_size; +}; + static const struct intel_renderstate_rodata * -render_state_get_rodata(const int gen) +render_state_get_rodata(const struct intel_engine_cs *engine) { - switch (gen) { + switch (INTEL_GEN(engine->i915)) { case 6: return &gen6_null_state; case 7: @@ -45,35 +54,6 @@ render_state_get_rodata(const int gen) return NULL; } -static int render_state_init(struct render_state *so, - struct drm_i915_private *dev_priv) -{ - int ret; - - so->gen = INTEL_GEN(dev_priv); - so->rodata = render_state_get_rodata(so->gen); - if (so->rodata == NULL) - return 0; - - if (so->rodata->batch_items * 4 > 4096) - return -EINVAL; - - so->obj = i915_gem_object_create(&dev_priv->drm, 4096); - if (IS_ERR(so->obj)) - return PTR_ERR(so->obj); - - ret = i915_gem_obj_ggtt_pin(so->obj, 4096, 0); - if (ret) - goto free_gem; - - so->ggtt_offset = i915_gem_obj_ggtt_offset(so->obj); - return 0; - -free_gem: - drm_gem_object_unreference(&so->obj->base); - return ret; -} - /* * Macro to add commands to auxiliary batch. * This macro only checks for page overflow before inserting the commands, @@ -85,41 +65,37 @@ free_gem: */ #define OUT_BATCH(batch, i, val) \ do { \ - if (WARN_ON((i) >= PAGE_SIZE / sizeof(u32))) { \ - ret = -ENOSPC; \ - goto err_out; \ - } \ + if ((i) >= PAGE_SIZE / sizeof(u32)) \ + goto err; \ (batch)[(i)++] = (val); \ } while(0) -static int render_state_setup(struct render_state *so) +static int render_state_setup(struct intel_render_state *so, + struct drm_i915_private *i915) { - struct drm_device *dev = so->obj->base.dev; const struct intel_renderstate_rodata *rodata = so->rodata; + struct drm_i915_gem_object *obj = so->vma->obj; unsigned int i = 0, reloc_index = 0; - struct page *page; + unsigned int needs_clflush; u32 *d; int ret; - ret = i915_gem_object_set_to_cpu_domain(so->obj, true); + ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush); if (ret) return ret; - page = i915_gem_object_get_dirty_page(so->obj, 0); - d = kmap(page); + d = kmap_atomic(i915_gem_object_get_dirty_page(obj, 0)); while (i < rodata->batch_items) { u32 s = rodata->batch[i]; if (i * 4 == rodata->reloc[reloc_index]) { - u64 r = s + so->ggtt_offset; + u64 r = s + so->vma->node.start; s = lower_32_bits(r); - if (so->gen >= 8) { + if (HAS_64BIT_RELOC(i915)) { if (i + 1 >= rodata->batch_items || - rodata->batch[i + 1] != 0) { - ret = -EINVAL; - goto err_out; - } + rodata->batch[i + 1] != 0) + goto err; d[i++] = s; s = upper_32_bits(r); @@ -131,12 +107,20 @@ static int render_state_setup(struct render_state *so) d[i++] = s; } + if (rodata->reloc[reloc_index] != -1) { + DRM_ERROR("only %d relocs resolved\n", reloc_index); + goto err; + } + + so->batch_offset = so->vma->node.start; + so->batch_size = rodata->batch_items * sizeof(u32); + while (i % CACHELINE_DWORDS) OUT_BATCH(d, i, MI_NOOP); - so->aux_batch_offset = i * sizeof(u32); + so->aux_offset = i * sizeof(u32); - if (HAS_POOLED_EU(dev)) { + if (HAS_POOLED_EU(i915)) { /* * We always program 3x6 pool config but depending upon which * subslice is disabled HW drops down to appropriate config @@ -164,95 +148,133 @@ static int render_state_setup(struct render_state *so) } OUT_BATCH(d, i, MI_BATCH_BUFFER_END); - so->aux_batch_size = (i * sizeof(u32)) - so->aux_batch_offset; - + so->aux_size = i * sizeof(u32) - so->aux_offset; + so->aux_offset += so->batch_offset; /* * Since we are sending length, we need to strictly conform to * all requirements. For Gen2 this must be a multiple of 8. */ - so->aux_batch_size = ALIGN(so->aux_batch_size, 8); - - kunmap(page); - - ret = i915_gem_object_set_to_gtt_domain(so->obj, false); - if (ret) - return ret; - - if (rodata->reloc[reloc_index] != -1) { - DRM_ERROR("only %d relocs resolved\n", reloc_index); - return -EINVAL; - } + so->aux_size = ALIGN(so->aux_size, 8); - return 0; + if (needs_clflush) + drm_clflush_virt_range(d, i * sizeof(u32)); + kunmap_atomic(d); -err_out: - kunmap(page); + ret = i915_gem_object_set_to_gtt_domain(obj, false); +out: + i915_gem_obj_finish_shmem_access(obj); return ret; + +err: + kunmap_atomic(d); + ret = -EINVAL; + goto out; } #undef OUT_BATCH -void i915_gem_render_state_fini(struct render_state *so) -{ - i915_gem_object_ggtt_unpin(so->obj); - drm_gem_object_unreference(&so->obj->base); -} - -int i915_gem_render_state_prepare(struct intel_engine_cs *engine, - struct render_state *so) +int i915_gem_render_state_init(struct intel_engine_cs *engine) { + struct intel_render_state *so; + const struct intel_renderstate_rodata *rodata; + struct drm_i915_gem_object *obj; int ret; - if (WARN_ON(engine->id != RCS)) - return -ENOENT; - - ret = render_state_init(so, engine->i915); - if (ret) - return ret; + if (engine->id != RCS) + return 0; - if (so->rodata == NULL) + rodata = render_state_get_rodata(engine); + if (!rodata) return 0; - ret = render_state_setup(so); - if (ret) { - i915_gem_render_state_fini(so); - return ret; + if (rodata->batch_items * 4 > 4096) + return -EINVAL; + + so = kmalloc(sizeof(*so), GFP_KERNEL); + if (!so) + return -ENOMEM; + + obj = i915_gem_object_create_internal(engine->i915, 4096); + if (IS_ERR(obj)) { + ret = PTR_ERR(obj); + goto err_free; } + so->vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL); + if (IS_ERR(so->vma)) { + ret = PTR_ERR(so->vma); + goto err_obj; + } + + so->rodata = rodata; + engine->render_state = so; return 0; + +err_obj: + i915_gem_object_put(obj); +err_free: + kfree(so); + return ret; } -int i915_gem_render_state_init(struct drm_i915_gem_request *req) +int i915_gem_render_state_emit(struct drm_i915_gem_request *req) { - struct render_state so; + struct intel_render_state *so; int ret; - ret = i915_gem_render_state_prepare(req->engine, &so); - if (ret) - return ret; + lockdep_assert_held(&req->i915->drm.struct_mutex); - if (so.rodata == NULL) + so = req->engine->render_state; + if (!so) return 0; - ret = req->engine->dispatch_execbuffer(req, so.ggtt_offset, - so.rodata->batch_items * 4, - I915_DISPATCH_SECURE); + /* Recreate the page after shrinking */ + if (!so->vma->obj->mm.pages) + so->batch_offset = -1; + + ret = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH); if (ret) - goto out; + return ret; - if (so.aux_batch_size > 8) { - ret = req->engine->dispatch_execbuffer(req, - (so.ggtt_offset + - so.aux_batch_offset), - so.aux_batch_size, - I915_DISPATCH_SECURE); + if (so->vma->node.start != so->batch_offset) { + ret = render_state_setup(so, req->i915); if (ret) - goto out; + goto err_unpin; } - i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req); + ret = req->engine->emit_bb_start(req, + so->batch_offset, so->batch_size, + I915_DISPATCH_SECURE); + if (ret) + goto err_unpin; -out: - i915_gem_render_state_fini(&so); + if (so->aux_size > 8) { + ret = req->engine->emit_bb_start(req, + so->aux_offset, so->aux_size, + I915_DISPATCH_SECURE); + if (ret) + goto err_unpin; + } + + i915_vma_move_to_active(so->vma, req, 0); +err_unpin: + i915_vma_unpin(so->vma); return ret; } + +void i915_gem_render_state_fini(struct intel_engine_cs *engine) +{ + struct intel_render_state *so; + struct drm_i915_gem_object *obj; + + so = fetch_and_zero(&engine->render_state); + if (!so) + return; + + obj = so->vma->obj; + + i915_vma_close(so->vma); + __i915_gem_object_release_unless_active(obj); + + kfree(so); +} |
