summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/i915_gem_render_state.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/i915/i915_gem_render_state.c')
-rw-r--r--drivers/gpu/drm/i915/i915_gem_render_state.c232
1 files changed, 127 insertions, 105 deletions
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index f75bbd67a13a..5af19b0bf713 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -28,10 +28,19 @@
#include "i915_drv.h"
#include "intel_renderstate.h"
+struct intel_render_state {
+ const struct intel_renderstate_rodata *rodata;
+ struct i915_vma *vma;
+ u32 batch_offset;
+ u32 batch_size;
+ u32 aux_offset;
+ u32 aux_size;
+};
+
static const struct intel_renderstate_rodata *
-render_state_get_rodata(const int gen)
+render_state_get_rodata(const struct intel_engine_cs *engine)
{
- switch (gen) {
+ switch (INTEL_GEN(engine->i915)) {
case 6:
return &gen6_null_state;
case 7:
@@ -45,35 +54,6 @@ render_state_get_rodata(const int gen)
return NULL;
}
-static int render_state_init(struct render_state *so,
- struct drm_i915_private *dev_priv)
-{
- int ret;
-
- so->gen = INTEL_GEN(dev_priv);
- so->rodata = render_state_get_rodata(so->gen);
- if (so->rodata == NULL)
- return 0;
-
- if (so->rodata->batch_items * 4 > 4096)
- return -EINVAL;
-
- so->obj = i915_gem_object_create(&dev_priv->drm, 4096);
- if (IS_ERR(so->obj))
- return PTR_ERR(so->obj);
-
- ret = i915_gem_obj_ggtt_pin(so->obj, 4096, 0);
- if (ret)
- goto free_gem;
-
- so->ggtt_offset = i915_gem_obj_ggtt_offset(so->obj);
- return 0;
-
-free_gem:
- drm_gem_object_unreference(&so->obj->base);
- return ret;
-}
-
/*
* Macro to add commands to auxiliary batch.
* This macro only checks for page overflow before inserting the commands,
@@ -85,41 +65,37 @@ free_gem:
*/
#define OUT_BATCH(batch, i, val) \
do { \
- if (WARN_ON((i) >= PAGE_SIZE / sizeof(u32))) { \
- ret = -ENOSPC; \
- goto err_out; \
- } \
+ if ((i) >= PAGE_SIZE / sizeof(u32)) \
+ goto err; \
(batch)[(i)++] = (val); \
} while(0)
-static int render_state_setup(struct render_state *so)
+static int render_state_setup(struct intel_render_state *so,
+ struct drm_i915_private *i915)
{
- struct drm_device *dev = so->obj->base.dev;
const struct intel_renderstate_rodata *rodata = so->rodata;
+ struct drm_i915_gem_object *obj = so->vma->obj;
unsigned int i = 0, reloc_index = 0;
- struct page *page;
+ unsigned int needs_clflush;
u32 *d;
int ret;
- ret = i915_gem_object_set_to_cpu_domain(so->obj, true);
+ ret = i915_gem_obj_prepare_shmem_write(obj, &needs_clflush);
if (ret)
return ret;
- page = i915_gem_object_get_dirty_page(so->obj, 0);
- d = kmap(page);
+ d = kmap_atomic(i915_gem_object_get_dirty_page(obj, 0));
while (i < rodata->batch_items) {
u32 s = rodata->batch[i];
if (i * 4 == rodata->reloc[reloc_index]) {
- u64 r = s + so->ggtt_offset;
+ u64 r = s + so->vma->node.start;
s = lower_32_bits(r);
- if (so->gen >= 8) {
+ if (HAS_64BIT_RELOC(i915)) {
if (i + 1 >= rodata->batch_items ||
- rodata->batch[i + 1] != 0) {
- ret = -EINVAL;
- goto err_out;
- }
+ rodata->batch[i + 1] != 0)
+ goto err;
d[i++] = s;
s = upper_32_bits(r);
@@ -131,12 +107,20 @@ static int render_state_setup(struct render_state *so)
d[i++] = s;
}
+ if (rodata->reloc[reloc_index] != -1) {
+ DRM_ERROR("only %d relocs resolved\n", reloc_index);
+ goto err;
+ }
+
+ so->batch_offset = so->vma->node.start;
+ so->batch_size = rodata->batch_items * sizeof(u32);
+
while (i % CACHELINE_DWORDS)
OUT_BATCH(d, i, MI_NOOP);
- so->aux_batch_offset = i * sizeof(u32);
+ so->aux_offset = i * sizeof(u32);
- if (HAS_POOLED_EU(dev)) {
+ if (HAS_POOLED_EU(i915)) {
/*
* We always program 3x6 pool config but depending upon which
* subslice is disabled HW drops down to appropriate config
@@ -164,95 +148,133 @@ static int render_state_setup(struct render_state *so)
}
OUT_BATCH(d, i, MI_BATCH_BUFFER_END);
- so->aux_batch_size = (i * sizeof(u32)) - so->aux_batch_offset;
-
+ so->aux_size = i * sizeof(u32) - so->aux_offset;
+ so->aux_offset += so->batch_offset;
/*
* Since we are sending length, we need to strictly conform to
* all requirements. For Gen2 this must be a multiple of 8.
*/
- so->aux_batch_size = ALIGN(so->aux_batch_size, 8);
-
- kunmap(page);
-
- ret = i915_gem_object_set_to_gtt_domain(so->obj, false);
- if (ret)
- return ret;
-
- if (rodata->reloc[reloc_index] != -1) {
- DRM_ERROR("only %d relocs resolved\n", reloc_index);
- return -EINVAL;
- }
+ so->aux_size = ALIGN(so->aux_size, 8);
- return 0;
+ if (needs_clflush)
+ drm_clflush_virt_range(d, i * sizeof(u32));
+ kunmap_atomic(d);
-err_out:
- kunmap(page);
+ ret = i915_gem_object_set_to_gtt_domain(obj, false);
+out:
+ i915_gem_obj_finish_shmem_access(obj);
return ret;
+
+err:
+ kunmap_atomic(d);
+ ret = -EINVAL;
+ goto out;
}
#undef OUT_BATCH
-void i915_gem_render_state_fini(struct render_state *so)
-{
- i915_gem_object_ggtt_unpin(so->obj);
- drm_gem_object_unreference(&so->obj->base);
-}
-
-int i915_gem_render_state_prepare(struct intel_engine_cs *engine,
- struct render_state *so)
+int i915_gem_render_state_init(struct intel_engine_cs *engine)
{
+ struct intel_render_state *so;
+ const struct intel_renderstate_rodata *rodata;
+ struct drm_i915_gem_object *obj;
int ret;
- if (WARN_ON(engine->id != RCS))
- return -ENOENT;
-
- ret = render_state_init(so, engine->i915);
- if (ret)
- return ret;
+ if (engine->id != RCS)
+ return 0;
- if (so->rodata == NULL)
+ rodata = render_state_get_rodata(engine);
+ if (!rodata)
return 0;
- ret = render_state_setup(so);
- if (ret) {
- i915_gem_render_state_fini(so);
- return ret;
+ if (rodata->batch_items * 4 > 4096)
+ return -EINVAL;
+
+ so = kmalloc(sizeof(*so), GFP_KERNEL);
+ if (!so)
+ return -ENOMEM;
+
+ obj = i915_gem_object_create_internal(engine->i915, 4096);
+ if (IS_ERR(obj)) {
+ ret = PTR_ERR(obj);
+ goto err_free;
}
+ so->vma = i915_vma_create(obj, &engine->i915->ggtt.base, NULL);
+ if (IS_ERR(so->vma)) {
+ ret = PTR_ERR(so->vma);
+ goto err_obj;
+ }
+
+ so->rodata = rodata;
+ engine->render_state = so;
return 0;
+
+err_obj:
+ i915_gem_object_put(obj);
+err_free:
+ kfree(so);
+ return ret;
}
-int i915_gem_render_state_init(struct drm_i915_gem_request *req)
+int i915_gem_render_state_emit(struct drm_i915_gem_request *req)
{
- struct render_state so;
+ struct intel_render_state *so;
int ret;
- ret = i915_gem_render_state_prepare(req->engine, &so);
- if (ret)
- return ret;
+ lockdep_assert_held(&req->i915->drm.struct_mutex);
- if (so.rodata == NULL)
+ so = req->engine->render_state;
+ if (!so)
return 0;
- ret = req->engine->dispatch_execbuffer(req, so.ggtt_offset,
- so.rodata->batch_items * 4,
- I915_DISPATCH_SECURE);
+ /* Recreate the page after shrinking */
+ if (!so->vma->obj->mm.pages)
+ so->batch_offset = -1;
+
+ ret = i915_vma_pin(so->vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
if (ret)
- goto out;
+ return ret;
- if (so.aux_batch_size > 8) {
- ret = req->engine->dispatch_execbuffer(req,
- (so.ggtt_offset +
- so.aux_batch_offset),
- so.aux_batch_size,
- I915_DISPATCH_SECURE);
+ if (so->vma->node.start != so->batch_offset) {
+ ret = render_state_setup(so, req->i915);
if (ret)
- goto out;
+ goto err_unpin;
}
- i915_vma_move_to_active(i915_gem_obj_to_ggtt(so.obj), req);
+ ret = req->engine->emit_bb_start(req,
+ so->batch_offset, so->batch_size,
+ I915_DISPATCH_SECURE);
+ if (ret)
+ goto err_unpin;
-out:
- i915_gem_render_state_fini(&so);
+ if (so->aux_size > 8) {
+ ret = req->engine->emit_bb_start(req,
+ so->aux_offset, so->aux_size,
+ I915_DISPATCH_SECURE);
+ if (ret)
+ goto err_unpin;
+ }
+
+ i915_vma_move_to_active(so->vma, req, 0);
+err_unpin:
+ i915_vma_unpin(so->vma);
return ret;
}
+
+void i915_gem_render_state_fini(struct intel_engine_cs *engine)
+{
+ struct intel_render_state *so;
+ struct drm_i915_gem_object *obj;
+
+ so = fetch_and_zero(&engine->render_state);
+ if (!so)
+ return;
+
+ obj = so->vma->obj;
+
+ i915_vma_close(so->vma);
+ __i915_gem_object_release_unless_active(obj);
+
+ kfree(so);
+}