diff options
Diffstat (limited to 'drivers/gpu/drm/i915/intel_ringbuffer.c')
| -rw-r--r-- | drivers/gpu/drm/i915/intel_ringbuffer.c | 408 | 
1 files changed, 260 insertions, 148 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index 3817a6f00d9e..6e6b8db996ef 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -81,7 +81,7 @@ bool intel_ring_stopped(struct intel_engine_cs *ring)  	return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring);  } -void __intel_ring_advance(struct intel_engine_cs *ring) +static void __intel_ring_advance(struct intel_engine_cs *ring)  {  	struct intel_ringbuffer *ringbuf = ring->buffer;  	ringbuf->tail &= ringbuf->size - 1; @@ -91,10 +91,11 @@ void __intel_ring_advance(struct intel_engine_cs *ring)  }  static int -gen2_render_ring_flush(struct intel_engine_cs *ring, +gen2_render_ring_flush(struct drm_i915_gem_request *req,  		       u32	invalidate_domains,  		       u32	flush_domains)  { +	struct intel_engine_cs *ring = req->ring;  	u32 cmd;  	int ret; @@ -105,7 +106,7 @@ gen2_render_ring_flush(struct intel_engine_cs *ring,  	if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)  		cmd |= MI_READ_FLUSH; -	ret = intel_ring_begin(ring, 2); +	ret = intel_ring_begin(req, 2);  	if (ret)  		return ret; @@ -117,10 +118,11 @@ gen2_render_ring_flush(struct intel_engine_cs *ring,  }  static int -gen4_render_ring_flush(struct intel_engine_cs *ring, +gen4_render_ring_flush(struct drm_i915_gem_request *req,  		       u32	invalidate_domains,  		       u32	flush_domains)  { +	struct intel_engine_cs *ring = req->ring;  	struct drm_device *dev = ring->dev;  	u32 cmd;  	int ret; @@ -163,7 +165,7 @@ gen4_render_ring_flush(struct intel_engine_cs *ring,  	    (IS_G4X(dev) || IS_GEN5(dev)))  		cmd |= MI_INVALIDATE_ISP; -	ret = intel_ring_begin(ring, 2); +	ret = intel_ring_begin(req, 2);  	if (ret)  		return ret; @@ -212,13 +214,13 @@ gen4_render_ring_flush(struct intel_engine_cs *ring,   * really our business.  That leaves only stall at scoreboard.   */  static int -intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring) +intel_emit_post_sync_nonzero_flush(struct drm_i915_gem_request *req)  { +	struct intel_engine_cs *ring = req->ring;  	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;  	int ret; - -	ret = intel_ring_begin(ring, 6); +	ret = intel_ring_begin(req, 6);  	if (ret)  		return ret; @@ -231,7 +233,7 @@ intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring)  	intel_ring_emit(ring, MI_NOOP);  	intel_ring_advance(ring); -	ret = intel_ring_begin(ring, 6); +	ret = intel_ring_begin(req, 6);  	if (ret)  		return ret; @@ -247,15 +249,16 @@ intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring)  }  static int -gen6_render_ring_flush(struct intel_engine_cs *ring, -                         u32 invalidate_domains, u32 flush_domains) +gen6_render_ring_flush(struct drm_i915_gem_request *req, +		       u32 invalidate_domains, u32 flush_domains)  { +	struct intel_engine_cs *ring = req->ring;  	u32 flags = 0;  	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;  	int ret;  	/* Force SNB workarounds for PIPE_CONTROL flushes */ -	ret = intel_emit_post_sync_nonzero_flush(ring); +	ret = intel_emit_post_sync_nonzero_flush(req);  	if (ret)  		return ret; @@ -285,7 +288,7 @@ gen6_render_ring_flush(struct intel_engine_cs *ring,  		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;  	} -	ret = intel_ring_begin(ring, 4); +	ret = intel_ring_begin(req, 4);  	if (ret)  		return ret; @@ -299,11 +302,12 @@ gen6_render_ring_flush(struct intel_engine_cs *ring,  }  static int -gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring) +gen7_render_ring_cs_stall_wa(struct drm_i915_gem_request *req)  { +	struct intel_engine_cs *ring = req->ring;  	int ret; -	ret = intel_ring_begin(ring, 4); +	ret = intel_ring_begin(req, 4);  	if (ret)  		return ret; @@ -318,9 +322,10 @@ gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring)  }  static int -gen7_render_ring_flush(struct intel_engine_cs *ring, +gen7_render_ring_flush(struct drm_i915_gem_request *req,  		       u32 invalidate_domains, u32 flush_domains)  { +	struct intel_engine_cs *ring = req->ring;  	u32 flags = 0;  	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;  	int ret; @@ -362,10 +367,10 @@ gen7_render_ring_flush(struct intel_engine_cs *ring,  		/* Workaround: we must issue a pipe_control with CS-stall bit  		 * set before a pipe_control command that has the state cache  		 * invalidate bit set. */ -		gen7_render_ring_cs_stall_wa(ring); +		gen7_render_ring_cs_stall_wa(req);  	} -	ret = intel_ring_begin(ring, 4); +	ret = intel_ring_begin(req, 4);  	if (ret)  		return ret; @@ -379,12 +384,13 @@ gen7_render_ring_flush(struct intel_engine_cs *ring,  }  static int -gen8_emit_pipe_control(struct intel_engine_cs *ring, +gen8_emit_pipe_control(struct drm_i915_gem_request *req,  		       u32 flags, u32 scratch_addr)  { +	struct intel_engine_cs *ring = req->ring;  	int ret; -	ret = intel_ring_begin(ring, 6); +	ret = intel_ring_begin(req, 6);  	if (ret)  		return ret; @@ -400,11 +406,11 @@ gen8_emit_pipe_control(struct intel_engine_cs *ring,  }  static int -gen8_render_ring_flush(struct intel_engine_cs *ring, +gen8_render_ring_flush(struct drm_i915_gem_request *req,  		       u32 invalidate_domains, u32 flush_domains)  {  	u32 flags = 0; -	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES; +	u32 scratch_addr = req->ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;  	int ret;  	flags |= PIPE_CONTROL_CS_STALL; @@ -424,7 +430,7 @@ gen8_render_ring_flush(struct intel_engine_cs *ring,  		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;  		/* WaCsStallBeforeStateCacheInvalidate:bdw,chv */ -		ret = gen8_emit_pipe_control(ring, +		ret = gen8_emit_pipe_control(req,  					     PIPE_CONTROL_CS_STALL |  					     PIPE_CONTROL_STALL_AT_SCOREBOARD,  					     0); @@ -432,7 +438,7 @@ gen8_render_ring_flush(struct intel_engine_cs *ring,  			return ret;  	} -	return gen8_emit_pipe_control(ring, flags, scratch_addr); +	return gen8_emit_pipe_control(req, flags, scratch_addr);  }  static void ring_write_tail(struct intel_engine_cs *ring, @@ -703,10 +709,10 @@ err:  	return ret;  } -static int intel_ring_workarounds_emit(struct intel_engine_cs *ring, -				       struct intel_context *ctx) +static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)  {  	int ret, i; +	struct intel_engine_cs *ring = req->ring;  	struct drm_device *dev = ring->dev;  	struct drm_i915_private *dev_priv = dev->dev_private;  	struct i915_workarounds *w = &dev_priv->workarounds; @@ -715,11 +721,11 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring,  		return 0;  	ring->gpu_caches_dirty = true; -	ret = intel_ring_flush_all_caches(ring); +	ret = intel_ring_flush_all_caches(req);  	if (ret)  		return ret; -	ret = intel_ring_begin(ring, (w->count * 2 + 2)); +	ret = intel_ring_begin(req, (w->count * 2 + 2));  	if (ret)  		return ret; @@ -733,7 +739,7 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring,  	intel_ring_advance(ring);  	ring->gpu_caches_dirty = true; -	ret = intel_ring_flush_all_caches(ring); +	ret = intel_ring_flush_all_caches(req);  	if (ret)  		return ret; @@ -742,16 +748,15 @@ static int intel_ring_workarounds_emit(struct intel_engine_cs *ring,  	return 0;  } -static int intel_rcs_ctx_init(struct intel_engine_cs *ring, -			      struct intel_context *ctx) +static int intel_rcs_ctx_init(struct drm_i915_gem_request *req)  {  	int ret; -	ret = intel_ring_workarounds_emit(ring, ctx); +	ret = intel_ring_workarounds_emit(req);  	if (ret != 0)  		return ret; -	ret = i915_gem_render_state_init(ring); +	ret = i915_gem_render_state_init(req);  	if (ret)  		DRM_ERROR("init render state: %d\n", ret); @@ -775,11 +780,11 @@ static int wa_add(struct drm_i915_private *dev_priv,  	return 0;  } -#define WA_REG(addr, mask, val) { \ +#define WA_REG(addr, mask, val) do { \  		const int r = wa_add(dev_priv, (addr), (mask), (val)); \  		if (r) \  			return r; \ -	} +	} while (0)  #define WA_SET_BIT_MASKED(addr, mask) \  	WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask)) @@ -800,6 +805,11 @@ static int bdw_init_workarounds(struct intel_engine_cs *ring)  	struct drm_device *dev = ring->dev;  	struct drm_i915_private *dev_priv = dev->dev_private; +	WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); + +	/* WaDisableAsyncFlipPerfMode:bdw */ +	WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); +  	/* WaDisablePartialInstShootdown:bdw */  	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */  	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, @@ -861,6 +871,11 @@ static int chv_init_workarounds(struct intel_engine_cs *ring)  	struct drm_device *dev = ring->dev;  	struct drm_i915_private *dev_priv = dev->dev_private; +	WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); + +	/* WaDisableAsyncFlipPerfMode:chv */ +	WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); +  	/* WaDisablePartialInstShootdown:chv */  	/* WaDisableThreadStallDopClockGating:chv */  	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, @@ -931,8 +946,11 @@ static int gen9_init_workarounds(struct intel_engine_cs *ring)  		/* WaSetDisablePixMaskCammingAndRhwoInCommonSliceChicken:skl,bxt */  		WA_SET_BIT_MASKED(GEN7_COMMON_SLICE_CHICKEN1,  				  GEN9_RHWO_OPTIMIZATION_DISABLE); -		WA_SET_BIT_MASKED(GEN9_SLICE_COMMON_ECO_CHICKEN0, -				  DISABLE_PIXEL_MASK_CAMMING); +		/* +		 * WA also requires GEN9_SLICE_COMMON_ECO_CHICKEN0[14:14] to be set +		 * but we do that in per ctx batchbuffer as there is an issue +		 * with this register not getting restored on ctx restore +		 */  	}  	if ((IS_SKYLAKE(dev) && INTEL_REVID(dev) >= SKL_REVID_C0) || @@ -1023,13 +1041,6 @@ static int skl_init_workarounds(struct intel_engine_cs *ring)  		WA_SET_BIT_MASKED(HIZ_CHICKEN,  				  BDW_HIZ_POWER_COMPILER_CLOCK_GATING_DISABLE); -	if (INTEL_REVID(dev) == SKL_REVID_C0 || -	    INTEL_REVID(dev) == SKL_REVID_D0) -		/* WaBarrierPerformanceFixDisable:skl */ -		WA_SET_BIT_MASKED(HDC_CHICKEN0, -				  HDC_FENCE_DEST_SLM_DISABLE | -				  HDC_BARRIER_PERFORMANCE_DISABLE); -  	if (INTEL_REVID(dev) <= SKL_REVID_D0) {  		/*  		 *Use Force Non-Coherent whenever executing a 3D context. This @@ -1041,6 +1052,20 @@ static int skl_init_workarounds(struct intel_engine_cs *ring)  				  HDC_FORCE_NON_COHERENT);  	} +	if (INTEL_REVID(dev) == SKL_REVID_C0 || +	    INTEL_REVID(dev) == SKL_REVID_D0) +		/* WaBarrierPerformanceFixDisable:skl */ +		WA_SET_BIT_MASKED(HDC_CHICKEN0, +				  HDC_FENCE_DEST_SLM_DISABLE | +				  HDC_BARRIER_PERFORMANCE_DISABLE); + +	/* WaDisableSbeCacheDispatchPortSharing:skl */ +	if (INTEL_REVID(dev) <= SKL_REVID_F0) { +		WA_SET_BIT_MASKED( +			GEN7_HALF_SLICE_CHICKEN1, +			GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); +	} +  	return skl_tune_iz_hashing(ring);  } @@ -1105,9 +1130,9 @@ static int init_render_ring(struct intel_engine_cs *ring)  	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be  	 * programmed to '1' on all products.  	 * -	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv +	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv  	 */ -	if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 9) +	if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)  		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));  	/* Required for the hardware to program scanline values for waiting */ @@ -1132,7 +1157,7 @@ static int init_render_ring(struct intel_engine_cs *ring)  			   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));  	} -	if (INTEL_INFO(dev)->gen >= 6) +	if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8)  		I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));  	if (HAS_L3_DPF(dev)) @@ -1155,10 +1180,11 @@ static void render_ring_cleanup(struct intel_engine_cs *ring)  	intel_fini_pipe_control(ring);  } -static int gen8_rcs_signal(struct intel_engine_cs *signaller, +static int gen8_rcs_signal(struct drm_i915_gem_request *signaller_req,  			   unsigned int num_dwords)  {  #define MBOX_UPDATE_DWORDS 8 +	struct intel_engine_cs *signaller = signaller_req->ring;  	struct drm_device *dev = signaller->dev;  	struct drm_i915_private *dev_priv = dev->dev_private;  	struct intel_engine_cs *waiter; @@ -1168,7 +1194,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller,  	num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;  #undef MBOX_UPDATE_DWORDS -	ret = intel_ring_begin(signaller, num_dwords); +	ret = intel_ring_begin(signaller_req, num_dwords);  	if (ret)  		return ret; @@ -1178,8 +1204,7 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller,  		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)  			continue; -		seqno = i915_gem_request_get_seqno( -					   signaller->outstanding_lazy_request); +		seqno = i915_gem_request_get_seqno(signaller_req);  		intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));  		intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |  					   PIPE_CONTROL_QW_WRITE | @@ -1196,10 +1221,11 @@ static int gen8_rcs_signal(struct intel_engine_cs *signaller,  	return 0;  } -static int gen8_xcs_signal(struct intel_engine_cs *signaller, +static int gen8_xcs_signal(struct drm_i915_gem_request *signaller_req,  			   unsigned int num_dwords)  {  #define MBOX_UPDATE_DWORDS 6 +	struct intel_engine_cs *signaller = signaller_req->ring;  	struct drm_device *dev = signaller->dev;  	struct drm_i915_private *dev_priv = dev->dev_private;  	struct intel_engine_cs *waiter; @@ -1209,7 +1235,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller,  	num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;  #undef MBOX_UPDATE_DWORDS -	ret = intel_ring_begin(signaller, num_dwords); +	ret = intel_ring_begin(signaller_req, num_dwords);  	if (ret)  		return ret; @@ -1219,8 +1245,7 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller,  		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)  			continue; -		seqno = i915_gem_request_get_seqno( -					   signaller->outstanding_lazy_request); +		seqno = i915_gem_request_get_seqno(signaller_req);  		intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |  					   MI_FLUSH_DW_OP_STOREDW);  		intel_ring_emit(signaller, lower_32_bits(gtt_offset) | @@ -1235,9 +1260,10 @@ static int gen8_xcs_signal(struct intel_engine_cs *signaller,  	return 0;  } -static int gen6_signal(struct intel_engine_cs *signaller, +static int gen6_signal(struct drm_i915_gem_request *signaller_req,  		       unsigned int num_dwords)  { +	struct intel_engine_cs *signaller = signaller_req->ring;  	struct drm_device *dev = signaller->dev;  	struct drm_i915_private *dev_priv = dev->dev_private;  	struct intel_engine_cs *useless; @@ -1248,15 +1274,14 @@ static int gen6_signal(struct intel_engine_cs *signaller,  	num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);  #undef MBOX_UPDATE_DWORDS -	ret = intel_ring_begin(signaller, num_dwords); +	ret = intel_ring_begin(signaller_req, num_dwords);  	if (ret)  		return ret;  	for_each_ring(useless, dev_priv, i) {  		u32 mbox_reg = signaller->semaphore.mbox.signal[i];  		if (mbox_reg != GEN6_NOSYNC) { -			u32 seqno = i915_gem_request_get_seqno( -					   signaller->outstanding_lazy_request); +			u32 seqno = i915_gem_request_get_seqno(signaller_req);  			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));  			intel_ring_emit(signaller, mbox_reg);  			intel_ring_emit(signaller, seqno); @@ -1272,30 +1297,29 @@ static int gen6_signal(struct intel_engine_cs *signaller,  /**   * gen6_add_request - Update the semaphore mailbox registers - *  - * @ring - ring that is adding a request - * @seqno - return seqno stuck into the ring + * + * @request - request to write to the ring   *   * Update the mailbox registers in the *other* rings with the current seqno.   * This acts like a signal in the canonical semaphore.   */  static int -gen6_add_request(struct intel_engine_cs *ring) +gen6_add_request(struct drm_i915_gem_request *req)  { +	struct intel_engine_cs *ring = req->ring;  	int ret;  	if (ring->semaphore.signal) -		ret = ring->semaphore.signal(ring, 4); +		ret = ring->semaphore.signal(req, 4);  	else -		ret = intel_ring_begin(ring, 4); +		ret = intel_ring_begin(req, 4);  	if (ret)  		return ret;  	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);  	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); -	intel_ring_emit(ring, -		    i915_gem_request_get_seqno(ring->outstanding_lazy_request)); +	intel_ring_emit(ring, i915_gem_request_get_seqno(req));  	intel_ring_emit(ring, MI_USER_INTERRUPT);  	__intel_ring_advance(ring); @@ -1318,14 +1342,15 @@ static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev,   */  static int -gen8_ring_sync(struct intel_engine_cs *waiter, +gen8_ring_sync(struct drm_i915_gem_request *waiter_req,  	       struct intel_engine_cs *signaller,  	       u32 seqno)  { +	struct intel_engine_cs *waiter = waiter_req->ring;  	struct drm_i915_private *dev_priv = waiter->dev->dev_private;  	int ret; -	ret = intel_ring_begin(waiter, 4); +	ret = intel_ring_begin(waiter_req, 4);  	if (ret)  		return ret; @@ -1343,10 +1368,11 @@ gen8_ring_sync(struct intel_engine_cs *waiter,  }  static int -gen6_ring_sync(struct intel_engine_cs *waiter, +gen6_ring_sync(struct drm_i915_gem_request *waiter_req,  	       struct intel_engine_cs *signaller,  	       u32 seqno)  { +	struct intel_engine_cs *waiter = waiter_req->ring;  	u32 dw1 = MI_SEMAPHORE_MBOX |  		  MI_SEMAPHORE_COMPARE |  		  MI_SEMAPHORE_REGISTER; @@ -1361,7 +1387,7 @@ gen6_ring_sync(struct intel_engine_cs *waiter,  	WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID); -	ret = intel_ring_begin(waiter, 4); +	ret = intel_ring_begin(waiter_req, 4);  	if (ret)  		return ret; @@ -1392,8 +1418,9 @@ do {									\  } while (0)  static int -pc_render_add_request(struct intel_engine_cs *ring) +pc_render_add_request(struct drm_i915_gem_request *req)  { +	struct intel_engine_cs *ring = req->ring;  	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;  	int ret; @@ -1405,7 +1432,7 @@ pc_render_add_request(struct intel_engine_cs *ring)  	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to  	 * memory before requesting an interrupt.  	 */ -	ret = intel_ring_begin(ring, 32); +	ret = intel_ring_begin(req, 32);  	if (ret)  		return ret; @@ -1413,8 +1440,7 @@ pc_render_add_request(struct intel_engine_cs *ring)  			PIPE_CONTROL_WRITE_FLUSH |  			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);  	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); -	intel_ring_emit(ring, -		    i915_gem_request_get_seqno(ring->outstanding_lazy_request)); +	intel_ring_emit(ring, i915_gem_request_get_seqno(req));  	intel_ring_emit(ring, 0);  	PIPE_CONTROL_FLUSH(ring, scratch_addr);  	scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */ @@ -1433,8 +1459,7 @@ pc_render_add_request(struct intel_engine_cs *ring)  			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |  			PIPE_CONTROL_NOTIFY);  	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT); -	intel_ring_emit(ring, -		    i915_gem_request_get_seqno(ring->outstanding_lazy_request)); +	intel_ring_emit(ring, i915_gem_request_get_seqno(req));  	intel_ring_emit(ring, 0);  	__intel_ring_advance(ring); @@ -1585,13 +1610,14 @@ i8xx_ring_put_irq(struct intel_engine_cs *ring)  }  static int -bsd_ring_flush(struct intel_engine_cs *ring, +bsd_ring_flush(struct drm_i915_gem_request *req,  	       u32     invalidate_domains,  	       u32     flush_domains)  { +	struct intel_engine_cs *ring = req->ring;  	int ret; -	ret = intel_ring_begin(ring, 2); +	ret = intel_ring_begin(req, 2);  	if (ret)  		return ret; @@ -1602,18 +1628,18 @@ bsd_ring_flush(struct intel_engine_cs *ring,  }  static int -i9xx_add_request(struct intel_engine_cs *ring) +i9xx_add_request(struct drm_i915_gem_request *req)  { +	struct intel_engine_cs *ring = req->ring;  	int ret; -	ret = intel_ring_begin(ring, 4); +	ret = intel_ring_begin(req, 4);  	if (ret)  		return ret;  	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);  	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT); -	intel_ring_emit(ring, -		    i915_gem_request_get_seqno(ring->outstanding_lazy_request)); +	intel_ring_emit(ring, i915_gem_request_get_seqno(req));  	intel_ring_emit(ring, MI_USER_INTERRUPT);  	__intel_ring_advance(ring); @@ -1745,13 +1771,14 @@ gen8_ring_put_irq(struct intel_engine_cs *ring)  }  static int -i965_dispatch_execbuffer(struct intel_engine_cs *ring, +i965_dispatch_execbuffer(struct drm_i915_gem_request *req,  			 u64 offset, u32 length,  			 unsigned dispatch_flags)  { +	struct intel_engine_cs *ring = req->ring;  	int ret; -	ret = intel_ring_begin(ring, 2); +	ret = intel_ring_begin(req, 2);  	if (ret)  		return ret; @@ -1771,14 +1798,15 @@ i965_dispatch_execbuffer(struct intel_engine_cs *ring,  #define I830_TLB_ENTRIES (2)  #define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)  static int -i830_dispatch_execbuffer(struct intel_engine_cs *ring, +i830_dispatch_execbuffer(struct drm_i915_gem_request *req,  			 u64 offset, u32 len,  			 unsigned dispatch_flags)  { +	struct intel_engine_cs *ring = req->ring;  	u32 cs_offset = ring->scratch.gtt_offset;  	int ret; -	ret = intel_ring_begin(ring, 6); +	ret = intel_ring_begin(req, 6);  	if (ret)  		return ret; @@ -1795,7 +1823,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring,  		if (len > I830_BATCH_LIMIT)  			return -ENOSPC; -		ret = intel_ring_begin(ring, 6 + 2); +		ret = intel_ring_begin(req, 6 + 2);  		if (ret)  			return ret; @@ -1818,7 +1846,7 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring,  		offset = cs_offset;  	} -	ret = intel_ring_begin(ring, 4); +	ret = intel_ring_begin(req, 4);  	if (ret)  		return ret; @@ -1833,13 +1861,14 @@ i830_dispatch_execbuffer(struct intel_engine_cs *ring,  }  static int -i915_dispatch_execbuffer(struct intel_engine_cs *ring, +i915_dispatch_execbuffer(struct drm_i915_gem_request *req,  			 u64 offset, u32 len,  			 unsigned dispatch_flags)  { +	struct intel_engine_cs *ring = req->ring;  	int ret; -	ret = intel_ring_begin(ring, 2); +	ret = intel_ring_begin(req, 2);  	if (ret)  		return ret; @@ -2082,7 +2111,6 @@ void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)  	intel_unpin_ringbuffer_obj(ringbuf);  	intel_destroy_ringbuffer_obj(ringbuf); -	i915_gem_request_assign(&ring->outstanding_lazy_request, NULL);  	if (ring->cleanup)  		ring->cleanup(ring); @@ -2106,6 +2134,9 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)  	if (intel_ring_space(ringbuf) >= n)  		return 0; +	/* The whole point of reserving space is to not wait! */ +	WARN_ON(ringbuf->reserved_in_use); +  	list_for_each_entry(request, &ring->request_list, list) {  		space = __intel_ring_space(request->postfix, ringbuf->tail,  					   ringbuf->size); @@ -2124,18 +2155,11 @@ static int ring_wait_for_space(struct intel_engine_cs *ring, int n)  	return 0;  } -static int intel_wrap_ring_buffer(struct intel_engine_cs *ring) +static void __wrap_ring_buffer(struct intel_ringbuffer *ringbuf)  {  	uint32_t __iomem *virt; -	struct intel_ringbuffer *ringbuf = ring->buffer;  	int rem = ringbuf->size - ringbuf->tail; -	if (ringbuf->space < rem) { -		int ret = ring_wait_for_space(ring, rem); -		if (ret) -			return ret; -	} -  	virt = ringbuf->virtual_start + ringbuf->tail;  	rem /= 4;  	while (rem--) @@ -2143,21 +2167,11 @@ static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)  	ringbuf->tail = 0;  	intel_ring_update_space(ringbuf); - -	return 0;  }  int intel_ring_idle(struct intel_engine_cs *ring)  {  	struct drm_i915_gem_request *req; -	int ret; - -	/* We need to add any requests required to flush the objects and ring */ -	if (ring->outstanding_lazy_request) { -		ret = i915_add_request(ring); -		if (ret) -			return ret; -	}  	/* Wait upon the last request to be completed */  	if (list_empty(&ring->request_list)) @@ -2180,33 +2194,126 @@ int intel_ring_alloc_request_extras(struct drm_i915_gem_request *request)  	return 0;  } -static int __intel_ring_prepare(struct intel_engine_cs *ring, -				int bytes) +int intel_ring_reserve_space(struct drm_i915_gem_request *request) +{ +	/* +	 * The first call merely notes the reserve request and is common for +	 * all back ends. The subsequent localised _begin() call actually +	 * ensures that the reservation is available. Without the begin, if +	 * the request creator immediately submitted the request without +	 * adding any commands to it then there might not actually be +	 * sufficient room for the submission commands. +	 */ +	intel_ring_reserved_space_reserve(request->ringbuf, MIN_SPACE_FOR_ADD_REQUEST); + +	return intel_ring_begin(request, 0); +} + +void intel_ring_reserved_space_reserve(struct intel_ringbuffer *ringbuf, int size) +{ +	WARN_ON(ringbuf->reserved_size); +	WARN_ON(ringbuf->reserved_in_use); + +	ringbuf->reserved_size = size; +} + +void intel_ring_reserved_space_cancel(struct intel_ringbuffer *ringbuf) +{ +	WARN_ON(ringbuf->reserved_in_use); + +	ringbuf->reserved_size   = 0; +	ringbuf->reserved_in_use = false; +} + +void intel_ring_reserved_space_use(struct intel_ringbuffer *ringbuf) +{ +	WARN_ON(ringbuf->reserved_in_use); + +	ringbuf->reserved_in_use = true; +	ringbuf->reserved_tail   = ringbuf->tail; +} + +void intel_ring_reserved_space_end(struct intel_ringbuffer *ringbuf) +{ +	WARN_ON(!ringbuf->reserved_in_use); +	if (ringbuf->tail > ringbuf->reserved_tail) { +		WARN(ringbuf->tail > ringbuf->reserved_tail + ringbuf->reserved_size, +		     "request reserved size too small: %d vs %d!\n", +		     ringbuf->tail - ringbuf->reserved_tail, ringbuf->reserved_size); +	} else { +		/* +		 * The ring was wrapped while the reserved space was in use. +		 * That means that some unknown amount of the ring tail was +		 * no-op filled and skipped. Thus simply adding the ring size +		 * to the tail and doing the above space check will not work. +		 * Rather than attempt to track how much tail was skipped, +		 * it is much simpler to say that also skipping the sanity +		 * check every once in a while is not a big issue. +		 */ +	} + +	ringbuf->reserved_size   = 0; +	ringbuf->reserved_in_use = false; +} + +static int __intel_ring_prepare(struct intel_engine_cs *ring, int bytes)  {  	struct intel_ringbuffer *ringbuf = ring->buffer; -	int ret; +	int remain_usable = ringbuf->effective_size - ringbuf->tail; +	int remain_actual = ringbuf->size - ringbuf->tail; +	int ret, total_bytes, wait_bytes = 0; +	bool need_wrap = false; -	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) { -		ret = intel_wrap_ring_buffer(ring); -		if (unlikely(ret)) -			return ret; +	if (ringbuf->reserved_in_use) +		total_bytes = bytes; +	else +		total_bytes = bytes + ringbuf->reserved_size; + +	if (unlikely(bytes > remain_usable)) { +		/* +		 * Not enough space for the basic request. So need to flush +		 * out the remainder and then wait for base + reserved. +		 */ +		wait_bytes = remain_actual + total_bytes; +		need_wrap = true; +	} else { +		if (unlikely(total_bytes > remain_usable)) { +			/* +			 * The base request will fit but the reserved space +			 * falls off the end. So only need to to wait for the +			 * reserved size after flushing out the remainder. +			 */ +			wait_bytes = remain_actual + ringbuf->reserved_size; +			need_wrap = true; +		} else if (total_bytes > ringbuf->space) { +			/* No wrapping required, just waiting. */ +			wait_bytes = total_bytes; +		}  	} -	if (unlikely(ringbuf->space < bytes)) { -		ret = ring_wait_for_space(ring, bytes); +	if (wait_bytes) { +		ret = ring_wait_for_space(ring, wait_bytes);  		if (unlikely(ret))  			return ret; + +		if (need_wrap) +			__wrap_ring_buffer(ringbuf);  	}  	return 0;  } -int intel_ring_begin(struct intel_engine_cs *ring, +int intel_ring_begin(struct drm_i915_gem_request *req,  		     int num_dwords)  { -	struct drm_i915_private *dev_priv = ring->dev->dev_private; +	struct intel_engine_cs *ring; +	struct drm_i915_private *dev_priv;  	int ret; +	WARN_ON(req == NULL); +	ring = req->ring; +	dev_priv = ring->dev->dev_private; +  	ret = i915_gem_check_wedge(&dev_priv->gpu_error,  				   dev_priv->mm.interruptible);  	if (ret) @@ -2216,18 +2323,14 @@ int intel_ring_begin(struct intel_engine_cs *ring,  	if (ret)  		return ret; -	/* Preallocate the olr before touching the ring */ -	ret = i915_gem_request_alloc(ring, ring->default_context); -	if (ret) -		return ret; -  	ring->buffer->space -= num_dwords * sizeof(uint32_t);  	return 0;  }  /* Align the ring tail to a cacheline boundary */ -int intel_ring_cacheline_align(struct intel_engine_cs *ring) +int intel_ring_cacheline_align(struct drm_i915_gem_request *req)  { +	struct intel_engine_cs *ring = req->ring;  	int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);  	int ret; @@ -2235,7 +2338,7 @@ int intel_ring_cacheline_align(struct intel_engine_cs *ring)  		return 0;  	num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords; -	ret = intel_ring_begin(ring, num_dwords); +	ret = intel_ring_begin(req, num_dwords);  	if (ret)  		return ret; @@ -2252,8 +2355,6 @@ void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)  	struct drm_device *dev = ring->dev;  	struct drm_i915_private *dev_priv = dev->dev_private; -	BUG_ON(ring->outstanding_lazy_request); -  	if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {  		I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);  		I915_WRITE(RING_SYNC_1(ring->mmio_base), 0); @@ -2298,13 +2399,14 @@ static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring,  		   _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));  } -static int gen6_bsd_ring_flush(struct intel_engine_cs *ring, +static int gen6_bsd_ring_flush(struct drm_i915_gem_request *req,  			       u32 invalidate, u32 flush)  { +	struct intel_engine_cs *ring = req->ring;  	uint32_t cmd;  	int ret; -	ret = intel_ring_begin(ring, 4); +	ret = intel_ring_begin(req, 4);  	if (ret)  		return ret; @@ -2342,20 +2444,23 @@ static int gen6_bsd_ring_flush(struct intel_engine_cs *ring,  }  static int -gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring, +gen8_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,  			      u64 offset, u32 len,  			      unsigned dispatch_flags)  { +	struct intel_engine_cs *ring = req->ring;  	bool ppgtt = USES_PPGTT(ring->dev) &&  			!(dispatch_flags & I915_DISPATCH_SECURE);  	int ret; -	ret = intel_ring_begin(ring, 4); +	ret = intel_ring_begin(req, 4);  	if (ret)  		return ret;  	/* FIXME(BDW): Address space and security selectors. */ -	intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8)); +	intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8) | +			(dispatch_flags & I915_DISPATCH_RS ? +			 MI_BATCH_RESOURCE_STREAMER : 0));  	intel_ring_emit(ring, lower_32_bits(offset));  	intel_ring_emit(ring, upper_32_bits(offset));  	intel_ring_emit(ring, MI_NOOP); @@ -2365,20 +2470,23 @@ gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring,  }  static int -hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring, +hsw_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,  			     u64 offset, u32 len,  			     unsigned dispatch_flags)  { +	struct intel_engine_cs *ring = req->ring;  	int ret; -	ret = intel_ring_begin(ring, 2); +	ret = intel_ring_begin(req, 2);  	if (ret)  		return ret;  	intel_ring_emit(ring,  			MI_BATCH_BUFFER_START |  			(dispatch_flags & I915_DISPATCH_SECURE ? -			 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW)); +			 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW) | +			(dispatch_flags & I915_DISPATCH_RS ? +			 MI_BATCH_RESOURCE_STREAMER : 0));  	/* bit0-7 is the length on GEN6+ */  	intel_ring_emit(ring, offset);  	intel_ring_advance(ring); @@ -2387,13 +2495,14 @@ hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,  }  static int -gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring, +gen6_ring_dispatch_execbuffer(struct drm_i915_gem_request *req,  			      u64 offset, u32 len,  			      unsigned dispatch_flags)  { +	struct intel_engine_cs *ring = req->ring;  	int ret; -	ret = intel_ring_begin(ring, 2); +	ret = intel_ring_begin(req, 2);  	if (ret)  		return ret; @@ -2410,14 +2519,15 @@ gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring,  /* Blitter support (SandyBridge+) */ -static int gen6_ring_flush(struct intel_engine_cs *ring, +static int gen6_ring_flush(struct drm_i915_gem_request *req,  			   u32 invalidate, u32 flush)  { +	struct intel_engine_cs *ring = req->ring;  	struct drm_device *dev = ring->dev;  	uint32_t cmd;  	int ret; -	ret = intel_ring_begin(ring, 4); +	ret = intel_ring_begin(req, 4);  	if (ret)  		return ret; @@ -2818,26 +2928,28 @@ int intel_init_vebox_ring_buffer(struct drm_device *dev)  }  int -intel_ring_flush_all_caches(struct intel_engine_cs *ring) +intel_ring_flush_all_caches(struct drm_i915_gem_request *req)  { +	struct intel_engine_cs *ring = req->ring;  	int ret;  	if (!ring->gpu_caches_dirty)  		return 0; -	ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS); +	ret = ring->flush(req, 0, I915_GEM_GPU_DOMAINS);  	if (ret)  		return ret; -	trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS); +	trace_i915_gem_ring_flush(req, 0, I915_GEM_GPU_DOMAINS);  	ring->gpu_caches_dirty = false;  	return 0;  }  int -intel_ring_invalidate_all_caches(struct intel_engine_cs *ring) +intel_ring_invalidate_all_caches(struct drm_i915_gem_request *req)  { +	struct intel_engine_cs *ring = req->ring;  	uint32_t flush_domains;  	int ret; @@ -2845,11 +2957,11 @@ intel_ring_invalidate_all_caches(struct intel_engine_cs *ring)  	if (ring->gpu_caches_dirty)  		flush_domains = I915_GEM_GPU_DOMAINS; -	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); +	ret = ring->flush(req, I915_GEM_GPU_DOMAINS, flush_domains);  	if (ret)  		return ret; -	trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains); +	trace_i915_gem_ring_flush(req, I915_GEM_GPU_DOMAINS, flush_domains);  	ring->gpu_caches_dirty = false;  	return 0;  | 
