Merge tag 'drm-for-v4.16' of git://people.freedesktop.org/~airlied/linux

Pull drm updates from Dave Airlie: "This seems to have been a comparatively quieter merge window, I assume due to holidays etc. The "biggest" change is AMD header cleanups, which merge/remove a bunch of them. The AMD gpu scheduler is now being made generic with the etnaviv driver wanting to reuse the code, hopefully other drivers can go in the same direction. Otherwise it's the usual lots of stuff in i915/amdgpu, not so much stuff elsewhere. Core: - Add .last_close and .output_poll_changed helpers to reduce driver footprints - Fix plane clipping - Improved debug printing support - Add panel orientation property - Update edid derived properties at edid setting - Reduction in fbdev driver footprint - Move amdgpu scheduler into core for other drivers to use. i915: - Selftest and IGT improvements - Fast boot prep work on IPS, pipe config - HW workarounds for Cannonlake, Geminilake - Cannonlake clock and HDMI2.0 fixes - GPU cache invalidation and context switch improvements - Display planes cleanup - New PMU interface for perf queries - New firmware support for KBL/SKL - Geminilake HW workaround for perforamce - Coffeelake stolen memory improvements - GPU reset robustness work - Cannonlake horizontal plane flipping - GVT work amdgpu/radeon: - RV and Vega header file cleanups (lots of lines gone!) - TTM operation context support - 48-bit GPUVM support for Vega/RV - ECC support for Vega - Resizeable BAR support - Multi-display sync support - Enable swapout for reserved BOs during allocation - S3 fixes on Raven - GPU reset cleanup and fixes - 2+1 level GPU page table amdkfd: - GFX7/8 SDMA user queues support - Hardware scheduling for multiple processes - dGPU prep work rcar: - Added R8A7743/5 support - System suspend/resume support sun4i: - Multi-plane support for YUV formats - A83T and LVDS support msm: - Devfreq support for GPU tegra: - Prep work for adding Tegra186 support - Tegra186 HDMI support - HDMI2.0 and zpos support by using generic helpers tilcdc: - Misc fixes omapdrm: - Support memory bandwidth limits - DSI command mode panel cleanups - DMM error handling exynos: - drop the old IPP subdriver. etnaviv: - Occlusion query fixes - Job handling fixes - Prep work for hooking in gpu scheduler armada: - Move closer to atomic modesetting - Allow disabling primary plane if overlay is full screen imx: - Format modifier support - Add tile prefetch to PRE - Runtime PM support for PRG ast: - fix LUT loading" * tag 'drm-for-v4.16' of git://people.freedesktop.org/~airlied/linux: (1471 commits) drm/ast: Load lut in crtc_commit drm: Check for lessee in DROP_MASTER ioctl drm: fix gpu scheduler link order drm/amd/display: Demote error print to debug print when ATOM impl missing dma-buf: fix reservation_object_wait_timeout_rcu once more v2 drm/amdgpu: Avoid leaking PM domain on driver unbind (v2) drm/amd/amdgpu: Add Polaris version check drm/amdgpu: Reenable manual GPU reset from sysfs drm/amdgpu: disable MMHUB power gating on raven drm/ttm: Don't unreserve swapped BOs that were previously reserved drm/ttm: Don't add swapped BOs to swap-LRU list drm/amdgpu: only check for ECC on Vega10 drm/amd/powerplay: Fix smu_table_entry.handle type drm/ttm: add VADDR_FLAG_UPDATED_COUNT to correctly update dma_page global count drm: Fix PANEL_ORIENTATION_QUIRKS breaking the Kconfig DRM menuconfig drm/radeon: fill in rb backend map on evergreen/ni. drm/amdgpu/gfx9: fix ngg enablement to clear gds reserved memory (v2) drm/ttm: only free pages rather than update global memory count together drm/amdgpu: fix CPU based VM updates drm/amdgpu: fix typo in amdgpu_vce_validate_bo ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2018-02-01 17:48:47 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2018-02-01 17:48:47 -0800
commit: 4bf772b14675411a69b3c807f73006de0fe4b649 (patch)
tree: b841e3ba0e3429695589cb0ab73871fa12f42c38 /drivers/gpu/drm/i915/intel_ringbuffer.h
parent: 3879ae653a3e98380fe2daf653338830b7ca0097 (diff)
parent: 24b8ef699e8221d2b7f813adaab13eec053e1507 (diff)
1 files changed, 235 insertions, 28 deletions
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 2863d5a65187..c5ff203e42d6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -6,6 +6,7 @@
 #include "i915_gem_batch_pool.h"
 #include "i915_gem_request.h"
 #include "i915_gem_timeline.h"
+#include "i915_pmu.h"
 #include "i915_selftest.h"
 
 struct drm_printer;
@@ -47,16 +48,6 @@ struct intel_hw_status_page {
 /* seqno size is actually only a uint32, but since we plan to use MI_FLUSH_DW to
  * do the writes, and that must have qw aligned offsets, simply pretend it's 8b.
  */
-#define gen8_semaphore_seqno_size sizeof(uint64_t)
-#define GEN8_SEMAPHORE_OFFSET(__from, __to)			     \
-	(((__from) * I915_NUM_ENGINES  + (__to)) * gen8_semaphore_seqno_size)
-#define GEN8_SIGNAL_OFFSET(__ring, to)			     \
-	(dev_priv->semaphore->node.start + \
-	 GEN8_SEMAPHORE_OFFSET((__ring)->id, (to)))
-#define GEN8_WAIT_OFFSET(__ring, from)			     \
-	(dev_priv->semaphore->node.start + \
-	 GEN8_SEMAPHORE_OFFSET(from, (__ring)->id))
-
 enum intel_engine_hangcheck_action {
 	ENGINE_IDLE = 0,
 	ENGINE_WAIT,
@@ -166,7 +157,6 @@ struct i915_ctx_workarounds {
 };
 
 struct drm_i915_gem_request;
-struct intel_render_state;
 
 /*
  * Engine IDs definitions.
@@ -195,9 +185,9 @@ struct i915_priolist {
  */
 struct intel_engine_execlists {
 	/**
-	 * @irq_tasklet: softirq tasklet for bottom handler
+	 * @tasklet: softirq tasklet for bottom handler
 	 */
-	struct tasklet_struct irq_tasklet;
+	struct tasklet_struct tasklet;
 
 	/**
 	 * @default_priolist: priority list for I915_PRIORITY_NORMAL
@@ -210,6 +200,11 @@ struct intel_engine_execlists {
 	bool no_priolist;
 
 	/**
+	 * @elsp: the ExecList Submission Port register
+	 */
+	u32 __iomem *elsp;
+
+	/**
 	 * @port: execlist port states
 	 *
 	 * For each hardware ELSP (ExecList Submission Port) we keep
@@ -253,6 +248,7 @@ struct intel_engine_execlists {
 	unsigned int active;
 #define EXECLISTS_ACTIVE_USER 0
 #define EXECLISTS_ACTIVE_PREEMPT 1
+#define EXECLISTS_ACTIVE_HWACK 2
 
 	/**
 	 * @port_mask: number of execlist ports - 1
@@ -290,11 +286,14 @@ struct intel_engine_execlists {
 struct intel_engine_cs {
 	struct drm_i915_private *i915;
 	char name[INTEL_ENGINE_CS_MAX_NAME];
+
 	enum intel_engine_id id;
-	unsigned int uabi_id;
 	unsigned int hw_id;
 	unsigned int guc_id;
 
+	u8 uabi_id;
+	u8 uabi_class;
+
 	u8 class;
 	u8 instance;
 	u32 context_size;
@@ -304,7 +303,7 @@ struct intel_engine_cs {
 	struct intel_ring *buffer;
 	struct intel_timeline *timeline;
 
-	struct intel_render_state *render_state;
+	struct drm_i915_gem_object *default_state;
 
 	atomic_t irq_count;
 	unsigned long irq_posted;
@@ -340,12 +339,49 @@ struct intel_engine_cs {
 		struct timer_list hangcheck; /* detect missed interrupts */
 
 		unsigned int hangcheck_interrupts;
+		unsigned int irq_enabled;
 
 		bool irq_armed : 1;
-		bool irq_enabled : 1;
 		I915_SELFTEST_DECLARE(bool mock : 1);
 	} breadcrumbs;
 
+	struct {
+		/**
+		 * @enable: Bitmask of enable sample events on this engine.
+		 *
+		 * Bits correspond to sample event types, for instance
+		 * I915_SAMPLE_QUEUED is bit 0 etc.
+		 */
+		u32 enable;
+		/**
+		 * @enable_count: Reference count for the enabled samplers.
+		 *
+		 * Index number corresponds to the bit number from @enable.
+		 */
+		unsigned int enable_count[I915_PMU_SAMPLE_BITS];
+		/**
+		 * @sample: Counter values for sampling events.
+		 *
+		 * Our internal timer stores the current counters in this field.
+		 */
+#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_SEMA + 1)
+		struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
+		/**
+		 * @busy_stats: Has enablement of engine stats tracking been
+		 * 		requested.
+		 */
+		bool busy_stats;
+		/**
+		 * @disable_busy_stats: Work item for busy stats disabling.
+		 *
+		 * Same as with @enable_busy_stats action, with the difference
+		 * that we delay it in case there are rapid enable-disable
+		 * actions, which can happen during tool startup (like perf
+		 * stat).
+		 */
+		struct delayed_work disable_busy_stats;
+	} pmu;
+
 	/*
 	 * A pool of objects to use as shadow copies of client batch buffers
 	 * when the command parser is enabled. Prevents the client from
@@ -366,6 +402,9 @@ struct intel_engine_cs {
 	void		(*reset_hw)(struct intel_engine_cs *engine,
 				    struct drm_i915_gem_request *req);
 
+	void		(*park)(struct intel_engine_cs *engine);
+	void		(*unpark)(struct intel_engine_cs *engine);
+
 	void		(*set_default_submission)(struct intel_engine_cs *engine);
 
 	struct intel_ring *(*context_pin)(struct intel_engine_cs *engine,
@@ -462,18 +501,15 @@ struct intel_engine_cs {
 	 *  ie. transpose of f(x, y)
 	 */
 	struct {
-		union {
 #define GEN6_SEMAPHORE_LAST	VECS_HW
 #define GEN6_NUM_SEMAPHORES	(GEN6_SEMAPHORE_LAST + 1)
 #define GEN6_SEMAPHORES_MASK	GENMASK(GEN6_SEMAPHORE_LAST, 0)
-			struct {
-				/* our mbox written by others */
-				u32		wait[GEN6_NUM_SEMAPHORES];
-				/* mboxes this ring signals to */
-				i915_reg_t	signal[GEN6_NUM_SEMAPHORES];
-			} mbox;
-			u64		signal_ggtt[I915_NUM_ENGINES];
-		};
+		struct {
+			/* our mbox written by others */
+			u32		wait[GEN6_NUM_SEMAPHORES];
+			/* mboxes this ring signals to */
+			i915_reg_t	signal[GEN6_NUM_SEMAPHORES];
+		} mbox;
 
 		/* AKA wait() */
 		int	(*sync_to)(struct drm_i915_gem_request *req,
@@ -501,13 +537,16 @@ struct intel_engine_cs {
 	 * stream (ring).
 	 */
 	struct i915_gem_context *legacy_active_context;
+	struct i915_hw_ppgtt *legacy_active_ppgtt;
 
 	/* status_notifier: list of callbacks for context-switch changes */
 	struct atomic_notifier_head context_status_notifier;
 
 	struct intel_engine_hangcheck hangcheck;
 
-	bool needs_cmd_parser;
+#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0)
+#define I915_ENGINE_SUPPORTS_STATS   BIT(1)
+	unsigned int flags;
 
 	/*
 	 * Table of commands the command parser needs to know about
@@ -532,8 +571,50 @@ struct intel_engine_cs {
 	 * certain bits to encode the command length in the header).
 	 */
 	u32 (*get_cmd_length_mask)(u32 cmd_header);
+
+	struct {
+		/**
+		 * @lock: Lock protecting the below fields.
+		 */
+		spinlock_t lock;
+		/**
+		 * @enabled: Reference count indicating number of listeners.
+		 */
+		unsigned int enabled;
+		/**
+		 * @active: Number of contexts currently scheduled in.
+		 */
+		unsigned int active;
+		/**
+		 * @enabled_at: Timestamp when busy stats were enabled.
+		 */
+		ktime_t enabled_at;
+		/**
+		 * @start: Timestamp of the last idle to active transition.
+		 *
+		 * Idle is defined as active == 0, active is active > 0.
+		 */
+		ktime_t start;
+		/**
+		 * @total: Total time this engine was busy.
+		 *
+		 * Accumulated time not counting the most recent block in cases
+		 * where engine is currently busy (active > 0).
+		 */
+		ktime_t total;
+	} stats;
 };
 
+static inline bool intel_engine_needs_cmd_parser(struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER;
+}
+
+static inline bool intel_engine_supports_stats(struct intel_engine_cs *engine)
+{
+	return engine->flags & I915_ENGINE_SUPPORTS_STATS;
+}
+
 static inline void
 execlists_set_active(struct intel_engine_execlists *execlists,
 		     unsigned int bit)
@@ -555,6 +636,12 @@ execlists_is_active(const struct intel_engine_execlists *execlists,
 	return test_bit(bit, (unsigned long *)&execlists->active);
 }
 
+void
+execlists_cancel_port_requests(struct intel_engine_execlists * const execlists);
+
+void
+execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists);
+
 static inline unsigned int
 execlists_num_ports(const struct intel_engine_execlists * const execlists)
 {
@@ -624,6 +711,8 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
  */
 #define I915_GEM_HWS_INDEX		0x30
 #define I915_GEM_HWS_INDEX_ADDR (I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
+#define I915_GEM_HWS_PREEMPT_INDEX	0x32
+#define I915_GEM_HWS_PREEMPT_ADDR (I915_GEM_HWS_PREEMPT_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
 #define I915_GEM_HWS_SCRATCH_INDEX	0x40
 #define I915_GEM_HWS_SCRATCH_ADDR (I915_GEM_HWS_SCRATCH_INDEX << MI_STORE_DWORD_INDEX_SHIFT)
 
@@ -648,6 +737,7 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv);
 
 int __must_check intel_ring_cacheline_align(struct drm_i915_gem_request *req);
 
+int intel_ring_wait_for_space(struct intel_ring *ring, unsigned int bytes);
 u32 __must_check *intel_ring_begin(struct drm_i915_gem_request *req,
 				   unsigned int n);
 
@@ -776,6 +866,11 @@ static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
 	return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
 }
 
+static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
+{
+	return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR;
+}
+
 /* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
 int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
 
@@ -846,6 +941,9 @@ unsigned int intel_engine_wakeup(struct intel_engine_cs *engine);
 #define ENGINE_WAKEUP_WAITER BIT(0)
 #define ENGINE_WAKEUP_ASLEEP BIT(1)
 
+void intel_engine_pin_breadcrumbs_irq(struct intel_engine_cs *engine);
+void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine);
+
 void __intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
 void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine);
 
@@ -864,14 +962,123 @@ static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset)
 	return batch + 6;
 }
 
+static inline u32 *
+gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset)
+{
+	/* We're using qword write, offset should be aligned to 8 bytes. */
+	GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
+
+	/* w/a for post sync ops following a GPGPU operation we
+	 * need a prior CS_STALL, which is emitted by the flush
+	 * following the batch.
+	 */
+	*cs++ = GFX_OP_PIPE_CONTROL(6);
+	*cs++ = PIPE_CONTROL_GLOBAL_GTT_IVB | PIPE_CONTROL_CS_STALL |
+		PIPE_CONTROL_QW_WRITE;
+	*cs++ = gtt_offset;
+	*cs++ = 0;
+	*cs++ = value;
+	/* We're thrashing one dword of HWS. */
+	*cs++ = 0;
+
+	return cs;
+}
+
+static inline u32 *
+gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset)
+{
+	/* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */
+	GEM_BUG_ON(gtt_offset & (1 << 5));
+	/* Offset should be aligned to 8 bytes for both (QW/DW) write types */
+	GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8));
+
+	*cs++ = (MI_FLUSH_DW + 1) | MI_FLUSH_DW_OP_STOREDW;
+	*cs++ = gtt_offset | MI_FLUSH_DW_USE_GTT;
+	*cs++ = 0;
+	*cs++ = value;
+
+	return cs;
+}
+
 bool intel_engine_is_idle(struct intel_engine_cs *engine);
 bool intel_engines_are_idle(struct drm_i915_private *dev_priv);
 
-void intel_engines_mark_idle(struct drm_i915_private *i915);
+bool intel_engine_has_kernel_context(const struct intel_engine_cs *engine);
+
+void intel_engines_park(struct drm_i915_private *i915);
+void intel_engines_unpark(struct drm_i915_private *i915);
+
 void intel_engines_reset_default_submission(struct drm_i915_private *i915);
+unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
 
 bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
 
-void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *p);
+__printf(3, 4)
+void intel_engine_dump(struct intel_engine_cs *engine,
+		       struct drm_printer *m,
+		       const char *header, ...);
+
+struct intel_engine_cs *
+intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
+
+static inline void intel_engine_context_in(struct intel_engine_cs *engine)
+{
+	unsigned long flags;
+
+	if (READ_ONCE(engine->stats.enabled) == 0)
+		return;
+
+	spin_lock_irqsave(&engine->stats.lock, flags);
+
+	if (engine->stats.enabled > 0) {
+		if (engine->stats.active++ == 0)
+			engine->stats.start = ktime_get();
+		GEM_BUG_ON(engine->stats.active == 0);
+	}
+
+	spin_unlock_irqrestore(&engine->stats.lock, flags);
+}
+
+static inline void intel_engine_context_out(struct intel_engine_cs *engine)
+{
+	unsigned long flags;
+
+	if (READ_ONCE(engine->stats.enabled) == 0)
+		return;
+
+	spin_lock_irqsave(&engine->stats.lock, flags);
+
+	if (engine->stats.enabled > 0) {
+		ktime_t last;
+
+		if (engine->stats.active && --engine->stats.active == 0) {
+			/*
+			 * Decrement the active context count and in case GPU
+			 * is now idle add up to the running total.
+			 */
+			last = ktime_sub(ktime_get(), engine->stats.start);
+
+			engine->stats.total = ktime_add(engine->stats.total,
+							last);
+		} else if (engine->stats.active == 0) {
+			/*
+			 * After turning on engine stats, context out might be
+			 * the first event in which case we account from the
+			 * time stats gathering was turned on.
+			 */
+			last = ktime_sub(ktime_get(), engine->stats.enabled_at);
+
+			engine->stats.total = ktime_add(engine->stats.total,
+							last);
+		}
+	}
+
+	spin_unlock_irqrestore(&engine->stats.lock, flags);
+}
+
+int intel_enable_engine_stats(struct intel_engine_cs *engine);
+void intel_disable_engine_stats(struct intel_engine_cs *engine);
+
+ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine);
 
 #endif /* _INTEL_RINGBUFFER_H_ */
author	Linus Torvalds <torvalds@linux-foundation.org>	2018-02-01 17:48:47 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2018-02-01 17:48:47 -0800
commit	4bf772b14675411a69b3c807f73006de0fe4b649 (patch)
tree	b841e3ba0e3429695589cb0ab73871fa12f42c38 /drivers/gpu/drm/i915/intel_ringbuffer.h
parent	3879ae653a3e98380fe2daf653338830b7ca0097 (diff)
parent	24b8ef699e8221d2b7f813adaab13eec053e1507 (diff)