diff options
author | Dave Airlie <airlied@redhat.com> | 2024-10-18 13:53:34 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2024-10-18 13:53:41 +1000 |
commit | 83f000784844cb9d4669ef1a3366479db3197b33 (patch) | |
tree | 8bbbb2f28ce3788f8f1770e8f49e2bf360219114 | |
parent | 49ff3e79a7f49e269b0278e75d35d6421f5538a2 (diff) | |
parent | ffafd12696d1a4c8eeb7386d798d75e1fafb4e01 (diff) |
Merge tag 'drm-xe-fixes-2024-10-17' of https://gitlab.freedesktop.org/drm/xe/kernel into drm-fixes
Driver Changes:
- New workaround to Xe2 (Aradhya)
- Fix unbalanced rpm put (Matthew Auld)
- Remove fragile lock optimization (Matthew Brost)
- Fix job release, delegating it to the drm scheduler (Matthew Brost)
- Fix timestamp bit width for Xe2 (Lucas)
- Fix external BO's dma-resv usag (Matthew Brost)
- Fix returning success for timeout in wait_token (Nirmoy)
- Initialize fence to avoid it being detected as signaled (Matthew Auld)
- Improve cache flush for BMG (Matthew Auld)
- Don't allow hflip for tile4 framebuffer on Xe2 (Juha-Pekka)
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Lucas De Marchi <lucas.demarchi@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/jkldrex5733ldxrla75b4ayvhujjhw2kccmasl5rotoufoacj4@pkvlrrv4orc7
-rw-r--r-- | drivers/gpu/drm/i915/display/intel_fb.c | 13 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/display/intel_fb.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/display/skl_universal_plane.c | 11 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/regs/xe_gt_regs.h | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_device.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_exec.c | 12 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_gpu_scheduler.h | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_gt.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 29 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_guc_submit.c | 7 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_query.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_sync.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_vm.c | 8 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_wa.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/xe/xe_wait_user_fence.c | 3 |
16 files changed, 63 insertions, 44 deletions
diff --git a/drivers/gpu/drm/i915/display/intel_fb.c b/drivers/gpu/drm/i915/display/intel_fb.c index 5be7bb43e2e0..35557d98d7a7 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.c +++ b/drivers/gpu/drm/i915/display/intel_fb.c @@ -438,6 +438,19 @@ bool intel_fb_needs_64k_phys(u64 modifier) INTEL_PLANE_CAP_NEED64K_PHYS); } +/** + * intel_fb_is_tile4_modifier: Check if a modifier is a tile4 modifier type + * @modifier: Modifier to check + * + * Returns: + * Returns %true if @modifier is a tile4 modifier. + */ +bool intel_fb_is_tile4_modifier(u64 modifier) +{ + return plane_caps_contain_any(lookup_modifier(modifier)->plane_caps, + INTEL_PLANE_CAP_TILING_4); +} + static bool check_modifier_display_ver_range(const struct intel_modifier_desc *md, u8 display_ver_from, u8 display_ver_until) { diff --git a/drivers/gpu/drm/i915/display/intel_fb.h b/drivers/gpu/drm/i915/display/intel_fb.h index 10de437e8ef8..827be3f7934c 100644 --- a/drivers/gpu/drm/i915/display/intel_fb.h +++ b/drivers/gpu/drm/i915/display/intel_fb.h @@ -35,6 +35,7 @@ bool intel_fb_is_ccs_modifier(u64 modifier); bool intel_fb_is_rc_ccs_cc_modifier(u64 modifier); bool intel_fb_is_mc_ccs_modifier(u64 modifier); bool intel_fb_needs_64k_phys(u64 modifier); +bool intel_fb_is_tile4_modifier(u64 modifier); bool intel_fb_is_ccs_aux_plane(const struct drm_framebuffer *fb, int color_plane); int intel_fb_rc_ccs_cc_plane(const struct drm_framebuffer *fb); diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c index 17d4c880ecc4..c8720d31d101 100644 --- a/drivers/gpu/drm/i915/display/skl_universal_plane.c +++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c @@ -1591,6 +1591,17 @@ static int skl_plane_check_fb(const struct intel_crtc_state *crtc_state, return -EINVAL; } + /* + * Display20 onward tile4 hflip is not supported + */ + if (rotation & DRM_MODE_REFLECT_X && + intel_fb_is_tile4_modifier(fb->modifier) && + DISPLAY_VER(dev_priv) >= 20) { + drm_dbg_kms(&dev_priv->drm, + "horizontal flip is not supported with tile4 surface formats\n"); + return -EINVAL; + } + if (drm_rotation_90_or_270(rotation)) { if (!intel_fb_supports_90_270_rotation(to_intel_framebuffer(fb))) { drm_dbg_kms(&dev_priv->drm, diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h index ac9c437e103d..00ad34ed73a5 100644 --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h @@ -393,9 +393,6 @@ #define XE2_GLOBAL_INVAL XE_REG(0xb404) -#define SCRATCH1LPFC XE_REG(0xb474) -#define EN_L3_RW_CCS_CACHE_FLUSH REG_BIT(0) - #define XE2LPM_L3SQCREG2 XE_REG_MCR(0xb604) #define XE2LPM_L3SQCREG3 XE_REG_MCR(0xb608) diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c index 5a63d135ba96..0a9ffc19e92f 100644 --- a/drivers/gpu/drm/xe/xe_device.c +++ b/drivers/gpu/drm/xe/xe_device.c @@ -980,13 +980,13 @@ void xe_device_declare_wedged(struct xe_device *xe) return; } + xe_pm_runtime_get_noresume(xe); + if (drmm_add_action_or_reset(&xe->drm, xe_device_wedged_fini, xe)) { drm_err(&xe->drm, "Failed to register xe_device_wedged_fini clean-up. Although device is wedged.\n"); return; } - xe_pm_runtime_get_noresume(xe); - if (!atomic_xchg(&xe->wedged.flag, 1)) { xe->needs_flr_on_fini = true; drm_err(&xe->drm, diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c index 7b38485817dc..f23ac1e2ed88 100644 --- a/drivers/gpu/drm/xe/xe_exec.c +++ b/drivers/gpu/drm/xe/xe_exec.c @@ -41,11 +41,6 @@ * user knows an exec writes to a BO and reads from the BO in the next exec, it * is the user's responsibility to pass in / out fence between the two execs). * - * Implicit dependencies for external BOs are handled by using the dma-buf - * implicit dependency uAPI (TODO: add link). To make this works each exec must - * install the job's fence into the DMA_RESV_USAGE_WRITE slot of every external - * BO mapped in the VM. - * * We do not allow a user to trigger a bind at exec time rather we have a VM * bind IOCTL which uses the same in / out fence interface as exec. In that * sense, a VM bind is basically the same operation as an exec from the user @@ -59,8 +54,8 @@ * behind any pending kernel operations on any external BOs in VM or any BOs * private to the VM. This is accomplished by the rebinds waiting on BOs * DMA_RESV_USAGE_KERNEL slot (kernel ops) and kernel ops waiting on all BOs - * slots (inflight execs are in the DMA_RESV_USAGE_BOOKING for private BOs and - * in DMA_RESV_USAGE_WRITE for external BOs). + * slots (inflight execs are in the DMA_RESV_USAGE_BOOKKEEP for private BOs and + * for external BOs). * * Rebinds / dma-resv usage applies to non-compute mode VMs only as for compute * mode VMs we use preempt fences and a rebind worker (TODO: add link). @@ -304,7 +299,8 @@ retry: xe_sched_job_arm(job); if (!xe_vm_in_lr_mode(vm)) drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, &job->drm.s_fence->finished, - DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_WRITE); + DMA_RESV_USAGE_BOOKKEEP, + DMA_RESV_USAGE_BOOKKEEP); for (i = 0; i < num_syncs; i++) { xe_sync_entry_signal(&syncs[i], &job->drm.s_fence->finished); diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h index 5ad5629a6c60..64b2ae6839db 100644 --- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h +++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h @@ -63,7 +63,9 @@ xe_sched_invalidate_job(struct xe_sched_job *job, int threshold) static inline void xe_sched_add_pending_job(struct xe_gpu_scheduler *sched, struct xe_sched_job *job) { + spin_lock(&sched->base.job_list_lock); list_add(&job->drm.list, &sched->base.pending_list); + spin_unlock(&sched->base.job_list_lock); } static inline diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c index ea65cf59372c..d5fd6a089b7c 100644 --- a/drivers/gpu/drm/xe/xe_gt.c +++ b/drivers/gpu/drm/xe/xe_gt.c @@ -108,7 +108,6 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt) return; if (!xe_gt_is_media_type(gt)) { - xe_mmio_write32(gt, SCRATCH1LPFC, EN_L3_RW_CCS_CACHE_FLUSH); reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL); reg |= CG_DIS_CNTLBUS; xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg); diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c index cca9cf536f76..bbb9e411d21f 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c @@ -37,6 +37,15 @@ static long tlb_timeout_jiffies(struct xe_gt *gt) return hw_tlb_timeout + 2 * delay; } +static void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence) +{ + if (WARN_ON_ONCE(!fence->gt)) + return; + + xe_pm_runtime_put(gt_to_xe(fence->gt)); + fence->gt = NULL; /* fini() should be called once */ +} + static void __invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence) { @@ -204,7 +213,7 @@ static int send_tlb_invalidation(struct xe_guc *guc, tlb_timeout_jiffies(gt)); } spin_unlock_irq(>->tlb_invalidation.pending_lock); - } else if (ret < 0) { + } else { __invalidation_fence_signal(xe, fence); } if (!ret) { @@ -267,10 +276,8 @@ int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt) xe_gt_tlb_invalidation_fence_init(gt, &fence, true); ret = xe_gt_tlb_invalidation_guc(gt, &fence); - if (ret < 0) { - xe_gt_tlb_invalidation_fence_fini(&fence); + if (ret) return ret; - } xe_gt_tlb_invalidation_fence_wait(&fence); } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) { @@ -496,7 +503,8 @@ static const struct dma_fence_ops invalidation_fence_ops = { * @stack: fence is stack variable * * Initialize TLB invalidation fence for use. xe_gt_tlb_invalidation_fence_fini - * must be called if fence is not signaled. + * will be automatically called when fence is signalled (all fences must signal), + * even on error. */ void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, @@ -516,14 +524,3 @@ void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, dma_fence_get(&fence->base); fence->gt = gt; } - -/** - * xe_gt_tlb_invalidation_fence_fini - Finalize TLB invalidation fence - * @fence: TLB invalidation fence to finalize - * - * Drop PM ref which fence took durinig init. - */ -void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence) -{ - xe_pm_runtime_put(gt_to_xe(fence->gt)); -} diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h index a84065fa324c..f430d5797af7 100644 --- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h +++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h @@ -28,7 +28,6 @@ int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len); void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt, struct xe_gt_tlb_invalidation_fence *fence, bool stack); -void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence); static inline void xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence) diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c index 63495007f336..8a9254e5af6e 100644 --- a/drivers/gpu/drm/xe/xe_guc_submit.c +++ b/drivers/gpu/drm/xe/xe_guc_submit.c @@ -1030,10 +1030,13 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job) /* * TDR has fired before free job worker. Common if exec queue - * immediately closed after last fence signaled. + * immediately closed after last fence signaled. Add back to pending + * list so job can be freed and kick scheduler ensuring free job is not + * lost. */ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) { - guc_exec_queue_free_job(drm_job); + xe_sched_add_pending_job(sched, job); + xe_sched_submission_start(sched); return DRM_GPU_SCHED_STAT_NOMINAL; } diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 28d9bb3b825d..848da8e68c7a 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -161,7 +161,11 @@ query_engine_cycles(struct xe_device *xe, cpu_clock); xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); - resp.width = 36; + + if (GRAPHICS_VER(xe) >= 20) + resp.width = 64; + else + resp.width = 36; /* Only write to the output fields of user query */ if (put_user(resp.cpu_timestamp, &query_ptr->cpu_timestamp)) diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c index bb3c2a830362..c6cf227ead40 100644 --- a/drivers/gpu/drm/xe/xe_sync.c +++ b/drivers/gpu/drm/xe/xe_sync.c @@ -58,7 +58,7 @@ static struct xe_user_fence *user_fence_create(struct xe_device *xe, u64 addr, if (!access_ok(ptr, sizeof(*ptr))) return ERR_PTR(-EFAULT); - ufence = kmalloc(sizeof(*ufence), GFP_KERNEL); + ufence = kzalloc(sizeof(*ufence), GFP_KERNEL); if (!ufence) return ERR_PTR(-ENOMEM); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index ce9dca4d4e87..c99380271de6 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -3199,10 +3199,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) ret = xe_gt_tlb_invalidation_vma(tile->primary_gt, &fence[fence_id], vma); - if (ret < 0) { - xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]); + if (ret) goto wait; - } ++fence_id; if (!tile->media_gt) @@ -3214,10 +3212,8 @@ int xe_vm_invalidate_vma(struct xe_vma *vma) ret = xe_gt_tlb_invalidation_vma(tile->media_gt, &fence[fence_id], vma); - if (ret < 0) { - xe_gt_tlb_invalidation_fence_fini(&fence[fence_id]); + if (ret) goto wait; - } ++fence_id; } } diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c index d424992514a4..353936a0f877 100644 --- a/drivers/gpu/drm/xe/xe_wa.c +++ b/drivers/gpu/drm/xe/xe_wa.c @@ -710,6 +710,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = { DIS_PARTIAL_AUTOSTRIP | DIS_AUTOSTRIP)) }, + { XE_RTP_NAME("15016589081"), + XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)), + XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX)) + }, /* Xe2_HPG */ { XE_RTP_NAME("15010599737"), diff --git a/drivers/gpu/drm/xe/xe_wait_user_fence.c b/drivers/gpu/drm/xe/xe_wait_user_fence.c index d46fa8374980..f5deb81eba01 100644 --- a/drivers/gpu/drm/xe/xe_wait_user_fence.c +++ b/drivers/gpu/drm/xe/xe_wait_user_fence.c @@ -169,9 +169,6 @@ int xe_wait_user_fence_ioctl(struct drm_device *dev, void *data, args->timeout = 0; } - if (!timeout && !(err < 0)) - err = -ETIME; - if (q) xe_exec_queue_put(q); |