diff options
author | Thomas Zimmermann <tzimmermann@suse.de> | 2023-01-31 14:18:33 +0100 |
---|---|---|
committer | Thomas Zimmermann <tzimmermann@suse.de> | 2023-01-31 14:18:33 +0100 |
commit | df5bf3b942a8d344bd9cbbe6ac31c9a2ea1557a4 (patch) | |
tree | bfbcbe56b9f4f8b1e44242b80800a68b2ae5b2d6 /drivers/gpu/drm/amd/amdgpu | |
parent | 532a38292c7213aa6d950e6a1b86659d08b5aa67 (diff) | |
parent | aebd8f0c6f8280ba35bc989f4a9ea47469d3589a (diff) |
Merge drm/drm-next into drm-misc-next
Backmerging to get v6.2-rc6.
Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 51 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 12 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/imu_v11_0.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/soc21.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c | 62 |
8 files changed, 101 insertions, 37 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 8b7a09b392ac..0f4cb41078c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -63,6 +63,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, amdgpu_ctx_put(p->ctx); return -ECANCELED; } + + amdgpu_sync_create(&p->sync); return 0; } @@ -454,18 +456,6 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p, } r = amdgpu_sync_fence(&p->sync, fence); - if (r) - goto error; - - /* - * When we have an explicit dependency it might be necessary to insert a - * pipeline sync to make sure that all caches etc are flushed and the - * next job actually sees the results from the previous one. - */ - if (fence->context == p->gang_leader->base.entity->fence_context) - r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence); - -error: dma_fence_put(fence); return r; } @@ -1190,10 +1180,19 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; + struct drm_gpu_scheduler *sched; struct amdgpu_bo_list_entry *e; + struct dma_fence *fence; unsigned int i; int r; + r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]); + if (r) { + if (r != -ERESTARTSYS) + DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); + return r; + } + list_for_each_entry(e, &p->validated, tv.head) { struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); struct dma_resv *resv = bo->tbo.base.resv; @@ -1213,10 +1212,24 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) return r; } - r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]); - if (r && r != -ERESTARTSYS) - DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n"); - return r; + sched = p->gang_leader->base.entity->rq->sched; + while ((fence = amdgpu_sync_get_fence(&p->sync))) { + struct drm_sched_fence *s_fence = to_drm_sched_fence(fence); + + /* + * When we have an dependency it might be necessary to insert a + * pipeline sync to make sure that all caches etc are flushed and the + * next job actually sees the results from the previous one + * before we start executing on the same scheduler ring. + */ + if (!s_fence || s_fence->sched != sched) + continue; + + r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence); + if (r) + return r; + } + return 0; } static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p) @@ -1256,9 +1269,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, continue; fence = &p->jobs[i]->base.s_fence->scheduled; + dma_fence_get(fence); r = drm_sched_job_add_dependency(&leader->base, fence); - if (r) + if (r) { + dma_fence_put(fence); goto error_cleanup; + } } if (p->gang_size > 1) { @@ -1346,6 +1362,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser) { unsigned i; + amdgpu_sync_free(&parser->sync); for (i = 0; i < parser->num_post_deps; i++) { drm_syncobj_put(parser->post_deps[i].syncobj); kfree(parser->post_deps[i].chain); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 1257745fb202..5bee3ff62344 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3038,6 +3038,18 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA)) continue; + /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot. + * These are in TMR, hence are expected to be reused by PSP-TOS to reload + * from this location and RLC Autoload automatically also gets loaded + * from here based on PMFW -> PSP message during re-init sequence. + * Therefore, the psp suspend & resume should be skipped to avoid destroy + * the TMR and reload FWs again for IMU enabled APU ASICs. + */ + if (amdgpu_in_reset(adev) && + (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs && + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) + continue; + /* XXX handle errors */ r = adev->ip_blocks[i].version->funcs->suspend(adev); /* XXX handle errors */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index 44c57f4a84c4..32fe05c810c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -549,8 +549,8 @@ struct amdgpu_mst_connector { struct drm_dp_mst_topology_mgr mst_mgr; struct amdgpu_dm_dp_aux dm_dp_aux; - struct drm_dp_mst_port *port; - struct amdgpu_connector *mst_port; + struct drm_dp_mst_port *mst_output_port; + struct amdgpu_connector *mst_root; bool is_mst_connector; struct amdgpu_encoder *mst_encoder; }; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c index bac7976975bd..dcd8c066bc1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c @@ -391,8 +391,10 @@ int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job) dma_fence_get(f); r = drm_sched_job_add_dependency(&job->base, f); - if (r) + if (r) { + dma_fence_put(f); return r; + } } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index ed0d368149aa..4ab90c7852c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -35,6 +35,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_1_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_2_imu.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_imu.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_4_imu.bin"); static int imu_v11_0_init_microcode(struct amdgpu_device *adev) { diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index bfa305079bfc..62cdd2113135 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -40,6 +40,8 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes1.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes.bin"); MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes.bin"); +MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes1.bin"); static int mes_v11_0_hw_fini(void *handle); static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev); @@ -196,7 +198,6 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes, mes_add_queue_pkt.trap_handler_addr = input->tba_addr; mes_add_queue_pkt.tma_addr = input->tma_addr; mes_add_queue_pkt.is_kfd_process = input->is_kfd_process; - mes_add_queue_pkt.trap_en = 1; /* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */ mes_add_queue_pkt.is_aql_queue = input->is_aql_queue; diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 9c4a29d50f1c..e03cf7f766c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -52,6 +52,7 @@ static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_ { {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)}, {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)}, + {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)}, }; static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn1[] = diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c index efb22d0975b3..22a41766a8c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c @@ -1710,7 +1710,7 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, create = ptr + addr + offset - start; - /* H246, HEVC and VP9 can run on any instance */ + /* H264, HEVC and VP9 can run on any instance */ if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11) continue; @@ -1724,7 +1724,29 @@ out: return r; } -#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003) +#define RADEON_VCN_ENGINE_TYPE_ENCODE (0x00000002) +#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003) + +#define RADEON_VCN_ENGINE_INFO (0x30000001) +#define RADEON_VCN_ENGINE_INFO_MAX_OFFSET 16 + +#define RENCODE_ENCODE_STANDARD_AV1 2 +#define RENCODE_IB_PARAM_SESSION_INIT 0x00000003 +#define RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET 64 + +/* return the offset in ib if id is found, -1 otherwise + * to speed up the searching we only search upto max_offset + */ +static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int max_offset) +{ + int i; + + for (i = 0; i < ib->length_dw && i < max_offset && ib->ptr[i] >= 8; i += ib->ptr[i]/4) { + if (ib->ptr[i + 1] == id) + return i; + } + return -1; +} static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, struct amdgpu_job *job, @@ -1734,27 +1756,35 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, struct amdgpu_vcn_decode_buffer *decode_buffer; uint64_t addr; uint32_t val; + int idx; /* The first instance can decode anything */ if (!ring->me) return 0; - /* unified queue ib header has 8 double words. */ - if (ib->length_dw < 8) - return 0; - - val = amdgpu_ib_get_value(ib, 6); //RADEON_VCN_ENGINE_TYPE - if (val != RADEON_VCN_ENGINE_TYPE_DECODE) - return 0; - - decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[10]; - - if (!(decode_buffer->valid_buf_flag & 0x1)) + /* RADEON_VCN_ENGINE_INFO is at the top of ib block */ + idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO, + RADEON_VCN_ENGINE_INFO_MAX_OFFSET); + if (idx < 0) /* engine info is missing */ return 0; - addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 | - decode_buffer->msg_buffer_address_lo; - return vcn_v4_0_dec_msg(p, job, addr); + val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */ + if (val == RADEON_VCN_ENGINE_TYPE_DECODE) { + decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6]; + + if (!(decode_buffer->valid_buf_flag & 0x1)) + return 0; + + addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 | + decode_buffer->msg_buffer_address_lo; + return vcn_v4_0_dec_msg(p, job, addr); + } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) { + idx = vcn_v4_0_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT, + RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET); + if (idx >= 0 && ib->ptr[idx + 2] == RENCODE_ENCODE_STANDARD_AV1) + return vcn_v4_0_limit_sched(p, job); + } + return 0; } static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = { |