summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
authorThomas Zimmermann <tzimmermann@suse.de>2023-01-31 14:18:33 +0100
committerThomas Zimmermann <tzimmermann@suse.de>2023-01-31 14:18:33 +0100
commitdf5bf3b942a8d344bd9cbbe6ac31c9a2ea1557a4 (patch)
treebfbcbe56b9f4f8b1e44242b80800a68b2ae5b2d6 /drivers/gpu/drm/amd/amdgpu
parent532a38292c7213aa6d950e6a1b86659d08b5aa67 (diff)
parentaebd8f0c6f8280ba35bc989f4a9ea47469d3589a (diff)
Merge drm/drm-next into drm-misc-next
Backmerging to get v6.2-rc6. Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c51
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/imu_v11_0.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/mes_v11_0.c3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/soc21.c1
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c62
8 files changed, 101 insertions, 37 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 8b7a09b392ac..0f4cb41078c1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -63,6 +63,8 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
amdgpu_ctx_put(p->ctx);
return -ECANCELED;
}
+
+ amdgpu_sync_create(&p->sync);
return 0;
}
@@ -454,18 +456,6 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
}
r = amdgpu_sync_fence(&p->sync, fence);
- if (r)
- goto error;
-
- /*
- * When we have an explicit dependency it might be necessary to insert a
- * pipeline sync to make sure that all caches etc are flushed and the
- * next job actually sees the results from the previous one.
- */
- if (fence->context == p->gang_leader->base.entity->fence_context)
- r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence);
-
-error:
dma_fence_put(fence);
return r;
}
@@ -1190,10 +1180,19 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct drm_gpu_scheduler *sched;
struct amdgpu_bo_list_entry *e;
+ struct dma_fence *fence;
unsigned int i;
int r;
+ r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);
+ if (r) {
+ if (r != -ERESTARTSYS)
+ DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
+ return r;
+ }
+
list_for_each_entry(e, &p->validated, tv.head) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
struct dma_resv *resv = bo->tbo.base.resv;
@@ -1213,10 +1212,24 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
return r;
}
- r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);
- if (r && r != -ERESTARTSYS)
- DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
- return r;
+ sched = p->gang_leader->base.entity->rq->sched;
+ while ((fence = amdgpu_sync_get_fence(&p->sync))) {
+ struct drm_sched_fence *s_fence = to_drm_sched_fence(fence);
+
+ /*
+ * When we have an dependency it might be necessary to insert a
+ * pipeline sync to make sure that all caches etc are flushed and the
+ * next job actually sees the results from the previous one
+ * before we start executing on the same scheduler ring.
+ */
+ if (!s_fence || s_fence->sched != sched)
+ continue;
+
+ r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence);
+ if (r)
+ return r;
+ }
+ return 0;
}
static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
@@ -1256,9 +1269,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
continue;
fence = &p->jobs[i]->base.s_fence->scheduled;
+ dma_fence_get(fence);
r = drm_sched_job_add_dependency(&leader->base, fence);
- if (r)
+ if (r) {
+ dma_fence_put(fence);
goto error_cleanup;
+ }
}
if (p->gang_size > 1) {
@@ -1346,6 +1362,7 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser)
{
unsigned i;
+ amdgpu_sync_free(&parser->sync);
for (i = 0; i < parser->num_post_deps; i++) {
drm_syncobj_put(parser->post_deps[i].syncobj);
kfree(parser->post_deps[i].chain);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1257745fb202..5bee3ff62344 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -3038,6 +3038,18 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
(adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SDMA))
continue;
+ /* Once swPSP provides the IMU, RLC FW binaries to TOS during cold-boot.
+ * These are in TMR, hence are expected to be reused by PSP-TOS to reload
+ * from this location and RLC Autoload automatically also gets loaded
+ * from here based on PMFW -> PSP message during re-init sequence.
+ * Therefore, the psp suspend & resume should be skipped to avoid destroy
+ * the TMR and reload FWs again for IMU enabled APU ASICs.
+ */
+ if (amdgpu_in_reset(adev) &&
+ (adev->flags & AMD_IS_APU) && adev->gfx.imu.funcs &&
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
+ continue;
+
/* XXX handle errors */
r = adev->ip_blocks[i].version->funcs->suspend(adev);
/* XXX handle errors */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 44c57f4a84c4..32fe05c810c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -549,8 +549,8 @@ struct amdgpu_mst_connector {
struct drm_dp_mst_topology_mgr mst_mgr;
struct amdgpu_dm_dp_aux dm_dp_aux;
- struct drm_dp_mst_port *port;
- struct amdgpu_connector *mst_port;
+ struct drm_dp_mst_port *mst_output_port;
+ struct amdgpu_connector *mst_root;
bool is_mst_connector;
struct amdgpu_encoder *mst_encoder;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index bac7976975bd..dcd8c066bc1f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -391,8 +391,10 @@ int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job)
dma_fence_get(f);
r = drm_sched_job_add_dependency(&job->base, f);
- if (r)
+ if (r) {
+ dma_fence_put(f);
return r;
+ }
}
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
index ed0d368149aa..4ab90c7852c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c
@@ -35,6 +35,7 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_0_imu.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_1_imu.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_2_imu.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_3_imu.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_imu.bin");
static int imu_v11_0_init_microcode(struct amdgpu_device *adev)
{
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index bfa305079bfc..62cdd2113135 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -40,6 +40,8 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_2_mes1.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes.bin");
MODULE_FIRMWARE("amdgpu/gc_11_0_3_mes1.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_mes1.bin");
static int mes_v11_0_hw_fini(void *handle);
static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev);
@@ -196,7 +198,6 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
mes_add_queue_pkt.tma_addr = input->tma_addr;
mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
- mes_add_queue_pkt.trap_en = 1;
/* For KFD, gds_size is re-used for queue size (needed in MES for AQL queues) */
mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index 9c4a29d50f1c..e03cf7f766c5 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -52,6 +52,7 @@ static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_
{
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 2304, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 2304, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_encode_array_vcn1[] =
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index efb22d0975b3..22a41766a8c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -1710,7 +1710,7 @@ static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
create = ptr + addr + offset - start;
- /* H246, HEVC and VP9 can run on any instance */
+ /* H264, HEVC and VP9 can run on any instance */
if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
continue;
@@ -1724,7 +1724,29 @@ out:
return r;
}
-#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003)
+#define RADEON_VCN_ENGINE_TYPE_ENCODE (0x00000002)
+#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003)
+
+#define RADEON_VCN_ENGINE_INFO (0x30000001)
+#define RADEON_VCN_ENGINE_INFO_MAX_OFFSET 16
+
+#define RENCODE_ENCODE_STANDARD_AV1 2
+#define RENCODE_IB_PARAM_SESSION_INIT 0x00000003
+#define RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET 64
+
+/* return the offset in ib if id is found, -1 otherwise
+ * to speed up the searching we only search upto max_offset
+ */
+static int vcn_v4_0_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id, int max_offset)
+{
+ int i;
+
+ for (i = 0; i < ib->length_dw && i < max_offset && ib->ptr[i] >= 8; i += ib->ptr[i]/4) {
+ if (ib->ptr[i + 1] == id)
+ return i;
+ }
+ return -1;
+}
static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
struct amdgpu_job *job,
@@ -1734,27 +1756,35 @@ static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
struct amdgpu_vcn_decode_buffer *decode_buffer;
uint64_t addr;
uint32_t val;
+ int idx;
/* The first instance can decode anything */
if (!ring->me)
return 0;
- /* unified queue ib header has 8 double words. */
- if (ib->length_dw < 8)
- return 0;
-
- val = amdgpu_ib_get_value(ib, 6); //RADEON_VCN_ENGINE_TYPE
- if (val != RADEON_VCN_ENGINE_TYPE_DECODE)
- return 0;
-
- decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[10];
-
- if (!(decode_buffer->valid_buf_flag & 0x1))
+ /* RADEON_VCN_ENGINE_INFO is at the top of ib block */
+ idx = vcn_v4_0_enc_find_ib_param(ib, RADEON_VCN_ENGINE_INFO,
+ RADEON_VCN_ENGINE_INFO_MAX_OFFSET);
+ if (idx < 0) /* engine info is missing */
return 0;
- addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 |
- decode_buffer->msg_buffer_address_lo;
- return vcn_v4_0_dec_msg(p, job, addr);
+ val = amdgpu_ib_get_value(ib, idx + 2); /* RADEON_VCN_ENGINE_TYPE */
+ if (val == RADEON_VCN_ENGINE_TYPE_DECODE) {
+ decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[idx + 6];
+
+ if (!(decode_buffer->valid_buf_flag & 0x1))
+ return 0;
+
+ addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 |
+ decode_buffer->msg_buffer_address_lo;
+ return vcn_v4_0_dec_msg(p, job, addr);
+ } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) {
+ idx = vcn_v4_0_enc_find_ib_param(ib, RENCODE_IB_PARAM_SESSION_INIT,
+ RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET);
+ if (idx >= 0 && ib->ptr[idx + 2] == RENCODE_ENCODE_STANDARD_AV1)
+ return vcn_v4_0_limit_sched(p, job);
+ }
+ return 0;
}
static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {