From cfa2104fbcb87ab0abbdaba608087df1e24fe195 Mon Sep 17 00:00:00 2001 From: Jammy Zhou Date: Tue, 4 Aug 2015 10:50:47 +0800 Subject: drm/amdgpu: add feature version for SDMA ucode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jammy Zhou Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c') diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index d7895885fe0c..01bd5c903f9b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -542,6 +542,7 @@ static int sdma_v2_4_load_microcode(struct amdgpu_device *adev) amdgpu_ucode_print_sdma_hdr(&hdr->header); fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version); + adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); fw_data = (const __le32 *) (adev->sdma[i].fw->data + -- cgit v1.2.3-70-g09d2 From 595fd013f795daeed0c7ddda02d8e0c51d8ce76c Mon Sep 17 00:00:00 2001 From: Jammy Zhou Date: Tue, 4 Aug 2015 11:44:19 +0800 Subject: drm/amdgpu: set fw_version and feature_version for smu fw loading MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The fw_version and feature_verion should be set correctly when the firmwares are loaded by SMU on Tonga/Carrzio/Iceland Signed-off-by: Jammy Zhou Reviewed-by: Christian König --- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 37 ++++++++++++++++++++-------------- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 7 ++++--- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 7 ++++--- 3 files changed, 30 insertions(+), 21 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 0ac38ee298d1..f5a42ab1f65c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -587,6 +587,7 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) int err; struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; + const struct gfx_firmware_header_v1_0 *cp_hdr; DRM_DEBUG("\n"); @@ -611,6 +612,9 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) err = amdgpu_ucode_validate(adev->gfx.pfp_fw); if (err) goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; + adev->gfx.pfp_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.pfp_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_me.bin", chip_name); err = request_firmware(&adev->gfx.me_fw, fw_name, adev->dev); @@ -619,6 +623,9 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) err = amdgpu_ucode_validate(adev->gfx.me_fw); if (err) goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; + adev->gfx.me_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.me_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ce.bin", chip_name); err = request_firmware(&adev->gfx.ce_fw, fw_name, adev->dev); @@ -627,12 +634,18 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) err = amdgpu_ucode_validate(adev->gfx.ce_fw); if (err) goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; + adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) goto out; err = amdgpu_ucode_validate(adev->gfx.rlc_fw); + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.rlc_fw->data; + adev->gfx.rlc_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.rlc_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec.bin", chip_name); err = request_firmware(&adev->gfx.mec_fw, fw_name, adev->dev); @@ -641,6 +654,9 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) err = amdgpu_ucode_validate(adev->gfx.mec_fw); if (err) goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + adev->gfx.mec_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); + adev->gfx.mec_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_mec2.bin", chip_name); err = request_firmware(&adev->gfx.mec2_fw, fw_name, adev->dev); @@ -648,6 +664,12 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) err = amdgpu_ucode_validate(adev->gfx.mec2_fw); if (err) goto out; + cp_hdr = (const struct gfx_firmware_header_v1_0 *) + adev->gfx.mec2_fw->data; + adev->gfx.mec2_fw_version = le32_to_cpu( + cp_hdr->header.ucode_version); + adev->gfx.mec2_feature_version = le32_to_cpu( + cp_hdr->ucode_feature_version); } else { err = 0; adev->gfx.mec2_fw = NULL; @@ -2272,9 +2294,6 @@ static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev) hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; amdgpu_ucode_print_rlc_hdr(&hdr->header); - adev->gfx.rlc_fw_version = le32_to_cpu(hdr->header.ucode_version); - adev->gfx.rlc_feature_version = le32_to_cpu( - hdr->ucode_feature_version); fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); @@ -2360,12 +2379,6 @@ static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev) amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); amdgpu_ucode_print_gfx_hdr(&me_hdr->header); - adev->gfx.pfp_fw_version = le32_to_cpu(pfp_hdr->header.ucode_version); - adev->gfx.ce_fw_version = le32_to_cpu(ce_hdr->header.ucode_version); - adev->gfx.me_fw_version = le32_to_cpu(me_hdr->header.ucode_version); - adev->gfx.me_feature_version = le32_to_cpu(me_hdr->ucode_feature_version); - adev->gfx.ce_feature_version = le32_to_cpu(ce_hdr->ucode_feature_version); - adev->gfx.pfp_feature_version = le32_to_cpu(pfp_hdr->ucode_feature_version); gfx_v8_0_cp_gfx_enable(adev, false); @@ -2621,9 +2634,6 @@ static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); - adev->gfx.mec_fw_version = le32_to_cpu(mec_hdr->header.ucode_version); - adev->gfx.mec_feature_version = le32_to_cpu( - mec_hdr->ucode_feature_version); fw_data = (const __le32 *) (adev->gfx.mec_fw->data + @@ -2642,9 +2652,6 @@ static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header); - adev->gfx.mec2_fw_version = le32_to_cpu(mec2_hdr->header.ucode_version); - adev->gfx.mec2_feature_version = le32_to_cpu( - mec2_hdr->ucode_feature_version); fw_data = (const __le32 *) (adev->gfx.mec2_fw->data + diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 01bd5c903f9b..a988dfb1d394 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -121,6 +121,7 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev) int err, i; struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; + const struct sdma_firmware_header_v1_0 *hdr; DRM_DEBUG("\n"); @@ -142,6 +143,9 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev) err = amdgpu_ucode_validate(adev->sdma[i].fw); if (err) goto out; + hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data; + adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version); + adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); if (adev->firmware.smu_load) { info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; @@ -541,9 +545,6 @@ static int sdma_v2_4_load_microcode(struct amdgpu_device *adev) hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data; amdgpu_ucode_print_sdma_hdr(&hdr->header); fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; - adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version); - adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); - fw_data = (const __le32 *) (adev->sdma[i].fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index cf9bc2e126fc..2b86569b18d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -159,6 +159,7 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) int err, i; struct amdgpu_firmware_info *info = NULL; const struct common_firmware_header *header = NULL; + const struct sdma_firmware_header_v1_0 *hdr; DRM_DEBUG("\n"); @@ -183,6 +184,9 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) err = amdgpu_ucode_validate(adev->sdma[i].fw); if (err) goto out; + hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data; + adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version); + adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); if (adev->firmware.smu_load) { info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; @@ -630,9 +634,6 @@ static int sdma_v3_0_load_microcode(struct amdgpu_device *adev) hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data; amdgpu_ucode_print_sdma_hdr(&hdr->header); fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; - adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version); - adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); - fw_data = (const __le32 *) (adev->sdma[i].fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); -- cgit v1.2.3-70-g09d2 From 4274f5d45cf11f88d7380702a7147b70553ddd6e Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Tue, 21 Jul 2015 16:04:39 +0800 Subject: drm/amdgpu: prepare job before push to sw queue for pte ring user mode will still use pte ring as a normal ring. if the prepare job generates another command(update pte) on its ring in scheduler, then will kill scheduler which is going to waiting later job but pending running job. Signed-off-by: Chunming Zhou Acked-by: Christian K?nig Reviewed-by: Jammy Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 1 + drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 1 + drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 1 + 5 files changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 127867c2fc37..79e81f397e60 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -902,6 +902,7 @@ struct amdgpu_ring { struct amdgpu_ctx *current_ctx; enum amdgpu_ring_type type; char name[16]; + bool is_pte_ring; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 5f2403898b06..9ff4d2756a6f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -909,7 +909,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) amdgpu_cs_parser_get_ring(adev, parser); parser->uf.sequence = atomic64_inc_return( &parser->ctx->rings[ring->idx].c_entity.last_queued_v_seq); - if ((parser->bo_list && parser->bo_list->has_userptr)) { + if (ring->is_pte_ring || (parser->bo_list && parser->bo_list->has_userptr)) { r = amdgpu_cs_parser_prepare_job(parser); if (r) goto out; diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 15df46c93f0a..dd3da7bb11c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -1404,5 +1404,6 @@ static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev) if (adev->vm_manager.vm_pte_funcs == NULL) { adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs; adev->vm_manager.vm_pte_funcs_ring = &adev->sdma[0].ring; + adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true; } } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index a988dfb1d394..8b7e2438b6d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -1415,5 +1415,6 @@ static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev) if (adev->vm_manager.vm_pte_funcs == NULL) { adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs; adev->vm_manager.vm_pte_funcs_ring = &adev->sdma[0].ring; + adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true; } } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 8f4aac23b317..4b5d769bdb4f 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1540,5 +1540,6 @@ static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev) if (adev->vm_manager.vm_pte_funcs == NULL) { adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs; adev->vm_manager.vm_pte_funcs_ring = &adev->sdma[0].ring; + adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true; } } -- cgit v1.2.3-70-g09d2 From 0011fdaa4dab19bf545a28c0d4d164bba4745d29 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Mon, 1 Jun 2015 15:33:20 +0800 Subject: drm/amdgpu: use gpu scheduler for sdma ib test Signed-off-by: Chunming Zhou Reviewed-by: Christian K?nig --- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 25 +++++++++++-------------- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 26 ++++++++++++-------------- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 25 +++++++++++-------------- 3 files changed, 34 insertions(+), 42 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c') diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index dd3da7bb11c1..6e8642b70445 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -629,12 +629,10 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring) gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ib_get(ring, NULL, 256, &ib); if (r) { - amdgpu_wb_free(adev, index); DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); - return r; + goto err0; } ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); @@ -643,20 +641,15 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring) ib.ptr[3] = 1; ib.ptr[4] = 0xDEADBEEF; ib.length_dw = 5; + r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, + AMDGPU_FENCE_OWNER_UNDEFINED); + if (r) + goto err1; - r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED); - if (r) { - amdgpu_ib_free(adev, &ib); - amdgpu_wb_free(adev, index); - DRM_ERROR("amdgpu: failed to schedule ib (%d).\n", r); - return r; - } r = amdgpu_fence_wait(ib.fence, false); if (r) { - amdgpu_ib_free(adev, &ib); - amdgpu_wb_free(adev, index); DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); - return r; + goto err1; } for (i = 0; i < adev->usec_timeout; i++) { tmp = le32_to_cpu(adev->wb.wb[index]); @@ -666,12 +659,16 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring) } if (i < adev->usec_timeout) { DRM_INFO("ib test on ring %d succeeded in %u usecs\n", - ib.fence->ring->idx, i); + ring->idx, i); + goto err1; } else { DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); r = -EINVAL; } + +err1: amdgpu_ib_free(adev, &ib); +err0: amdgpu_wb_free(adev, index); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 8b7e2438b6d2..5511a191e591 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -688,12 +688,10 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring) gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ib_get(ring, NULL, 256, &ib); if (r) { - amdgpu_wb_free(adev, index); DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); - return r; + goto err0; } ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | @@ -707,19 +705,15 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring) ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP); ib.length_dw = 8; - r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED); - if (r) { - amdgpu_ib_free(adev, &ib); - amdgpu_wb_free(adev, index); - DRM_ERROR("amdgpu: failed to schedule ib (%d).\n", r); - return r; - } + r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, + AMDGPU_FENCE_OWNER_UNDEFINED); + if (r) + goto err1; + r = amdgpu_fence_wait(ib.fence, false); if (r) { - amdgpu_ib_free(adev, &ib); - amdgpu_wb_free(adev, index); DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); - return r; + goto err1; } for (i = 0; i < adev->usec_timeout; i++) { tmp = le32_to_cpu(adev->wb.wb[index]); @@ -729,12 +723,16 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring) } if (i < adev->usec_timeout) { DRM_INFO("ib test on ring %d succeeded in %u usecs\n", - ib.fence->ring->idx, i); + ring->idx, i); + goto err1; } else { DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); r = -EINVAL; } + +err1: amdgpu_ib_free(adev, &ib); +err0: amdgpu_wb_free(adev, index); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 4b5d769bdb4f..679ea9c779ee 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -809,12 +809,10 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring) gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); - r = amdgpu_ib_get(ring, NULL, 256, &ib); if (r) { - amdgpu_wb_free(adev, index); DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); - return r; + goto err0; } ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | @@ -828,19 +826,15 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring) ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP); ib.length_dw = 8; - r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED); - if (r) { - amdgpu_ib_free(adev, &ib); - amdgpu_wb_free(adev, index); - DRM_ERROR("amdgpu: failed to schedule ib (%d).\n", r); - return r; - } + r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, + AMDGPU_FENCE_OWNER_UNDEFINED); + if (r) + goto err1; + r = amdgpu_fence_wait(ib.fence, false); if (r) { - amdgpu_ib_free(adev, &ib); - amdgpu_wb_free(adev, index); DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); - return r; + goto err1; } for (i = 0; i < adev->usec_timeout; i++) { tmp = le32_to_cpu(adev->wb.wb[index]); @@ -850,12 +844,15 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring) } if (i < adev->usec_timeout) { DRM_INFO("ib test on ring %d succeeded in %u usecs\n", - ib.fence->ring->idx, i); + ring->idx, i); + goto err1; } else { DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); r = -EINVAL; } +err1: amdgpu_ib_free(adev, &ib); +err0: amdgpu_wb_free(adev, index); return r; } -- cgit v1.2.3-70-g09d2 From ed88a0ee7ff53c292350fc5d38eae161c2372d51 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Mon, 3 Aug 2015 13:22:35 +0800 Subject: drm/amdgpu: use kernel fence for sdma ib test Signed-off-by: Chunming Zhou Reviewed-by: Christian K?nig Reviewed-by: Jammy Zhou --- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c') diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 6e8642b70445..115b7706e025 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -646,7 +646,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring) if (r) goto err1; - r = amdgpu_fence_wait(ib.fence, false); + r = fence_wait(&ib.fence->base, false); if (r) { DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); goto err1; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 5511a191e591..2b7ce9393929 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -710,7 +710,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring) if (r) goto err1; - r = amdgpu_fence_wait(ib.fence, false); + r = fence_wait(&ib.fence->base, false); if (r) { DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); goto err1; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 679ea9c779ee..cba45e62ae4b 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -831,7 +831,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring) if (r) goto err1; - r = amdgpu_fence_wait(ib.fence, false); + r = fence_wait(&ib.fence->base, false); if (r) { DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); goto err1; -- cgit v1.2.3-70-g09d2 From 1763552ee8a7f39a1788d24e27b50d4dee383520 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Mon, 3 Aug 2015 11:43:19 +0800 Subject: drm/amdgpu: add kernel fence in ib_submit_kernel_helper every sbumission should be able to get a fence. Signed-off-by: Chunming Zhou Reviewed-by: Christian K?nig Reviewed-by: Jammy Zhou --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 ++- drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 8 ++++++-- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 8 +++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 12 ++++++++---- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 6 ++++-- drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 6 ++++-- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 6 ++++-- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 6 ++++-- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 6 ++++-- 9 files changed, 41 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 371ff0845989..e1f093c1f011 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -872,7 +872,8 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, struct amdgpu_ib *ibs, unsigned num_ibs, int (*free_job)(struct amdgpu_cs_parser *), - void *owner); + void *owner, + struct fence **fence); struct amdgpu_ring { struct amdgpu_device *adev; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index 161c83ad9349..23a17ec239c0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -107,7 +107,8 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, struct amdgpu_ib *ibs, unsigned num_ibs, int (*free_job)(struct amdgpu_cs_parser *), - void *owner) + void *owner, + struct fence **f) { int r = 0; if (amdgpu_enable_scheduler) { @@ -135,5 +136,8 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, WARN(true, "emit timeout\n"); } else r = amdgpu_ib_schedule(adev, 1, ibs, owner); - return r; + if (r) + return r; + *f = &ibs[num_ibs - 1].fence->base; + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 9b2730599134..f114c6b49b9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -825,6 +825,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct ww_acquire_ctx ticket; struct list_head head; struct amdgpu_ib *ib = NULL; + struct fence *f = NULL; struct amdgpu_device *adev = ring->adev; uint64_t addr; int i, r; @@ -869,14 +870,15 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, &amdgpu_uvd_free_job, - AMDGPU_FENCE_OWNER_UNDEFINED); + AMDGPU_FENCE_OWNER_UNDEFINED, + &f); if (r) goto err2; - ttm_eu_fence_buffer_objects(&ticket, &head, &ib->fence->base); + ttm_eu_fence_buffer_objects(&ticket, &head, f); if (fence) - *fence = fence_get(&ib->fence->base); + *fence = fence_get(f); amdgpu_bo_unref(&bo); if (amdgpu_enable_scheduler) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 94c40ca5ba74..38660eac67d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -362,6 +362,7 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, { const unsigned ib_size_dw = 1024; struct amdgpu_ib *ib = NULL; + struct fence *f = NULL; struct amdgpu_device *adev = ring->adev; uint64_t dummy; int i, r; @@ -408,11 +409,12 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, &amdgpu_vce_free_job, - AMDGPU_FENCE_OWNER_UNDEFINED); + AMDGPU_FENCE_OWNER_UNDEFINED, + &f); if (r) goto err; if (fence) - *fence = fence_get(&ib->fence->base); + *fence = fence_get(f); if (amdgpu_enable_scheduler) return 0; err: @@ -436,6 +438,7 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, { const unsigned ib_size_dw = 1024; struct amdgpu_ib *ib = NULL; + struct fence *f = NULL; struct amdgpu_device *adev = ring->adev; uint64_t dummy; int i, r; @@ -472,11 +475,12 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, ib->ptr[i] = 0x0; r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, &amdgpu_vce_free_job, - AMDGPU_FENCE_OWNER_UNDEFINED); + AMDGPU_FENCE_OWNER_UNDEFINED, + &f); if (r) goto err; if (fence) - *fence = fence_get(&ib->fence->base); + *fence = fence_get(f); if (amdgpu_enable_scheduler) return 0; err: diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 115b7706e025..c3ed5b22d732 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -614,6 +614,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib ib; + struct fence *f = NULL; unsigned i; unsigned index; int r; @@ -642,11 +643,12 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring) ib.ptr[4] = 0xDEADBEEF; ib.length_dw = 5; r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, - AMDGPU_FENCE_OWNER_UNDEFINED); + AMDGPU_FENCE_OWNER_UNDEFINED, + &f); if (r) goto err1; - r = fence_wait(&ib.fence->base, false); + r = fence_wait(f, false); if (r) { DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); goto err1; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 521d811996f9..ee1c47f9a2b6 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2648,6 +2648,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib ib; + struct fence *f = NULL; uint32_t scratch; uint32_t tmp = 0; unsigned i; @@ -2670,11 +2671,12 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring) ib.length_dw = 3; r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, - AMDGPU_FENCE_OWNER_UNDEFINED); + AMDGPU_FENCE_OWNER_UNDEFINED, + &f); if (r) goto err2; - r = fence_wait(&ib.fence->base, false); + r = fence_wait(f, false); if (r) { DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); goto err2; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 59eae93804c3..a865d96b67af 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -610,6 +610,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib ib; + struct fence *f = NULL; uint32_t scratch; uint32_t tmp = 0; unsigned i; @@ -632,11 +633,12 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring) ib.length_dw = 3; r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, - AMDGPU_FENCE_OWNER_UNDEFINED); + AMDGPU_FENCE_OWNER_UNDEFINED, + &f); if (r) goto err2; - r = fence_wait(&ib.fence->base, false); + r = fence_wait(f, false); if (r) { DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); goto err2; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 2b7ce9393929..6de7dc88d53c 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -673,6 +673,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib ib; + struct fence *f = NULL; unsigned i; unsigned index; int r; @@ -706,11 +707,12 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring) ib.length_dw = 8; r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, - AMDGPU_FENCE_OWNER_UNDEFINED); + AMDGPU_FENCE_OWNER_UNDEFINED, + &f); if (r) goto err1; - r = fence_wait(&ib.fence->base, false); + r = fence_wait(f, false); if (r) { DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); goto err1; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index cba45e62ae4b..963a991fea00 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -794,6 +794,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib ib; + struct fence *f = NULL; unsigned i; unsigned index; int r; @@ -827,11 +828,12 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring) ib.length_dw = 8; r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL, - AMDGPU_FENCE_OWNER_UNDEFINED); + AMDGPU_FENCE_OWNER_UNDEFINED, + &f); if (r) goto err1; - r = fence_wait(&ib.fence->base, false); + r = fence_wait(f, false); if (r) { DRM_ERROR("amdgpu: fence wait failed (%d).\n", r); goto err1; -- cgit v1.2.3-70-g09d2 From 281b42230175608dec0cd8dab9908250e7aa36a9 Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Wed, 12 Aug 2015 12:58:31 +0800 Subject: drm/amdgpu: add reference for **fence fix fence is released when pass to **fence sometimes. add reference for it. Signed-off-by: Chunming Zhou Reviewed-by: Christian K?nig --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 1 + drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 4 ++-- drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 +++ drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1 + drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 1 + drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 1 + drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 1 + 11 files changed, 15 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index f428288d8363..8796938216d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -136,6 +136,7 @@ static void amdgpu_job_work_func(struct work_struct *work) sched_job->free_job(sched_job); mutex_unlock(&sched_job->job_lock); /* after processing job, free memory */ + fence_put(&sched_job->s_fence->base); kfree(sched_job); } struct amdgpu_cs_parser *amdgpu_cs_parser_create(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c index d2e5f3b90a3c..a86e38158afa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c @@ -133,13 +133,13 @@ int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev, return r; } ibs[num_ibs - 1].sequence = sched_job->s_fence->v_seq; - *f = &sched_job->s_fence->base; + *f = fence_get(&sched_job->s_fence->base); mutex_unlock(&sched_job->job_lock); } else { r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner); if (r) return r; - *f = &ibs[num_ibs - 1].fence->base; + *f = fence_get(&ibs[num_ibs - 1].fence->base); } return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index e7336a95fe59..68369cf1e318 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -877,7 +877,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, if (fence) *fence = fence_get(f); amdgpu_bo_unref(&bo); - + fence_put(f); if (amdgpu_enable_scheduler) return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 38660eac67d6..33ee6ae28f37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -415,6 +415,7 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, goto err; if (fence) *fence = fence_get(f); + fence_put(f); if (amdgpu_enable_scheduler) return 0; err: @@ -481,6 +482,7 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, goto err; if (fence) *fence = fence_get(f); + fence_put(f); if (amdgpu_enable_scheduler) return 0; err: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b3f5d0484980..de882b0db350 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -366,6 +366,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, &fence); if (!r) amdgpu_bo_fence(bo, fence, true); + fence_put(fence); if (amdgpu_enable_scheduler) { amdgpu_bo_unreserve(bo); return 0; @@ -495,6 +496,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev, if (r) goto error_free; amdgpu_bo_fence(pd, fence, true); + fence_put(fence); } if (!amdgpu_enable_scheduler || ib->length_dw == 0) { @@ -812,6 +814,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, fence_put(*fence); *fence = fence_get(f); } + fence_put(f); if (!amdgpu_enable_scheduler) { amdgpu_ib_free(adev, ib); kfree(ib); diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index c3ed5b22d732..2b4242b39b0a 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -669,6 +669,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring) } err1: + fence_put(f); amdgpu_ib_free(adev, &ib); err0: amdgpu_wb_free(adev, index); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index ee1c47f9a2b6..9b0cab413677 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2698,6 +2698,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring) } err2: + fence_put(f); amdgpu_ib_free(adev, &ib); err1: amdgpu_gfx_scratch_free(adev, scratch); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index a865d96b67af..4b68e6306f40 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -659,6 +659,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring) r = -EINVAL; } err2: + fence_put(f); amdgpu_ib_free(adev, &ib); err1: amdgpu_gfx_scratch_free(adev, scratch); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 6de7dc88d53c..9de8104eddeb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -733,6 +733,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring) } err1: + fence_put(f); amdgpu_ib_free(adev, &ib); err0: amdgpu_wb_free(adev, index); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 963a991fea00..029f3455f9f9 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -853,6 +853,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring) r = -EINVAL; } err1: + fence_put(f); amdgpu_ib_free(adev, &ib); err0: amdgpu_wb_free(adev, index); diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c index 39577f6c0241..5017c71ba700 100644 --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c @@ -313,6 +313,7 @@ int amd_sched_push_job(struct amd_gpu_scheduler *sched, kfree(job); return -EINVAL; } + fence_get(&(*fence)->base); job->s_fence = *fence; while (kfifo_in_spinlocked(&c_entity->job_queue, &job, sizeof(void *), &c_entity->queue_lock) != sizeof(void *)) { -- cgit v1.2.3-70-g09d2 From b203dd95949ec8d5e30d53446408eb9a4d1bdc62 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 18 Aug 2015 18:23:16 +0200 Subject: drm/amdgpu: fix zeroing all IB fields manually v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The problem now is that we don't necessarily call amdgpu_ib_get() in some error paths and so work with uninitialized data. Better require that the memory is already zeroed. v2: better commit message Signed-off-by: Christian König Reviewed-by: Chunming Zhou (v1) Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 17 ----------------- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 1 + drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 1 + drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 1 + 7 files changed, 6 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 2cf7f52a5650..a592df574939 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -260,7 +260,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) } - p->ibs = kmalloc_array(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL); + p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL); if (!p->ibs) r = -ENOMEM; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 5104e64e9ad8..1c237f5e3365 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -73,29 +73,12 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm, if (!vm) ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo); - else - ib->gpu_addr = 0; - - } else { - ib->sa_bo = NULL; - ib->ptr = NULL; - ib->gpu_addr = 0; } amdgpu_sync_create(&ib->sync); ib->ring = ring; - ib->fence = NULL; - ib->user = NULL; ib->vm = vm; - ib->ctx = NULL; - ib->gds_base = 0; - ib->gds_size = 0; - ib->gws_base = 0; - ib->gws_size = 0; - ib->oa_base = 0; - ib->oa_size = 0; - ib->flags = 0; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 2b4242b39b0a..af526557ac66 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -630,6 +630,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring) gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); + memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(ring, NULL, 256, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 9b0cab413677..fab7b236f37f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -2660,6 +2660,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring) return r; } WREG32(scratch, 0xCAFEDEAD); + memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(ring, NULL, 256, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 4b68e6306f40..818edb37fa9c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -622,6 +622,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring) return r; } WREG32(scratch, 0xCAFEDEAD); + memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(ring, NULL, 256, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 9de8104eddeb..2b0e89e05b1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -689,6 +689,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring) gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); + memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(ring, NULL, 256, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 029f3455f9f9..6f1df03ebc70 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -810,6 +810,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring) gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); + memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(ring, NULL, 256, &ib); if (r) { DRM_ERROR("amdgpu: failed to get ib (%d).\n", r); -- cgit v1.2.3-70-g09d2 From c7ae72c01be10f539f385f624713f8ba0aa11a8f Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Tue, 25 Aug 2015 17:23:45 +0800 Subject: drm/amdgpu: use IB for copy buffer of eviction This aids handling buffers moves with the scheduler. Signed-off-by: Chunming Zhou Reviewed-by: Christian K?nig --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 8 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c | 8 +-- drivers/gpu/drm/amd/amdgpu/amdgpu_test.c | 12 ++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 77 ++++++++++++++------------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 3 +- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 16 +++--- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 18 +++---- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 18 +++---- 8 files changed, 81 insertions(+), 79 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index c9160430b5ac..aa2dcf578dd6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -247,7 +247,7 @@ struct amdgpu_buffer_funcs { unsigned copy_num_dw; /* used for buffer migration */ - void (*emit_copy_buffer)(struct amdgpu_ring *ring, + void (*emit_copy_buffer)(struct amdgpu_ib *ib, /* src addr in bytes */ uint64_t src_offset, /* dst addr in bytes */ @@ -518,7 +518,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t dst_offset, uint32_t byte_count, struct reservation_object *resv, - struct amdgpu_fence **fence); + struct fence **fence); int amdgpu_mmap(struct file *filp, struct vm_area_struct *vma); struct amdgpu_bo_list_entry { @@ -2247,7 +2247,7 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) #define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r)) #define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s)) #define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s)) -#define amdgpu_emit_copy_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((r), (s), (d), (b)) +#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b)) #define amdgpu_emit_fill_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((r), (s), (d), (b)) #define amdgpu_dpm_get_temperature(adev) (adev)->pm.funcs->get_temperature((adev)) #define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev)) @@ -2379,7 +2379,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev, uint64_t addr); void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va); - +int amdgpu_vm_free_job(struct amdgpu_job *job); /* * functions used by amdgpu_encoder.c */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c index 759482e4300d..98d59ee640ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c @@ -33,7 +33,7 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, { unsigned long start_jiffies; unsigned long end_jiffies; - struct amdgpu_fence *fence = NULL; + struct fence *fence = NULL; int i, r; start_jiffies = jiffies; @@ -42,17 +42,17 @@ static int amdgpu_benchmark_do_move(struct amdgpu_device *adev, unsigned size, r = amdgpu_copy_buffer(ring, saddr, daddr, size, NULL, &fence); if (r) goto exit_do_move; - r = fence_wait(&fence->base, false); + r = fence_wait(fence, false); if (r) goto exit_do_move; - amdgpu_fence_unref(&fence); + fence_put(fence); } end_jiffies = jiffies; r = jiffies_to_msecs(end_jiffies - start_jiffies); exit_do_move: if (fence) - amdgpu_fence_unref(&fence); + fence_put(fence); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c index 962dd5552137..f80b1a43be8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c @@ -77,7 +77,7 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) void *gtt_map, *vram_map; void **gtt_start, **gtt_end; void **vram_start, **vram_end; - struct amdgpu_fence *fence = NULL; + struct fence *fence = NULL; r = amdgpu_bo_create(adev, size, PAGE_SIZE, true, AMDGPU_GEM_DOMAIN_GTT, 0, NULL, gtt_obj + i); @@ -116,13 +116,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) goto out_lclean_unpin; } - r = fence_wait(&fence->base, false); + r = fence_wait(fence, false); if (r) { DRM_ERROR("Failed to wait for GTT->VRAM fence %d\n", i); goto out_lclean_unpin; } - amdgpu_fence_unref(&fence); + fence_put(fence); r = amdgpu_bo_kmap(vram_obj, &vram_map); if (r) { @@ -161,13 +161,13 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev) goto out_lclean_unpin; } - r = fence_wait(&fence->base, false); + r = fence_wait(fence, false); if (r) { DRM_ERROR("Failed to wait for VRAM->GTT fence %d\n", i); goto out_lclean_unpin; } - amdgpu_fence_unref(&fence); + fence_put(fence); r = amdgpu_bo_kmap(gtt_obj[i], >t_map); if (r) { @@ -214,7 +214,7 @@ out_lclean: amdgpu_bo_unref(>t_obj[i]); } if (fence) - amdgpu_fence_unref(&fence); + fence_put(fence); break; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 4cb81320b045..399143541d8a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -228,7 +228,7 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, struct amdgpu_device *adev; struct amdgpu_ring *ring; uint64_t old_start, new_start; - struct amdgpu_fence *fence; + struct fence *fence; int r; adev = amdgpu_get_adev(bo->bdev); @@ -269,9 +269,9 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, new_mem->num_pages * PAGE_SIZE, /* bytes */ bo->resv, &fence); /* FIXME: handle copy error */ - r = ttm_bo_move_accel_cleanup(bo, &fence->base, + r = ttm_bo_move_accel_cleanup(bo, fence, evict, no_wait_gpu, new_mem); - amdgpu_fence_unref(&fence); + fence_put(fence); return r; } @@ -987,52 +987,48 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t dst_offset, uint32_t byte_count, struct reservation_object *resv, - struct amdgpu_fence **fence) + struct fence **fence) { struct amdgpu_device *adev = ring->adev; - struct amdgpu_sync sync; uint32_t max_bytes; unsigned num_loops, num_dw; + struct amdgpu_ib *ib; unsigned i; int r; - /* sync other rings */ - amdgpu_sync_create(&sync); - if (resv) { - r = amdgpu_sync_resv(adev, &sync, resv, false); - if (r) { - DRM_ERROR("sync failed (%d).\n", r); - amdgpu_sync_free(adev, &sync, NULL); - return r; - } - } - max_bytes = adev->mman.buffer_funcs->copy_max_bytes; num_loops = DIV_ROUND_UP(byte_count, max_bytes); num_dw = num_loops * adev->mman.buffer_funcs->copy_num_dw; - /* for fence and sync */ - num_dw += 64 + AMDGPU_NUM_SYNCS * 8; + /* for IB padding */ + while (num_dw & 0x7) + num_dw++; + + ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL); + if (!ib) + return -ENOMEM; - r = amdgpu_sync_wait(&sync); + r = amdgpu_ib_get(ring, NULL, num_dw * 4, ib); if (r) { - DRM_ERROR("sync wait failed (%d).\n", r); - amdgpu_sync_free(adev, &sync, NULL); + kfree(ib); return r; } - r = amdgpu_ring_lock(ring, num_dw); - if (r) { - DRM_ERROR("ring lock failed (%d).\n", r); - amdgpu_sync_free(adev, &sync, NULL); - return r; + ib->length_dw = 0; + + if (resv) { + r = amdgpu_sync_resv(adev, &ib->sync, resv, + AMDGPU_FENCE_OWNER_UNDEFINED); + if (r) { + DRM_ERROR("sync failed (%d).\n", r); + goto error_free; + } } - amdgpu_sync_rings(&sync, ring); for (i = 0; i < num_loops; i++) { uint32_t cur_size_in_bytes = min(byte_count, max_bytes); - amdgpu_emit_copy_buffer(adev, ring, src_offset, dst_offset, + amdgpu_emit_copy_buffer(adev, ib, src_offset, dst_offset, cur_size_in_bytes); src_offset += cur_size_in_bytes; @@ -1040,17 +1036,24 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, byte_count -= cur_size_in_bytes; } - r = amdgpu_fence_emit(ring, AMDGPU_FENCE_OWNER_MOVE, fence); - if (r) { - amdgpu_ring_unlock_undo(ring); - amdgpu_sync_free(adev, &sync, NULL); - return r; - } - - amdgpu_ring_unlock_commit(ring); - amdgpu_sync_free(adev, &sync, &(*fence)->base); + amdgpu_vm_pad_ib(adev, ib); + WARN_ON(ib->length_dw > num_dw); + r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1, + &amdgpu_vm_free_job, + AMDGPU_FENCE_OWNER_MOVE, + fence); + if (r) + goto error_free; + if (!amdgpu_enable_scheduler) { + amdgpu_ib_free(adev, ib); + kfree(ib); + } return 0; +error_free: + amdgpu_ib_free(adev, ib); + kfree(ib); + return r; } #if defined(CONFIG_DEBUG_FS) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 677266b90c74..83b7ce6f5f72 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -316,8 +316,7 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev, } } -static int amdgpu_vm_free_job( - struct amdgpu_job *sched_job) +int amdgpu_vm_free_job(struct amdgpu_job *sched_job) { int i; for (i = 0; i < sched_job->num_ibs; i++) diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index af526557ac66..3920c1e346f8 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -1339,18 +1339,18 @@ static void cik_sdma_set_irq_funcs(struct amdgpu_device *adev) * Used by the amdgpu ttm implementation to move pages if * registered as the asic copy callback. */ -static void cik_sdma_emit_copy_buffer(struct amdgpu_ring *ring, +static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count) { - amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0)); - amdgpu_ring_write(ring, byte_count); - amdgpu_ring_write(ring, 0); /* src/dst endian swap */ - amdgpu_ring_write(ring, lower_32_bits(src_offset)); - amdgpu_ring_write(ring, upper_32_bits(src_offset)); - amdgpu_ring_write(ring, lower_32_bits(dst_offset)); - amdgpu_ring_write(ring, upper_32_bits(dst_offset)); + ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_COPY, SDMA_COPY_SUB_OPCODE_LINEAR, 0); + ib->ptr[ib->length_dw++] = byte_count; + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ + ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 2b0e89e05b1d..715e02d3bfba 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -1350,19 +1350,19 @@ static void sdma_v2_4_set_irq_funcs(struct amdgpu_device *adev) * Used by the amdgpu ttm implementation to move pages if * registered as the asic copy callback. */ -static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ring *ring, +static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count) { - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | - SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR)); - amdgpu_ring_write(ring, byte_count); - amdgpu_ring_write(ring, 0); /* src/dst endian swap */ - amdgpu_ring_write(ring, lower_32_bits(src_offset)); - amdgpu_ring_write(ring, upper_32_bits(src_offset)); - amdgpu_ring_write(ring, lower_32_bits(dst_offset)); - amdgpu_ring_write(ring, upper_32_bits(dst_offset)); + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); + ib->ptr[ib->length_dw++] = byte_count; + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ + ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 6f1df03ebc70..67128c8e78b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1474,19 +1474,19 @@ static void sdma_v3_0_set_irq_funcs(struct amdgpu_device *adev) * Used by the amdgpu ttm implementation to move pages if * registered as the asic copy callback. */ -static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ring *ring, +static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib, uint64_t src_offset, uint64_t dst_offset, uint32_t byte_count) { - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | - SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR)); - amdgpu_ring_write(ring, byte_count); - amdgpu_ring_write(ring, 0); /* src/dst endian swap */ - amdgpu_ring_write(ring, lower_32_bits(src_offset)); - amdgpu_ring_write(ring, upper_32_bits(src_offset)); - amdgpu_ring_write(ring, lower_32_bits(dst_offset)); - amdgpu_ring_write(ring, upper_32_bits(dst_offset)); + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_COPY) | + SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR); + ib->ptr[ib->length_dw++] = byte_count; + ib->ptr[ib->length_dw++] = 0; /* src/dst endian swap */ + ib->ptr[ib->length_dw++] = lower_32_bits(src_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(src_offset); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); } /** -- cgit v1.2.3-70-g09d2 From 6e7a3840745c950c37d37cbb0af2e753a765d4ec Mon Sep 17 00:00:00 2001 From: Chunming Zhou Date: Thu, 27 Aug 2015 13:46:09 +0800 Subject: drm/amdgpu: use IB for fill_buffer instead of direct command Signed-off-by: Chunming Zhou Reviewed-by: Christian K?nig Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++-- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 12 ++++++------ drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 12 ++++++------ drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 12 ++++++------ 4 files changed, 20 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9fefcd9c1af1..c6812f2e198e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -262,7 +262,7 @@ struct amdgpu_buffer_funcs { unsigned fill_num_dw; /* used for buffer clearing */ - void (*emit_fill_buffer)(struct amdgpu_ring *ring, + void (*emit_fill_buffer)(struct amdgpu_ib *ib, /* value to write to memory */ uint32_t src_data, /* dst addr in bytes */ @@ -2248,7 +2248,7 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) #define amdgpu_display_stop_mc_access(adev, s) (adev)->mode_info.funcs->stop_mc_access((adev), (s)) #define amdgpu_display_resume_mc_access(adev, s) (adev)->mode_info.funcs->resume_mc_access((adev), (s)) #define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b)) -#define amdgpu_emit_fill_buffer(adev, r, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((r), (s), (d), (b)) +#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b)) #define amdgpu_dpm_get_temperature(adev) (adev)->pm.funcs->get_temperature((adev)) #define amdgpu_dpm_pre_set_power_state(adev) (adev)->pm.funcs->pre_set_power_state((adev)) #define amdgpu_dpm_set_power_state(adev) (adev)->pm.funcs->set_power_state((adev)) diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 3920c1e346f8..c1e782952bd7 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -1363,16 +1363,16 @@ static void cik_sdma_emit_copy_buffer(struct amdgpu_ib *ib, * * Fill GPU buffers using the DMA engine (CIK). */ -static void cik_sdma_emit_fill_buffer(struct amdgpu_ring *ring, +static void cik_sdma_emit_fill_buffer(struct amdgpu_ib *ib, uint32_t src_data, uint64_t dst_offset, uint32_t byte_count) { - amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 0)); - amdgpu_ring_write(ring, lower_32_bits(dst_offset)); - amdgpu_ring_write(ring, upper_32_bits(dst_offset)); - amdgpu_ring_write(ring, src_data); - amdgpu_ring_write(ring, byte_count); + ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_CONSTANT_FILL, 0, 0); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = src_data; + ib->ptr[ib->length_dw++] = byte_count; } static const struct amdgpu_buffer_funcs cik_sdma_buffer_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 715e02d3bfba..2f3948c09081 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -1375,16 +1375,16 @@ static void sdma_v2_4_emit_copy_buffer(struct amdgpu_ib *ib, * * Fill GPU buffers using the DMA engine (VI). */ -static void sdma_v2_4_emit_fill_buffer(struct amdgpu_ring *ring, +static void sdma_v2_4_emit_fill_buffer(struct amdgpu_ib *ib, uint32_t src_data, uint64_t dst_offset, uint32_t byte_count) { - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL)); - amdgpu_ring_write(ring, lower_32_bits(dst_offset)); - amdgpu_ring_write(ring, upper_32_bits(dst_offset)); - amdgpu_ring_write(ring, src_data); - amdgpu_ring_write(ring, byte_count); + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = src_data; + ib->ptr[ib->length_dw++] = byte_count; } static const struct amdgpu_buffer_funcs sdma_v2_4_buffer_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 67128c8e78b8..a9d9607e8d91 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1499,16 +1499,16 @@ static void sdma_v3_0_emit_copy_buffer(struct amdgpu_ib *ib, * * Fill GPU buffers using the DMA engine (VI). */ -static void sdma_v3_0_emit_fill_buffer(struct amdgpu_ring *ring, +static void sdma_v3_0_emit_fill_buffer(struct amdgpu_ib *ib, uint32_t src_data, uint64_t dst_offset, uint32_t byte_count) { - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL)); - amdgpu_ring_write(ring, lower_32_bits(dst_offset)); - amdgpu_ring_write(ring, upper_32_bits(dst_offset)); - amdgpu_ring_write(ring, src_data); - amdgpu_ring_write(ring, byte_count); + ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_CONST_FILL); + ib->ptr[ib->length_dw++] = lower_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); + ib->ptr[ib->length_dw++] = src_data; + ib->ptr[ib->length_dw++] = byte_count; } static const struct amdgpu_buffer_funcs sdma_v3_0_buffer_funcs = { -- cgit v1.2.3-70-g09d2 From 18111de0dfc38c582c4348af3bda5d3331d35012 Mon Sep 17 00:00:00 2001 From: Jammy Zhou Date: Mon, 31 Aug 2015 14:06:39 +0800 Subject: drm/amdgpu: add burst_nop flag for sdma The burst NOP is supported for SDMA when feature_version is >= 20. Signed-off-by: Jammy Zhou Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 + drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 2 ++ drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 2 ++ drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 2 ++ 4 files changed, 7 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index b66938dbb5cf..34812eccd7d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1708,6 +1708,7 @@ struct amdgpu_sdma { uint32_t feature_version; struct amdgpu_ring ring; + bool burst_nop; }; /* diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index c1e782952bd7..cc909c9cee63 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -501,6 +501,8 @@ static int cik_sdma_load_microcode(struct amdgpu_device *adev) fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version); adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); + if (adev->sdma[i].feature_version >= 20) + adev->sdma[i].burst_nop = true; fw_data = (const __le32 *) (adev->sdma[i].fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 2f3948c09081..2457bf3e3a1e 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -146,6 +146,8 @@ static int sdma_v2_4_init_microcode(struct amdgpu_device *adev) hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data; adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version); adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); + if (adev->sdma[i].feature_version >= 20) + adev->sdma[i].burst_nop = true; if (adev->firmware.smu_load) { info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index a9d9607e8d91..70b2f03c5712 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -218,6 +218,8 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma[i].fw->data; adev->sdma[i].fw_version = le32_to_cpu(hdr->header.ucode_version); adev->sdma[i].feature_version = le32_to_cpu(hdr->ucode_feature_version); + if (adev->sdma[i].feature_version >= 20) + adev->sdma[i].burst_nop = true; if (adev->firmware.smu_load) { info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; -- cgit v1.2.3-70-g09d2 From edff0e2826412be38f0c7977cbf89262141aad87 Mon Sep 17 00:00:00 2001 From: Jammy Zhou Date: Tue, 1 Sep 2015 13:04:08 +0800 Subject: drm/amdgpu: add insert_nop ring func and default implementation The insert_nop function is added to amdgpu_ring_funcs structure as well as the default implementation Signed-off-by: Jammy Zhou Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 +++ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 24 +++++++++++++++++++++--- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 1 + drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 2 ++ drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 1 + drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 1 + drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c | 1 + drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c | 1 + drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vce_v2_0.c | 1 + drivers/gpu/drm/amd/amdgpu/vce_v3_0.c | 1 + 12 files changed, 36 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 057e7ef5c0d0..668939a14206 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -343,6 +343,8 @@ struct amdgpu_ring_funcs { int (*test_ring)(struct amdgpu_ring *ring); int (*test_ib)(struct amdgpu_ring *ring); bool (*is_lockup)(struct amdgpu_ring *ring); + /* insert NOP packets */ + void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count); }; /* @@ -1217,6 +1219,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev); void amdgpu_ring_free_size(struct amdgpu_ring *ring); int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw); +void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); void amdgpu_ring_commit(struct amdgpu_ring *ring); void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring); void amdgpu_ring_undo(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 7d442c51063e..9bec91484c24 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -131,6 +131,21 @@ int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw) return 0; } +/** amdgpu_ring_insert_nop - insert NOP packets + * + * @ring: amdgpu_ring structure holding ring information + * @count: the number of NOP packets to insert + * + * This is the generic insert_nop function for rings except SDMA + */ +void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + int i; + + for (i = 0; i < count; i++) + amdgpu_ring_write(ring, ring->nop); +} + /** * amdgpu_ring_commit - tell the GPU to execute the new * commands on the ring buffer @@ -143,10 +158,13 @@ int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw) */ void amdgpu_ring_commit(struct amdgpu_ring *ring) { + uint32_t count; + /* We pad to match fetch size */ - while (ring->wptr & ring->align_mask) { - amdgpu_ring_write(ring, ring->nop); - } + count = ring->align_mask + 1 - (ring->wptr & ring->align_mask); + count %= ring->align_mask + 1; + ring->funcs->insert_nop(ring, count); + mb(); amdgpu_ring_set_wptr(ring); } diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index cc909c9cee63..f0661b269a63 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -1305,6 +1305,7 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = { .test_ring = cik_sdma_ring_test_ring, .test_ib = cik_sdma_ring_test_ib, .is_lockup = cik_sdma_ring_is_lockup, + .insert_nop = amdgpu_ring_insert_nop, }; static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index fab7b236f37f..517a68f82ec3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -5598,6 +5598,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { .test_ring = gfx_v7_0_ring_test_ring, .test_ib = gfx_v7_0_ring_test_ib, .is_lockup = gfx_v7_0_ring_is_lockup, + .insert_nop = amdgpu_ring_insert_nop, }; static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { @@ -5614,6 +5615,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { .test_ring = gfx_v7_0_ring_test_ring, .test_ib = gfx_v7_0_ring_test_ib, .is_lockup = gfx_v7_0_ring_is_lockup, + .insert_nop = amdgpu_ring_insert_nop, }; static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 818edb37fa9c..956a35822f86 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -4378,6 +4378,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .test_ring = gfx_v8_0_ring_test_ring, .test_ib = gfx_v8_0_ring_test_ib, .is_lockup = gfx_v8_0_ring_is_lockup, + .insert_nop = amdgpu_ring_insert_nop, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { @@ -4394,6 +4395,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { .test_ring = gfx_v8_0_ring_test_ring, .test_ib = gfx_v8_0_ring_test_ib, .is_lockup = gfx_v8_0_ring_is_lockup, + .insert_nop = amdgpu_ring_insert_nop, }; static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 2457bf3e3a1e..1b913bce2599 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -1316,6 +1316,7 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = { .test_ring = sdma_v2_4_ring_test_ring, .test_ib = sdma_v2_4_ring_test_ib, .is_lockup = sdma_v2_4_ring_is_lockup, + .insert_nop = amdgpu_ring_insert_nop, }; static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 70b2f03c5712..a7550a8f5d84 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -1440,6 +1440,7 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = { .test_ring = sdma_v3_0_ring_test_ring, .test_ib = sdma_v3_0_ring_test_ib, .is_lockup = sdma_v3_0_ring_is_lockup, + .insert_nop = amdgpu_ring_insert_nop, }; static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 9ac383bc6c1f..5fac5da694f0 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -886,6 +886,7 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = { .test_ring = uvd_v4_2_ring_test_ring, .test_ib = uvd_v4_2_ring_test_ib, .is_lockup = amdgpu_ring_test_lockup, + .insert_nop = amdgpu_ring_insert_nop, }; static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index de4b3f57902d..2d5c59c318af 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -825,6 +825,7 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = { .test_ring = uvd_v5_0_ring_test_ring, .test_ib = uvd_v5_0_ring_test_ib, .is_lockup = amdgpu_ring_test_lockup, + .insert_nop = amdgpu_ring_insert_nop, }; static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 66c975870e97..d9f553fce531 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -805,6 +805,7 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_funcs = { .test_ring = uvd_v6_0_ring_test_ring, .test_ib = uvd_v6_0_ring_test_ib, .is_lockup = amdgpu_ring_test_lockup, + .insert_nop = amdgpu_ring_insert_nop, }; static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index 303d961d57bd..cd16df543f64 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -643,6 +643,7 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = { .test_ring = amdgpu_vce_ring_test_ring, .test_ib = amdgpu_vce_ring_test_ib, .is_lockup = amdgpu_ring_test_lockup, + .insert_nop = amdgpu_ring_insert_nop, }; static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index 4349658081ff..5642b8eb92ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -608,6 +608,7 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_funcs = { .test_ring = amdgpu_vce_ring_test_ring, .test_ib = amdgpu_vce_ring_test_ib, .is_lockup = amdgpu_ring_test_lockup, + .insert_nop = amdgpu_ring_insert_nop, }; static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev) -- cgit v1.2.3-70-g09d2 From ac01db3dd5a0c7916e804a52752b780c5c18b98d Mon Sep 17 00:00:00 2001 From: Jammy Zhou Date: Tue, 1 Sep 2015 13:13:54 +0800 Subject: drm/amdgpu: implement burst NOP for SDMA Customize the insert_nop func for SDMA rings, and use burst NOP for ring/IB submissions in other places as well Signed-off-by: Jammy Zhou Reviewed-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 34 +++++++++++++++++++++++++++++----- drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 34 +++++++++++++++++++++++++++++----- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 33 ++++++++++++++++++++++++++++----- 3 files changed, 86 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c') diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index f0661b269a63..9ea9de457da3 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -188,6 +188,19 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring) WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], (ring->wptr << 2) & 0x3fffc); } +static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring); + int i; + + for (i = 0; i < count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + amdgpu_ring_write(ring, ring->nop | + SDMA_NOP_COUNT(count - 1)); + else + amdgpu_ring_write(ring, ring->nop); +} + /** * cik_sdma_ring_emit_ib - Schedule an IB on the DMA engine * @@ -213,8 +226,8 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, next_rptr); /* IB packet must end on a 8 DW boundary */ - while ((ring->wptr & 7) != 4) - amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0)); + cik_sdma_ring_insert_nop(ring, (12 - (ring->wptr & 7)) % 8); + amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_INDIRECT_BUFFER, 0, extra_bits)); amdgpu_ring_write(ring, ib->gpu_addr & 0xffffffe0); /* base must be 32 byte aligned */ amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr) & 0xffffffff); @@ -817,8 +830,19 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, */ static void cik_sdma_vm_pad_ib(struct amdgpu_ib *ib) { - while (ib->length_dw & 0x7) - ib->ptr[ib->length_dw++] = SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0); + struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring); + u32 pad_count; + int i; + + pad_count = (8 - (ib->length_dw & 0x7)) % 8; + for (i = 0; i < pad_count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + ib->ptr[ib->length_dw++] = + SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0) | + SDMA_NOP_COUNT(pad_count - 1); + else + ib->ptr[ib->length_dw++] = + SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0); } /** @@ -1305,7 +1329,7 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = { .test_ring = cik_sdma_ring_test_ring, .test_ib = cik_sdma_ring_test_ib, .is_lockup = cik_sdma_ring_is_lockup, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = cik_sdma_ring_insert_nop, }; static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 1b913bce2599..14e87234171a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -220,6 +220,19 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring) WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[me], ring->wptr << 2); } +static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring); + int i; + + for (i = 0; i < count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + amdgpu_ring_write(ring, ring->nop | + SDMA_PKT_NOP_HEADER_COUNT(count - 1)); + else + amdgpu_ring_write(ring, ring->nop); +} + /** * sdma_v2_4_ring_emit_ib - Schedule an IB on the DMA engine * @@ -247,8 +260,8 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, next_rptr); /* IB packet must end on a 8 DW boundary */ - while ((ring->wptr & 7) != 2) - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_NOP)); + sdma_v2_4_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8); + amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); /* base must be 32 byte aligned */ @@ -881,8 +894,19 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, */ static void sdma_v2_4_vm_pad_ib(struct amdgpu_ib *ib) { - while (ib->length_dw & 0x7) - ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP); + struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring); + u32 pad_count; + int i; + + pad_count = (8 - (ib->length_dw & 0x7)) % 8; + for (i = 0; i < pad_count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + ib->ptr[ib->length_dw++] = + SDMA_PKT_HEADER_OP(SDMA_OP_NOP) | + SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1); + else + ib->ptr[ib->length_dw++] = + SDMA_PKT_HEADER_OP(SDMA_OP_NOP); } /** @@ -1316,7 +1340,7 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = { .test_ring = sdma_v2_4_ring_test_ring, .test_ib = sdma_v2_4_ring_test_ib, .is_lockup = sdma_v2_4_ring_is_lockup, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = sdma_v2_4_ring_insert_nop, }; static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index a7550a8f5d84..9bfe92df15f7 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -306,6 +306,19 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring) } } +static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) +{ + struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ring); + int i; + + for (i = 0; i < count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + amdgpu_ring_write(ring, ring->nop | + SDMA_PKT_NOP_HEADER_COUNT(count - 1)); + else + amdgpu_ring_write(ring, ring->nop); +} + /** * sdma_v3_0_ring_emit_ib - Schedule an IB on the DMA engine * @@ -332,8 +345,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, amdgpu_ring_write(ring, next_rptr); /* IB packet must end on a 8 DW boundary */ - while ((ring->wptr & 7) != 2) - amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_NOP)); + sdma_v3_0_ring_insert_nop(ring, (10 - (ring->wptr & 7)) % 8); amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) | SDMA_PKT_INDIRECT_HEADER_VMID(vmid)); @@ -1001,8 +1013,19 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, */ static void sdma_v3_0_vm_pad_ib(struct amdgpu_ib *ib) { - while (ib->length_dw & 0x7) - ib->ptr[ib->length_dw++] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP); + struct amdgpu_sdma *sdma = amdgpu_get_sdma_instance(ib->ring); + u32 pad_count; + int i; + + pad_count = (8 - (ib->length_dw & 0x7)) % 8; + for (i = 0; i < pad_count; i++) + if (sdma && sdma->burst_nop && (i == 0)) + ib->ptr[ib->length_dw++] = + SDMA_PKT_HEADER_OP(SDMA_OP_NOP) | + SDMA_PKT_NOP_HEADER_COUNT(pad_count - 1); + else + ib->ptr[ib->length_dw++] = + SDMA_PKT_HEADER_OP(SDMA_OP_NOP); } /** @@ -1440,7 +1463,7 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = { .test_ring = sdma_v3_0_ring_test_ring, .test_ib = sdma_v3_0_ring_test_ib, .is_lockup = sdma_v3_0_ring_is_lockup, - .insert_nop = amdgpu_ring_insert_nop, + .insert_nop = sdma_v3_0_ring_insert_nop, }; static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev) -- cgit v1.2.3-70-g09d2