diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 748 |
1 files changed, 509 insertions, 239 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 7a8c9172d30a..fd0bfe140ee0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -54,6 +54,11 @@ MODULE_FIRMWARE("amdgpu/raven2_sdma.bin"); #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L #define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L +#define WREG32_SDMA(instance, offset, value) \ + WREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset)), value) +#define RREG32_SDMA(instance, offset) \ + RREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset))) + static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev); @@ -367,16 +372,11 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring) wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs])); DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); } else { - u32 lowbit, highbit; - - lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2; - highbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; - - DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n", - ring->me, highbit, lowbit); - wptr = highbit; + wptr = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI); wptr = wptr << 32; - wptr |= lowbit; + wptr |= RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR); + DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n", + ring->me, wptr); } return wptr >> 2; @@ -417,14 +417,67 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) lower_32_bits(ring->wptr << 2), ring->me, upper_32_bits(ring->wptr << 2)); - WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); - WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); + WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR, + lower_32_bits(ring->wptr << 2)); + WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI, + upper_32_bits(ring->wptr << 2)); + } +} + +/** + * sdma_v4_0_page_ring_get_wptr - get the current write pointer + * + * @ring: amdgpu ring pointer + * + * Get the current wptr from the hardware (VEGA10+). + */ +static uint64_t sdma_v4_0_page_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u64 wptr; + + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs])); + } else { + wptr = RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI); + wptr = wptr << 32; + wptr |= RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR); + } + + return wptr >> 2; +} + +/** + * sdma_v4_0_ring_set_wptr - commit the write pointer + * + * @ring: amdgpu ring pointer + * + * Write the wptr back to the hardware (VEGA10+). + */ +static void sdma_v4_0_page_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs]; + + /* XXX check if swapping is necessary on BE */ + WRITE_ONCE(*wb, (ring->wptr << 2)); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + } else { + uint64_t wptr = ring->wptr << 2; + + WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR, + lower_32_bits(wptr)); + WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI, + upper_32_bits(wptr)); } } static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); int i; for (i = 0; i < count; i++) @@ -444,9 +497,12 @@ static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * Schedule an IB in the DMA ring (VEGA10). */ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + /* IB packet must end on a 8 DW boundary */ sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); @@ -568,16 +624,16 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) amdgpu_ttm_set_buffer_funcs_status(adev, false); for (i = 0; i < adev->sdma.num_instances; i++) { - rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); - ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); + WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); + ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl); } - sdma0->ready = false; - sdma1->ready = false; + sdma0->sched.ready = false; + sdma1->sched.ready = false; } /** @@ -593,6 +649,39 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev) } /** + * sdma_v4_0_page_stop - stop the page async dma engines + * + * @adev: amdgpu_device pointer + * + * Stop the page async dma ring buffers (VEGA10). + */ +static void sdma_v4_0_page_stop(struct amdgpu_device *adev) +{ + struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].page; + struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].page; + u32 rb_cntl, ib_cntl; + int i; + + if ((adev->mman.buffer_funcs_ring == sdma0) || + (adev->mman.buffer_funcs_ring == sdma1)) + amdgpu_ttm_set_buffer_funcs_status(adev, false); + + for (i = 0; i < adev->sdma.num_instances; i++) { + rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, + RB_ENABLE, 0); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl); + ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, + IB_ENABLE, 0); + WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl); + } + + sdma0->sched.ready = false; + sdma1->sched.ready = false; +} + +/** * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch * * @adev: amdgpu_device pointer @@ -630,18 +719,15 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) } for (i = 0; i < adev->sdma.num_instances; i++) { - f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); + f32_cntl = RREG32_SDMA(i, mmSDMA0_CNTL); f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, AUTO_CTXSW_ENABLE, enable ? 1 : 0); if (enable && amdgpu_sdma_phase_quantum) { - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM), - phase_quantum); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM), - phase_quantum); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM), - phase_quantum); + WREG32_SDMA(i, mmSDMA0_PHASE0_QUANTUM, phase_quantum); + WREG32_SDMA(i, mmSDMA0_PHASE1_QUANTUM, phase_quantum); + WREG32_SDMA(i, mmSDMA0_PHASE2_QUANTUM, phase_quantum); } - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl); + WREG32_SDMA(i, mmSDMA0_CNTL, f32_cntl); } } @@ -662,156 +748,215 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable) if (enable == false) { sdma_v4_0_gfx_stop(adev); sdma_v4_0_rlc_stop(adev); + if (adev->sdma.has_page_queue) + sdma_v4_0_page_stop(adev); } for (i = 0; i < adev->sdma.num_instances; i++) { - f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); + f32_cntl = RREG32_SDMA(i, mmSDMA0_F32_CNTL); f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl); + WREG32_SDMA(i, mmSDMA0_F32_CNTL, f32_cntl); } } /** + * sdma_v4_0_rb_cntl - get parameters for rb_cntl + */ +static uint32_t sdma_v4_0_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl) +{ + /* Set ring buffer size in dwords */ + uint32_t rb_bufsz = order_base_2(ring->ring_size / 4); + + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); +#ifdef __BIG_ENDIAN + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, + RPTR_WRITEBACK_SWAP_ENABLE, 1); +#endif + return rb_cntl; +} + +/** * sdma_v4_0_gfx_resume - setup and start the async dma engines * * @adev: amdgpu_device pointer + * @i: instance to resume * * Set up the gfx DMA ring buffers and enable them (VEGA10). * Returns 0 for success, error for failure. */ -static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) +static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i) { - struct amdgpu_ring *ring; + struct amdgpu_ring *ring = &adev->sdma.instance[i].ring; u32 rb_cntl, ib_cntl, wptr_poll_cntl; - u32 rb_bufsz; u32 wb_offset; u32 doorbell; u32 doorbell_offset; - u32 temp; u64 wptr_gpu_addr; - int i, r; - for (i = 0; i < adev->sdma.num_instances; i++) { - ring = &adev->sdma.instance[i].ring; - wb_offset = (ring->rptr_offs * 4); + wb_offset = (ring->rptr_offs * 4); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); + rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL); + rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl); + WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); - /* Set ring buffer size in dwords */ - rb_bufsz = order_base_2(ring->ring_size / 4); - rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); -#ifdef __BIG_ENDIAN - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, - RPTR_WRITEBACK_SWAP_ENABLE, 1); -#endif - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + /* Initialize the ring buffer's read and write pointers */ + WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR, 0); + WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_HI, 0); + WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR, 0); + WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_HI, 0); - /* Initialize the ring buffer's read and write pointers */ - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0); + /* set the wb address whether it's enabled or not */ + WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_HI, + upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); + WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO, + lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); - /* set the wb address whether it's enabled or not */ - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), - upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), - lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, + RPTR_WRITEBACK_ENABLE, 1); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE, ring->gpu_addr >> 8); + WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE_HI, ring->gpu_addr >> 40); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); + ring->wptr = 0; - ring->wptr = 0; + /* before programing wptr to a less value, need set minor_ptr_update first */ + WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 1); - /* before programing wptr to a less value, need set minor_ptr_update first */ - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); - - if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); - } + doorbell = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL); + doorbell_offset = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET); - doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); - doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET)); - - if (ring->use_doorbell) { - doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); - doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET, + doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, + ring->use_doorbell); + doorbell_offset = REG_SET_FIELD(doorbell_offset, + SDMA0_GFX_DOORBELL_OFFSET, OFFSET, ring->doorbell_index); - } else { - doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); - } - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); - adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, - ring->doorbell_index); + WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL, doorbell); + WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET, doorbell_offset); + adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, + ring->doorbell_index); + + sdma_v4_0_ring_set_wptr(ring); + + /* set minor_ptr_update to 0 after wptr programed */ + WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 0); + + /* setup the wptr shadow polling */ + wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO, + lower_32_bits(wptr_gpu_addr)); + WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI, + upper_32_bits(wptr_gpu_addr)); + wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL); + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, + SDMA0_GFX_RB_WPTR_POLL_CNTL, + F32_POLL_ENABLE, amdgpu_sriov_vf(adev)); + WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, wptr_poll_cntl); + + /* enable DMA RB */ + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); + WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); + + ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); +#ifdef __BIG_ENDIAN + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); +#endif + /* enable DMA IBs */ + WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl); - if (amdgpu_sriov_vf(adev)) - sdma_v4_0_ring_set_wptr(ring); + ring->sched.ready = true; +} - /* set minor_ptr_update to 0 after wptr programed */ - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); +/** + * sdma_v4_0_page_resume - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * @i: instance to resume + * + * Set up the page DMA ring buffers and enable them (VEGA10). + * Returns 0 for success, error for failure. + */ +static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i) +{ + struct amdgpu_ring *ring = &adev->sdma.instance[i].page; + u32 rb_cntl, ib_cntl, wptr_poll_cntl; + u32 wb_offset; + u32 doorbell; + u32 doorbell_offset; + u64 wptr_gpu_addr; - /* set utc l1 enable flag always to 1 */ - temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); - temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp); + wb_offset = (ring->rptr_offs * 4); - if (!amdgpu_sriov_vf(adev)) { - /* unhalt engine */ - temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); - temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp); - } + rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL); + rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl); - /* setup the wptr shadow polling */ - wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), - lower_32_bits(wptr_gpu_addr)); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), - upper_32_bits(wptr_gpu_addr)); - wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); - if (amdgpu_sriov_vf(adev)) - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); - else - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl); + /* Initialize the ring buffer's read and write pointers */ + WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR, 0); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_HI, 0); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR, 0); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_HI, 0); - /* enable DMA RB */ - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + /* set the wb address whether it's enabled or not */ + WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_HI, + upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_LO, + lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); - ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); -#ifdef __BIG_ENDIAN - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); -#endif - /* enable DMA IBs */ - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, + RPTR_WRITEBACK_ENABLE, 1); - ring->ready = true; + WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE, ring->gpu_addr >> 8); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE_HI, ring->gpu_addr >> 40); - if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ - sdma_v4_0_ctx_switch_enable(adev, true); - sdma_v4_0_enable(adev, true); - } + ring->wptr = 0; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; - return r; - } + /* before programing wptr to a less value, need set minor_ptr_update first */ + WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 1); - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, true); + doorbell = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL); + doorbell_offset = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET); - } + doorbell = REG_SET_FIELD(doorbell, SDMA0_PAGE_DOORBELL, ENABLE, + ring->use_doorbell); + doorbell_offset = REG_SET_FIELD(doorbell_offset, + SDMA0_PAGE_DOORBELL_OFFSET, + OFFSET, ring->doorbell_index); + WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL, doorbell); + WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET, doorbell_offset); + + /* paging queue doorbell range is setup at sdma_v4_0_gfx_resume */ + sdma_v4_0_page_ring_set_wptr(ring); + + /* set minor_ptr_update to 0 after wptr programed */ + WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 0); + + /* setup the wptr shadow polling */ + wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_LO, + lower_32_bits(wptr_gpu_addr)); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_HI, + upper_32_bits(wptr_gpu_addr)); + wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL); + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, + SDMA0_PAGE_RB_WPTR_POLL_CNTL, + F32_POLL_ENABLE, amdgpu_sriov_vf(adev)); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, wptr_poll_cntl); + + /* enable DMA RB */ + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, RB_ENABLE, 1); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl); + + ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_ENABLE, 1); +#ifdef __BIG_ENDIAN + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_SWAP_ENABLE, 1); +#endif + /* enable DMA IBs */ + WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl); - return 0; + ring->sched.ready = true; } static void @@ -922,12 +1067,14 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) (adev->sdma.instance[i].fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0); + WREG32_SDMA(i, mmSDMA0_UCODE_ADDR, 0); for (j = 0; j < fw_size; j++) - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++)); + WREG32_SDMA(i, mmSDMA0_UCODE_DATA, + le32_to_cpup(fw_data++)); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); + WREG32_SDMA(i, mmSDMA0_UCODE_ADDR, + adev->sdma.instance[i].fw_version); } return 0; @@ -943,33 +1090,78 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) */ static int sdma_v4_0_start(struct amdgpu_device *adev) { - int r = 0; + struct amdgpu_ring *ring; + int i, r; if (amdgpu_sriov_vf(adev)) { sdma_v4_0_ctx_switch_enable(adev, false); sdma_v4_0_enable(adev, false); + } else { - /* set RB registers */ - r = sdma_v4_0_gfx_resume(adev); - return r; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + r = sdma_v4_0_load_microcode(adev); + if (r) + return r; + } + + /* unhalt the MEs */ + sdma_v4_0_enable(adev, true); + /* enable sdma ring preemption */ + sdma_v4_0_ctx_switch_enable(adev, true); } - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - r = sdma_v4_0_load_microcode(adev); + /* start the gfx rings and rlc compute queues */ + for (i = 0; i < adev->sdma.num_instances; i++) { + uint32_t temp; + + WREG32_SDMA(i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL, 0); + sdma_v4_0_gfx_resume(adev, i); + if (adev->sdma.has_page_queue) + sdma_v4_0_page_resume(adev, i); + + /* set utc l1 enable flag always to 1 */ + temp = RREG32_SDMA(i, mmSDMA0_CNTL); + temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); + WREG32_SDMA(i, mmSDMA0_CNTL, temp); + + if (!amdgpu_sriov_vf(adev)) { + /* unhalt engine */ + temp = RREG32_SDMA(i, mmSDMA0_F32_CNTL); + temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); + WREG32_SDMA(i, mmSDMA0_F32_CNTL, temp); + } + } + + if (amdgpu_sriov_vf(adev)) { + sdma_v4_0_ctx_switch_enable(adev, true); + sdma_v4_0_enable(adev, true); + } else { + r = sdma_v4_0_rlc_resume(adev); if (r) return r; } - /* unhalt the MEs */ - sdma_v4_0_enable(adev, true); - /* enable sdma ring preemption */ - sdma_v4_0_ctx_switch_enable(adev, true); + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; + + r = amdgpu_ring_test_helper(ring); + if (r) + return r; - /* start the gfx rings and rlc compute queues */ - r = sdma_v4_0_gfx_resume(adev); - if (r) - return r; - r = sdma_v4_0_rlc_resume(adev); + if (adev->sdma.has_page_queue) { + struct amdgpu_ring *page = &adev->sdma.instance[i].page; + + r = amdgpu_ring_test_helper(page); + if (r) + return r; + + if (adev->mman.buffer_funcs_ring == page) + amdgpu_ttm_set_buffer_funcs_status(adev, true); + } + + if (adev->mman.buffer_funcs_ring == ring) + amdgpu_ttm_set_buffer_funcs_status(adev, true); + } return r; } @@ -993,21 +1185,16 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring) u64 gpu_addr; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); r = amdgpu_ring_alloc(ring, 5); - if (r) { - DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_device_wb_free(adev, index); - return r; - } + if (r) + goto error_free_wb; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); @@ -1024,15 +1211,11 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } - amdgpu_device_wb_free(adev, index); + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; +error_free_wb: + amdgpu_device_wb_free(adev, index); return r; } @@ -1055,20 +1238,16 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) u64 gpu_addr; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 256, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + if (r) goto err0; - } ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); @@ -1087,21 +1266,17 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; goto err1; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err1; } tmp = le32_to_cpu(adev->wb.wb[index]); - if (tmp == 0xDEADBEEF) { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + if (tmp == 0xDEADBEEF) r = 0; - } else { - DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); + else r = -EINVAL; - } + err1: amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); @@ -1206,7 +1381,7 @@ static void sdma_v4_0_vm_set_pte_pde(struct amdgpu_ib *ib, */ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); u32 pad_count; int i; @@ -1272,15 +1447,45 @@ static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, sdma_v4_0_wait_reg_mem(ring, 0, 0, reg, 0, val, mask, 10); } +static bool sdma_v4_0_fw_support_paging_queue(struct amdgpu_device *adev) +{ + uint fw_version = adev->sdma.instance[0].fw_version; + + switch (adev->asic_type) { + case CHIP_VEGA10: + return fw_version >= 430; + case CHIP_VEGA12: + /*return fw_version >= 31;*/ + return false; + case CHIP_VEGA20: + return fw_version >= 123; + default: + return false; + } +} + static int sdma_v4_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; if (adev->asic_type == CHIP_RAVEN) adev->sdma.num_instances = 1; else adev->sdma.num_instances = 2; + r = sdma_v4_0_init_microcode(adev); + if (r) { + DRM_ERROR("Failed to load sdma firmware!\n"); + return r; + } + + /* TODO: Page queue breaks driver reload under SRIOV */ + if ((adev->asic_type == CHIP_VEGA10) && amdgpu_sriov_vf((adev))) + adev->sdma.has_page_queue = false; + else if (sdma_v4_0_fw_support_paging_queue(adev)) + adev->sdma.has_page_queue = true; + sdma_v4_0_set_ring_funcs(adev); sdma_v4_0_set_buffer_funcs(adev); sdma_v4_0_set_vm_pte_funcs(adev); @@ -1289,7 +1494,6 @@ static int sdma_v4_0_early_init(void *handle) return 0; } - static int sdma_v4_0_sw_init(void *handle) { struct amdgpu_ring *ring; @@ -1308,12 +1512,6 @@ static int sdma_v4_0_sw_init(void *handle) if (r) return r; - r = sdma_v4_0_init_microcode(adev); - if (r) { - DRM_ERROR("Failed to load sdma firmware!\n"); - return r; - } - for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; @@ -1322,15 +1520,10 @@ static int sdma_v4_0_sw_init(void *handle) DRM_INFO("use_doorbell being set to: [%s]\n", ring->use_doorbell?"true":"false"); - if (adev->asic_type == CHIP_VEGA10) - ring->doorbell_index = (i == 0) ? - (AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE0 << 1) //get DWORD offset - : (AMDGPU_VEGA10_DOORBELL64_sDMA_ENGINE1 << 1); // get DWORD offset - else - ring->doorbell_index = (i == 0) ? - (AMDGPU_DOORBELL64_sDMA_ENGINE0 << 1) //get DWORD offset - : (AMDGPU_DOORBELL64_sDMA_ENGINE1 << 1); // get DWORD offset - + /* doorbell size is 2 dwords, get DWORD offset */ + ring->doorbell_index = (i == 0) ? + (adev->doorbell_index.sdma_engine0 << 1) + : (adev->doorbell_index.sdma_engine1 << 1); sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, @@ -1340,6 +1533,29 @@ static int sdma_v4_0_sw_init(void *handle) AMDGPU_SDMA_IRQ_TRAP1); if (r) return r; + + if (adev->sdma.has_page_queue) { + ring = &adev->sdma.instance[i].page; + ring->ring_obj = NULL; + ring->use_doorbell = true; + + /* paging queue use same doorbell index/routing as gfx queue + * with 0x400 (4096 dwords) offset on second doorbell page + */ + ring->doorbell_index = (i == 0) ? + (adev->doorbell_index.sdma_engine0 << 1) + : (adev->doorbell_index.sdma_engine1 << 1); + ring->doorbell_index += 0x400; + + sprintf(ring->name, "page%d", i); + r = amdgpu_ring_init(adev, ring, 1024, + &adev->sdma.trap_irq, + (i == 0) ? + AMDGPU_SDMA_IRQ_TRAP0 : + AMDGPU_SDMA_IRQ_TRAP1); + if (r) + return r; + } } return r; @@ -1350,8 +1566,11 @@ static int sdma_v4_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - for (i = 0; i < adev->sdma.num_instances; i++) + for (i = 0; i < adev->sdma.num_instances; i++) { amdgpu_ring_fini(&adev->sdma.instance[i].ring); + if (adev->sdma.has_page_queue) + amdgpu_ring_fini(&adev->sdma.instance[i].page); + } for (i = 0; i < adev->sdma.num_instances; i++) { release_firmware(adev->sdma.instance[i].fw); @@ -1414,7 +1633,7 @@ static bool sdma_v4_0_is_idle(void *handle) u32 i; for (i = 0; i < adev->sdma.num_instances; i++) { - u32 tmp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_STATUS_REG)); + u32 tmp = RREG32_SDMA(i, mmSDMA0_STATUS_REG); if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) return false; @@ -1430,8 +1649,8 @@ static int sdma_v4_0_wait_for_idle(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->usec_timeout; i++) { - sdma0 = RREG32(sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG)); - sdma1 = RREG32(sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG)); + sdma0 = RREG32_SDMA(0, mmSDMA0_STATUS_REG); + sdma1 = RREG32_SDMA(1, mmSDMA0_STATUS_REG); if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK) return 0; @@ -1452,16 +1671,13 @@ static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev, unsigned type, enum amdgpu_interrupt_state state) { + unsigned int instance = (type == AMDGPU_SDMA_IRQ_TRAP0) ? 0 : 1; u32 sdma_cntl; - u32 reg_offset = (type == AMDGPU_SDMA_IRQ_TRAP0) ? - sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_CNTL) : - sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_CNTL); - - sdma_cntl = RREG32(reg_offset); + sdma_cntl = RREG32_SDMA(instance, mmSDMA0_CNTL); sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); - WREG32(reg_offset, sdma_cntl); + WREG32_SDMA(instance, mmSDMA0_CNTL, sdma_cntl); return 0; } @@ -1470,39 +1686,34 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + uint32_t instance; + DRM_DEBUG("IH: SDMA trap\n"); switch (entry->client_id) { case SOC15_IH_CLIENTID_SDMA0: - switch (entry->ring_id) { - case 0: - amdgpu_fence_process(&adev->sdma.instance[0].ring); - break; - case 1: - /* XXX compute */ - break; - case 2: - /* XXX compute */ - break; - case 3: - /* XXX page queue*/ - break; - } + instance = 0; break; case SOC15_IH_CLIENTID_SDMA1: - switch (entry->ring_id) { - case 0: - amdgpu_fence_process(&adev->sdma.instance[1].ring); - break; - case 1: - /* XXX compute */ - break; - case 2: - /* XXX compute */ - break; - case 3: - /* XXX page queue*/ - break; - } + instance = 1; + break; + default: + return 0; + } + + switch (entry->ring_id) { + case 0: + amdgpu_fence_process(&adev->sdma.instance[instance].ring); + break; + case 1: + if (adev->asic_type == CHIP_VEGA20) + amdgpu_fence_process(&adev->sdma.instance[instance].page); + break; + case 2: + /* XXX compute */ + break; + case 3: + if (adev->asic_type != CHIP_VEGA20) + amdgpu_fence_process(&adev->sdma.instance[instance].page); break; } return 0; @@ -1512,12 +1723,29 @@ static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + int instance; + DRM_ERROR("Illegal instruction in SDMA command stream\n"); - schedule_work(&adev->reset_work); + + switch (entry->client_id) { + case SOC15_IH_CLIENTID_SDMA0: + instance = 0; + break; + case SOC15_IH_CLIENTID_SDMA1: + instance = 1; + break; + default: + return 0; + } + + switch (entry->ring_id) { + case 0: + drm_sched_fault(&adev->sdma.instance[instance].ring.sched); + break; + } return 0; } - static void sdma_v4_0_update_medium_grain_clock_gating( struct amdgpu_device *adev, bool enable) @@ -1730,6 +1958,38 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; +static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = { + .type = AMDGPU_RING_TYPE_SDMA, + .align_mask = 0xf, + .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), + .support_64bit_ptrs = true, + .vmhub = AMDGPU_MMHUB, + .get_rptr = sdma_v4_0_ring_get_rptr, + .get_wptr = sdma_v4_0_page_ring_get_wptr, + .set_wptr = sdma_v4_0_page_ring_set_wptr, + .emit_frame_size = + 6 + /* sdma_v4_0_ring_emit_hdp_flush */ + 3 + /* hdp invalidate */ + 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ + /* sdma_v4_0_ring_emit_vm_flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ + .emit_ib = sdma_v4_0_ring_emit_ib, + .emit_fence = sdma_v4_0_ring_emit_fence, + .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, + .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, + .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, + .test_ring = sdma_v4_0_ring_test_ring, + .test_ib = sdma_v4_0_ring_test_ib, + .insert_nop = sdma_v4_0_ring_insert_nop, + .pad_ib = sdma_v4_0_ring_pad_ib, + .emit_wreg = sdma_v4_0_ring_emit_wreg, + .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) { int i; @@ -1737,6 +1997,10 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs; adev->sdma.instance[i].ring.me = i; + if (adev->sdma.has_page_queue) { + adev->sdma.instance[i].page.funcs = &sdma_v4_0_page_ring_funcs; + adev->sdma.instance[i].page.me = i; + } } } @@ -1818,7 +2082,10 @@ static const struct amdgpu_buffer_funcs sdma_v4_0_buffer_funcs = { static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev) { adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs; - adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; + if (adev->sdma.has_page_queue) + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].page; + else + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; } static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = { @@ -1836,7 +2103,10 @@ static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev) adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs; for (i = 0; i < adev->sdma.num_instances; i++) { - sched = &adev->sdma.instance[i].ring.sched; + if (adev->sdma.has_page_queue) + sched = &adev->sdma.instance[i].page.sched; + else + sched = &adev->sdma.instance[i].ring.sched; adev->vm_manager.vm_pte_rqs[i] = &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; } |