diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 332 |
1 files changed, 268 insertions, 64 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index e00b46180d2e..1916ec84dd71 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -31,6 +31,8 @@ /* delay 0.1 second to enable gfx off feature */ #define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) +#define GFX_OFF_NO_DELAY 0 + /* * GPU GFX IP block helpers function. */ @@ -48,7 +50,7 @@ int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec, return bit; } -void amdgpu_gfx_bit_to_mec_queue(struct amdgpu_device *adev, int bit, +void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit, int *mec, int *pipe, int *queue) { *queue = bit % adev->gfx.mec.num_queue_per_pipe; @@ -192,42 +194,44 @@ static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev) return adev->gfx.mec.num_mec > 1; } -void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) +bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev, + struct amdgpu_ring *ring) { - int i, queue, pipe, mec; - bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev); + /* Policy: use 1st queue as high priority compute queue if we + * have more than one compute queue. + */ + if (adev->gfx.num_compute_rings > 1 && + ring == &adev->gfx.compute_ring[0]) + return true; - /* policy for amdgpu compute queue ownership */ - for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) { - queue = i % adev->gfx.mec.num_queue_per_pipe; - pipe = (i / adev->gfx.mec.num_queue_per_pipe) - % adev->gfx.mec.num_pipe_per_mec; - mec = (i / adev->gfx.mec.num_queue_per_pipe) - / adev->gfx.mec.num_pipe_per_mec; - - /* we've run out of HW */ - if (mec >= adev->gfx.mec.num_mec) - break; + return false; +} - if (multipipe_policy) { - /* policy: amdgpu owns the first two queues of the first MEC */ - if (mec == 0 && queue < 2) - set_bit(i, adev->gfx.mec.queue_bitmap); - } else { - /* policy: amdgpu owns all queues in the first pipe */ - if (mec == 0 && pipe == 0) - set_bit(i, adev->gfx.mec.queue_bitmap); +void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) +{ + int i, queue, pipe; + bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev); + int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe, + adev->gfx.num_compute_rings); + + if (multipipe_policy) { + /* policy: make queues evenly cross all pipes on MEC1 only */ + for (i = 0; i < max_queues_per_mec; i++) { + pipe = i % adev->gfx.mec.num_pipe_per_mec; + queue = (i / adev->gfx.mec.num_pipe_per_mec) % + adev->gfx.mec.num_queue_per_pipe; + + set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue, + adev->gfx.mec.queue_bitmap); } + } else { + /* policy: amdgpu owns all queues in the given pipe */ + for (i = 0; i < max_queues_per_mec; ++i) + set_bit(i, adev->gfx.mec.queue_bitmap); } - /* update the number of active compute rings */ - adev->gfx.num_compute_rings = - bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); - - /* If you hit this case and edited the policy, you probably just - * need to increase AMDGPU_MAX_COMPUTE_RINGS */ - if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS)) - adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS; + dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)); } void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev) @@ -266,7 +270,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev, if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap)) continue; - amdgpu_gfx_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); + amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); /* * 1. Using pipes 2/3 from MEC 2 seems cause problems. @@ -296,10 +300,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, spin_lock_init(&kiq->ring_lock); - r = amdgpu_device_wb_get(adev, &adev->virt.reg_val_offs); - if (r) - return r; - ring->adev = NULL; ring->ring_obj = NULL; ring->use_doorbell = true; @@ -310,9 +310,10 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, return r; ring->eop_gpu_addr = kiq->eop_gpu_addr; + ring->no_scheduler = true; sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue); - r = amdgpu_ring_init(adev, ring, 1024, - irq, AMDGPU_CP_KIQ_IRQ_DRIVER0); + r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0, + AMDGPU_RING_PRIO_DEFAULT, NULL); if (r) dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r); @@ -321,7 +322,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) { - amdgpu_device_wb_free(ring->adev, ring->adev->virt.reg_val_offs); amdgpu_ring_fini(ring); } @@ -464,20 +464,38 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev) { struct amdgpu_kiq *kiq = &adev->gfx.kiq; struct amdgpu_ring *kiq_ring = &kiq->ring; - int i; + int i, r; if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues) return -EINVAL; + spin_lock(&adev->gfx.kiq.ring_lock); if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size * - adev->gfx.num_compute_rings)) + adev->gfx.num_compute_rings)) { + spin_unlock(&adev->gfx.kiq.ring_lock); return -ENOMEM; + } for (i = 0; i < adev->gfx.num_compute_rings; i++) kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i], RESET_QUEUES, 0, 0); + r = amdgpu_ring_test_helper(kiq_ring); + spin_unlock(&adev->gfx.kiq.ring_lock); + + return r; +} + +int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev, + int queue_bit) +{ + int mec, pipe, queue; + int set_resource_bit = 0; + + amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue); - return amdgpu_ring_test_ring(kiq_ring); + set_resource_bit = mec * 4 * 8 + pipe * 8 + queue; + + return set_resource_bit; } int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev) @@ -502,17 +520,18 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev) break; } - queue_mask |= (1ull << i); + queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i)); } DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe, kiq_ring->queue); - + spin_lock(&adev->gfx.kiq.ring_lock); r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size * adev->gfx.num_compute_rings + kiq->pmf->set_resources_size); if (r) { DRM_ERROR("Failed to lock KIQ (%d).\n", r); + spin_unlock(&adev->gfx.kiq.ring_lock); return r; } @@ -521,6 +540,7 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev) kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]); r = amdgpu_ring_test_helper(kiq_ring); + spin_unlock(&adev->gfx.kiq.ring_lock); if (r) DRM_ERROR("KCQ enable failed\n"); @@ -540,30 +560,66 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev) void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) { - if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) - return; + unsigned long delay = GFX_OFF_DELAY_ENABLE; - if (!is_support_sw_smu(adev) && - (!adev->powerplay.pp_funcs || - !adev->powerplay.pp_funcs->set_powergating_by_smu)) + if (!(adev->pm.pp_feature & PP_GFXOFF_MASK)) return; - mutex_lock(&adev->gfx.gfx_off_mutex); - if (!enable) - adev->gfx.gfx_off_req_count++; - else if (adev->gfx.gfx_off_req_count > 0) + if (enable) { + /* If the count is already 0, it means there's an imbalance bug somewhere. + * Note that the bug may be in a different caller than the one which triggers the + * WARN_ON_ONCE. + */ + if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0)) + goto unlock; + adev->gfx.gfx_off_req_count--; - if (enable && !adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) { - schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE); - } else if (!enable && adev->gfx.gfx_off_state) { - if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) - adev->gfx.gfx_off_state = false; + if (adev->gfx.gfx_off_req_count == 0 && + !adev->gfx.gfx_off_state) { + /* If going to s2idle, no need to wait */ + if (adev->in_s0ix) + delay = GFX_OFF_NO_DELAY; + schedule_delayed_work(&adev->gfx.gfx_off_delay_work, + delay); + } + } else { + if (adev->gfx.gfx_off_req_count == 0) { + cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); + + if (adev->gfx.gfx_off_state && + !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) { + adev->gfx.gfx_off_state = false; + + if (adev->gfx.funcs->init_spm_golden) { + dev_dbg(adev->dev, + "GFXOFF is disabled, re-init SPM golden settings\n"); + amdgpu_gfx_init_spm_golden(adev); + } + } + } + + adev->gfx.gfx_off_req_count++; } +unlock: + mutex_unlock(&adev->gfx.gfx_off_mutex); +} + +int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value) +{ + + int r = 0; + + mutex_lock(&adev->gfx.gfx_off_mutex); + + r = smu_get_status_gfxoff(adev, value); + mutex_unlock(&adev->gfx.gfx_off_mutex); + + return r; } int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev) @@ -571,7 +627,6 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev) int r; struct ras_fs_if fs_info = { .sysfs_name = "gfx_err_count", - .debugfs_name = "gfx_err_inject", }; struct ras_ih_if ih_info = { .cb = amdgpu_gfx_process_ras_data_cb, @@ -584,16 +639,17 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev) adev->gfx.ras_if->block = AMDGPU_RAS_BLOCK__GFX; adev->gfx.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE; adev->gfx.ras_if->sub_block_index = 0; - strcpy(adev->gfx.ras_if->name, "gfx"); } fs_info.head = ih_info.head = *adev->gfx.ras_if; - r = amdgpu_ras_late_init(adev, adev->gfx.ras_if, &fs_info, &ih_info); if (r) goto free; if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) { + if (!amdgpu_persistent_edc_harvesting_supported(adev)) + amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); + r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0); if (r) goto late_fini; @@ -639,9 +695,10 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev, */ if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) { kgd2kfd_set_sram_ecc_flag(adev->kfd.dev); - if (adev->gfx.funcs->query_ras_error_count) - adev->gfx.funcs->query_ras_error_count(adev, err_data); - amdgpu_ras_reset_gpu(adev, 0); + if (adev->gfx.ras_funcs && + adev->gfx.ras_funcs->query_ras_error_count) + adev->gfx.ras_funcs->query_ras_error_count(adev, err_data); + amdgpu_ras_reset_gpu(adev); } return AMDGPU_RAS_SUCCESS; } @@ -664,3 +721,150 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev, amdgpu_ras_interrupt_dispatch(adev, &ih_data); return 0; } + +uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) +{ + signed long r, cnt = 0; + unsigned long flags; + uint32_t seq, reg_val_offs = 0, value = 0; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *ring = &kiq->ring; + + if (amdgpu_device_skip_hw_access(adev)) + return 0; + + BUG_ON(!ring->funcs->emit_rreg); + + spin_lock_irqsave(&kiq->ring_lock, flags); + if (amdgpu_device_wb_get(adev, ®_val_offs)) { + pr_err("critical bug! too many kiq readers\n"); + goto failed_unlock; + } + amdgpu_ring_alloc(ring, 32); + amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) + goto failed_undo; + + amdgpu_ring_commit(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + + /* don't wait anymore for gpu reset case because this way may + * block gpu_recover() routine forever, e.g. this virt_kiq_rreg + * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will + * never return if we keep waiting in virt_kiq_rreg, which cause + * gpu_recover() hang there. + * + * also don't wait anymore for IRQ context + * */ + if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt())) + goto failed_kiq_read; + + might_sleep(); + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) + goto failed_kiq_read; + + mb(); + value = adev->wb.wb[reg_val_offs]; + amdgpu_device_wb_free(adev, reg_val_offs); + return value; + +failed_undo: + amdgpu_ring_undo(ring); +failed_unlock: + spin_unlock_irqrestore(&kiq->ring_lock, flags); +failed_kiq_read: + if (reg_val_offs) + amdgpu_device_wb_free(adev, reg_val_offs); + dev_err(adev->dev, "failed to read reg:%x\n", reg); + return ~0; +} + +void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) +{ + signed long r, cnt = 0; + unsigned long flags; + uint32_t seq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *ring = &kiq->ring; + + BUG_ON(!ring->funcs->emit_wreg); + + if (amdgpu_device_skip_hw_access(adev)) + return; + + spin_lock_irqsave(&kiq->ring_lock, flags); + amdgpu_ring_alloc(ring, 32); + amdgpu_ring_emit_wreg(ring, reg, v); + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT); + if (r) + goto failed_undo; + + amdgpu_ring_commit(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + + /* don't wait anymore for gpu reset case because this way may + * block gpu_recover() routine forever, e.g. this virt_kiq_rreg + * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will + * never return if we keep waiting in virt_kiq_rreg, which cause + * gpu_recover() hang there. + * + * also don't wait anymore for IRQ context + * */ + if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt())) + goto failed_kiq_write; + + might_sleep(); + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) + goto failed_kiq_write; + + return; + +failed_undo: + amdgpu_ring_undo(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); +failed_kiq_write: + dev_err(adev->dev, "failed to write reg:%x\n", reg); +} + +int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev) +{ + if (amdgpu_num_kcq == -1) { + return 8; + } else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) { + dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n"); + return 8; + } + return amdgpu_num_kcq; +} + +/* amdgpu_gfx_state_change_set - Handle gfx power state change set + * @adev: amdgpu_device pointer + * @state: gfx power state(1 -sGpuChangeState_D0Entry and 2 -sGpuChangeState_D3Entry) + * + */ + +void amdgpu_gfx_state_change_set(struct amdgpu_device *adev, enum gfx_change_state state) +{ + mutex_lock(&adev->pm.mutex); + if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->gfx_state_change_set) + ((adev)->powerplay.pp_funcs->gfx_state_change_set( + (adev)->powerplay.pp_handle, state)); + mutex_unlock(&adev->pm.mutex); +} |