summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c332
1 files changed, 268 insertions, 64 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index e00b46180d2e..1916ec84dd71 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -31,6 +31,8 @@
/* delay 0.1 second to enable gfx off feature */
#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100)
+#define GFX_OFF_NO_DELAY 0
+
/*
* GPU GFX IP block helpers function.
*/
@@ -48,7 +50,7 @@ int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
return bit;
}
-void amdgpu_gfx_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
+void amdgpu_queue_mask_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
int *mec, int *pipe, int *queue)
{
*queue = bit % adev->gfx.mec.num_queue_per_pipe;
@@ -192,42 +194,44 @@ static bool amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
return adev->gfx.mec.num_mec > 1;
}
-void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
+bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
{
- int i, queue, pipe, mec;
- bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
+ /* Policy: use 1st queue as high priority compute queue if we
+ * have more than one compute queue.
+ */
+ if (adev->gfx.num_compute_rings > 1 &&
+ ring == &adev->gfx.compute_ring[0])
+ return true;
- /* policy for amdgpu compute queue ownership */
- for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
- queue = i % adev->gfx.mec.num_queue_per_pipe;
- pipe = (i / adev->gfx.mec.num_queue_per_pipe)
- % adev->gfx.mec.num_pipe_per_mec;
- mec = (i / adev->gfx.mec.num_queue_per_pipe)
- / adev->gfx.mec.num_pipe_per_mec;
-
- /* we've run out of HW */
- if (mec >= adev->gfx.mec.num_mec)
- break;
+ return false;
+}
- if (multipipe_policy) {
- /* policy: amdgpu owns the first two queues of the first MEC */
- if (mec == 0 && queue < 2)
- set_bit(i, adev->gfx.mec.queue_bitmap);
- } else {
- /* policy: amdgpu owns all queues in the first pipe */
- if (mec == 0 && pipe == 0)
- set_bit(i, adev->gfx.mec.queue_bitmap);
+void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
+{
+ int i, queue, pipe;
+ bool multipipe_policy = amdgpu_gfx_is_multipipe_capable(adev);
+ int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec *
+ adev->gfx.mec.num_queue_per_pipe,
+ adev->gfx.num_compute_rings);
+
+ if (multipipe_policy) {
+ /* policy: make queues evenly cross all pipes on MEC1 only */
+ for (i = 0; i < max_queues_per_mec; i++) {
+ pipe = i % adev->gfx.mec.num_pipe_per_mec;
+ queue = (i / adev->gfx.mec.num_pipe_per_mec) %
+ adev->gfx.mec.num_queue_per_pipe;
+
+ set_bit(pipe * adev->gfx.mec.num_queue_per_pipe + queue,
+ adev->gfx.mec.queue_bitmap);
}
+ } else {
+ /* policy: amdgpu owns all queues in the given pipe */
+ for (i = 0; i < max_queues_per_mec; ++i)
+ set_bit(i, adev->gfx.mec.queue_bitmap);
}
- /* update the number of active compute rings */
- adev->gfx.num_compute_rings =
- bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
-
- /* If you hit this case and edited the policy, you probably just
- * need to increase AMDGPU_MAX_COMPUTE_RINGS */
- if (WARN_ON(adev->gfx.num_compute_rings > AMDGPU_MAX_COMPUTE_RINGS))
- adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
+ dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES));
}
void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
@@ -266,7 +270,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
continue;
- amdgpu_gfx_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
+ amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
/*
* 1. Using pipes 2/3 from MEC 2 seems cause problems.
@@ -296,10 +300,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
spin_lock_init(&kiq->ring_lock);
- r = amdgpu_device_wb_get(adev, &adev->virt.reg_val_offs);
- if (r)
- return r;
-
ring->adev = NULL;
ring->ring_obj = NULL;
ring->use_doorbell = true;
@@ -310,9 +310,10 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
return r;
ring->eop_gpu_addr = kiq->eop_gpu_addr;
+ ring->no_scheduler = true;
sprintf(ring->name, "kiq_%d.%d.%d", ring->me, ring->pipe, ring->queue);
- r = amdgpu_ring_init(adev, ring, 1024,
- irq, AMDGPU_CP_KIQ_IRQ_DRIVER0);
+ r = amdgpu_ring_init(adev, ring, 1024, irq, AMDGPU_CP_KIQ_IRQ_DRIVER0,
+ AMDGPU_RING_PRIO_DEFAULT, NULL);
if (r)
dev_warn(adev->dev, "(%d) failed to init kiq ring\n", r);
@@ -321,7 +322,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring)
{
- amdgpu_device_wb_free(ring->adev, ring->adev->virt.reg_val_offs);
amdgpu_ring_fini(ring);
}
@@ -464,20 +464,38 @@ int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
{
struct amdgpu_kiq *kiq = &adev->gfx.kiq;
struct amdgpu_ring *kiq_ring = &kiq->ring;
- int i;
+ int i, r;
if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
return -EINVAL;
+ spin_lock(&adev->gfx.kiq.ring_lock);
if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
- adev->gfx.num_compute_rings))
+ adev->gfx.num_compute_rings)) {
+ spin_unlock(&adev->gfx.kiq.ring_lock);
return -ENOMEM;
+ }
for (i = 0; i < adev->gfx.num_compute_rings; i++)
kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i],
RESET_QUEUES, 0, 0);
+ r = amdgpu_ring_test_helper(kiq_ring);
+ spin_unlock(&adev->gfx.kiq.ring_lock);
+
+ return r;
+}
+
+int amdgpu_queue_mask_bit_to_set_resource_bit(struct amdgpu_device *adev,
+ int queue_bit)
+{
+ int mec, pipe, queue;
+ int set_resource_bit = 0;
+
+ amdgpu_queue_mask_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);
- return amdgpu_ring_test_ring(kiq_ring);
+ set_resource_bit = mec * 4 * 8 + pipe * 8 + queue;
+
+ return set_resource_bit;
}
int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
@@ -502,17 +520,18 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
break;
}
- queue_mask |= (1ull << i);
+ queue_mask |= (1ull << amdgpu_queue_mask_bit_to_set_resource_bit(adev, i));
}
DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
kiq_ring->queue);
-
+ spin_lock(&adev->gfx.kiq.ring_lock);
r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
adev->gfx.num_compute_rings +
kiq->pmf->set_resources_size);
if (r) {
DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+ spin_unlock(&adev->gfx.kiq.ring_lock);
return r;
}
@@ -521,6 +540,7 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]);
r = amdgpu_ring_test_helper(kiq_ring);
+ spin_unlock(&adev->gfx.kiq.ring_lock);
if (r)
DRM_ERROR("KCQ enable failed\n");
@@ -540,30 +560,66 @@ int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
{
- if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
- return;
+ unsigned long delay = GFX_OFF_DELAY_ENABLE;
- if (!is_support_sw_smu(adev) &&
- (!adev->powerplay.pp_funcs ||
- !adev->powerplay.pp_funcs->set_powergating_by_smu))
+ if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
return;
-
mutex_lock(&adev->gfx.gfx_off_mutex);
- if (!enable)
- adev->gfx.gfx_off_req_count++;
- else if (adev->gfx.gfx_off_req_count > 0)
+ if (enable) {
+ /* If the count is already 0, it means there's an imbalance bug somewhere.
+ * Note that the bug may be in a different caller than the one which triggers the
+ * WARN_ON_ONCE.
+ */
+ if (WARN_ON_ONCE(adev->gfx.gfx_off_req_count == 0))
+ goto unlock;
+
adev->gfx.gfx_off_req_count--;
- if (enable && !adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) {
- schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE);
- } else if (!enable && adev->gfx.gfx_off_state) {
- if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false))
- adev->gfx.gfx_off_state = false;
+ if (adev->gfx.gfx_off_req_count == 0 &&
+ !adev->gfx.gfx_off_state) {
+ /* If going to s2idle, no need to wait */
+ if (adev->in_s0ix)
+ delay = GFX_OFF_NO_DELAY;
+ schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
+ delay);
+ }
+ } else {
+ if (adev->gfx.gfx_off_req_count == 0) {
+ cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work);
+
+ if (adev->gfx.gfx_off_state &&
+ !amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) {
+ adev->gfx.gfx_off_state = false;
+
+ if (adev->gfx.funcs->init_spm_golden) {
+ dev_dbg(adev->dev,
+ "GFXOFF is disabled, re-init SPM golden settings\n");
+ amdgpu_gfx_init_spm_golden(adev);
+ }
+ }
+ }
+
+ adev->gfx.gfx_off_req_count++;
}
+unlock:
+ mutex_unlock(&adev->gfx.gfx_off_mutex);
+}
+
+int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)
+{
+
+ int r = 0;
+
+ mutex_lock(&adev->gfx.gfx_off_mutex);
+
+ r = smu_get_status_gfxoff(adev, value);
+
mutex_unlock(&adev->gfx.gfx_off_mutex);
+
+ return r;
}
int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev)
@@ -571,7 +627,6 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev)
int r;
struct ras_fs_if fs_info = {
.sysfs_name = "gfx_err_count",
- .debugfs_name = "gfx_err_inject",
};
struct ras_ih_if ih_info = {
.cb = amdgpu_gfx_process_ras_data_cb,
@@ -584,16 +639,17 @@ int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev)
adev->gfx.ras_if->block = AMDGPU_RAS_BLOCK__GFX;
adev->gfx.ras_if->type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
adev->gfx.ras_if->sub_block_index = 0;
- strcpy(adev->gfx.ras_if->name, "gfx");
}
fs_info.head = ih_info.head = *adev->gfx.ras_if;
-
r = amdgpu_ras_late_init(adev, adev->gfx.ras_if,
&fs_info, &ih_info);
if (r)
goto free;
if (amdgpu_ras_is_supported(adev, adev->gfx.ras_if->block)) {
+ if (!amdgpu_persistent_edc_harvesting_supported(adev))
+ amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
+
r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
if (r)
goto late_fini;
@@ -639,9 +695,10 @@ int amdgpu_gfx_process_ras_data_cb(struct amdgpu_device *adev,
*/
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) {
kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
- if (adev->gfx.funcs->query_ras_error_count)
- adev->gfx.funcs->query_ras_error_count(adev, err_data);
- amdgpu_ras_reset_gpu(adev, 0);
+ if (adev->gfx.ras_funcs &&
+ adev->gfx.ras_funcs->query_ras_error_count)
+ adev->gfx.ras_funcs->query_ras_error_count(adev, err_data);
+ amdgpu_ras_reset_gpu(adev);
}
return AMDGPU_RAS_SUCCESS;
}
@@ -664,3 +721,150 @@ int amdgpu_gfx_cp_ecc_error_irq(struct amdgpu_device *adev,
amdgpu_ras_interrupt_dispatch(adev, &ih_data);
return 0;
}
+
+uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
+{
+ signed long r, cnt = 0;
+ unsigned long flags;
+ uint32_t seq, reg_val_offs = 0, value = 0;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_ring *ring = &kiq->ring;
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return 0;
+
+ BUG_ON(!ring->funcs->emit_rreg);
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ if (amdgpu_device_wb_get(adev, &reg_val_offs)) {
+ pr_err("critical bug! too many kiq readers\n");
+ goto failed_unlock;
+ }
+ amdgpu_ring_alloc(ring, 32);
+ amdgpu_ring_emit_rreg(ring, reg, reg_val_offs);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r)
+ goto failed_undo;
+
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+ /* don't wait anymore for gpu reset case because this way may
+ * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+ * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+ * never return if we keep waiting in virt_kiq_rreg, which cause
+ * gpu_recover() hang there.
+ *
+ * also don't wait anymore for IRQ context
+ * */
+ if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
+ goto failed_kiq_read;
+
+ might_sleep();
+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+ }
+
+ if (cnt > MAX_KIQ_REG_TRY)
+ goto failed_kiq_read;
+
+ mb();
+ value = adev->wb.wb[reg_val_offs];
+ amdgpu_device_wb_free(adev, reg_val_offs);
+ return value;
+
+failed_undo:
+ amdgpu_ring_undo(ring);
+failed_unlock:
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+failed_kiq_read:
+ if (reg_val_offs)
+ amdgpu_device_wb_free(adev, reg_val_offs);
+ dev_err(adev->dev, "failed to read reg:%x\n", reg);
+ return ~0;
+}
+
+void amdgpu_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
+{
+ signed long r, cnt = 0;
+ unsigned long flags;
+ uint32_t seq;
+ struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+ struct amdgpu_ring *ring = &kiq->ring;
+
+ BUG_ON(!ring->funcs->emit_wreg);
+
+ if (amdgpu_device_skip_hw_access(adev))
+ return;
+
+ spin_lock_irqsave(&kiq->ring_lock, flags);
+ amdgpu_ring_alloc(ring, 32);
+ amdgpu_ring_emit_wreg(ring, reg, v);
+ r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
+ if (r)
+ goto failed_undo;
+
+ amdgpu_ring_commit(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+
+ /* don't wait anymore for gpu reset case because this way may
+ * block gpu_recover() routine forever, e.g. this virt_kiq_rreg
+ * is triggered in TTM and ttm_bo_lock_delayed_workqueue() will
+ * never return if we keep waiting in virt_kiq_rreg, which cause
+ * gpu_recover() hang there.
+ *
+ * also don't wait anymore for IRQ context
+ * */
+ if (r < 1 && (amdgpu_in_reset(adev) || in_interrupt()))
+ goto failed_kiq_write;
+
+ might_sleep();
+ while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) {
+
+ msleep(MAX_KIQ_REG_BAILOUT_INTERVAL);
+ r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
+ }
+
+ if (cnt > MAX_KIQ_REG_TRY)
+ goto failed_kiq_write;
+
+ return;
+
+failed_undo:
+ amdgpu_ring_undo(ring);
+ spin_unlock_irqrestore(&kiq->ring_lock, flags);
+failed_kiq_write:
+ dev_err(adev->dev, "failed to write reg:%x\n", reg);
+}
+
+int amdgpu_gfx_get_num_kcq(struct amdgpu_device *adev)
+{
+ if (amdgpu_num_kcq == -1) {
+ return 8;
+ } else if (amdgpu_num_kcq > 8 || amdgpu_num_kcq < 0) {
+ dev_warn(adev->dev, "set kernel compute queue number to 8 due to invalid parameter provided by user\n");
+ return 8;
+ }
+ return amdgpu_num_kcq;
+}
+
+/* amdgpu_gfx_state_change_set - Handle gfx power state change set
+ * @adev: amdgpu_device pointer
+ * @state: gfx power state(1 -sGpuChangeState_D0Entry and 2 -sGpuChangeState_D3Entry)
+ *
+ */
+
+void amdgpu_gfx_state_change_set(struct amdgpu_device *adev, enum gfx_change_state state)
+{
+ mutex_lock(&adev->pm.mutex);
+ if (adev->powerplay.pp_funcs &&
+ adev->powerplay.pp_funcs->gfx_state_change_set)
+ ((adev)->powerplay.pp_funcs->gfx_state_change_set(
+ (adev)->powerplay.pp_handle, state));
+ mutex_unlock(&adev->pm.mutex);
+}