diff options
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 21 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 133 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c | 67 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device.c | 10 |
7 files changed, 141 insertions, 99 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 76438f197de1..069b259f384c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -209,12 +209,12 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) int max_queues_per_mec = min(adev->gfx.mec.num_pipe_per_mec * adev->gfx.mec.num_queue_per_pipe, adev->gfx.num_compute_rings); - int num_xcd = (adev->gfx.num_xcd > 1) ? adev->gfx.num_xcd : 1; + int num_xcc = adev->gfx.xcc_mask ? NUM_XCC(adev->gfx.xcc_mask) : 1; if (multipipe_policy) { /* policy: make queues evenly cross all pipes on MEC1 only * for multiple xcc, just use the original policy for simplicity */ - for (j = 0; j < num_xcd; j++) { + for (j = 0; j < num_xcc; j++) { for (i = 0; i < max_queues_per_mec; i++) { pipe = i % adev->gfx.mec.num_pipe_per_mec; queue = (i / adev->gfx.mec.num_pipe_per_mec) % @@ -226,13 +226,13 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev) } } else { /* policy: amdgpu owns all queues in the given pipe */ - for (j = 0; j < num_xcd; j++) { + for (j = 0; j < num_xcc; j++) { for (i = 0; i < max_queues_per_mec; ++i) set_bit(i, adev->gfx.mec_bitmap[j].queue_bitmap); } } - for (j = 0; j < num_xcd; j++) { + for (j = 0; j < num_xcc; j++) { dev_dbg(adev->dev, "mec queue bitmap weight=%d\n", bitmap_weight(adev->gfx.mec_bitmap[j].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES)); } @@ -1207,23 +1207,24 @@ static ssize_t amdgpu_gfx_set_compute_partition(struct device *dev, struct drm_device *ddev = dev_get_drvdata(dev); struct amdgpu_device *adev = drm_to_adev(ddev); enum amdgpu_gfx_partition mode; - int ret; + int ret = 0, num_xcc; - if (adev->gfx.num_xcd % 2 != 0) + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + if (num_xcc % 2 != 0) return -EINVAL; if (!strncasecmp("SPX", buf, strlen("SPX"))) { mode = AMDGPU_SPX_PARTITION_MODE; } else if (!strncasecmp("DPX", buf, strlen("DPX"))) { - if (adev->gfx.num_xcd != 4 || adev->gfx.num_xcd != 8) + if (num_xcc != 4 || num_xcc != 8) return -EINVAL; mode = AMDGPU_DPX_PARTITION_MODE; } else if (!strncasecmp("TPX", buf, strlen("TPX"))) { - if (adev->gfx.num_xcd != 6) + if (num_xcc != 6) return -EINVAL; mode = AMDGPU_TPX_PARTITION_MODE; } else if (!strncasecmp("QPX", buf, strlen("QPX"))) { - if (adev->gfx.num_xcd != 8) + if (num_xcc != 8) return -EINVAL; mode = AMDGPU_QPX_PARTITION_MODE; } else if (!strncasecmp("CPX", buf, strlen("CPX"))) { @@ -1253,7 +1254,7 @@ static ssize_t amdgpu_gfx_get_available_compute_partition(struct device *dev, char *supported_partition; /* TBD */ - switch (adev->gfx.num_xcd) { + switch (NUM_XCC(adev->gfx.xcc_mask)) { case 8: supported_partition = "SPX, DPX, QPX, CPX"; break; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 8df36527aee9..93f9875154db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -64,6 +64,8 @@ enum amdgpu_gfx_partition { AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE, }; +#define NUM_XCC(x) hweight16(x) + struct amdgpu_mec { struct amdgpu_bo *hpd_eop_obj; u64 hpd_eop_gpu_addr; @@ -396,7 +398,7 @@ struct amdgpu_gfx { bool cp_gfx_shadow; /* for gfx11 */ enum amdgpu_gfx_partition partition_mode; - uint32_t num_xcd; + uint16_t xcc_mask; uint32_t num_xcc_per_xcp; struct mutex partition_mutex; }; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 91814dc083c9..da69177dc76f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4536,7 +4536,7 @@ static int gfx_v9_0_early_init(void *handle) adev->gfx.num_gfx_rings = 0; else adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; - adev->gfx.num_xcd = 1; + adev->gfx.xcc_mask = 1; adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), AMDGPU_MAX_COMPUTE_RINGS); gfx_v9_0_set_kiq_pm4_funcs(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 52185b1d5d31..c776fc5884de 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -177,16 +177,19 @@ static const struct kiq_pm4_funcs gfx_v9_4_3_kiq_pm4_funcs = { static void gfx_v9_4_3_set_kiq_pm4_funcs(struct amdgpu_device *adev) { - int i; - for (i = 0; i < adev->gfx.num_xcd; i++) + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) adev->gfx.kiq[i].pmf = &gfx_v9_4_3_kiq_pm4_funcs; } static void gfx_v9_4_3_init_golden_registers(struct amdgpu_device *adev) { - int i; + int i, num_xcc; - for (i = 2; i < adev->gfx.num_xcd; i++) + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 2; i < num_xcc; i++) WREG32_SOC15(GC, i, regGRBM_MCM_ADDR, 0x4); } @@ -499,7 +502,7 @@ static void gfx_v9_4_3_mec_fini(struct amdgpu_device *adev) static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev) { - int r, i; + int r, i, num_xcc; u32 *hpd; const __le32 *fw_data; unsigned fw_size; @@ -508,7 +511,8 @@ static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev) const struct gfx_firmware_header_v1_0 *mec_hdr; - for (i = 0; i < adev->gfx.num_xcd; i++) + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) bitmap_zero(adev->gfx.mec_bitmap[i].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); @@ -683,23 +687,24 @@ static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, enum amdgpu_gfx_partition mode) { u32 tmp = 0; - int num_xcc_per_partition, i; + int num_xcc_per_partition, i, num_xcc; if (mode == adev->gfx.partition_mode) return mode; + num_xcc = NUM_XCC(adev->gfx.xcc_mask); switch (mode) { case AMDGPU_SPX_PARTITION_MODE: - num_xcc_per_partition = adev->gfx.num_xcd; + num_xcc_per_partition = num_xcc; break; case AMDGPU_DPX_PARTITION_MODE: - num_xcc_per_partition = adev->gfx.num_xcd / 2; + num_xcc_per_partition = num_xcc / 2; break; case AMDGPU_TPX_PARTITION_MODE: - num_xcc_per_partition = adev->gfx.num_xcd / 3; + num_xcc_per_partition = num_xcc / 3; break; case AMDGPU_QPX_PARTITION_MODE: - num_xcc_per_partition = adev->gfx.num_xcd / 4; + num_xcc_per_partition = num_xcc / 4; break; case AMDGPU_CPX_PARTITION_MODE: num_xcc_per_partition = 1; @@ -712,7 +717,7 @@ static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, * Stop user queues and threads, and make sure GPU is empty of work. */ - for (i = 0; i < adev->gfx.num_xcd; i++) { + for (i = 0; i < num_xcc; i++) { tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP, num_xcc_per_partition); tmp = REG_SET_FIELD(tmp, CP_HYP_XCP_CTL, VIRTUAL_XCC_ID, @@ -836,7 +841,7 @@ static int gfx_v9_4_3_compute_ring_init(struct amdgpu_device *adev, int ring_id, static int gfx_v9_4_3_sw_init(void *handle) { - int i, j, k, r, ring_id, xcc_id; + int i, j, k, r, ring_id, xcc_id, num_xcc; struct amdgpu_kiq *kiq; struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -844,6 +849,8 @@ static int gfx_v9_4_3_sw_init(void *handle) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + /* EOP Event */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); if (r) @@ -877,8 +884,7 @@ static int gfx_v9_4_3_sw_init(void *handle) /* set up the compute queues - allocate horizontally across pipes */ ring_id = 0; - for (xcc_id = 0; xcc_id < adev->gfx.num_xcd; xcc_id++) { - + for (xcc_id = 0; xcc_id < num_xcc; xcc_id++) { for (i = 0; i < adev->gfx.mec.num_mec; ++i) { for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; @@ -930,14 +936,14 @@ static int gfx_v9_4_3_sw_init(void *handle) static int gfx_v9_4_3_sw_fini(void *handle) { - int i; + int i, num_xcc; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - for (i = 0; i < adev->gfx.num_compute_rings * - adev->gfx.num_xcd; i++) + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < adev->gfx.num_compute_rings * num_xcc; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - for (i = 0; i < adev->gfx.num_xcd; i++) { + for (i = 0; i < num_xcc; i++) { amdgpu_gfx_mqd_sw_fini(adev, i); amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[i].ring); amdgpu_gfx_kiq_fini(adev, i); @@ -1050,9 +1056,10 @@ static void gfx_v9_4_3_init_gds_vmid(struct amdgpu_device *adev, int xcc_id) static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) { u32 tmp; - int i, j; + int i, j, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { WREG32_FIELD15_PREREG(GC, i, GRBM_CNTL, READ_TIMEOUT, 0xff); gfx_v9_4_3_setup_rb(adev, i); } @@ -1064,7 +1071,7 @@ static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { - for (j = 0; j < adev->gfx.num_xcd; j++) { + for (j = 0; j < num_xcc; j++) { soc15_grbm_select(adev, 0, 0, 0, i, j); /* CP and shaders */ if (i == 0) { @@ -1092,7 +1099,7 @@ static void gfx_v9_4_3_constants_init(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); - for (i = 0; i < adev->gfx.num_xcd; i++) { + for (i = 0; i < num_xcc; i++) { gfx_v9_4_3_init_compute_vmid(adev, i); gfx_v9_4_3_init_gds_vmid(adev, i); } @@ -1150,8 +1157,10 @@ static void gfx_v9_4_3_disable_gpa_mode(struct amdgpu_device *adev, int xcc_id) static void gfx_v9_4_3_program_xcc_id(struct amdgpu_device *adev, int xcc_id) { uint32_t tmp = 0; + int num_xcc; - switch (adev->gfx.num_xcd) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + switch (num_xcc) { /* directly config VIRTUAL_XCC_ID to 0 for 1-XCC */ case 1: WREG32_SOC15(GC, xcc_id, regCP_HYP_XCP_CTL, 0x8); @@ -1288,9 +1297,10 @@ static void gfx_v9_4_3_enable_gui_idle_interrupt(struct amdgpu_device *adev, static void gfx_v9_4_3_rlc_stop(struct amdgpu_device *adev) { - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { WREG32_FIELD15_PREREG(GC, i, RLC_CNTL, RLC_ENABLE_F32, 0); gfx_v9_4_3_enable_gui_idle_interrupt(adev, false, i); gfx_v9_4_3_wait_for_rlc_serdes(adev, i); @@ -1299,9 +1309,10 @@ static void gfx_v9_4_3_rlc_stop(struct amdgpu_device *adev) static void gfx_v9_4_3_rlc_reset(struct amdgpu_device *adev) { - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { WREG32_FIELD15_PREREG(GC, i, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1); udelay(50); WREG32_FIELD15_PREREG(GC, i, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0); @@ -1314,9 +1325,10 @@ static void gfx_v9_4_3_rlc_start(struct amdgpu_device *adev) #ifdef AMDGPU_RLC_DEBUG_RETRY u32 rlc_ucode_ver; #endif - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { WREG32_FIELD15_PREREG(GC, i, RLC_CNTL, RLC_ENABLE_F32, 1); udelay(50); @@ -1377,11 +1389,12 @@ static int gfx_v9_4_3_rlc_load_microcode(struct amdgpu_device *adev, int xcc_id) static int gfx_v9_4_3_rlc_resume(struct amdgpu_device *adev) { - int r, i; + int r, i, num_xcc; adev->gfx.rlc.funcs->stop(adev); - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { /* disable CG */ WREG32_SOC15(GC, i, regRLC_CGCG_CGLS_CTRL, 0); @@ -1954,10 +1967,11 @@ done: static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) { - int r, i, j; + int r, i, j, num_xcc; struct amdgpu_ring *ring; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { gfx_v9_4_3_enable_gui_idle_interrupt(adev, false, i); if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { @@ -2021,12 +2035,13 @@ static int gfx_v9_4_3_hw_init(void *handle) static int gfx_v9_4_3_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i; + int i, num_xcc; amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { if (amdgpu_gfx_disable_kcq(adev, i)) DRM_ERROR("XCD %d KCQ disable failed\n", i); @@ -2069,9 +2084,10 @@ static int gfx_v9_4_3_resume(void *handle) static bool gfx_v9_4_3_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { if (REG_GET_FIELD(RREG32_SOC15(GC, i, regGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) return false; @@ -2183,30 +2199,30 @@ static void gfx_v9_4_3_ring_emit_gds_switch(struct amdgpu_ring *ring, static int gfx_v9_4_3_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int num_xcc; - /* hardcode in emulation phase */ - adev->gfx.num_xcd = 1; + num_xcc = NUM_XCC(adev->gfx.xcc_mask); adev->gfx.partition_mode = amdgpu_user_partt_mode; /* calculate the num_xcc_in_xcp for the partition mode*/ switch (amdgpu_user_partt_mode) { case AMDGPU_SPX_PARTITION_MODE: - adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd; + adev->gfx.num_xcc_per_xcp = num_xcc; break; case AMDGPU_DPX_PARTITION_MODE: - adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd / 2; + adev->gfx.num_xcc_per_xcp = num_xcc / 2; break; case AMDGPU_TPX_PARTITION_MODE: - adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd / 3; + adev->gfx.num_xcc_per_xcp = num_xcc / 3; break; case AMDGPU_QPX_PARTITION_MODE: - adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd / 4; + adev->gfx.num_xcc_per_xcp = num_xcc / 4; break; case AMDGPU_CPX_PARTITION_MODE: adev->gfx.num_xcc_per_xcp = 1; break; default: - adev->gfx.num_xcc_per_xcp = adev->gfx.num_xcd; + adev->gfx.num_xcc_per_xcp = num_xcc; break; } @@ -2404,14 +2420,15 @@ static int gfx_v9_4_3_set_clockgating_state(void *handle, enum amd_clockgating_state state) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i; + int i, num_xcc; if (amdgpu_sriov_vf(adev)) return 0; + num_xcc = NUM_XCC(adev->gfx.xcc_mask); switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 4, 3): - for (i = 0; i < adev->gfx.num_xcd; i++) + for (i = 0; i < num_xcc; i++) gfx_v9_4_3_update_gfx_clock_gating(adev, state == AMD_CG_STATE_GATE, i); break; @@ -2739,12 +2756,13 @@ static int gfx_v9_4_3_set_priv_reg_fault_state(struct amdgpu_device *adev, unsigned type, enum amdgpu_interrupt_state state) { - int i; + int i, num_xcc; + num_xcc = NUM_XCC(adev->gfx.xcc_mask); switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - for (i = 0; i < adev->gfx.num_xcd; i++) + for (i = 0; i < num_xcc; i++) WREG32_FIELD15_PREREG(GC, i, CP_INT_CNTL_RING0, PRIV_REG_INT_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); @@ -2761,12 +2779,13 @@ static int gfx_v9_4_3_set_priv_inst_fault_state(struct amdgpu_device *adev, unsigned type, enum amdgpu_interrupt_state state) { - int i; + int i, num_xcc; + num_xcc = NUM_XCC(adev->gfx.xcc_mask); switch (state) { case AMDGPU_IRQ_STATE_DISABLE: case AMDGPU_IRQ_STATE_ENABLE: - for (i = 0; i < adev->gfx.num_xcd; i++) + for (i = 0; i < num_xcc; i++) WREG32_FIELD15_PREREG(GC, i, CP_INT_CNTL_RING0, PRIV_INSTR_INT_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); @@ -2783,8 +2802,10 @@ static int gfx_v9_4_3_set_eop_interrupt_state(struct amdgpu_device *adev, unsigned type, enum amdgpu_interrupt_state state) { - int i; - for (i = 0; i < adev->gfx.num_xcd; i++) { + int i, num_xcc; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { switch (type) { case AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP: gfx_v9_4_3_set_compute_eop_interrupt_state(adev, 1, 0, state, i); @@ -2842,6 +2863,7 @@ static int gfx_v9_4_3_eop_irq(struct amdgpu_device *adev, /* Per-queue interrupt is supported for MEC starting from VI. * The interrupt can only be enabled/disabled per pipe instead of per queue. */ + if ((ring->me == me_id) && (ring->pipe == pipe_id) && (ring->queue == queue_id)) amdgpu_fence_process(ring); } @@ -3056,9 +3078,10 @@ static const struct amdgpu_ring_funcs gfx_v9_4_3_ring_funcs_kiq = { static void gfx_v9_4_3_set_ring_funcs(struct amdgpu_device *adev) { - int i, j; + int i, j, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { adev->gfx.kiq[i].ring.funcs = &gfx_v9_4_3_ring_funcs_kiq; for (j = 0; j < adev->gfx.num_compute_rings; j++) diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c index 1bb17d95f720..e35365ab3f1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_2.c @@ -43,9 +43,10 @@ static void gfxhub_v1_2_setup_vm_pt_regs(struct amdgpu_device *adev, uint64_t page_table_base) { struct amdgpu_vmhub *hub; - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { hub = &adev->vmhub[AMDGPU_GFXHUB(i)]; WREG32_SOC15_OFFSET(GC, i, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, @@ -56,13 +57,14 @@ static void gfxhub_v1_2_setup_vm_pt_regs(struct amdgpu_device *adev, regVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, hub->ctx_addr_distance * vmid, upper_32_bits(page_table_base)); + } } static void gfxhub_v1_2_init_gart_aperture_regs(struct amdgpu_device *adev) { uint64_t pt_base; - int i; + int i, num_xcc; if (adev->gmc.pdb0_bo) pt_base = amdgpu_gmc_pd_addr(adev->gmc.pdb0_bo); @@ -74,7 +76,8 @@ static void gfxhub_v1_2_init_gart_aperture_regs(struct amdgpu_device *adev) /* If use GART for FB translation, vmid0 page table covers both * vram and system memory (gart) */ - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { if (adev->gmc.pdb0_bo) { WREG32_SOC15(GC, i, regVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, @@ -111,9 +114,10 @@ static void gfxhub_v1_2_init_system_aperture_regs(struct amdgpu_device *adev) { uint64_t value; uint32_t tmp; - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { /* Program the AGP BAR */ WREG32_SOC15_RLC(GC, i, regMC_VM_AGP_BASE, 0); WREG32_SOC15_RLC(GC, i, regMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); @@ -177,9 +181,10 @@ static void gfxhub_v1_2_init_system_aperture_regs(struct amdgpu_device *adev) static void gfxhub_v1_2_init_tlb_regs(struct amdgpu_device *adev) { uint32_t tmp; - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { /* Setup TLB control */ tmp = RREG32_SOC15(GC, i, regMC_VM_MX_L1_TLB_CNTL); @@ -202,9 +207,10 @@ static void gfxhub_v1_2_init_tlb_regs(struct amdgpu_device *adev) static void gfxhub_v1_2_init_cache_regs(struct amdgpu_device *adev) { uint32_t tmp; - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { /* Setup L2 cache */ tmp = RREG32_SOC15(GC, i, regVM_L2_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_CNTL, ENABLE_L2_CACHE, 1); @@ -249,9 +255,10 @@ static void gfxhub_v1_2_init_cache_regs(struct amdgpu_device *adev) static void gfxhub_v1_2_enable_system_domain(struct amdgpu_device *adev) { uint32_t tmp; - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { tmp = RREG32_SOC15(GC, i, regVM_CONTEXT0_CNTL); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, ENABLE_CONTEXT, 1); tmp = REG_SET_FIELD(tmp, VM_CONTEXT0_CNTL, PAGE_TABLE_DEPTH, @@ -266,9 +273,10 @@ static void gfxhub_v1_2_enable_system_domain(struct amdgpu_device *adev) static void gfxhub_v1_2_disable_identity_aperture(struct amdgpu_device *adev) { - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { WREG32_SOC15(GC, i, regVM_L2_CONTEXT1_IDENTITY_APERTURE_LOW_ADDR_LO32, 0XFFFFFFFF); @@ -295,7 +303,7 @@ static void gfxhub_v1_2_setup_vmid_config(struct amdgpu_device *adev) struct amdgpu_vmhub *hub; unsigned num_level, block_size; uint32_t tmp; - int i, j; + int i, j, num_xcc; num_level = adev->vm_manager.num_level; block_size = adev->vm_manager.block_size; @@ -304,7 +312,8 @@ static void gfxhub_v1_2_setup_vmid_config(struct amdgpu_device *adev) else block_size -= 9; - for (j = 0; j < adev->gfx.num_xcd; j++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (j = 0; j < num_xcc; j++) { hub = &adev->vmhub[AMDGPU_GFXHUB(j)]; for (i = 0; i <= 14; i++) { tmp = RREG32_SOC15_OFFSET(GC, j, regVM_CONTEXT1_CNTL, i); @@ -362,10 +371,12 @@ static void gfxhub_v1_2_setup_vmid_config(struct amdgpu_device *adev) static void gfxhub_v1_2_program_invalidation(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub; - unsigned i, j; + unsigned i, j, num_xcc; - for (j = 0; j < adev->gfx.num_xcd; j++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (j = 0; j < num_xcc; j++) { hub = &adev->vmhub[AMDGPU_GFXHUB(j)]; + for (i = 0 ; i < 18; ++i) { WREG32_SOC15_OFFSET(GC, j, regVM_INVALIDATE_ENG0_ADDR_RANGE_LO32, i * hub->eng_addr_distance, 0xffffffff); @@ -377,9 +388,10 @@ static void gfxhub_v1_2_program_invalidation(struct amdgpu_device *adev) static int gfxhub_v1_2_gart_enable(struct amdgpu_device *adev) { - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { if (amdgpu_sriov_vf(adev)) { /* * MC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are @@ -413,9 +425,10 @@ static void gfxhub_v1_2_gart_disable(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub; u32 tmp; - u32 i, j; + u32 i, j, num_xcc; - for (j = 0; j < adev->gfx.num_xcd; j++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (j = 0; j < num_xcc; j++) { hub = &adev->vmhub[AMDGPU_GFXHUB(j)]; /* Disable all tables */ for (i = 0; i < 16; i++) @@ -449,9 +462,10 @@ static void gfxhub_v1_2_set_fault_enable_default(struct amdgpu_device *adev, bool value) { u32 tmp; - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { tmp = RREG32_SOC15(GC, i, regVM_L2_PROTECTION_FAULT_CNTL); tmp = REG_SET_FIELD(tmp, VM_L2_PROTECTION_FAULT_CNTL, RANGE_PROTECTION_FAULT_ENABLE_DEFAULT, value); @@ -490,9 +504,10 @@ static void gfxhub_v1_2_set_fault_enable_default(struct amdgpu_device *adev, static void gfxhub_v1_2_init(struct amdgpu_device *adev) { struct amdgpu_vmhub *hub; - int i; + int i, num_xcc; - for (i = 0; i < adev->gfx.num_xcd; i++) { + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + for (i = 0; i < num_xcc; i++) { hub = &adev->vmhub[AMDGPU_GFXHUB(i)]; hub->ctx0_ptb_addr_lo32 = diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 4b2c4ecd7253..2c322a25bf1c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -1733,7 +1733,8 @@ static int gmc_v9_0_sw_init(void *handle) adev->gmc.translate_further = adev->vm_manager.num_level > 1; break; case IP_VERSION(9, 4, 3): - bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0), adev->gfx.num_xcd); + bitmap_set(adev->vmhubs_mask, AMDGPU_GFXHUB(0), + NUM_XCC(adev->gfx.xcc_mask)); bitmap_set(adev->vmhubs_mask, AMDGPU_MMHUB0(0), adev->num_aid); amdgpu_vm_adjust_size(adev, 256 * 1024, 9, 3, 48); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 829e32433faf..df96c4c508a0 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -592,6 +592,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, struct kfd_node *node; uint32_t first_vmid_kfd, last_vmid_kfd, vmid_num_kfd; unsigned int max_proc_per_quantum; + int num_xcd; kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev, KGD_ENGINE_MEC1); @@ -601,16 +602,15 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, KGD_ENGINE_SDMA1); kfd->shared_resources = *gpu_resources; - if (kfd->adev->gfx.num_xcd == 0 || kfd->adev->gfx.num_xcd == 1 || - kfd->adev->gfx.num_xcc_per_xcp == 0) + num_xcd = NUM_XCC(kfd->adev->gfx.xcc_mask); + if (num_xcd == 0 || num_xcd == 1 || kfd->adev->gfx.num_xcc_per_xcp == 0) kfd->num_nodes = 1; else - kfd->num_nodes = - kfd->adev->gfx.num_xcd/kfd->adev->gfx.num_xcc_per_xcp; + kfd->num_nodes = num_xcd / kfd->adev->gfx.num_xcc_per_xcp; if (kfd->num_nodes == 0) { dev_err(kfd_device, "KFD num nodes cannot be 0, GC inst: %d, num_xcc_in_node: %d\n", - kfd->adev->gfx.num_xcd, kfd->adev->gfx.num_xcc_per_xcp); + num_xcd, kfd->adev->gfx.num_xcc_per_xcp); goto out; } |