diff options
Diffstat (limited to 'drivers/gpu/drm')
228 files changed, 18878 insertions, 12802 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 6bf6cfaea3f1..e74cd443063a 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -71,7 +71,7 @@ amdgpu-y += \ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o navi14_reg_init.o \ arct_reg_init.o navi12_reg_init.o mxgpu_nv.o sienna_cichlid_reg_init.o vangogh_reg_init.o \ - nbio_v7_2.o dimgrey_cavefish_reg_init.o + nbio_v7_2.o dimgrey_cavefish_reg_init.o hdp_v4_0.o hdp_v5_0.o # add DF block amdgpu-y += \ @@ -97,6 +97,7 @@ amdgpu-y += \ tonga_ih.o \ cz_ih.o \ vega10_ih.o \ + vega20_ih.o \ navi10_ih.o # add PSP block diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index f77443cd9c17..f4ff8ddb52d4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -88,6 +88,7 @@ #include "amdgpu_gfx.h" #include "amdgpu_sdma.h" #include "amdgpu_nbio.h" +#include "amdgpu_hdp.h" #include "amdgpu_dm.h" #include "amdgpu_virt.h" #include "amdgpu_csa.h" @@ -106,6 +107,7 @@ #include "amdgpu_gfxhub.h" #include "amdgpu_df.h" #include "amdgpu_smuio.h" +#include "amdgpu_hdp.h" #define MAX_GPU_INSTANCE 16 @@ -607,7 +609,6 @@ struct amdgpu_asic_funcs { /* invalidate hdp read cache */ void (*invalidate_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring); - void (*reset_hdp_ras_error_count)(struct amdgpu_device *adev); /* check if the asic needs a full reset of if soft reset will work */ bool (*need_full_reset)(struct amdgpu_device *adev); /* initialize doorbell layout for specific asic*/ @@ -920,6 +921,9 @@ struct amdgpu_device { /* nbio */ struct amdgpu_nbio nbio; + /* hdp */ + struct amdgpu_hdp hdp; + /* smuio */ struct amdgpu_smuio smuio; @@ -1201,8 +1205,10 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l)) #define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v))) #define amdgpu_asic_get_config_memsize(adev) (adev)->asic_funcs->get_config_memsize((adev)) -#define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r)) -#define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r)) +#define amdgpu_asic_flush_hdp(adev, r) \ + ((adev)->asic_funcs->flush_hdp ? (adev)->asic_funcs->flush_hdp((adev), (r)) : (adev)->hdp.funcs->flush_hdp((adev), (r))) +#define amdgpu_asic_invalidate_hdp(adev, r) \ + ((adev)->asic_funcs->invalidate_hdp ? (adev)->asic_funcs->invalidate_hdp((adev), (r)) : (adev)->hdp.funcs->invalidate_hdp((adev), (r))) #define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev)) #define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev)) #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1))) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c index 4763bab7a4d0..62aa1a6f64ed 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c @@ -23,7 +23,6 @@ #include "amdgpu_amdkfd.h" #include "gc/gc_10_1_0_offset.h" #include "gc/gc_10_1_0_sh_mask.h" -#include "navi10_enum.h" #include "athub/athub_2_0_0_offset.h" #include "athub/athub_2_0_0_sh_mask.h" #include "oss/osssys_5_0_0_offset.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c index 50016bf9c427..fad3b91f74f5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c @@ -24,7 +24,6 @@ #include "amdgpu_amdkfd.h" #include "gc/gc_10_3_0_offset.h" #include "gc/gc_10_3_0_sh_mask.h" -#include "navi10_enum.h" #include "oss/osssys_5_0_0_offset.h" #include "oss/osssys_5_0_0_sh_mask.h" #include "soc15_common.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c index 6333cada1e09..efdf639f6593 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c @@ -155,7 +155,7 @@ static bool amdgpu_read_bios_from_rom(struct amdgpu_device *adev) u8 header[AMD_VBIOS_SIGNATURE_END+1] = {0}; int len; - if (!adev->asic_funcs->read_bios_from_rom) + if (!adev->asic_funcs || !adev->asic_funcs->read_bios_from_rom) return false; /* validate VBIOS signature */ @@ -348,7 +348,8 @@ static bool amdgpu_read_disabled_bios(struct amdgpu_device *adev) if (adev->flags & AMD_IS_APU) return igp_read_bios_from_vram(adev); else - return amdgpu_asic_read_disabled_bios(adev); + return (!adev->asic_funcs || !adev->asic_funcs->read_disabled_bios) ? + false : amdgpu_asic_read_disabled_bios(adev); } #ifdef CONFIG_ACPI diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 8f451e809127..377b32691881 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2548,11 +2548,11 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) if (adev->gmc.xgmi.num_physical_nodes > 1) amdgpu_xgmi_remove_device(adev); - amdgpu_amdkfd_device_fini(adev); - amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); + amdgpu_amdkfd_device_fini(adev); + /* need to disable SMC first */ for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.hw) @@ -3034,7 +3034,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type) #endif default: if (amdgpu_dc > 0) - DRM_INFO("Display Core has been requested via kernel parameter " + DRM_INFO_ONCE("Display Core has been requested via kernel parameter " "but isn't supported by ASIC, ignoring\n"); return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c index 06dbb612a53a..47e0b48dc26f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c @@ -40,6 +40,7 @@ #include <linux/dma-buf.h> #include <linux/dma-fence-array.h> #include <linux/pci-p2pdma.h> +#include <linux/pm_runtime.h> /** * amdgpu_gem_prime_mmap - &drm_driver.gem_prime_mmap implementation @@ -151,9 +152,13 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, if (attach->dev->driver == adev->dev->driver) return 0; + r = pm_runtime_get_sync(adev_to_drm(adev)->dev); + if (r < 0) + goto out; + r = amdgpu_bo_reserve(bo, false); if (unlikely(r != 0)) - return r; + goto out; /* * We only create shared fences for internal use, but importers @@ -165,11 +170,15 @@ static int amdgpu_dma_buf_attach(struct dma_buf *dmabuf, */ r = __dma_resv_make_exclusive(bo->tbo.base.resv); if (r) - return r; + goto out; bo->prime_shared_count++; amdgpu_bo_unreserve(bo); return 0; + +out: + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); + return r; } /** @@ -189,6 +198,9 @@ static void amdgpu_dma_buf_detach(struct dma_buf *dmabuf, if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count) bo->prime_shared_count--; + + pm_runtime_mark_last_busy(adev_to_drm(adev)->dev); + pm_runtime_put_autosuspend(adev_to_drm(adev)->dev); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h new file mode 100644 index 000000000000..43caf9f8cc11 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hdp.h @@ -0,0 +1,40 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_HDP_H__ +#define __AMDGPU_HDP_H__ + +struct amdgpu_hdp_funcs { + void (*flush_hdp)(struct amdgpu_device *adev, struct amdgpu_ring *ring); + void (*invalidate_hdp)(struct amdgpu_device *adev, + struct amdgpu_ring *ring); + void (*reset_ras_error_count)(struct amdgpu_device *adev); + void (*update_clock_gating)(struct amdgpu_device *adev, bool enable); + void (*get_clock_gating_state)(struct amdgpu_device *adev, u32 *flags); + void (*init_registers)(struct amdgpu_device *adev); +}; + +struct amdgpu_hdp { + const struct amdgpu_hdp_funcs *funcs; +}; + +#endif /* __AMDGPU_HDP_H__ */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index dcd9b4a8e20b..725a9c73d51f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -205,3 +205,46 @@ restart_ih: return IRQ_HANDLED; } +/** + * amdgpu_ih_decode_iv_helper - decode an interrupt vector + * + * @adev: amdgpu_device pointer + * + * Decodes the interrupt vector at the current rptr + * position and also advance the position for for Vega10 + * and later GPUs. + */ +void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih, + struct amdgpu_iv_entry *entry) +{ + /* wptr/rptr are in bytes! */ + u32 ring_index = ih->rptr >> 2; + uint32_t dw[8]; + + dw[0] = le32_to_cpu(ih->ring[ring_index + 0]); + dw[1] = le32_to_cpu(ih->ring[ring_index + 1]); + dw[2] = le32_to_cpu(ih->ring[ring_index + 2]); + dw[3] = le32_to_cpu(ih->ring[ring_index + 3]); + dw[4] = le32_to_cpu(ih->ring[ring_index + 4]); + dw[5] = le32_to_cpu(ih->ring[ring_index + 5]); + dw[6] = le32_to_cpu(ih->ring[ring_index + 6]); + dw[7] = le32_to_cpu(ih->ring[ring_index + 7]); + + entry->client_id = dw[0] & 0xff; + entry->src_id = (dw[0] >> 8) & 0xff; + entry->ring_id = (dw[0] >> 16) & 0xff; + entry->vmid = (dw[0] >> 24) & 0xf; + entry->vmid_src = (dw[0] >> 31); + entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32); + entry->timestamp_src = dw[2] >> 31; + entry->pasid = dw[3] & 0xffff; + entry->pasid_src = dw[3] >> 31; + entry->src_data[0] = dw[4]; + entry->src_data[1] = dw[5]; + entry->src_data[2] = dw[6]; + entry->src_data[3] = dw[7]; + + /* wptr/rptr are in bytes! */ + ih->rptr += 32; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 3c9cfe7eecff..6ed4a85fc7c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -30,6 +30,18 @@ struct amdgpu_device; struct amdgpu_iv_entry; +struct amdgpu_ih_regs { + uint32_t ih_rb_base; + uint32_t ih_rb_base_hi; + uint32_t ih_rb_cntl; + uint32_t ih_rb_wptr; + uint32_t ih_rb_rptr; + uint32_t ih_doorbell_rptr; + uint32_t ih_rb_wptr_addr_lo; + uint32_t ih_rb_wptr_addr_hi; + uint32_t psp_reg_id; +}; + /* * R6xx+ IH ring */ @@ -53,6 +65,7 @@ struct amdgpu_ih_ring { bool enabled; unsigned rptr; atomic_t lock; + struct amdgpu_ih_regs ih_regs; }; /* provided by the ih block */ @@ -75,5 +88,7 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv, unsigned int num_dw); int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); - +void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih, + struct amdgpu_iv_entry *entry); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index bea57e8e793f..afbbec82a289 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -444,7 +444,8 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, } else if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) { DRM_DEBUG("Invalid src_id in IV: %d\n", src_id); - } else if (adev->irq.virq[src_id]) { + } else if ((client_id == AMDGPU_IRQ_CLIENTID_LEGACY) && + adev->irq.virq[src_id]) { generic_handle_irq(irq_find_mapping(adev->irq.domain, src_id)); } else if (!adev->irq.client[client_id].sources) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h index e62cc0e1a5ad..4ba0024aedf1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_nbio.h @@ -57,7 +57,6 @@ struct amdgpu_nbio_funcs { u32 (*get_pcie_port_data_offset)(struct amdgpu_device *adev); u32 (*get_rev_id)(struct amdgpu_device *adev); void (*mc_access_enable)(struct amdgpu_device *adev, bool enable); - void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring); u32 (*get_memsize)(struct amdgpu_device *adev); void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance, bool use_doorbell, int doorbell_index, int doorbell_size); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 523d22db094b..c2d9d072b6fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -249,7 +249,7 @@ psp_cmd_submit_buf(struct psp_context *psp, { int ret; int index; - int timeout = 2000; + int timeout = 20000; bool ras_intr = false; bool skip_unsupport = false; @@ -282,7 +282,7 @@ psp_cmd_submit_buf(struct psp_context *psp, ras_intr = amdgpu_ras_intr_triggered(); if (ras_intr) break; - msleep(1); + usleep_range(10, 100); amdgpu_asic_invalidate_hdp(psp->adev, NULL); } @@ -563,7 +563,7 @@ static int psp_asd_load(struct psp_context *psp) * add workaround to bypass it for sriov now. * TODO: add version check to make it common */ - if (amdgpu_sriov_vf(psp->adev) || !psp->asd_fw) + if (amdgpu_sriov_vf(psp->adev) || !psp->asd_ucode_size) return 0; cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); @@ -1315,8 +1315,12 @@ static int psp_hdcp_terminate(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->hdcp_context.hdcp_initialized) - return 0; + if (!psp->hdcp_context.hdcp_initialized) { + if (psp->hdcp_context.hdcp_shared_buf) + goto out; + else + return 0; + } ret = psp_hdcp_unload(psp); if (ret) @@ -1324,6 +1328,7 @@ static int psp_hdcp_terminate(struct psp_context *psp) psp->hdcp_context.hdcp_initialized = false; +out: /* free hdcp shared memory */ amdgpu_bo_free_kernel(&psp->hdcp_context.hdcp_shared_bo, &psp->hdcp_context.hdcp_shared_mc_addr, @@ -1462,8 +1467,12 @@ static int psp_dtm_terminate(struct psp_context *psp) if (amdgpu_sriov_vf(psp->adev)) return 0; - if (!psp->dtm_context.dtm_initialized) - return 0; + if (!psp->dtm_context.dtm_initialized) { + if (psp->dtm_context.dtm_shared_buf) + goto out; + else + return 0; + } ret = psp_dtm_unload(psp); if (ret) @@ -1471,6 +1480,7 @@ static int psp_dtm_terminate(struct psp_context *psp) psp->dtm_context.dtm_initialized = false; +out: /* free hdcp shared memory */ amdgpu_bo_free_kernel(&psp->dtm_context.dtm_shared_bo, &psp->dtm_context.dtm_shared_mc_addr, @@ -2589,11 +2599,10 @@ static int parse_ta_bin_descriptor(struct psp_context *psp, switch (desc->fw_type) { case TA_FW_TYPE_PSP_ASD: - psp->asd_fw_version = le32_to_cpu(desc->fw_version); + psp->asd_fw_version = le32_to_cpu(desc->fw_version); psp->asd_feature_version = le32_to_cpu(desc->fw_version); - psp->asd_ucode_size = le32_to_cpu(desc->size_bytes); + psp->asd_ucode_size = le32_to_cpu(desc->size_bytes); psp->asd_start_addr = ucode_start_addr; - psp->asd_fw = psp->ta_fw; break; case TA_FW_TYPE_PSP_XGMI: psp->ta_xgmi_ucode_version = le32_to_cpu(desc->fw_version); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index c136bd449744..82e952696d24 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -1518,7 +1518,7 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data; int i = 0; - int ret = 0; + int ret = 0, status; if (!con || !con->eh_data || !bps || !count) return -EINVAL; @@ -1543,12 +1543,12 @@ static int amdgpu_ras_badpages_read(struct amdgpu_device *adev, .size = AMDGPU_GPU_PAGE_SIZE, .flags = AMDGPU_RAS_RETIRE_PAGE_RESERVED, }; - ret = amdgpu_vram_mgr_query_page_status( + status = amdgpu_vram_mgr_query_page_status( ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM), data->bps[i].retired_page); - if (ret == -EBUSY) + if (status == -EBUSY) (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_PENDING; - else if (ret == -ENOENT) + else if (status == -ENOENT) (*bps)[i].flags = AMDGPU_RAS_RETIRE_PAGE_FAULT; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c index 1dd040166c63..19d9aa76cfbf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c @@ -30,6 +30,7 @@ #define EEPROM_I2C_TARGET_ADDR_VEGA20 0xA0 #define EEPROM_I2C_TARGET_ADDR_ARCTURUS 0xA8 #define EEPROM_I2C_TARGET_ADDR_ARCTURUS_D342 0xA0 +#define EEPROM_I2C_TARGET_ADDR_SIENNA_CICHLID 0xA0 /* * The 2 macros bellow represent the actual size in bytes that @@ -62,7 +63,8 @@ static bool __is_ras_eeprom_supported(struct amdgpu_device *adev) { if ((adev->asic_type == CHIP_VEGA20) || - (adev->asic_type == CHIP_ARCTURUS)) + (adev->asic_type == CHIP_ARCTURUS) || + (adev->asic_type == CHIP_SIENNA_CICHLID)) return true; return false; @@ -100,6 +102,10 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev, case CHIP_ARCTURUS: return __get_eeprom_i2c_addr_arct(adev, i2c_addr); + case CHIP_SIENNA_CICHLID: + *i2c_addr = EEPROM_I2C_TARGET_ADDR_SIENNA_CICHLID; + break; + default: return false; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index f80716179968..792d20261846 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -21,7 +21,7 @@ * */ -#if !defined(_AMDGPU_TRACE_H) || defined(TRACE_HEADER_MULTI_READ) +#if !defined(_AMDGPU_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) #define _AMDGPU_TRACE_H_ #include <linux/stringify.h> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index 8b989670ed66..e2ed4689118a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -1170,7 +1170,7 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle, int r, i; r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_DOMAIN_GTT, &bo, NULL, (void **)&msg); if (r) return r; @@ -1202,7 +1202,7 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, int r, i; r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_GEM_DOMAIN_GTT, &bo, NULL, (void **)&msg); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c index 921a69abda55..5b90efd6f6d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_0.c @@ -27,7 +27,6 @@ #include "athub/athub_2_0_0_offset.h" #include "athub/athub_2_0_0_sh_mask.h" #include "athub/athub_2_0_0_default.h" -#include "navi10_enum.h" #include "soc15_common.h" diff --git a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c index 66c183ddd43e..7b1b18350bf9 100644 --- a/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c +++ b/drivers/gpu/drm/amd/amdgpu/athub_v2_1.c @@ -26,7 +26,6 @@ #include "athub/athub_2_1_0_offset.h" #include "athub/athub_2_1_0_sh_mask.h" -#include "navi10_enum.h" #include "soc15_common.h" diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index da37f8a900af..307c01301c87 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -194,19 +194,30 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev, wptr = le32_to_cpu(*ih->wptr_cpu); - if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) { - wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); - /* When a ring buffer overflow happen start parsing interrupt - * from the last not overwritten vector (wptr + 16). Hopefully - * this should allow us to catchup. - */ - dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", - wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); - ih->rptr = (wptr + 16) & ih->ptr_mask; - tmp = RREG32(mmIH_RB_CNTL); - tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); - WREG32(mmIH_RB_CNTL, tmp); - } + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + /* Double check that the overflow wasn't already cleared. */ + wptr = RREG32(mmIH_RB_WPTR); + + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + + /* When a ring buffer overflow happen start parsing interrupt + * from the last not overwritten vector (wptr + 16). Hopefully + * this should allow us to catchup. + */ + dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", + wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); + ih->rptr = (wptr + 16) & ih->ptr_mask; + tmp = RREG32(mmIH_RB_CNTL); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32(mmIH_RB_CNTL, tmp); + + +out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index ffcc64ec6473..9810af712cc0 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -294,7 +294,7 @@ static int dce_virtual_get_modes(struct drm_connector *connector) static const struct mode_size { int w; int h; - } common_modes[21] = { + } common_modes[] = { { 640, 480}, { 720, 480}, { 800, 600}, @@ -312,13 +312,14 @@ static int dce_virtual_get_modes(struct drm_connector *connector) {1600, 1200}, {1920, 1080}, {1920, 1200}, + {2560, 1440}, {4096, 3112}, {3656, 2664}, {3840, 2160}, {4096, 2160}, }; - for (i = 0; i < 21; i++) { + for (i = 0; i < ARRAY_SIZE(common_modes); i++) { mode = drm_cvt_mode(dev, common_modes[i].w, common_modes[i].h, 60, false, false, false); drm_mode_probed_add(connector, mode); } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index ba1086784525..10aae0abcffb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -38,7 +38,6 @@ #include "smuio/smuio_11_0_0_offset.h" #include "smuio/smuio_11_0_0_sh_mask.h" #include "navi10_enum.h" -#include "hdp/hdp_5_0_0_offset.h" #include "ivsrcid/gfx/irqsrcs_gfx_10_1.h" #include "soc15.h" @@ -5691,7 +5690,7 @@ static int gfx_v10_0_cp_gfx_load_pfp_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->nbio.funcs->hdp_flush(adev, NULL); + adev->hdp.funcs->flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_PFP_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_PFP_IC_BASE_CNTL, VMID, 0); @@ -5769,7 +5768,7 @@ static int gfx_v10_0_cp_gfx_load_ce_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->nbio.funcs->hdp_flush(adev, NULL); + adev->hdp.funcs->flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_CE_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CE_IC_BASE_CNTL, VMID, 0); @@ -5846,7 +5845,7 @@ static int gfx_v10_0_cp_gfx_load_me_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->nbio.funcs->hdp_flush(adev, NULL); + adev->hdp.funcs->flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_ME_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_ME_IC_BASE_CNTL, VMID, 0); @@ -6215,7 +6214,7 @@ static int gfx_v10_0_cp_compute_load_microcode(struct amdgpu_device *adev) } if (amdgpu_emu_mode == 1) - adev->nbio.funcs->hdp_flush(adev, NULL); + adev->hdp.funcs->flush_hdp(adev, NULL); tmp = RREG32_SOC15(GC, 0, mmCP_CPC_IC_BASE_CNTL); tmp = REG_SET_FIELD(tmp, CP_CPC_IC_BASE_CNTL, CACHE_POLICY, 0); diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 5f4805e4d04a..a896e3d0fcf8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -38,7 +38,6 @@ #include "gc/gc_9_0_sh_mask.h" #include "vega10_enum.h" -#include "hdp/hdp_4_0_offset.h" #include "soc15_common.h" #include "clearstate_gfx9.h" diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c index 5648c48be77f..3b7c6c31fce1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c @@ -27,8 +27,6 @@ #include "gmc_v10_0.h" #include "umc_v8_7.h" -#include "hdp/hdp_5_0_0_offset.h" -#include "hdp/hdp_5_0_0_sh_mask.h" #include "athub/athub_2_0_0_sh_mask.h" #include "athub/athub_2_0_0_offset.h" #include "dcn/dcn_2_0_0_offset.h" @@ -312,7 +310,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid, int r; /* flush hdp cache */ - adev->nbio.funcs->hdp_flush(adev, NULL); + adev->hdp.funcs->flush_hdp(adev, NULL); /* For SRIOV run time, driver shouldn't access the register through MMIO * Directly use kiq to do the vm invalidation instead @@ -995,7 +993,6 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) { int r; bool value; - u32 tmp; if (adev->gart.bo == NULL) { dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); @@ -1014,15 +1011,10 @@ static int gmc_v10_0_gart_enable(struct amdgpu_device *adev) if (r) return r; - tmp = RREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL); - tmp |= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK; - WREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL, tmp); - - tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL); - WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); + adev->hdp.funcs->init_registers(adev); /* Flush HDP after it is initialized */ - adev->nbio.funcs->hdp_flush(adev, NULL); + adev->hdp.funcs->flush_hdp(adev, NULL); value = (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) ? false : true; diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index e22268f9dba7..aedef9017c4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -31,8 +31,6 @@ #include "amdgpu_atomfirmware.h" #include "amdgpu_gem.h" -#include "hdp/hdp_4_0_offset.h" -#include "hdp/hdp_4_0_sh_mask.h" #include "gc/gc_9_0_sh_mask.h" #include "dce/dce_12_0_offset.h" #include "dce/dce_12_0_sh_mask.h" @@ -283,20 +281,6 @@ static const char *mmhub_client_ids_arcturus[][2] = { [224+15][1] = "SDMA7", }; -static const u32 golden_settings_vega10_hdp[] = -{ - 0xf64, 0x0fffffff, 0x00000000, - 0xf65, 0x0fffffff, 0x00000000, - 0xf66, 0x0fffffff, 0x00000000, - 0xf67, 0x0fffffff, 0x00000000, - 0xf68, 0x0fffffff, 0x00000000, - 0xf6a, 0x0fffffff, 0x00000000, - 0xf6b, 0x0fffffff, 0x00000000, - 0xf6c, 0x0fffffff, 0x00000000, - 0xf6d, 0x0fffffff, 0x00000000, - 0xf6e, 0x0fffffff, 0x00000000, -}; - static const struct soc15_reg_golden golden_settings_mmhub_1_0_0[] = { SOC15_REG_GOLDEN_VALUE(MMHUB, 0, mmDAGB1_WRCLI2, 0x00000007, 0xfe5fe0fa), @@ -1571,7 +1555,6 @@ static int gmc_v9_0_hw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; bool value; int r, i; - u32 tmp; /* The sequence of these two function calls matters.*/ gmc_v9_0_init_golden_registers(adev); @@ -1583,31 +1566,13 @@ static int gmc_v9_0_hw_init(void *handle) WREG32_FIELD15(DCE, 0, VGA_RENDER_CONTROL, VGA_VSTATUS_CNTL, 0); } - amdgpu_device_program_register_sequence(adev, - golden_settings_vega10_hdp, - ARRAY_SIZE(golden_settings_vega10_hdp)); - if (adev->mmhub.funcs->update_power_gating) adev->mmhub.funcs->update_power_gating(adev, true); - switch (adev->asic_type) { - case CHIP_ARCTURUS: - WREG32_FIELD15(HDP, 0, HDP_MMHUB_CNTL, HDP_MMHUB_GCC, 1); - break; - default: - break; - } - - WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1); - - tmp = RREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL); - WREG32_SOC15(HDP, 0, mmHDP_HOST_PATH_CNTL, tmp); - - WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8)); - WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40)); + adev->hdp.funcs->init_registers(adev); /* After HDP is initialized, flush HDP.*/ - adev->nbio.funcs->hdp_flush(adev, NULL); + adev->hdp.funcs->flush_hdp(adev, NULL); if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS) value = false; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c new file mode 100644 index 000000000000..e46621fed5b9 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c @@ -0,0 +1,137 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_atombios.h" +#include "hdp_v4_0.h" +#include "amdgpu_ras.h" + +#include "hdp/hdp_4_0_offset.h" +#include "hdp/hdp_4_0_sh_mask.h" +#include <uapi/linux/kfd_ioctl.h> + +/* for Vega20 register name change */ +#define mmHDP_MEM_POWER_CTRL 0x00d4 +#define HDP_MEM_POWER_CTRL__IPH_MEM_POWER_CTRL_EN_MASK 0x00000001L +#define HDP_MEM_POWER_CTRL__IPH_MEM_POWER_LS_EN_MASK 0x00000002L +#define HDP_MEM_POWER_CTRL__RC_MEM_POWER_CTRL_EN_MASK 0x00010000L +#define HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK 0x00020000L +#define mmHDP_MEM_POWER_CTRL_BASE_IDX 0 + +static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) + WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + else + amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); +} + +static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) + WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1); + else + amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( + HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); +} + +static void hdp_v4_0_reset_ras_error_count(struct amdgpu_device *adev) +{ + if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP)) + return; + /*read back hdp ras counter to reset it to 0 */ + RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT); +} + +static void hdp_v4_0_update_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + if (adev->asic_type == CHIP_VEGA10 || + adev->asic_type == CHIP_VEGA12 || + adev->asic_type == CHIP_RAVEN) { + def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS)); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) + data |= HDP_MEM_POWER_LS__LS_ENABLE_MASK; + else + data &= ~HDP_MEM_POWER_LS__LS_ENABLE_MASK; + + if (def != data) + WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS), data); + } else { + def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_CTRL)); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) + data |= HDP_MEM_POWER_CTRL__IPH_MEM_POWER_CTRL_EN_MASK | + HDP_MEM_POWER_CTRL__IPH_MEM_POWER_LS_EN_MASK | + HDP_MEM_POWER_CTRL__RC_MEM_POWER_CTRL_EN_MASK | + HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK; + else + data &= ~(HDP_MEM_POWER_CTRL__IPH_MEM_POWER_CTRL_EN_MASK | + HDP_MEM_POWER_CTRL__IPH_MEM_POWER_LS_EN_MASK | + HDP_MEM_POWER_CTRL__RC_MEM_POWER_CTRL_EN_MASK | + HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK); + + if (def != data) + WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_CTRL), data); + } +} + +static void hdp_v4_0_get_clockgating_state(struct amdgpu_device *adev, + u32 *flags) +{ + int data; + + /* AMD_CG_SUPPORT_HDP_LS */ + data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS)); + if (data & HDP_MEM_POWER_LS__LS_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_HDP_LS; +} + +static void hdp_v4_0_init_registers(struct amdgpu_device *adev) +{ + switch (adev->asic_type) { + case CHIP_ARCTURUS: + WREG32_FIELD15(HDP, 0, HDP_MMHUB_CNTL, HDP_MMHUB_GCC, 1); + break; + default: + break; + } + + WREG32_FIELD15(HDP, 0, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1); + + WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE, (adev->gmc.vram_start >> 8)); + WREG32_SOC15(HDP, 0, mmHDP_NONSURFACE_BASE_HI, (adev->gmc.vram_start >> 40)); +} + +const struct amdgpu_hdp_funcs hdp_v4_0_funcs = { + .flush_hdp = hdp_v4_0_flush_hdp, + .invalidate_hdp = hdp_v4_0_invalidate_hdp, + .reset_ras_error_count = hdp_v4_0_reset_ras_error_count, + .update_clock_gating = hdp_v4_0_update_clock_gating, + .get_clock_gating_state = hdp_v4_0_get_clockgating_state, + .init_registers = hdp_v4_0_init_registers, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h new file mode 100644 index 000000000000..d1e6399e8c46 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.h @@ -0,0 +1,31 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __HDP_V4_0_H__ +#define __HDP_V4_0_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_hdp_funcs hdp_v4_0_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c new file mode 100644 index 000000000000..7a15e669b68d --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c @@ -0,0 +1,212 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_atombios.h" +#include "hdp_v5_0.h" + +#include "hdp/hdp_5_0_0_offset.h" +#include "hdp/hdp_5_0_0_sh_mask.h" +#include <uapi/linux/kfd_ioctl.h> + +static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) + WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); + else + amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); +} + +static void hdp_v5_0_invalidate_hdp(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) { + WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1); + } else { + amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( + HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); + } +} + +static void hdp_v5_0_update_mem_power_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t hdp_clk_cntl, hdp_clk_cntl1; + uint32_t hdp_mem_pwr_cntl; + + if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD))) + return; + + hdp_clk_cntl = hdp_clk_cntl1 = RREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL); + hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL); + + /* Before doing clock/power mode switch, + * forced on IPH & RC clock */ + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + IPH_MEM_CLK_SOFT_OVERRIDE, 1); + hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, + RC_MEM_CLK_SOFT_OVERRIDE, 1); + WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl); + + /* HDP 5.0 doesn't support dynamic power mode switch, + * disable clock and power gating before any changing */ + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_CTRL_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_LS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_DS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_SD_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_CTRL_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, 0); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_SD_EN, 0); + WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); + + /* only one clock gating mode (LS/DS/SD) can be enabled */ + if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_LS_EN, enable); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_LS_EN, enable); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_DS_EN, enable); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, enable); + } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_SD_EN, enable); + /* RC should not use shut down mode, fallback to ds */ + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, + HDP_MEM_POWER_CTRL, + RC_MEM_POWER_DS_EN, enable); + } + + /* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to + * be set for SRAM LS/DS/SD */ + if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS | + AMD_CG_SUPPORT_HDP_SD)) { + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + IPH_MEM_POWER_CTRL_EN, 1); + hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, + RC_MEM_POWER_CTRL_EN, 1); + } + + WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); + + /* restore IPH & RC clock override after clock/power mode changing */ + WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl1); +} + +static void hdp_v5_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + uint32_t hdp_clk_cntl; + + if (!(adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG)) + return; + + hdp_clk_cntl = RREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL); + + if (enable) { + hdp_clk_cntl &= + ~(uint32_t) + (HDP_CLK_CNTL__IPH_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK); + } else { + hdp_clk_cntl |= HDP_CLK_CNTL__IPH_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK; + } + + WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl); +} + +static void hdp_v5_0_update_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + hdp_v5_0_update_mem_power_gating(adev, enable); + hdp_v5_0_update_medium_grain_clock_gating(adev, enable); +} + +static void hdp_v5_0_get_clockgating_state(struct amdgpu_device *adev, + u32 *flags) +{ + uint32_t tmp; + + /* AMD_CG_SUPPORT_HDP_MGCG */ + tmp = RREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL); + if (!(tmp & (HDP_CLK_CNTL__IPH_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | + HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK))) + *flags |= AMD_CG_SUPPORT_HDP_MGCG; + + /* AMD_CG_SUPPORT_HDP_LS/DS/SD */ + tmp = RREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL); + if (tmp & HDP_MEM_POWER_CTRL__IPH_MEM_POWER_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_LS; + else if (tmp & HDP_MEM_POWER_CTRL__IPH_MEM_POWER_DS_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_DS; + else if (tmp & HDP_MEM_POWER_CTRL__IPH_MEM_POWER_SD_EN_MASK) + *flags |= AMD_CG_SUPPORT_HDP_SD; +} + +static void hdp_v5_0_init_registers(struct amdgpu_device *adev) +{ + u32 tmp; + + tmp = RREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL); + tmp |= HDP_MISC_CNTL__FLUSH_INVALIDATE_CACHE_MASK; + WREG32_SOC15(HDP, 0, mmHDP_MISC_CNTL, tmp); +} + +const struct amdgpu_hdp_funcs hdp_v5_0_funcs = { + .flush_hdp = hdp_v5_0_flush_hdp, + .invalidate_hdp = hdp_v5_0_invalidate_hdp, + .update_clock_gating = hdp_v5_0_update_clock_gating, + .get_clock_gating_state = hdp_v5_0_get_clockgating_state, + .init_registers = hdp_v5_0_init_registers, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.h b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.h new file mode 100644 index 000000000000..2d5ec2b419f3 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.h @@ -0,0 +1,31 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __HDP_V5_0_H__ +#define __HDP_V5_0_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_hdp_funcs hdp_v5_0_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index 37d8b6ca4dab..cc957471f31e 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -194,19 +194,29 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev, wptr = le32_to_cpu(*ih->wptr_cpu); - if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) { - wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); - /* When a ring buffer overflow happen start parsing interrupt - * from the last not overwritten vector (wptr + 16). Hopefully - * this should allow us to catchup. - */ - dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", - wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); - ih->rptr = (wptr + 16) & ih->ptr_mask; - tmp = RREG32(mmIH_RB_CNTL); - tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); - WREG32(mmIH_RB_CNTL, tmp); - } + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + /* Double check that the overflow wasn't already cleared. */ + wptr = RREG32(mmIH_RB_WPTR); + + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + /* When a ring buffer overflow happen start parsing interrupt + * from the last not overwritten vector (wptr + 16). Hopefully + * this should allow us to catchup. + */ + dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", + wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); + ih->rptr = (wptr + 16) & ih->ptr_mask; + tmp = RREG32(mmIH_RB_CNTL); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32(mmIH_RB_CNTL, tmp); + + +out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c index b72c8e4ca36b..07104a1de308 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c @@ -310,7 +310,7 @@ static void mmhub_v2_3_setup_vmid_config(struct amdgpu_device *adev) /* Send no-retry XNACK on fault to suppress VM fault storm. */ tmp = REG_SET_FIELD(tmp, MMVM_CONTEXT1_CNTL, RETRY_PERMISSION_OR_INVALID_PAGE_FAULT, - !amdgpu_noretry); + !adev->gmc.noretry); WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_CNTL, i * hub->ctx_distance, tmp); WREG32_SOC15_OFFSET(MMHUB, 0, mmMMVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32, diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c index 7ba229e43799..f4e4040bbd25 100644 --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c @@ -40,6 +40,53 @@ static void navi10_ih_set_interrupt_funcs(struct amdgpu_device *adev); /** + * navi10_ih_init_register_offset - Initialize register offset for ih rings + * + * @adev: amdgpu_device pointer + * + * Initialize register offset ih rings (NAVI10). + */ +static void navi10_ih_init_register_offset(struct amdgpu_device *adev) +{ + struct amdgpu_ih_regs *ih_regs; + + if (adev->irq.ih.ring_size) { + ih_regs = &adev->irq.ih.ih_regs; + ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE); + ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_HI); + ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL); + ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR); + ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR); + ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR); + ih_regs->ih_rb_wptr_addr_lo = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO); + ih_regs->ih_rb_wptr_addr_hi = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI); + ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL; + } + + if (adev->irq.ih1.ring_size) { + ih_regs = &adev->irq.ih1.ih_regs; + ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_RING1); + ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_HI_RING1); + ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING1); + ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING1); + ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING1); + ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR_RING1); + ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING1; + } + + if (adev->irq.ih2.ring_size) { + ih_regs = &adev->irq.ih2.ih_regs; + ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_RING2); + ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_HI_RING2); + ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING2); + ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING2); + ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING2); + ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR_RING2); + ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING2; + } +} + +/** * force_update_wptr_for_self_int - Force update the wptr for self interrupt * * @adev: amdgpu_device pointer @@ -82,133 +129,66 @@ force_update_wptr_for_self_int(struct amdgpu_device *adev, } /** - * navi10_ih_enable_interrupts - Enable the interrupt ring buffer + * navi10_ih_toggle_ring_interrupts - toggle the interrupt ring buffer * * @adev: amdgpu_device pointer + * @ih: amdgpu_ih_ring pointet + * @enable: true - enable the interrupts, false - disable the interrupts * - * Enable the interrupt ring buffer (NAVI10). + * Toggle the interrupt ring buffer (NAVI10) */ -static void navi10_ih_enable_interrupts(struct amdgpu_device *adev) +static int navi10_ih_toggle_ring_interrupts(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih, + bool enable) { - u32 ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL); - - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1); - if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) { - if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { - DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); - return; - } - } else { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); - } + struct amdgpu_ih_regs *ih_regs; + uint32_t tmp; - adev->irq.ih.enabled = true; + ih_regs = &ih->ih_regs; - if (adev->irq.ih1.ring_size) { - ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, - RB_ENABLE, 1); - if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) { - if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, - ih_rb_cntl)) { - DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); - return; - } - } else { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); - } - adev->irq.ih1.enabled = true; - } + tmp = RREG32(ih_regs->ih_rb_cntl); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0)); + /* enable_intr field is only valid in ring0 */ + if (ih == &adev->irq.ih) + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0)); + WREG32(ih_regs->ih_rb_cntl, tmp); - if (adev->irq.ih2.ring_size) { - ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, - RB_ENABLE, 1); - if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) { - if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, - ih_rb_cntl)) { - DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); - return; - } - } else { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); - } - adev->irq.ih2.enabled = true; + if (enable) { + ih->enabled = true; + } else { + /* set rptr, wptr to 0 */ + WREG32(ih_regs->ih_rb_rptr, 0); + WREG32(ih_regs->ih_rb_wptr, 0); + ih->enabled = false; + ih->rptr = 0; } - if (adev->irq.ih_soft.ring_size) - adev->irq.ih_soft.enabled = true; + return 0; } /** - * navi10_ih_disable_interrupts - Disable the interrupt ring buffer + * navi10_ih_toggle_interrupts - Toggle all the available interrupt ring buffers * * @adev: amdgpu_device pointer + * @enable: enable or disable interrupt ring buffers * - * Disable the interrupt ring buffer (NAVI10). + * Toggle all the available interrupt ring buffers (NAVI10). */ -static void navi10_ih_disable_interrupts(struct amdgpu_device *adev) +static int navi10_ih_toggle_interrupts(struct amdgpu_device *adev, bool enable) { - u32 ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL); - - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0); - if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) { - if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { - DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); - return; - } - } else { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); - } - - /* set rptr, wptr to 0 */ - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0); - adev->irq.ih.enabled = false; - adev->irq.ih.rptr = 0; - - if (adev->irq.ih1.ring_size) { - ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, - RB_ENABLE, 0); - if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) { - if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, - ih_rb_cntl)) { - DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); - return; - } - } else { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); - } - /* set rptr, wptr to 0 */ - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0); - adev->irq.ih1.enabled = false; - adev->irq.ih1.rptr = 0; - } + struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1, &adev->irq.ih2}; + int i; + int r; - if (adev->irq.ih2.ring_size) { - ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, - RB_ENABLE, 0); - if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) { - if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, - ih_rb_cntl)) { - DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); - return; - } - } else { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); + for (i = 0; i < ARRAY_SIZE(ih); i++) { + if (ih[i]->ring_size) { + r = navi10_ih_toggle_ring_interrupts(adev, ih[i], enable); + if (r) + return r; } - /* set rptr, wptr to 0 */ - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0); - adev->irq.ih2.enabled = false; - adev->irq.ih2.rptr = 0; } + return 0; } static uint32_t navi10_ih_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl) @@ -253,22 +233,49 @@ static uint32_t navi10_ih_doorbell_rptr(struct amdgpu_ih_ring *ih) return ih_doorbell_rtpr; } -static void navi10_ih_reroute_ih(struct amdgpu_device *adev) +/** + * navi10_ih_enable_ring - enable an ih ring buffer + * + * @adev: amdgpu_device pointer + * @ih: amdgpu_ih_ring pointer + * + * Enable an ih ring buffer (NAVI10) + */ +static int navi10_ih_enable_ring(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) { + struct amdgpu_ih_regs *ih_regs; uint32_t tmp; - /* Reroute to IH ring 1 for VMC */ - WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_INDEX, 0x12); - tmp = RREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA); - tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1); - tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); - WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA, tmp); - - /* Reroute IH ring 1 for UMC */ - WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_INDEX, 0x1B); - tmp = RREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA); - tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); - WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA, tmp); + ih_regs = &ih->ih_regs; + + /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ + WREG32(ih_regs->ih_rb_base, ih->gpu_addr >> 8); + WREG32(ih_regs->ih_rb_base_hi, (ih->gpu_addr >> 40) & 0xff); + + tmp = RREG32(ih_regs->ih_rb_cntl); + tmp = navi10_ih_rb_cntl(ih, tmp); + if (ih == &adev->irq.ih) + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled); + if (ih == &adev->irq.ih1) { + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1); + } + WREG32(ih_regs->ih_rb_cntl, tmp); + + if (ih == &adev->irq.ih) { + /* set the ih ring 0 writeback address whether it's enabled or not */ + WREG32(ih_regs->ih_rb_wptr_addr_lo, lower_32_bits(ih->wptr_addr)); + WREG32(ih_regs->ih_rb_wptr_addr_hi, upper_32_bits(ih->wptr_addr) & 0xFFFF); + } + + /* set rptr, wptr to 0 */ + WREG32(ih_regs->ih_rb_wptr, 0); + WREG32(ih_regs->ih_rb_rptr, 0); + + WREG32(ih_regs->ih_doorbell_rptr, navi10_ih_doorbell_rptr(ih)); + + return 0; } /** @@ -284,36 +291,21 @@ static void navi10_ih_reroute_ih(struct amdgpu_device *adev) */ static int navi10_ih_irq_init(struct amdgpu_device *adev) { - struct amdgpu_ih_ring *ih = &adev->irq.ih; - u32 ih_rb_cntl, ih_chicken; + struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1, &adev->irq.ih2}; + u32 ih_chicken; u32 tmp; + int ret; + int i; /* disable irqs */ - navi10_ih_disable_interrupts(adev); + ret = navi10_ih_toggle_interrupts(adev, false); + if (ret) + return ret; adev->nbio.funcs->ih_control(adev); - /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ - WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE, ih->gpu_addr >> 8); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI, (ih->gpu_addr >> 40) & 0xff); - - ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL); - ih_rb_cntl = navi10_ih_rb_cntl(ih, ih_rb_cntl); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM, - !!adev->irq.msi_enabled); - if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) { - if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { - DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); - return -ETIMEDOUT; - } - } else { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); - } - if (adev->irq.ih1.ring_size) - navi10_ih_reroute_ih(adev); - if (unlikely(adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT)) { - if (ih->use_bus_addr) { + if (ih[0]->use_bus_addr) { switch (adev->asic_type) { case CHIP_SIENNA_CICHLID: case CHIP_NAVY_FLOUNDER: @@ -334,77 +326,17 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev) } } - /* set the writeback address whether it's enabled or not */ - WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, - lower_32_bits(ih->wptr_addr)); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI, - upper_32_bits(ih->wptr_addr) & 0xFFFF); - - /* set rptr, wptr to 0 */ - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0); - - WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR, - navi10_ih_doorbell_rptr(ih)); - - adev->nbio.funcs->ih_doorbell_range(adev, ih->use_doorbell, - ih->doorbell_index); - - ih = &adev->irq.ih1; - if (ih->ring_size) { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_RING1, ih->gpu_addr >> 8); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI_RING1, - (ih->gpu_addr >> 40) & 0xff); - - ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); - ih_rb_cntl = navi10_ih_rb_cntl(ih, ih_rb_cntl); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, - WPTR_OVERFLOW_ENABLE, 0); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, - RB_FULL_DRAIN_ENABLE, 1); - if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) { - if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, - ih_rb_cntl)) { - DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); - return -ETIMEDOUT; - } - } else { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + for (i = 0; i < ARRAY_SIZE(ih); i++) { + if (ih[i]->ring_size) { + ret = navi10_ih_enable_ring(adev, ih[i]); + if (ret) + return ret; } - /* set rptr, wptr to 0 */ - WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0); - - WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR_RING1, - navi10_ih_doorbell_rptr(ih)); - } - - ih = &adev->irq.ih2; - if (ih->ring_size) { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_RING2, ih->gpu_addr >> 8); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI_RING2, - (ih->gpu_addr >> 40) & 0xff); - - ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); - ih_rb_cntl = navi10_ih_rb_cntl(ih, ih_rb_cntl); - - if (amdgpu_sriov_vf(adev) && adev->asic_type < CHIP_NAVI10) { - if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, - ih_rb_cntl)) { - DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); - return -ETIMEDOUT; - } - } else { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); - } - /* set rptr, wptr to 0 */ - WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0); - - WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR_RING2, - navi10_ih_doorbell_rptr(ih)); } + /* update doorbell range for ih ring 0*/ + adev->nbio.funcs->ih_doorbell_range(adev, ih[0]->use_doorbell, + ih[0]->doorbell_index); tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL); tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL, @@ -418,10 +350,15 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev) pci_set_master(adev->pdev); /* enable interrupts */ - navi10_ih_enable_interrupts(adev); + ret = navi10_ih_toggle_interrupts(adev, true); + if (ret) + return ret; /* enable wptr force update for self int */ force_update_wptr_for_self_int(adev, 0, 8, true); + if (adev->irq.ih_soft.ring_size) + adev->irq.ih_soft.enabled = true; + return 0; } @@ -435,7 +372,7 @@ static int navi10_ih_irq_init(struct amdgpu_device *adev) static void navi10_ih_irq_disable(struct amdgpu_device *adev) { force_update_wptr_for_self_int(adev, 0, 8, false); - navi10_ih_disable_interrupts(adev); + navi10_ih_toggle_interrupts(adev, false); /* Wait and acknowledge irq */ mdelay(1); @@ -455,23 +392,16 @@ static void navi10_ih_irq_disable(struct amdgpu_device *adev) static u32 navi10_ih_get_wptr(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) { - u32 wptr, reg, tmp; + u32 wptr, tmp; + struct amdgpu_ih_regs *ih_regs; wptr = le32_to_cpu(*ih->wptr_cpu); + ih_regs = &ih->ih_regs; if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) goto out; - if (ih == &adev->irq.ih) - reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR); - else if (ih == &adev->irq.ih1) - reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING1); - else if (ih == &adev->irq.ih2) - reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING2); - else - BUG(); - - wptr = RREG32_NO_KIQ(reg); + wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr); if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) goto out; wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); @@ -486,68 +416,14 @@ static u32 navi10_ih_get_wptr(struct amdgpu_device *adev, wptr, ih->rptr, tmp); ih->rptr = tmp; - if (ih == &adev->irq.ih) - reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL); - else if (ih == &adev->irq.ih1) - reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING1); - else if (ih == &adev->irq.ih2) - reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING2); - else - BUG(); - - tmp = RREG32_NO_KIQ(reg); + tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); - WREG32_NO_KIQ(reg, tmp); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); out: return (wptr & ih->ptr_mask); } /** - * navi10_ih_decode_iv - decode an interrupt vector - * - * @adev: amdgpu_device pointer - * @ih: IH ring buffer to decode - * @entry: IV entry to place decoded information into - * - * Decodes the interrupt vector at the current rptr - * position and also advance the position. - */ -static void navi10_ih_decode_iv(struct amdgpu_device *adev, - struct amdgpu_ih_ring *ih, - struct amdgpu_iv_entry *entry) -{ - /* wptr/rptr are in bytes! */ - u32 ring_index = ih->rptr >> 2; - uint32_t dw[8]; - - dw[0] = le32_to_cpu(ih->ring[ring_index + 0]); - dw[1] = le32_to_cpu(ih->ring[ring_index + 1]); - dw[2] = le32_to_cpu(ih->ring[ring_index + 2]); - dw[3] = le32_to_cpu(ih->ring[ring_index + 3]); - dw[4] = le32_to_cpu(ih->ring[ring_index + 4]); - dw[5] = le32_to_cpu(ih->ring[ring_index + 5]); - dw[6] = le32_to_cpu(ih->ring[ring_index + 6]); - dw[7] = le32_to_cpu(ih->ring[ring_index + 7]); - - entry->client_id = dw[0] & 0xff; - entry->src_id = (dw[0] >> 8) & 0xff; - entry->ring_id = (dw[0] >> 16) & 0xff; - entry->vmid = (dw[0] >> 24) & 0xf; - entry->vmid_src = (dw[0] >> 31); - entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32); - entry->timestamp_src = dw[2] >> 31; - entry->pasid = dw[3] & 0xffff; - entry->pasid_src = dw[3] >> 31; - entry->src_data[0] = dw[4]; - entry->src_data[1] = dw[5]; - entry->src_data[2] = dw[6]; - entry->src_data[3] = dw[7]; - - /* wptr/rptr are in bytes! */ - ih->rptr += 32; -} - -/** * navi10_ih_irq_rearm - rearm IRQ if lost * * @adev: amdgpu_device pointer @@ -557,22 +433,15 @@ static void navi10_ih_decode_iv(struct amdgpu_device *adev, static void navi10_ih_irq_rearm(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) { - uint32_t reg_rptr = 0; uint32_t v = 0; uint32_t i = 0; + struct amdgpu_ih_regs *ih_regs; - if (ih == &adev->irq.ih) - reg_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR); - else if (ih == &adev->irq.ih1) - reg_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING1); - else if (ih == &adev->irq.ih2) - reg_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING2); - else - return; + ih_regs = &ih->ih_regs; /* Rearm IRQ / re-write doorbell if doorbell write is lost */ for (i = 0; i < MAX_REARM_RETRY; i++) { - v = RREG32_NO_KIQ(reg_rptr); + v = RREG32_NO_KIQ(ih_regs->ih_rb_rptr); if ((v < ih->ring_size) && (v != ih->rptr)) WDOORBELL32(ih->doorbell_index, ih->rptr); else @@ -591,6 +460,8 @@ static void navi10_ih_irq_rearm(struct amdgpu_device *adev, static void navi10_ih_set_rptr(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) { + struct amdgpu_ih_regs *ih_regs; + if (ih->use_doorbell) { /* XXX check if swapping is necessary on BE */ *ih->rptr_cpu = ih->rptr; @@ -598,12 +469,9 @@ static void navi10_ih_set_rptr(struct amdgpu_device *adev, if (amdgpu_sriov_vf(adev)) navi10_ih_irq_rearm(adev, ih); - } else if (ih == &adev->irq.ih) { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, ih->rptr); - } else if (ih == &adev->irq.ih1) { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, ih->rptr); - } else if (ih == &adev->irq.ih2) { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, ih->rptr); + } else { + ih_regs = &ih->ih_regs; + WREG32(ih_regs->ih_rb_rptr, ih->rptr); } } @@ -685,23 +553,8 @@ static int navi10_ih_sw_init(void *handle) adev->irq.ih1.ring_size = 0; adev->irq.ih2.ring_size = 0; - if (adev->asic_type < CHIP_NAVI10) { - r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, PAGE_SIZE, true); - if (r) - return r; - - adev->irq.ih1.use_doorbell = true; - adev->irq.ih1.doorbell_index = - (adev->doorbell_index.ih + 1) << 1; - - r = amdgpu_ih_ring_init(adev, &adev->irq.ih2, PAGE_SIZE, true); - if (r) - return r; - - adev->irq.ih2.use_doorbell = true; - adev->irq.ih2.doorbell_index = - (adev->doorbell_index.ih + 2) << 1; - } + /* initialize ih control registers offset */ + navi10_ih_init_register_offset(adev); r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true); if (r) @@ -717,6 +570,7 @@ static int navi10_ih_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; amdgpu_irq_fini(adev); + amdgpu_ih_ring_fini(adev, &adev->irq.ih_soft); amdgpu_ih_ring_fini(adev, &adev->irq.ih2); amdgpu_ih_ring_fini(adev, &adev->irq.ih1); amdgpu_ih_ring_fini(adev, &adev->irq.ih); @@ -848,7 +702,7 @@ static const struct amd_ip_funcs navi10_ih_ip_funcs = { static const struct amdgpu_ih_funcs navi10_ih_funcs = { .get_wptr = navi10_ih_get_wptr, - .decode_iv = navi10_ih_decode_iv, + .decode_iv = amdgpu_ih_decode_iv_helper, .set_rptr = navi10_ih_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c index b5c3db16c2b0..b860f1c7b5b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v2_3.c @@ -80,15 +80,6 @@ static void nbio_v2_3_mc_access_enable(struct amdgpu_device *adev, bool enable) WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0); } -static void nbio_v2_3_hdp_flush(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - else - amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); -} - static u32 nbio_v2_3_get_memsize(struct amdgpu_device *adev) { return RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_RCC_CONFIG_MEMSIZE); @@ -366,7 +357,6 @@ const struct amdgpu_nbio_funcs nbio_v2_3_funcs = { .get_pcie_data_offset = nbio_v2_3_get_pcie_data_offset, .get_rev_id = nbio_v2_3_get_rev_id, .mc_access_enable = nbio_v2_3_mc_access_enable, - .hdp_flush = nbio_v2_3_hdp_flush, .get_memsize = nbio_v2_3_get_memsize, .sdma_doorbell_range = nbio_v2_3_sdma_doorbell_range, .vcn_doorbell_range = nbio_v2_3_vcn_doorbell_range, diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index d2f1fe55d388..83ea063388fd 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -29,6 +29,15 @@ #include "nbio/nbio_6_1_sh_mask.h" #include "nbio/nbio_6_1_smn.h" #include "vega10_enum.h" +#include <uapi/linux/kfd_ioctl.h> + +static void nbio_v6_1_remap_hdp_registers(struct amdgpu_device *adev) +{ + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL); + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL, + adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL); +} static u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev) { @@ -50,18 +59,6 @@ static void nbio_v6_1_mc_access_enable(struct amdgpu_device *adev, bool enable) WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0); } -static void nbio_v6_1_hdp_flush(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - if (!ring || !ring->funcs->emit_wreg) - WREG32_SOC15_NO_KIQ(NBIO, 0, - mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL, - 0); - else - amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( - NBIO, 0, mmBIF_BX_PF0_HDP_MEM_COHERENCY_FLUSH_CNTL), 0); -} - static u32 nbio_v6_1_get_memsize(struct amdgpu_device *adev) { return RREG32_SOC15(NBIO, 0, mmRCC_PF_0_0_RCC_CONFIG_MEMSIZE); @@ -266,7 +263,6 @@ const struct amdgpu_nbio_funcs nbio_v6_1_funcs = { .get_pcie_data_offset = nbio_v6_1_get_pcie_data_offset, .get_rev_id = nbio_v6_1_get_rev_id, .mc_access_enable = nbio_v6_1_mc_access_enable, - .hdp_flush = nbio_v6_1_hdp_flush, .get_memsize = nbio_v6_1_get_memsize, .sdma_doorbell_range = nbio_v6_1_sdma_doorbell_range, .enable_doorbell_aperture = nbio_v6_1_enable_doorbell_aperture, @@ -277,4 +273,5 @@ const struct amdgpu_nbio_funcs nbio_v6_1_funcs = { .get_clockgating_state = nbio_v6_1_get_clockgating_state, .ih_control = nbio_v6_1_ih_control, .init_registers = nbio_v6_1_init_registers, + .remap_hdp_registers = nbio_v6_1_remap_hdp_registers, }; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index ae685813c419..3c00666a13e1 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -60,15 +60,6 @@ static void nbio_v7_0_mc_access_enable(struct amdgpu_device *adev, bool enable) WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0); } -static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - else - amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); -} - static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev) { return RREG32_SOC15(NBIO, 0, mmRCC_CONFIG_MEMSIZE); @@ -292,7 +283,6 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = { .get_pcie_data_offset = nbio_v7_0_get_pcie_data_offset, .get_rev_id = nbio_v7_0_get_rev_id, .mc_access_enable = nbio_v7_0_mc_access_enable, - .hdp_flush = nbio_v7_0_hdp_flush, .get_memsize = nbio_v7_0_get_memsize, .sdma_doorbell_range = nbio_v7_0_sdma_doorbell_range, .vcn_doorbell_range = nbio_v7_0_vcn_doorbell_range, diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c index aa36022670f9..598ce0e93627 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_2.c @@ -56,15 +56,6 @@ static void nbio_v7_2_mc_access_enable(struct amdgpu_device *adev, bool enable) WREG32_SOC15(NBIO, 0, regBIF_BX0_BIF_FB_EN, 0); } -static void nbio_v7_2_hdp_flush(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - else - amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); -} - static u32 nbio_v7_2_get_memsize(struct amdgpu_device *adev) { return RREG32_SOC15(NBIO, 0, regRCC_DEV0_EPF0_0_RCC_CONFIG_MEMSIZE); @@ -325,7 +316,6 @@ const struct amdgpu_nbio_funcs nbio_v7_2_funcs = { .get_pcie_port_data_offset = nbio_v7_2_get_pcie_port_data_offset, .get_rev_id = nbio_v7_2_get_rev_id, .mc_access_enable = nbio_v7_2_mc_access_enable, - .hdp_flush = nbio_v7_2_hdp_flush, .get_memsize = nbio_v7_2_get_memsize, .sdma_doorbell_range = nbio_v7_2_sdma_doorbell_range, .vcn_doorbell_range = nbio_v7_2_vcn_doorbell_range, diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c index eadc9526d33f..4bc1d1434065 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -82,15 +82,6 @@ static void nbio_v7_4_mc_access_enable(struct amdgpu_device *adev, bool enable) WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0); } -static void nbio_v7_4_hdp_flush(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - if (!ring || !ring->funcs->emit_wreg) - WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); - else - amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0); -} - static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev) { return RREG32_SOC15(NBIO, 0, mmRCC_CONFIG_MEMSIZE); @@ -541,7 +532,6 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { .get_pcie_data_offset = nbio_v7_4_get_pcie_data_offset, .get_rev_id = nbio_v7_4_get_rev_id, .mc_access_enable = nbio_v7_4_mc_access_enable, - .hdp_flush = nbio_v7_4_hdp_flush, .get_memsize = nbio_v7_4_get_memsize, .sdma_doorbell_range = nbio_v7_4_sdma_doorbell_range, .vcn_doorbell_range = nbio_v7_4_vcn_doorbell_range, diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index 6bee3677394a..1d785f06c79d 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -38,8 +38,6 @@ #include "gc/gc_10_1_0_offset.h" #include "gc/gc_10_1_0_sh_mask.h" -#include "hdp/hdp_5_0_0_offset.h" -#include "hdp/hdp_5_0_0_sh_mask.h" #include "smuio/smuio_11_0_0_offset.h" #include "mp/mp_11_0_offset.h" @@ -50,6 +48,7 @@ #include "mmhub_v2_0.h" #include "nbio_v2_3.h" #include "nbio_v7_2.h" +#include "hdp_v5_0.h" #include "nv.h" #include "navi10_ih.h" #include "gfx_v10_0.h" @@ -514,6 +513,7 @@ int nv_set_ip_blocks(struct amdgpu_device *adev) adev->nbio.funcs = &nbio_v2_3_funcs; adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg; } + adev->hdp.funcs = &hdp_v5_0_funcs; if (adev->asic_type == CHIP_SIENNA_CICHLID) adev->gmc.xgmi.supported = true; @@ -669,22 +669,6 @@ static uint32_t nv_get_rev_id(struct amdgpu_device *adev) return adev->nbio.funcs->get_rev_id(adev); } -static void nv_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) -{ - adev->nbio.funcs->hdp_flush(adev, ring); -} - -static void nv_invalidate_hdp(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - if (!ring || !ring->funcs->emit_wreg) { - WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1); - } else { - amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( - HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); - } -} - static bool nv_need_full_reset(struct amdgpu_device *adev) { return true; @@ -788,8 +772,6 @@ static const struct amdgpu_asic_funcs nv_asic_funcs = .set_uvd_clocks = &nv_set_uvd_clocks, .set_vce_clocks = &nv_set_vce_clocks, .get_config_memsize = &nv_get_config_memsize, - .flush_hdp = &nv_flush_hdp, - .invalidate_hdp = &nv_invalidate_hdp, .init_doorbell_index = &nv_init_doorbell_index, .need_full_reset = &nv_need_full_reset, .need_reset_on_init = &nv_need_reset_on_init, @@ -1080,120 +1062,6 @@ static int nv_common_soft_reset(void *handle) return 0; } -static void nv_update_hdp_mem_power_gating(struct amdgpu_device *adev, - bool enable) -{ - uint32_t hdp_clk_cntl, hdp_clk_cntl1; - uint32_t hdp_mem_pwr_cntl; - - if (!(adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | - AMD_CG_SUPPORT_HDP_DS | - AMD_CG_SUPPORT_HDP_SD))) - return; - - hdp_clk_cntl = hdp_clk_cntl1 = RREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL); - hdp_mem_pwr_cntl = RREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL); - - /* Before doing clock/power mode switch, - * forced on IPH & RC clock */ - hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, - IPH_MEM_CLK_SOFT_OVERRIDE, 1); - hdp_clk_cntl = REG_SET_FIELD(hdp_clk_cntl, HDP_CLK_CNTL, - RC_MEM_CLK_SOFT_OVERRIDE, 1); - WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl); - - /* HDP 5.0 doesn't support dynamic power mode switch, - * disable clock and power gating before any changing */ - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, - IPH_MEM_POWER_CTRL_EN, 0); - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, - IPH_MEM_POWER_LS_EN, 0); - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, - IPH_MEM_POWER_DS_EN, 0); - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, - IPH_MEM_POWER_SD_EN, 0); - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, - RC_MEM_POWER_CTRL_EN, 0); - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, - RC_MEM_POWER_LS_EN, 0); - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, - RC_MEM_POWER_DS_EN, 0); - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, - RC_MEM_POWER_SD_EN, 0); - WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); - - /* only one clock gating mode (LS/DS/SD) can be enabled */ - if (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS) { - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, - HDP_MEM_POWER_CTRL, - IPH_MEM_POWER_LS_EN, enable); - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, - HDP_MEM_POWER_CTRL, - RC_MEM_POWER_LS_EN, enable); - } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_DS) { - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, - HDP_MEM_POWER_CTRL, - IPH_MEM_POWER_DS_EN, enable); - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, - HDP_MEM_POWER_CTRL, - RC_MEM_POWER_DS_EN, enable); - } else if (adev->cg_flags & AMD_CG_SUPPORT_HDP_SD) { - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, - HDP_MEM_POWER_CTRL, - IPH_MEM_POWER_SD_EN, enable); - /* RC should not use shut down mode, fallback to ds */ - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, - HDP_MEM_POWER_CTRL, - RC_MEM_POWER_DS_EN, enable); - } - - /* confirmed that IPH_MEM_POWER_CTRL_EN and RC_MEM_POWER_CTRL_EN have to - * be set for SRAM LS/DS/SD */ - if (adev->cg_flags & (AMD_CG_SUPPORT_HDP_LS | AMD_CG_SUPPORT_HDP_DS | - AMD_CG_SUPPORT_HDP_SD)) { - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, - IPH_MEM_POWER_CTRL_EN, 1); - hdp_mem_pwr_cntl = REG_SET_FIELD(hdp_mem_pwr_cntl, HDP_MEM_POWER_CTRL, - RC_MEM_POWER_CTRL_EN, 1); - } - - WREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL, hdp_mem_pwr_cntl); - - /* restore IPH & RC clock override after clock/power mode changing */ - WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl1); -} - -static void nv_update_hdp_clock_gating(struct amdgpu_device *adev, - bool enable) -{ - uint32_t hdp_clk_cntl; - - if (!(adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG)) - return; - - hdp_clk_cntl = RREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL); - - if (enable) { - hdp_clk_cntl &= - ~(uint32_t) - (HDP_CLK_CNTL__IPH_MEM_CLK_SOFT_OVERRIDE_MASK | - HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | - HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | - HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | - HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | - HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK); - } else { - hdp_clk_cntl |= HDP_CLK_CNTL__IPH_MEM_CLK_SOFT_OVERRIDE_MASK | - HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | - HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | - HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | - HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | - HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK; - } - - WREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL, hdp_clk_cntl); -} - static int nv_common_set_clockgating_state(void *handle, enum amd_clockgating_state state) { @@ -1213,9 +1081,7 @@ static int nv_common_set_clockgating_state(void *handle, state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE); - nv_update_hdp_mem_power_gating(adev, - state == AMD_CG_STATE_GATE); - nv_update_hdp_clock_gating(adev, + adev->hdp.funcs->update_clock_gating(adev, state == AMD_CG_STATE_GATE); break; default: @@ -1234,31 +1100,13 @@ static int nv_common_set_powergating_state(void *handle, static void nv_common_get_clockgating_state(void *handle, u32 *flags) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - uint32_t tmp; if (amdgpu_sriov_vf(adev)) *flags = 0; adev->nbio.funcs->get_clockgating_state(adev, flags); - /* AMD_CG_SUPPORT_HDP_MGCG */ - tmp = RREG32_SOC15(HDP, 0, mmHDP_CLK_CNTL); - if (!(tmp & (HDP_CLK_CNTL__IPH_MEM_CLK_SOFT_OVERRIDE_MASK | - HDP_CLK_CNTL__RC_MEM_CLK_SOFT_OVERRIDE_MASK | - HDP_CLK_CNTL__DBUS_CLK_SOFT_OVERRIDE_MASK | - HDP_CLK_CNTL__DYN_CLK_SOFT_OVERRIDE_MASK | - HDP_CLK_CNTL__XDP_REG_CLK_SOFT_OVERRIDE_MASK | - HDP_CLK_CNTL__HDP_REG_CLK_SOFT_OVERRIDE_MASK))) - *flags |= AMD_CG_SUPPORT_HDP_MGCG; - - /* AMD_CG_SUPPORT_HDP_LS/DS/SD */ - tmp = RREG32_SOC15(HDP, 0, mmHDP_MEM_POWER_CTRL); - if (tmp & HDP_MEM_POWER_CTRL__IPH_MEM_POWER_LS_EN_MASK) - *flags |= AMD_CG_SUPPORT_HDP_LS; - else if (tmp & HDP_MEM_POWER_CTRL__IPH_MEM_POWER_DS_EN_MASK) - *flags |= AMD_CG_SUPPORT_HDP_DS; - else if (tmp & HDP_MEM_POWER_CTRL__IPH_MEM_POWER_SD_EN_MASK) - *flags |= AMD_CG_SUPPORT_HDP_SD; + adev->hdp.funcs->get_clock_gating_state(adev, flags); return; } diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index d65a5339d354..3ba7bdfde65d 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -47,7 +47,7 @@ enum psp_gfx_crtl_cmd_id GFX_CTRL_CMD_ID_DISABLE_INT = 0x00060000, /* disable PSP-to-Gfx interrupt */ GFX_CTRL_CMD_ID_MODE1_RST = 0x00070000, /* trigger the Mode 1 reset */ GFX_CTRL_CMD_ID_GBR_IH_SET = 0x00080000, /* set Gbr IH_RB_CNTL registers */ - GFX_CTRL_CMD_ID_CONSUME_CMD = 0x000A0000, /* send interrupt to psp for updating write pointer of vf */ + GFX_CTRL_CMD_ID_CONSUME_CMD = 0x00090000, /* send interrupt to psp for updating write pointer of vf */ GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING = 0x000C0000, /* destroy GPCOM ring */ GFX_CTRL_CMD_ID_MAX = 0x000F0000, /* max command ID */ diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c index bd4248c93c49..c325d6f53a71 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -392,37 +392,6 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp) return ret; } -static void psp_v11_0_reroute_ih(struct psp_context *psp) -{ - struct amdgpu_device *adev = psp->adev; - uint32_t tmp; - - /* Change IH ring for VMC */ - tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1244b); - tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1); - tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); - - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 3); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET); - - mdelay(20); - psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); - - /* Change IH ring for UMC */ - tmp = REG_SET_FIELD(0, IH_CLIENT_CFG_DATA, CREDIT_RETURN_ADDR, 0x1216b); - tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); - - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, 4); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, tmp); - WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_GBR_IH_SET); - - mdelay(20); - psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), - 0x80000000, 0x8000FFFF, false); -} - static int psp_v11_0_ring_init(struct psp_context *psp, enum psp_ring_type ring_type) { @@ -430,11 +399,6 @@ static int psp_v11_0_ring_init(struct psp_context *psp, struct psp_ring *ring; struct amdgpu_device *adev = psp->adev; - if ((!amdgpu_sriov_vf(adev)) && - !(adev->asic_type >= CHIP_SIENNA_CICHLID && - adev->asic_type <= CHIP_DIMGREY_CAVEFISH)) - psp_v11_0_reroute_ih(psp); - ring = &psp->km_ring; ring->ring_type = ring_type; @@ -726,7 +690,7 @@ static int psp_v11_0_memory_training(struct psp_context *psp, uint32_t ops) } memcpy_toio(adev->mman.aper_base_kaddr, buf, sz); - adev->nbio.funcs->hdp_flush(adev, NULL); + adev->hdp.funcs->flush_hdp(adev, NULL); vfree(buf); } diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index ce56e93c6886..c8c22c1d1e65 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -46,7 +46,6 @@ #include "sdma6/sdma6_4_2_2_sh_mask.h" #include "sdma7/sdma7_4_2_2_offset.h" #include "sdma7/sdma7_4_2_2_sh_mask.h" -#include "hdp/hdp_4_0_offset.h" #include "sdma0/sdma0_4_1_default.h" #include "soc15_common.h" diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c index b208b81005bb..d345e324837d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c @@ -32,7 +32,6 @@ #include "gc/gc_10_1_0_offset.h" #include "gc/gc_10_1_0_sh_mask.h" -#include "hdp/hdp_5_0_0_offset.h" #include "ivsrcid/sdma0/irqsrcs_sdma0_5_0.h" #include "ivsrcid/sdma1/irqsrcs_sdma1_5_0.h" diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index f1ba36a094da..690a5090475a 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -119,15 +119,7 @@ static int sdma_v5_2_init_inst_ctx(struct amdgpu_sdma_instance *sdma_inst) static void sdma_v5_2_destroy_inst_ctx(struct amdgpu_device *adev) { - int i; - - for (i = 0; i < adev->sdma.num_instances; i++) { - release_firmware(adev->sdma.instance[i].fw); - adev->sdma.instance[i].fw = NULL; - - if (adev->asic_type == CHIP_SIENNA_CICHLID) - break; - } + release_firmware(adev->sdma.instance[0].fw); memset((void *)adev->sdma.instance, 0, sizeof(struct amdgpu_sdma_instance) * AMDGPU_MAX_SDMA_INSTANCES); @@ -185,23 +177,10 @@ static int sdma_v5_2_init_microcode(struct amdgpu_device *adev) if (err) goto out; - for (i = 1; i < adev->sdma.num_instances; i++) { - if (adev->asic_type >= CHIP_SIENNA_CICHLID && - adev->asic_type <= CHIP_DIMGREY_CAVEFISH) { - memcpy((void *)&adev->sdma.instance[i], - (void *)&adev->sdma.instance[0], - sizeof(struct amdgpu_sdma_instance)); - } else { - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sdma%d.bin", chip_name, i); - err = request_firmware(&adev->sdma.instance[i].fw, fw_name, adev->dev); - if (err) - goto out; - - err = sdma_v5_2_init_inst_ctx(&adev->sdma.instance[i]); - if (err) - goto out; - } - } + for (i = 1; i < adev->sdma.num_instances; i++) + memcpy((void *)&adev->sdma.instance[i], + (void *)&adev->sdma.instance[0], + sizeof(struct amdgpu_sdma_instance)); DRM_DEBUG("psp_load == '%s'\n", adev->firmware.load_type == AMDGPU_FW_LOAD_PSP ? "true" : "false"); diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 8a23636ecc27..9a25accd48a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -40,8 +40,6 @@ #include "gc/gc_9_0_sh_mask.h" #include "sdma0/sdma0_4_0_offset.h" #include "sdma1/sdma1_4_0_offset.h" -#include "hdp/hdp_4_0_offset.h" -#include "hdp/hdp_4_0_sh_mask.h" #include "nbio/nbio_7_0_default.h" #include "nbio/nbio_7_0_offset.h" #include "nbio/nbio_7_0_sh_mask.h" @@ -59,7 +57,9 @@ #include "nbio_v6_1.h" #include "nbio_v7_0.h" #include "nbio_v7_4.h" +#include "hdp_v4_0.h" #include "vega10_ih.h" +#include "vega20_ih.h" #include "navi10_ih.h" #include "sdma_v4_0.h" #include "uvd_v7_0.h" @@ -83,14 +83,6 @@ #define mmMP0_MISC_LIGHT_SLEEP_CTRL 0x01ba #define mmMP0_MISC_LIGHT_SLEEP_CTRL_BASE_IDX 0 -/* for Vega20 register name change */ -#define mmHDP_MEM_POWER_CTRL 0x00d4 -#define HDP_MEM_POWER_CTRL__IPH_MEM_POWER_CTRL_EN_MASK 0x00000001L -#define HDP_MEM_POWER_CTRL__IPH_MEM_POWER_LS_EN_MASK 0x00000002L -#define HDP_MEM_POWER_CTRL__RC_MEM_POWER_CTRL_EN_MASK 0x00010000L -#define HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK 0x00020000L -#define mmHDP_MEM_POWER_CTRL_BASE_IDX 0 - /* * Indirect registers accessor */ @@ -699,6 +691,7 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) adev->nbio.funcs = &nbio_v6_1_funcs; adev->nbio.hdp_flush_reg = &nbio_v6_1_hdp_flush_reg; } + adev->hdp.funcs = &hdp_v4_0_funcs; if (adev->asic_type == CHIP_VEGA20 || adev->asic_type == CHIP_ARCTURUS) adev->df.funcs = &df_v3_6_funcs; @@ -729,12 +722,12 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); } if (adev->asic_type == CHIP_VEGA20) - amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + amdgpu_device_ip_block_add(adev, &vega20_ih_ip_block); else amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); } else { if (adev->asic_type == CHIP_VEGA20) - amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + amdgpu_device_ip_block_add(adev, &vega20_ih_ip_block); else amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) { @@ -787,9 +780,9 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) if (amdgpu_sriov_vf(adev)) { if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); - amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + amdgpu_device_ip_block_add(adev, &vega20_ih_ip_block); } else { - amdgpu_device_ip_block_add(adev, &navi10_ih_ip_block); + amdgpu_device_ip_block_add(adev, &vega20_ih_ip_block); if (likely(adev->firmware.load_type == AMDGPU_FW_LOAD_PSP)) amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); } @@ -834,35 +827,12 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) return 0; } -static void soc15_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) -{ - adev->nbio.funcs->hdp_flush(adev, ring); -} - -static void soc15_invalidate_hdp(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - if (!ring || !ring->funcs->emit_wreg) - WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1); - else - amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( - HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1); -} - static bool soc15_need_full_reset(struct amdgpu_device *adev) { /* change this when we implement soft reset */ return true; } -static void vega20_reset_hdp_ras_error_count(struct amdgpu_device *adev) -{ - if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__HDP)) - return; - /*read back hdp ras counter to reset it to 0 */ - RREG32_SOC15(HDP, 0, mmHDP_EDC_CNT); -} - static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0, uint64_t *count1) { @@ -1011,8 +981,6 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = .set_uvd_clocks = &soc15_set_uvd_clocks, .set_vce_clocks = &soc15_set_vce_clocks, .get_config_memsize = &soc15_get_config_memsize, - .flush_hdp = &soc15_flush_hdp, - .invalidate_hdp = &soc15_invalidate_hdp, .need_full_reset = &soc15_need_full_reset, .init_doorbell_index = &vega10_doorbell_index_init, .get_pcie_usage = &soc15_get_pcie_usage, @@ -1034,9 +1002,6 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs = .set_uvd_clocks = &soc15_set_uvd_clocks, .set_vce_clocks = &soc15_set_vce_clocks, .get_config_memsize = &soc15_get_config_memsize, - .flush_hdp = &soc15_flush_hdp, - .invalidate_hdp = &soc15_invalidate_hdp, - .reset_hdp_ras_error_count = &vega20_reset_hdp_ras_error_count, .need_full_reset = &soc15_need_full_reset, .init_doorbell_index = &vega20_doorbell_index_init, .get_pcie_usage = &vega20_get_pcie_usage, @@ -1293,9 +1258,8 @@ static int soc15_common_late_init(void *handle) if (amdgpu_sriov_vf(adev)) xgpu_ai_mailbox_get_irq(adev); - if (adev->asic_funcs && - adev->asic_funcs->reset_hdp_ras_error_count) - adev->asic_funcs->reset_hdp_ras_error_count(adev); + if (adev->hdp.funcs->reset_ras_error_count) + adev->hdp.funcs->reset_ras_error_count(adev); if (adev->nbio.funcs->ras_late_init) r = adev->nbio.funcs->ras_late_init(adev); @@ -1421,41 +1385,6 @@ static int soc15_common_soft_reset(void *handle) return 0; } -static void soc15_update_hdp_light_sleep(struct amdgpu_device *adev, bool enable) -{ - uint32_t def, data; - - if (adev->asic_type == CHIP_VEGA20 || - adev->asic_type == CHIP_ARCTURUS || - adev->asic_type == CHIP_RENOIR) { - def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_CTRL)); - - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) - data |= HDP_MEM_POWER_CTRL__IPH_MEM_POWER_CTRL_EN_MASK | - HDP_MEM_POWER_CTRL__IPH_MEM_POWER_LS_EN_MASK | - HDP_MEM_POWER_CTRL__RC_MEM_POWER_CTRL_EN_MASK | - HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK; - else - data &= ~(HDP_MEM_POWER_CTRL__IPH_MEM_POWER_CTRL_EN_MASK | - HDP_MEM_POWER_CTRL__IPH_MEM_POWER_LS_EN_MASK | - HDP_MEM_POWER_CTRL__RC_MEM_POWER_CTRL_EN_MASK | - HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK); - - if (def != data) - WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_CTRL), data); - } else { - def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS)); - - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) - data |= HDP_MEM_POWER_LS__LS_ENABLE_MASK; - else - data &= ~HDP_MEM_POWER_LS__LS_ENABLE_MASK; - - if (def != data) - WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS), data); - } -} - static void soc15_update_drm_clock_gating(struct amdgpu_device *adev, bool enable) { uint32_t def, data; @@ -1516,7 +1445,7 @@ static int soc15_common_set_clockgating_state(void *handle, state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE); - soc15_update_hdp_light_sleep(adev, + adev->hdp.funcs->update_clock_gating(adev, state == AMD_CG_STATE_GATE); soc15_update_drm_clock_gating(adev, state == AMD_CG_STATE_GATE); @@ -1533,7 +1462,7 @@ static int soc15_common_set_clockgating_state(void *handle, state == AMD_CG_STATE_GATE); adev->nbio.funcs->update_medium_grain_light_sleep(adev, state == AMD_CG_STATE_GATE); - soc15_update_hdp_light_sleep(adev, + adev->hdp.funcs->update_clock_gating(adev, state == AMD_CG_STATE_GATE); soc15_update_drm_clock_gating(adev, state == AMD_CG_STATE_GATE); @@ -1541,7 +1470,7 @@ static int soc15_common_set_clockgating_state(void *handle, state == AMD_CG_STATE_GATE); break; case CHIP_ARCTURUS: - soc15_update_hdp_light_sleep(adev, + adev->hdp.funcs->update_clock_gating(adev, state == AMD_CG_STATE_GATE); break; default: @@ -1560,10 +1489,7 @@ static void soc15_common_get_clockgating_state(void *handle, u32 *flags) adev->nbio.funcs->get_clockgating_state(adev, flags); - /* AMD_CG_SUPPORT_HDP_LS */ - data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS)); - if (data & HDP_MEM_POWER_LS__LS_ENABLE_MASK) - *flags |= AMD_CG_SUPPORT_HDP_LS; + adev->hdp.funcs->get_clock_gating_state(adev, flags); /* AMD_CG_SUPPORT_DRM_MGCG */ data = RREG32(SOC15_REG_OFFSET(MP0, 0, mmMP0_MISC_CGTT_CTRL0)); diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index ce3319993b4b..249fcbee7871 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -196,19 +196,30 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev, wptr = le32_to_cpu(*ih->wptr_cpu); - if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) { - wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); - /* When a ring buffer overflow happen start parsing interrupt - * from the last not overwritten vector (wptr + 16). Hopefully - * this should allow us to catchup. - */ - dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", - wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); - ih->rptr = (wptr + 16) & ih->ptr_mask; - tmp = RREG32(mmIH_RB_CNTL); - tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); - WREG32(mmIH_RB_CNTL, tmp); - } + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + /* Double check that the overflow wasn't already cleared. */ + wptr = RREG32(mmIH_RB_WPTR); + + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + + /* When a ring buffer overflow happen start parsing interrupt + * from the last not overwritten vector (wptr + 16). Hopefully + * this should allow us to catchup. + */ + + dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n", + wptr, ih->rptr, (wptr + 16) & ih->ptr_mask); + ih->rptr = (wptr + 16) & ih->ptr_mask; + tmp = RREG32(mmIH_RB_CNTL); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32(mmIH_RB_CNTL, tmp); + +out: return (wptr & ih->ptr_mask); } diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 312ecf6d24a0..7cd67cb2ac5f 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -36,7 +36,6 @@ #include "vce/vce_4_0_default.h" #include "vce/vce_4_0_sh_mask.h" #include "nbif/nbif_6_1_offset.h" -#include "hdp/hdp_4_0_offset.h" #include "mmhub/mmhub_1_0_offset.h" #include "mmhub/mmhub_1_0_sh_mask.h" #include "ivsrcid/uvd/irqsrcs_uvd_7_0.h" diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index c734e31a9e65..6117931fa8d7 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -32,7 +32,6 @@ #include "vcn/vcn_1_0_offset.h" #include "vcn/vcn_1_0_sh_mask.h" -#include "hdp/hdp_4_0_offset.h" #include "mmhub/mmhub_9_1_offset.h" #include "mmhub/mmhub_9_1_sh_mask.h" diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index e5ae31eb744e..88626d83e07b 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -38,132 +38,120 @@ static void vega10_ih_set_interrupt_funcs(struct amdgpu_device *adev); /** - * vega10_ih_enable_interrupts - Enable the interrupt ring buffer + * vega10_ih_init_register_offset - Initialize register offset for ih rings * * @adev: amdgpu_device pointer * - * Enable the interrupt ring buffer (VEGA10). + * Initialize register offset ih rings (VEGA10). */ -static void vega10_ih_enable_interrupts(struct amdgpu_device *adev) +static void vega10_ih_init_register_offset(struct amdgpu_device *adev) { - u32 ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL); - - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 1); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 1); - if (amdgpu_sriov_vf(adev)) { - if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { - DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); - return; - } - } else { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + struct amdgpu_ih_regs *ih_regs; + + if (adev->irq.ih.ring_size) { + ih_regs = &adev->irq.ih.ih_regs; + ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE); + ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_HI); + ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL); + ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR); + ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR); + ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR); + ih_regs->ih_rb_wptr_addr_lo = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO); + ih_regs->ih_rb_wptr_addr_hi = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI); + ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL; } - adev->irq.ih.enabled = true; if (adev->irq.ih1.ring_size) { - ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, - RB_ENABLE, 1); - if (amdgpu_sriov_vf(adev)) { - if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, - ih_rb_cntl)) { - DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); - return; - } - } else { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); - } - adev->irq.ih1.enabled = true; + ih_regs = &adev->irq.ih1.ih_regs; + ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_RING1); + ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_HI_RING1); + ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING1); + ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING1); + ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING1); + ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR_RING1); + ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING1; } if (adev->irq.ih2.ring_size) { - ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, - RB_ENABLE, 1); - if (amdgpu_sriov_vf(adev)) { - if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, - ih_rb_cntl)) { - DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); - return; - } - } else { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); - } - adev->irq.ih2.enabled = true; + ih_regs = &adev->irq.ih2.ih_regs; + ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_RING2); + ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_HI_RING2); + ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING2); + ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING2); + ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING2); + ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR_RING2); + ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING2; } - - if (adev->irq.ih_soft.ring_size) - adev->irq.ih_soft.enabled = true; } /** - * vega10_ih_disable_interrupts - Disable the interrupt ring buffer + * vega10_ih_toggle_ring_interrupts - toggle the interrupt ring buffer * * @adev: amdgpu_device pointer + * @ih: amdgpu_ih_ring pointet + * @enable: true - enable the interrupts, false - disable the interrupts * - * Disable the interrupt ring buffer (VEGA10). + * Toggle the interrupt ring buffer (VEGA10) */ -static void vega10_ih_disable_interrupts(struct amdgpu_device *adev) +static int vega10_ih_toggle_ring_interrupts(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih, + bool enable) { - u32 ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL); + struct amdgpu_ih_regs *ih_regs; + uint32_t tmp; - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_ENABLE, 0); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, ENABLE_INTR, 0); + ih_regs = &ih->ih_regs; + + tmp = RREG32(ih_regs->ih_rb_cntl); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0)); + /* enable_intr field is only valid in ring0 */ + if (ih == &adev->irq.ih) + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0)); if (amdgpu_sriov_vf(adev)) { - if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { - DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); - return; + if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) { + dev_err(adev->dev, "PSP program IH_RB_CNTL failed!\n"); + return -ETIMEDOUT; } } else { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); + WREG32(ih_regs->ih_rb_cntl, tmp); } - /* set rptr, wptr to 0 */ - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0); - adev->irq.ih.enabled = false; - adev->irq.ih.rptr = 0; - - if (adev->irq.ih1.ring_size) { - ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING1, - RB_ENABLE, 0); - if (amdgpu_sriov_vf(adev)) { - if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, - ih_rb_cntl)) { - DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); - return; - } - } else { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); - } + if (enable) { + ih->enabled = true; + } else { /* set rptr, wptr to 0 */ - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0); - adev->irq.ih1.enabled = false; - adev->irq.ih1.rptr = 0; + WREG32(ih_regs->ih_rb_rptr, 0); + WREG32(ih_regs->ih_rb_wptr, 0); + ih->enabled = false; + ih->rptr = 0; } - if (adev->irq.ih2.ring_size) { - ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL_RING2, - RB_ENABLE, 0); - if (amdgpu_sriov_vf(adev)) { - if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, - ih_rb_cntl)) { - DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); - return; - } - } else { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); - } + return 0; +} - /* set rptr, wptr to 0 */ - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0); - adev->irq.ih2.enabled = false; - adev->irq.ih2.rptr = 0; +/** + * vega10_ih_toggle_interrupts - Toggle all the available interrupt ring buffers + * + * @adev: amdgpu_device pointer + * @enable: enable or disable interrupt ring buffers + * + * Toggle all the available interrupt ring buffers (VEGA10). + */ +static int vega10_ih_toggle_interrupts(struct amdgpu_device *adev, bool enable) +{ + struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1, &adev->irq.ih2}; + int i; + int r; + + for (i = 0; i < ARRAY_SIZE(ih); i++) { + if (ih[i]->ring_size) { + r = vega10_ih_toggle_ring_interrupts(adev, ih[i], enable); + if (r) + return r; + } } + + return 0; } static uint32_t vega10_ih_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl) @@ -209,6 +197,58 @@ static uint32_t vega10_ih_doorbell_rptr(struct amdgpu_ih_ring *ih) } /** + * vega10_ih_enable_ring - enable an ih ring buffer + * + * @adev: amdgpu_device pointer + * @ih: amdgpu_ih_ring pointer + * + * Enable an ih ring buffer (VEGA10) + */ +static int vega10_ih_enable_ring(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + struct amdgpu_ih_regs *ih_regs; + uint32_t tmp; + + ih_regs = &ih->ih_regs; + + /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ + WREG32(ih_regs->ih_rb_base, ih->gpu_addr >> 8); + WREG32(ih_regs->ih_rb_base_hi, (ih->gpu_addr >> 40) & 0xff); + + tmp = RREG32(ih_regs->ih_rb_cntl); + tmp = vega10_ih_rb_cntl(ih, tmp); + if (ih == &adev->irq.ih) + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled); + if (ih == &adev->irq.ih1) { + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1); + } + if (amdgpu_sriov_vf(adev)) { + if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) { + dev_err(adev->dev, "PSP program IH_RB_CNTL failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32(ih_regs->ih_rb_cntl, tmp); + } + + if (ih == &adev->irq.ih) { + /* set the ih ring 0 writeback address whether it's enabled or not */ + WREG32(ih_regs->ih_rb_wptr_addr_lo, lower_32_bits(ih->wptr_addr)); + WREG32(ih_regs->ih_rb_wptr_addr_hi, upper_32_bits(ih->wptr_addr) & 0xFFFF); + } + + /* set rptr, wptr to 0 */ + WREG32(ih_regs->ih_rb_wptr, 0); + WREG32(ih_regs->ih_rb_rptr, 0); + + WREG32(ih_regs->ih_doorbell_rptr, vega10_ih_doorbell_rptr(ih)); + + return 0; +} + +/** * vega10_ih_irq_init - init and enable the interrupt ring * * @adev: amdgpu_device pointer @@ -221,116 +261,34 @@ static uint32_t vega10_ih_doorbell_rptr(struct amdgpu_ih_ring *ih) */ static int vega10_ih_irq_init(struct amdgpu_device *adev) { - struct amdgpu_ih_ring *ih; - u32 ih_rb_cntl, ih_chicken; - int ret = 0; + struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1, &adev->irq.ih2}; + u32 ih_chicken; + int ret; + int i; u32 tmp; /* disable irqs */ - vega10_ih_disable_interrupts(adev); + ret = vega10_ih_toggle_interrupts(adev, false); + if (ret) + return ret; adev->nbio.funcs->ih_control(adev); - ih = &adev->irq.ih; - /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ - WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE, ih->gpu_addr >> 8); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI, (ih->gpu_addr >> 40) & 0xff); - - ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL); - ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RPTR_REARM, - !!adev->irq.msi_enabled); - if (amdgpu_sriov_vf(adev)) { - if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL, ih_rb_cntl)) { - DRM_ERROR("PSP program IH_RB_CNTL failed!\n"); - return -ETIMEDOUT; - } - } else { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL, ih_rb_cntl); - } - - if ((adev->asic_type == CHIP_ARCTURUS && - adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) || - adev->asic_type == CHIP_RENOIR) { + if (adev->asic_type == CHIP_RENOIR) { ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN); if (adev->irq.ih.use_bus_addr) { ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, MC_SPACE_GPA_ENABLE, 1); - } else { - ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, - MC_SPACE_FBPA_ENABLE, 1); } WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken); } - /* set the writeback address whether it's enabled or not */ - WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, - lower_32_bits(ih->wptr_addr)); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI, - upper_32_bits(ih->wptr_addr) & 0xFFFF); - - /* set rptr, wptr to 0 */ - WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR, 0); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); - - WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR, - vega10_ih_doorbell_rptr(ih)); - - ih = &adev->irq.ih1; - if (ih->ring_size) { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_RING1, ih->gpu_addr >> 8); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI_RING1, - (ih->gpu_addr >> 40) & 0xff); - - ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1); - ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, - WPTR_OVERFLOW_ENABLE, 0); - ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, - RB_FULL_DRAIN_ENABLE, 1); - if (amdgpu_sriov_vf(adev)) { - if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING1, - ih_rb_cntl)) { - DRM_ERROR("program IH_RB_CNTL_RING1 failed!\n"); - return -ETIMEDOUT; - } - } else { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING1, ih_rb_cntl); + for (i = 0; i < ARRAY_SIZE(ih); i++) { + if (ih[i]->ring_size) { + ret = vega10_ih_enable_ring(adev, ih[i]); + if (ret) + return ret; } - - /* set rptr, wptr to 0 */ - WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING1, 0); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, 0); - - WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR_RING1, - vega10_ih_doorbell_rptr(ih)); - } - - ih = &adev->irq.ih2; - if (ih->ring_size) { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_RING2, ih->gpu_addr >> 8); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_BASE_HI_RING2, - (ih->gpu_addr >> 40) & 0xff); - - ih_rb_cntl = RREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2); - ih_rb_cntl = vega10_ih_rb_cntl(ih, ih_rb_cntl); - - if (amdgpu_sriov_vf(adev)) { - if (psp_reg_program(&adev->psp, PSP_REG_IH_RB_CNTL_RING2, - ih_rb_cntl)) { - DRM_ERROR("program IH_RB_CNTL_RING2 failed!\n"); - return -ETIMEDOUT; - } - } else { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_CNTL_RING2, ih_rb_cntl); - } - - /* set rptr, wptr to 0 */ - WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_RING2, 0); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, 0); - - WREG32_SOC15(OSSSYS, 0, mmIH_DOORBELL_RPTR_RING2, - vega10_ih_doorbell_rptr(ih)); } tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL); @@ -345,9 +303,14 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) pci_set_master(adev->pdev); /* enable interrupts */ - vega10_ih_enable_interrupts(adev); + ret = vega10_ih_toggle_interrupts(adev, true); + if (ret) + return ret; - return ret; + if (adev->irq.ih_soft.ring_size) + adev->irq.ih_soft.enabled = true; + + return 0; } /** @@ -359,7 +322,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) */ static void vega10_ih_irq_disable(struct amdgpu_device *adev) { - vega10_ih_disable_interrupts(adev); + vega10_ih_toggle_interrupts(adev, false); /* Wait and acknowledge irq */ mdelay(1); @@ -379,25 +342,17 @@ static void vega10_ih_irq_disable(struct amdgpu_device *adev) static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) { - u32 wptr, reg, tmp; + u32 wptr, tmp; + struct amdgpu_ih_regs *ih_regs; wptr = le32_to_cpu(*ih->wptr_cpu); + ih_regs = &ih->ih_regs; if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) goto out; /* Double check that the overflow wasn't already cleared. */ - - if (ih == &adev->irq.ih) - reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR); - else if (ih == &adev->irq.ih1) - reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING1); - else if (ih == &adev->irq.ih2) - reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING2); - else - BUG(); - - wptr = RREG32_NO_KIQ(reg); + wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr); if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) goto out; @@ -413,69 +368,15 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev, wptr, ih->rptr, tmp); ih->rptr = tmp; - if (ih == &adev->irq.ih) - reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL); - else if (ih == &adev->irq.ih1) - reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING1); - else if (ih == &adev->irq.ih2) - reg = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING2); - else - BUG(); - - tmp = RREG32_NO_KIQ(reg); + tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); - WREG32_NO_KIQ(reg, tmp); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); out: return (wptr & ih->ptr_mask); } /** - * vega10_ih_decode_iv - decode an interrupt vector - * - * @adev: amdgpu_device pointer - * @ih: IH ring buffer to decode - * @entry: IV entry to place decoded information into - * - * Decodes the interrupt vector at the current rptr - * position and also advance the position. - */ -static void vega10_ih_decode_iv(struct amdgpu_device *adev, - struct amdgpu_ih_ring *ih, - struct amdgpu_iv_entry *entry) -{ - /* wptr/rptr are in bytes! */ - u32 ring_index = ih->rptr >> 2; - uint32_t dw[8]; - - dw[0] = le32_to_cpu(ih->ring[ring_index + 0]); - dw[1] = le32_to_cpu(ih->ring[ring_index + 1]); - dw[2] = le32_to_cpu(ih->ring[ring_index + 2]); - dw[3] = le32_to_cpu(ih->ring[ring_index + 3]); - dw[4] = le32_to_cpu(ih->ring[ring_index + 4]); - dw[5] = le32_to_cpu(ih->ring[ring_index + 5]); - dw[6] = le32_to_cpu(ih->ring[ring_index + 6]); - dw[7] = le32_to_cpu(ih->ring[ring_index + 7]); - - entry->client_id = dw[0] & 0xff; - entry->src_id = (dw[0] >> 8) & 0xff; - entry->ring_id = (dw[0] >> 16) & 0xff; - entry->vmid = (dw[0] >> 24) & 0xf; - entry->vmid_src = (dw[0] >> 31); - entry->timestamp = dw[1] | ((u64)(dw[2] & 0xffff) << 32); - entry->timestamp_src = dw[2] >> 31; - entry->pasid = dw[3] & 0xffff; - entry->pasid_src = dw[3] >> 31; - entry->src_data[0] = dw[4]; - entry->src_data[1] = dw[5]; - entry->src_data[2] = dw[6]; - entry->src_data[3] = dw[7]; - - /* wptr/rptr are in bytes! */ - ih->rptr += 32; -} - -/** * vega10_ih_irq_rearm - rearm IRQ if lost * * @adev: amdgpu_device pointer @@ -485,22 +386,14 @@ static void vega10_ih_decode_iv(struct amdgpu_device *adev, static void vega10_ih_irq_rearm(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) { - uint32_t reg_rptr = 0; uint32_t v = 0; uint32_t i = 0; + struct amdgpu_ih_regs *ih_regs; - if (ih == &adev->irq.ih) - reg_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR); - else if (ih == &adev->irq.ih1) - reg_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING1); - else if (ih == &adev->irq.ih2) - reg_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING2); - else - return; - + ih_regs = &ih->ih_regs; /* Rearm IRQ / re-wwrite doorbell if doorbell write is lost */ for (i = 0; i < MAX_REARM_RETRY; i++) { - v = RREG32_NO_KIQ(reg_rptr); + v = RREG32_NO_KIQ(ih_regs->ih_rb_rptr); if ((v < ih->ring_size) && (v != ih->rptr)) WDOORBELL32(ih->doorbell_index, ih->rptr); else @@ -519,6 +412,8 @@ static void vega10_ih_irq_rearm(struct amdgpu_device *adev, static void vega10_ih_set_rptr(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) { + struct amdgpu_ih_regs *ih_regs; + if (ih->use_doorbell) { /* XXX check if swapping is necessary on BE */ *ih->rptr_cpu = ih->rptr; @@ -526,12 +421,9 @@ static void vega10_ih_set_rptr(struct amdgpu_device *adev, if (amdgpu_sriov_vf(adev)) vega10_ih_irq_rearm(adev, ih); - } else if (ih == &adev->irq.ih) { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, ih->rptr); - } else if (ih == &adev->irq.ih1) { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING1, ih->rptr); - } else if (ih == &adev->irq.ih2) { - WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR_RING2, ih->rptr); + } else { + ih_regs = &ih->ih_regs; + WREG32(ih_regs->ih_rb_rptr, ih->rptr); } } @@ -600,19 +492,23 @@ static int vega10_ih_sw_init(void *handle) adev->irq.ih.use_doorbell = true; adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1; - r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, PAGE_SIZE, true); - if (r) - return r; + if (!(adev->flags & AMD_IS_APU)) { + r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, PAGE_SIZE, true); + if (r) + return r; - adev->irq.ih1.use_doorbell = true; - adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1; + adev->irq.ih1.use_doorbell = true; + adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1; - r = amdgpu_ih_ring_init(adev, &adev->irq.ih2, PAGE_SIZE, true); - if (r) - return r; + r = amdgpu_ih_ring_init(adev, &adev->irq.ih2, PAGE_SIZE, true); + if (r) + return r; - adev->irq.ih2.use_doorbell = true; - adev->irq.ih2.doorbell_index = (adev->doorbell_index.ih + 2) << 1; + adev->irq.ih2.use_doorbell = true; + adev->irq.ih2.doorbell_index = (adev->doorbell_index.ih + 2) << 1; + } + /* initialize ih control registers offset */ + vega10_ih_init_register_offset(adev); r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true); if (r) @@ -628,6 +524,7 @@ static int vega10_ih_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; amdgpu_irq_fini(adev); + amdgpu_ih_ring_fini(adev, &adev->irq.ih_soft); amdgpu_ih_ring_fini(adev, &adev->irq.ih2); amdgpu_ih_ring_fini(adev, &adev->irq.ih1); amdgpu_ih_ring_fini(adev, &adev->irq.ih); @@ -698,15 +595,11 @@ static void vega10_ih_update_clockgating_state(struct amdgpu_device *adev, def = data = RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL); field_val = enable ? 0 : 1; /** - * Vega10 does not have IH_RETRY_INT_CAM_MEM_CLK_SOFT_OVERRIDE - * and IH_BUFFER_MEM_CLK_SOFT_OVERRIDE field. + * Vega10/12 and RAVEN don't have IH_BUFFER_MEM_CLK_SOFT_OVERRIDE field. */ - if (adev->asic_type > CHIP_VEGA10) { - data = REG_SET_FIELD(data, IH_CLK_CTRL, - IH_RETRY_INT_CAM_MEM_CLK_SOFT_OVERRIDE, field_val); + if (adev->asic_type == CHIP_RENOIR) data = REG_SET_FIELD(data, IH_CLK_CTRL, IH_BUFFER_MEM_CLK_SOFT_OVERRIDE, field_val); - } data = REG_SET_FIELD(data, IH_CLK_CTRL, DBUS_MUX_CLK_SOFT_OVERRIDE, field_val); @@ -759,7 +652,7 @@ const struct amd_ip_funcs vega10_ih_ip_funcs = { static const struct amdgpu_ih_funcs vega10_ih_funcs = { .get_wptr = vega10_ih_get_wptr, - .decode_iv = vega10_ih_decode_iv, + .decode_iv = amdgpu_ih_decode_iv_helper, .set_rptr = vega10_ih_set_rptr }; diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c new file mode 100644 index 000000000000..42032ca380cc --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c @@ -0,0 +1,700 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include <linux/pci.h> + +#include "amdgpu.h" +#include "amdgpu_ih.h" +#include "soc15.h" + +#include "oss/osssys_4_2_0_offset.h" +#include "oss/osssys_4_2_0_sh_mask.h" + +#include "soc15_common.h" +#include "vega20_ih.h" + +#define MAX_REARM_RETRY 10 + +static void vega20_ih_set_interrupt_funcs(struct amdgpu_device *adev); + +/** + * vega20_ih_init_register_offset - Initialize register offset for ih rings + * + * @adev: amdgpu_device pointer + * + * Initialize register offset ih rings (VEGA20). + */ +static void vega20_ih_init_register_offset(struct amdgpu_device *adev) +{ + struct amdgpu_ih_regs *ih_regs; + + if (adev->irq.ih.ring_size) { + ih_regs = &adev->irq.ih.ih_regs; + ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE); + ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_HI); + ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL); + ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR); + ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR); + ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR); + ih_regs->ih_rb_wptr_addr_lo = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO); + ih_regs->ih_rb_wptr_addr_hi = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI); + ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL; + } + + if (adev->irq.ih1.ring_size) { + ih_regs = &adev->irq.ih1.ih_regs; + ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_RING1); + ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_HI_RING1); + ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING1); + ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING1); + ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING1); + ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR_RING1); + ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING1; + } + + if (adev->irq.ih2.ring_size) { + ih_regs = &adev->irq.ih2.ih_regs; + ih_regs->ih_rb_base = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_RING2); + ih_regs->ih_rb_base_hi = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_BASE_HI_RING2); + ih_regs->ih_rb_cntl = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_CNTL_RING2); + ih_regs->ih_rb_wptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_WPTR_RING2); + ih_regs->ih_rb_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_RB_RPTR_RING2); + ih_regs->ih_doorbell_rptr = SOC15_REG_OFFSET(OSSSYS, 0, mmIH_DOORBELL_RPTR_RING2); + ih_regs->psp_reg_id = PSP_REG_IH_RB_CNTL_RING2; + } +} + +/** + * vega20_ih_toggle_ring_interrupts - toggle the interrupt ring buffer + * + * @adev: amdgpu_device pointer + * @ih: amdgpu_ih_ring pointet + * @enable: true - enable the interrupts, false - disable the interrupts + * + * Toggle the interrupt ring buffer (VEGA20) + */ +static int vega20_ih_toggle_ring_interrupts(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih, + bool enable) +{ + struct amdgpu_ih_regs *ih_regs; + uint32_t tmp; + + ih_regs = &ih->ih_regs; + + tmp = RREG32(ih_regs->ih_rb_cntl); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0)); + /* enable_intr field is only valid in ring0 */ + if (ih == &adev->irq.ih) + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0)); + if (amdgpu_sriov_vf(adev)) { + if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) { + dev_err(adev->dev, "PSP program IH_RB_CNTL failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32(ih_regs->ih_rb_cntl, tmp); + } + + if (enable) { + ih->enabled = true; + } else { + /* set rptr, wptr to 0 */ + WREG32(ih_regs->ih_rb_rptr, 0); + WREG32(ih_regs->ih_rb_wptr, 0); + ih->enabled = false; + ih->rptr = 0; + } + + return 0; +} + +/** + * vega20_ih_toggle_interrupts - Toggle all the available interrupt ring buffers + * + * @adev: amdgpu_device pointer + * @enable: enable or disable interrupt ring buffers + * + * Toggle all the available interrupt ring buffers (VEGA20). + */ +static int vega20_ih_toggle_interrupts(struct amdgpu_device *adev, bool enable) +{ + struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1, &adev->irq.ih2}; + int i; + int r; + + for (i = 0; i < ARRAY_SIZE(ih); i++) { + if (ih[i]->ring_size) { + r = vega20_ih_toggle_ring_interrupts(adev, ih[i], enable); + if (r) + return r; + } + } + + return 0; +} + +static uint32_t vega20_ih_rb_cntl(struct amdgpu_ih_ring *ih, uint32_t ih_rb_cntl) +{ + int rb_bufsz = order_base_2(ih->ring_size / 4); + + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + MC_SPACE, ih->use_bus_addr ? 1 : 4); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_OVERFLOW_CLEAR, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_OVERFLOW_ENABLE, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, RB_SIZE, rb_bufsz); + /* Ring Buffer write pointer writeback. If enabled, IH_RB_WPTR register + * value is written to memory + */ + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, + WPTR_WRITEBACK_ENABLE, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_SNOOP, 1); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_RO, 0); + ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, MC_VMID, 0); + + return ih_rb_cntl; +} + +static uint32_t vega20_ih_doorbell_rptr(struct amdgpu_ih_ring *ih) +{ + u32 ih_doorbell_rtpr = 0; + + if (ih->use_doorbell) { + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, OFFSET, + ih->doorbell_index); + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, + ENABLE, 1); + } else { + ih_doorbell_rtpr = REG_SET_FIELD(ih_doorbell_rtpr, + IH_DOORBELL_RPTR, + ENABLE, 0); + } + return ih_doorbell_rtpr; +} + +/** + * vega20_ih_enable_ring - enable an ih ring buffer + * + * @adev: amdgpu_device pointer + * @ih: amdgpu_ih_ring pointer + * + * Enable an ih ring buffer (VEGA20) + */ +static int vega20_ih_enable_ring(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + struct amdgpu_ih_regs *ih_regs; + uint32_t tmp; + + ih_regs = &ih->ih_regs; + + /* Ring Buffer base. [39:8] of 40-bit address of the beginning of the ring buffer*/ + WREG32(ih_regs->ih_rb_base, ih->gpu_addr >> 8); + WREG32(ih_regs->ih_rb_base_hi, (ih->gpu_addr >> 40) & 0xff); + + tmp = RREG32(ih_regs->ih_rb_cntl); + tmp = vega20_ih_rb_cntl(ih, tmp); + if (ih == &adev->irq.ih) + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RPTR_REARM, !!adev->irq.msi_enabled); + if (ih == &adev->irq.ih1) { + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_ENABLE, 0); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_FULL_DRAIN_ENABLE, 1); + } + if (amdgpu_sriov_vf(adev)) { + if (psp_reg_program(&adev->psp, ih_regs->psp_reg_id, tmp)) { + dev_err(adev->dev, "PSP program IH_RB_CNTL failed!\n"); + return -ETIMEDOUT; + } + } else { + WREG32(ih_regs->ih_rb_cntl, tmp); + } + + if (ih == &adev->irq.ih) { + /* set the ih ring 0 writeback address whether it's enabled or not */ + WREG32(ih_regs->ih_rb_wptr_addr_lo, lower_32_bits(ih->wptr_addr)); + WREG32(ih_regs->ih_rb_wptr_addr_hi, upper_32_bits(ih->wptr_addr) & 0xFFFF); + } + + /* set rptr, wptr to 0 */ + WREG32(ih_regs->ih_rb_wptr, 0); + WREG32(ih_regs->ih_rb_rptr, 0); + + WREG32(ih_regs->ih_doorbell_rptr, vega20_ih_doorbell_rptr(ih)); + + return 0; +} + +/** + * vega20_ih_reroute_ih - reroute VMC/UTCL2 ih to an ih ring + * + * @adev: amdgpu_device pointer + * + * Reroute VMC and UMC interrupts on primary ih ring to + * ih ring 1 so they won't lose when bunches of page faults + * interrupts overwhelms the interrupt handler(VEGA20) + */ +static void vega20_ih_reroute_ih(struct amdgpu_device *adev) +{ + uint32_t tmp; + + /* vega20 ih reroute will go through psp + * this function is only used for arcturus + */ + if (adev->asic_type == CHIP_ARCTURUS) { + /* Reroute to IH ring 1 for VMC */ + WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_INDEX, 0x12); + tmp = RREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA); + tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, CLIENT_TYPE, 1); + tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); + WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA, tmp); + + /* Reroute IH ring 1 for UTCL2 */ + WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_INDEX, 0x1B); + tmp = RREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA); + tmp = REG_SET_FIELD(tmp, IH_CLIENT_CFG_DATA, RING_ID, 1); + WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA, tmp); + } +} + +/** + * vega20_ih_irq_init - init and enable the interrupt ring + * + * @adev: amdgpu_device pointer + * + * Allocate a ring buffer for the interrupt controller, + * enable the RLC, disable interrupts, enable the IH + * ring buffer and enable it (VI). + * Called at device load and reume. + * Returns 0 for success, errors for failure. + */ +static int vega20_ih_irq_init(struct amdgpu_device *adev) +{ + struct amdgpu_ih_ring *ih[] = {&adev->irq.ih, &adev->irq.ih1, &adev->irq.ih2}; + u32 ih_chicken; + int ret; + int i; + u32 tmp; + + /* disable irqs */ + ret = vega20_ih_toggle_interrupts(adev, false); + if (ret) + return ret; + + adev->nbio.funcs->ih_control(adev); + + if (adev->asic_type == CHIP_ARCTURUS && + adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { + ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN); + if (adev->irq.ih.use_bus_addr) { + ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN, + MC_SPACE_GPA_ENABLE, 1); + } + WREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN, ih_chicken); + } + + for (i = 0; i < ARRAY_SIZE(ih); i++) { + if (ih[i]->ring_size) { + if (i == 1) + vega20_ih_reroute_ih(adev); + ret = vega20_ih_enable_ring(adev, ih[i]); + if (ret) + return ret; + } + } + + tmp = RREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL); + tmp = REG_SET_FIELD(tmp, IH_STORM_CLIENT_LIST_CNTL, + CLIENT18_IS_STORM_CLIENT, 1); + WREG32_SOC15(OSSSYS, 0, mmIH_STORM_CLIENT_LIST_CNTL, tmp); + + tmp = RREG32_SOC15(OSSSYS, 0, mmIH_INT_FLOOD_CNTL); + tmp = REG_SET_FIELD(tmp, IH_INT_FLOOD_CNTL, FLOOD_CNTL_ENABLE, 1); + WREG32_SOC15(OSSSYS, 0, mmIH_INT_FLOOD_CNTL, tmp); + + pci_set_master(adev->pdev); + + /* enable interrupts */ + ret = vega20_ih_toggle_interrupts(adev, true); + if (ret) + return ret; + + if (adev->irq.ih_soft.ring_size) + adev->irq.ih_soft.enabled = true; + + return 0; +} + +/** + * vega20_ih_irq_disable - disable interrupts + * + * @adev: amdgpu_device pointer + * + * Disable interrupts on the hw (VEGA20). + */ +static void vega20_ih_irq_disable(struct amdgpu_device *adev) +{ + vega20_ih_toggle_interrupts(adev, false); + + /* Wait and acknowledge irq */ + mdelay(1); +} + +/** + * vega20_ih_get_wptr - get the IH ring buffer wptr + * + * @adev: amdgpu_device pointer + * + * Get the IH ring buffer wptr from either the register + * or the writeback memory buffer (VEGA20). Also check for + * ring buffer overflow and deal with it. + * Returns the value of the wptr. + */ +static u32 vega20_ih_get_wptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + u32 wptr, tmp; + struct amdgpu_ih_regs *ih_regs; + + wptr = le32_to_cpu(*ih->wptr_cpu); + ih_regs = &ih->ih_regs; + + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + /* Double check that the overflow wasn't already cleared. */ + wptr = RREG32_NO_KIQ(ih_regs->ih_rb_wptr); + if (!REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) + goto out; + + wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0); + + /* When a ring buffer overflow happen start parsing interrupt + * from the last not overwritten vector (wptr + 32). Hopefully + * this should allow us to catchup. + */ + tmp = (wptr + 32) & ih->ptr_mask; + dev_warn(adev->dev, "IH ring buffer overflow " + "(0x%08X, 0x%08X, 0x%08X)\n", + wptr, ih->rptr, tmp); + ih->rptr = tmp; + + tmp = RREG32_NO_KIQ(ih_regs->ih_rb_cntl); + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1); + WREG32_NO_KIQ(ih_regs->ih_rb_cntl, tmp); + +out: + return (wptr & ih->ptr_mask); +} + +/** + * vega20_ih_irq_rearm - rearm IRQ if lost + * + * @adev: amdgpu_device pointer + * + */ +static void vega20_ih_irq_rearm(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + uint32_t v = 0; + uint32_t i = 0; + struct amdgpu_ih_regs *ih_regs; + + ih_regs = &ih->ih_regs; + + /* Rearm IRQ / re-wwrite doorbell if doorbell write is lost */ + for (i = 0; i < MAX_REARM_RETRY; i++) { + v = RREG32_NO_KIQ(ih_regs->ih_rb_rptr); + if ((v < ih->ring_size) && (v != ih->rptr)) + WDOORBELL32(ih->doorbell_index, ih->rptr); + else + break; + } +} + +/** + * vega20_ih_set_rptr - set the IH ring buffer rptr + * + * @adev: amdgpu_device pointer + * + * Set the IH ring buffer rptr. + */ +static void vega20_ih_set_rptr(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + struct amdgpu_ih_regs *ih_regs; + + if (ih->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + *ih->rptr_cpu = ih->rptr; + WDOORBELL32(ih->doorbell_index, ih->rptr); + + if (amdgpu_sriov_vf(adev)) + vega20_ih_irq_rearm(adev, ih); + } else { + ih_regs = &ih->ih_regs; + WREG32(ih_regs->ih_rb_rptr, ih->rptr); + } +} + +/** + * vega20_ih_self_irq - dispatch work for ring 1 and 2 + * + * @adev: amdgpu_device pointer + * @source: irq source + * @entry: IV with WPTR update + * + * Update the WPTR from the IV and schedule work to handle the entries. + */ +static int vega20_ih_self_irq(struct amdgpu_device *adev, + struct amdgpu_irq_src *source, + struct amdgpu_iv_entry *entry) +{ + uint32_t wptr = cpu_to_le32(entry->src_data[0]); + + switch (entry->ring_id) { + case 1: + *adev->irq.ih1.wptr_cpu = wptr; + schedule_work(&adev->irq.ih1_work); + break; + case 2: + *adev->irq.ih2.wptr_cpu = wptr; + schedule_work(&adev->irq.ih2_work); + break; + default: break; + } + return 0; +} + +static const struct amdgpu_irq_src_funcs vega20_ih_self_irq_funcs = { + .process = vega20_ih_self_irq, +}; + +static void vega20_ih_set_self_irq_funcs(struct amdgpu_device *adev) +{ + adev->irq.self_irq.num_types = 0; + adev->irq.self_irq.funcs = &vega20_ih_self_irq_funcs; +} + +static int vega20_ih_early_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + vega20_ih_set_interrupt_funcs(adev); + vega20_ih_set_self_irq_funcs(adev); + return 0; +} + +static int vega20_ih_sw_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_IH, 0, + &adev->irq.self_irq); + if (r) + return r; + + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, true); + if (r) + return r; + + adev->irq.ih.use_doorbell = true; + adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1; + + r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, PAGE_SIZE, true); + if (r) + return r; + + adev->irq.ih1.use_doorbell = true; + adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1; + + r = amdgpu_ih_ring_init(adev, &adev->irq.ih2, PAGE_SIZE, true); + if (r) + return r; + + adev->irq.ih2.use_doorbell = true; + adev->irq.ih2.doorbell_index = (adev->doorbell_index.ih + 2) << 1; + + /* initialize ih control registers offset */ + vega20_ih_init_register_offset(adev); + + r = amdgpu_ih_ring_init(adev, &adev->irq.ih_soft, PAGE_SIZE, true); + if (r) + return r; + + r = amdgpu_irq_init(adev); + + return r; +} + +static int vega20_ih_sw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + amdgpu_irq_fini(adev); + amdgpu_ih_ring_fini(adev, &adev->irq.ih_soft); + amdgpu_ih_ring_fini(adev, &adev->irq.ih2); + amdgpu_ih_ring_fini(adev, &adev->irq.ih1); + amdgpu_ih_ring_fini(adev, &adev->irq.ih); + + return 0; +} + +static int vega20_ih_hw_init(void *handle) +{ + int r; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + r = vega20_ih_irq_init(adev); + if (r) + return r; + + return 0; +} + +static int vega20_ih_hw_fini(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + vega20_ih_irq_disable(adev); + + return 0; +} + +static int vega20_ih_suspend(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return vega20_ih_hw_fini(adev); +} + +static int vega20_ih_resume(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + return vega20_ih_hw_init(adev); +} + +static bool vega20_ih_is_idle(void *handle) +{ + /* todo */ + return true; +} + +static int vega20_ih_wait_for_idle(void *handle) +{ + /* todo */ + return -ETIMEDOUT; +} + +static int vega20_ih_soft_reset(void *handle) +{ + /* todo */ + + return 0; +} + +static void vega20_ih_update_clockgating_state(struct amdgpu_device *adev, + bool enable) +{ + uint32_t data, def, field_val; + + if (adev->cg_flags & AMD_CG_SUPPORT_IH_CG) { + def = data = RREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL); + field_val = enable ? 0 : 1; + data = REG_SET_FIELD(data, IH_CLK_CTRL, + IH_RETRY_INT_CAM_MEM_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + IH_BUFFER_MEM_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + DBUS_MUX_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + OSSSYS_SHARE_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + LIMIT_SMN_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + DYN_CLK_SOFT_OVERRIDE, field_val); + data = REG_SET_FIELD(data, IH_CLK_CTRL, + REG_CLK_SOFT_OVERRIDE, field_val); + if (def != data) + WREG32_SOC15(OSSSYS, 0, mmIH_CLK_CTRL, data); + } +} + +static int vega20_ih_set_clockgating_state(void *handle, + enum amd_clockgating_state state) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + vega20_ih_update_clockgating_state(adev, + state == AMD_CG_STATE_GATE); + return 0; + +} + +static int vega20_ih_set_powergating_state(void *handle, + enum amd_powergating_state state) +{ + return 0; +} + +const struct amd_ip_funcs vega20_ih_ip_funcs = { + .name = "vega20_ih", + .early_init = vega20_ih_early_init, + .late_init = NULL, + .sw_init = vega20_ih_sw_init, + .sw_fini = vega20_ih_sw_fini, + .hw_init = vega20_ih_hw_init, + .hw_fini = vega20_ih_hw_fini, + .suspend = vega20_ih_suspend, + .resume = vega20_ih_resume, + .is_idle = vega20_ih_is_idle, + .wait_for_idle = vega20_ih_wait_for_idle, + .soft_reset = vega20_ih_soft_reset, + .set_clockgating_state = vega20_ih_set_clockgating_state, + .set_powergating_state = vega20_ih_set_powergating_state, +}; + +static const struct amdgpu_ih_funcs vega20_ih_funcs = { + .get_wptr = vega20_ih_get_wptr, + .decode_iv = amdgpu_ih_decode_iv_helper, + .set_rptr = vega20_ih_set_rptr +}; + +static void vega20_ih_set_interrupt_funcs(struct amdgpu_device *adev) +{ + adev->irq.ih_funcs = &vega20_ih_funcs; +} + +const struct amdgpu_ip_block_version vega20_ih_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_IH, + .major = 4, + .minor = 2, + .rev = 0, + .funcs = &vega20_ih_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.h b/drivers/gpu/drm/amd/amdgpu/vega20_ih.h new file mode 100644 index 000000000000..7af6d8758ee3 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.h @@ -0,0 +1,30 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __VEGA20_IH_H__ +#define __VEGA20_IH_H__ + +extern const struct amd_ip_funcs vega20_ih_ip_funcs; +extern const struct amdgpu_ip_block_version vega20_ih_ip_block; + +#endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c index 241bd6ff79f4..74a460be077b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_int_process_v9.c @@ -44,6 +44,25 @@ static bool event_interrupt_isr_v9(struct kfd_dev *dev, client_id = SOC15_CLIENT_ID_FROM_IH_ENTRY(ih_ring_entry); pasid = SOC15_PASID_FROM_IH_ENTRY(ih_ring_entry); + /* Only handle clients we care about */ + if (client_id != SOC15_IH_CLIENTID_GRBM_CP && + client_id != SOC15_IH_CLIENTID_SDMA0 && + client_id != SOC15_IH_CLIENTID_SDMA1 && + client_id != SOC15_IH_CLIENTID_SDMA2 && + client_id != SOC15_IH_CLIENTID_SDMA3 && + client_id != SOC15_IH_CLIENTID_SDMA4 && + client_id != SOC15_IH_CLIENTID_SDMA5 && + client_id != SOC15_IH_CLIENTID_SDMA6 && + client_id != SOC15_IH_CLIENTID_SDMA7 && + client_id != SOC15_IH_CLIENTID_VMC && + client_id != SOC15_IH_CLIENTID_VMC1 && + client_id != SOC15_IH_CLIENTID_UTCL2 && + client_id != SOC15_IH_CLIENTID_SE0SH && + client_id != SOC15_IH_CLIENTID_SE1SH && + client_id != SOC15_IH_CLIENTID_SE2SH && + client_id != SOC15_IH_CLIENTID_SE3SH) + return false; + /* This is a known issue for gfx9. Under non HWS, pasid is not set * in the interrupt payload, so we need to find out the pasid on our * own. @@ -96,17 +115,30 @@ static void event_interrupt_wq_v9(struct kfd_dev *dev, vmid = SOC15_VMID_FROM_IH_ENTRY(ih_ring_entry); context_id = SOC15_CONTEXT_ID0_FROM_IH_ENTRY(ih_ring_entry); - if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) - kfd_signal_event_interrupt(pasid, context_id, 32); - else if (source_id == SOC15_INTSRC_SDMA_TRAP) - kfd_signal_event_interrupt(pasid, context_id & 0xfffffff, 28); - else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG) - kfd_signal_event_interrupt(pasid, context_id & 0xffffff, 24); - else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) - kfd_signal_hw_exception_event(pasid); - else if (client_id == SOC15_IH_CLIENTID_VMC || - client_id == SOC15_IH_CLIENTID_VMC1 || - client_id == SOC15_IH_CLIENTID_UTCL2) { + if (client_id == SOC15_IH_CLIENTID_GRBM_CP || + client_id == SOC15_IH_CLIENTID_SE0SH || + client_id == SOC15_IH_CLIENTID_SE1SH || + client_id == SOC15_IH_CLIENTID_SE2SH || + client_id == SOC15_IH_CLIENTID_SE3SH) { + if (source_id == SOC15_INTSRC_CP_END_OF_PIPE) + kfd_signal_event_interrupt(pasid, context_id, 32); + else if (source_id == SOC15_INTSRC_SQ_INTERRUPT_MSG) + kfd_signal_event_interrupt(pasid, context_id & 0xffffff, 24); + else if (source_id == SOC15_INTSRC_CP_BAD_OPCODE) + kfd_signal_hw_exception_event(pasid); + } else if (client_id == SOC15_IH_CLIENTID_SDMA0 || + client_id == SOC15_IH_CLIENTID_SDMA1 || + client_id == SOC15_IH_CLIENTID_SDMA2 || + client_id == SOC15_IH_CLIENTID_SDMA3 || + client_id == SOC15_IH_CLIENTID_SDMA4 || + client_id == SOC15_IH_CLIENTID_SDMA5 || + client_id == SOC15_IH_CLIENTID_SDMA6 || + client_id == SOC15_IH_CLIENTID_SDMA7) { + if (source_id == SOC15_INTSRC_SDMA_TRAP) + kfd_signal_event_interrupt(pasid, context_id & 0xfffffff, 28); + } else if (client_id == SOC15_IH_CLIENTID_VMC || + client_id == SOC15_IH_CLIENTID_VMC1 || + client_id == SOC15_IH_CLIENTID_UTCL2) { struct kfd_vm_fault_info info = {0}; uint16_t ring_id = SOC15_RING_ID_FROM_IH_ENTRY(ih_ring_entry); diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig index 797b5d4b43e5..e509a175ed17 100644 --- a/drivers/gpu/drm/amd/display/Kconfig +++ b/drivers/gpu/drm/amd/display/Kconfig @@ -6,7 +6,7 @@ config DRM_AMD_DC bool "AMD DC - Enable new display engine" default y select SND_HDA_COMPONENT if SND_HDA_CORE - select DRM_AMD_DC_DCN if (X86 || PPC64 || (ARM64 && KERNEL_MODE_NEON)) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) + select DRM_AMD_DC_DCN if (X86 || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS) help Choose this option if you want to use the new display engine support for AMDGPU. This adds required support for Vega and diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 86c2b2c897bb..6bd495db7a04 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -60,7 +60,6 @@ #include <linux/module.h> #include <linux/moduleparam.h> -#include <linux/version.h> #include <linux/types.h> #include <linux/pm_runtime.h> #include <linux/pci.h> @@ -2386,8 +2385,7 @@ void amdgpu_dm_update_connector_after_detect( drm_connector_update_edid_property(connector, aconnector->edid); - aconnector->num_modes = drm_add_edid_modes(connector, aconnector->edid); - drm_connector_list_update(connector); + drm_add_edid_modes(connector, aconnector->edid); if (aconnector->dc_link->aux_mode) drm_dp_cec_set_edid(&aconnector->dm_dp_aux.aux, @@ -3760,10 +3758,53 @@ static const struct drm_encoder_funcs amdgpu_dm_encoder_funcs = { }; +static void get_min_max_dc_plane_scaling(struct drm_device *dev, + struct drm_framebuffer *fb, + int *min_downscale, int *max_upscale) +{ + struct amdgpu_device *adev = drm_to_adev(dev); + struct dc *dc = adev->dm.dc; + /* Caps for all supported planes are the same on DCE and DCN 1 - 3 */ + struct dc_plane_cap *plane_cap = &dc->caps.planes[0]; + + switch (fb->format->format) { + case DRM_FORMAT_P010: + case DRM_FORMAT_NV12: + case DRM_FORMAT_NV21: + *max_upscale = plane_cap->max_upscale_factor.nv12; + *min_downscale = plane_cap->max_downscale_factor.nv12; + break; + + case DRM_FORMAT_XRGB16161616F: + case DRM_FORMAT_ARGB16161616F: + case DRM_FORMAT_XBGR16161616F: + case DRM_FORMAT_ABGR16161616F: + *max_upscale = plane_cap->max_upscale_factor.fp16; + *min_downscale = plane_cap->max_downscale_factor.fp16; + break; + + default: + *max_upscale = plane_cap->max_upscale_factor.argb8888; + *min_downscale = plane_cap->max_downscale_factor.argb8888; + break; + } + + /* + * A factor of 1 in the plane_cap means to not allow scaling, ie. use a + * scaling factor of 1.0 == 1000 units. + */ + if (*max_upscale == 1) + *max_upscale = 1000; + + if (*min_downscale == 1) + *min_downscale = 1000; +} + + static int fill_dc_scaling_info(const struct drm_plane_state *state, struct dc_scaling_info *scaling_info) { - int scale_w, scale_h; + int scale_w, scale_h, min_downscale, max_upscale; memset(scaling_info, 0, sizeof(*scaling_info)); @@ -3795,17 +3836,25 @@ static int fill_dc_scaling_info(const struct drm_plane_state *state, /* DRM doesn't specify clipping on destination output. */ scaling_info->clip_rect = scaling_info->dst_rect; - /* TODO: Validate scaling per-format with DC plane caps */ + /* Validate scaling per-format with DC plane caps */ + if (state->plane && state->plane->dev && state->fb) { + get_min_max_dc_plane_scaling(state->plane->dev, state->fb, + &min_downscale, &max_upscale); + } else { + min_downscale = 250; + max_upscale = 16000; + } + scale_w = scaling_info->dst_rect.width * 1000 / scaling_info->src_rect.width; - if (scale_w < 250 || scale_w > 16000) + if (scale_w < min_downscale || scale_w > max_upscale) return -EINVAL; scale_h = scaling_info->dst_rect.height * 1000 / scaling_info->src_rect.height; - if (scale_h < 250 || scale_h > 16000) + if (scale_h < min_downscale || scale_h > max_upscale) return -EINVAL; /* @@ -5414,6 +5463,7 @@ static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable) struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc); struct amdgpu_device *adev = drm_to_adev(crtc->dev); struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state); + struct amdgpu_display_manager *dm = &adev->dm; int rc = 0; if (enable) { @@ -5429,7 +5479,27 @@ static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable) return rc; irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst; - return dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY; + + if (!dc_interrupt_set(adev->dm.dc, irq_source, enable)) + return -EBUSY; + + mutex_lock(&dm->dc_lock); + + if (enable) + dm->active_vblank_irq_count++; + else + dm->active_vblank_irq_count--; + +#if defined(CONFIG_DRM_AMD_DC_DCN) + dc_allow_idle_optimizations( + adev->dm.dc, dm->active_vblank_irq_count == 0 ? true : false); + + DRM_DEBUG_DRIVER("Allow idle optimizations (MALL): %d\n", dm->active_vblank_irq_count == 0); +#endif + + mutex_unlock(&dm->dc_lock); + + return 0; } static int dm_enable_vblank(struct drm_crtc *crtc) @@ -6424,12 +6494,26 @@ static void dm_plane_helper_cleanup_fb(struct drm_plane *plane, static int dm_plane_helper_check_state(struct drm_plane_state *state, struct drm_crtc_state *new_crtc_state) { - int max_downscale = 0; - int max_upscale = INT_MAX; + struct drm_framebuffer *fb = state->fb; + int min_downscale, max_upscale; + int min_scale = 0; + int max_scale = INT_MAX; + + /* Plane enabled? Get min/max allowed scaling factors from plane caps. */ + if (fb && state->crtc) { + get_min_max_dc_plane_scaling(state->crtc->dev, fb, + &min_downscale, &max_upscale); + /* + * Convert to drm convention: 16.16 fixed point, instead of dc's + * 1.0 == 1000. Also drm scaling is src/dst instead of dc's + * dst/src, so min_scale = 1.0 / max_upscale, etc. + */ + min_scale = (1000 << 16) / max_upscale; + max_scale = (1000 << 16) / min_downscale; + } - /* TODO: These should be checked against DC plane caps */ return drm_atomic_helper_check_plane_state( - state, new_crtc_state, max_downscale, max_upscale, true, true); + state, new_crtc_state, min_scale, max_scale, true, true); } static int dm_plane_atomic_check(struct drm_plane *plane, @@ -8378,8 +8462,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) acrtc->dm_irq_params.stream = dm_new_crtc_state->stream; manage_dm_interrupts(adev, acrtc, true); } -#ifdef CONFIG_DEBUG_FS - if (new_crtc_state->active && + if (IS_ENABLED(CONFIG_DEBUG_FS) && new_crtc_state->active && amdgpu_dm_is_valid_crc_source(dm_new_crtc_state->crc_src)) { /** * Frontend may have changed so reapply the CRC capture @@ -8400,7 +8483,6 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state) amdgpu_dm_crtc_configure_crc_source( crtc, dm_new_crtc_state, dm_new_crtc_state->crc_src); } -#endif } for_each_new_crtc_in_state(state, crtc, new_crtc_state, j) diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h index 2ee6edb3df93..f084e2fc9569 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h @@ -336,6 +336,13 @@ struct amdgpu_display_manager { */ const struct gpu_info_soc_bounding_box_v1_0 *soc_bounding_box; + /** + * @active_vblank_irq_count + * + * number of currently active vblank irqs + */ + uint32_t active_vblank_irq_count; + #ifdef CONFIG_DEBUG_FS /** * @crc_win_x_start_property: diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h index 0235bfb246e5..eba2f1d35d07 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h @@ -46,13 +46,13 @@ static inline bool amdgpu_dm_is_valid_crc_source(enum amdgpu_dm_pipe_crc_source } /* amdgpu_dm_crc.c */ -#ifdef CONFIG_DEBUG_FS bool amdgpu_dm_crc_window_is_default(struct dm_crtc_state *dm_crtc_state); bool amdgpu_dm_crc_window_changed(struct dm_crtc_state *dm_new_crtc_state, struct dm_crtc_state *dm_old_crtc_state); int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc, struct dm_crtc_state *dm_crtc_state, enum amdgpu_dm_pipe_crc_source source); +#ifdef CONFIG_DEBUG_FS int amdgpu_dm_crtc_set_crc_source(struct drm_crtc *crtc, const char *src_name); int amdgpu_dm_crtc_verify_crc_source(struct drm_crtc *crtc, const char *src_name, diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c index f6f487e9fe2d..3244a6ea7a65 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c @@ -25,7 +25,6 @@ #include <linux/string.h> #include <linux/acpi.h> -#include <linux/version.h> #include <linux/i2c.h> #include <drm/drm_probe_helper.h> diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c index 8ab0b9060d2b..5b0a4a7479e2 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c @@ -23,7 +23,6 @@ * */ -#include <linux/version.h> #include <drm/drm_atomic.h> #include <drm/drm_atomic_helper.h> #include <drm/drm_dp_mst_helper.h> diff --git a/drivers/gpu/drm/amd/display/dc/calcs/Makefile b/drivers/gpu/drm/amd/display/dc/calcs/Makefile index 64f515d74410..f3c00f479e1c 100644 --- a/drivers/gpu/drm/amd/display/dc/calcs/Makefile +++ b/drivers/gpu/drm/amd/display/dc/calcs/Makefile @@ -33,10 +33,6 @@ ifdef CONFIG_PPC64 calcs_ccflags := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -calcs_rcflags := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile index d59b380e7b7f..ff96bee57bfc 100644 --- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile +++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile @@ -104,13 +104,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) endif -# prevent build errors: -# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types -# this file is unused on arm64, just like on ppc64 -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := -mgeneral-regs-only -endif - AMD_DAL_CLK_MGR_DCN21 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn21/,$(CLK_MGR_DCN21)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21) @@ -125,13 +118,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) endif -# prevent build errors: -# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types -# this file is unused on arm64, just like on ppc64 -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := -mgeneral-regs-only -endif - AMD_DAL_CLK_MGR_DCN30 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn30/,$(CLK_MGR_DCN30)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30) @@ -146,13 +132,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn301/vg_clk_mgr.o := $(call cc-option,-mno-gnu-attribute) endif -# prevent build errors: -# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types -# this file is unused on arm64, just like on ppc64 -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/clk_mgr/dcn301/vg_clk_mgr.o := -mgeneral-regs-only -endif - AMD_DAL_CLK_MGR_DCN301 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn301/,$(CLK_MGR_DCN301)) AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN301) diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c index 58eb0d69873a..8f1cadb823c7 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc.c @@ -964,19 +964,15 @@ struct dc *dc_create(const struct dc_init_data *init_params) struct dc *dc = kzalloc(sizeof(*dc), GFP_KERNEL); unsigned int full_pipe_count; - if (NULL == dc) - goto alloc_fail; + if (!dc) + return NULL; if (init_params->dce_environment == DCE_ENV_VIRTUAL_HW) { - if (false == dc_construct_ctx(dc, init_params)) { - dc_destruct(dc); - goto construct_fail; - } + if (!dc_construct_ctx(dc, init_params)) + goto destruct_dc; } else { - if (false == dc_construct(dc, init_params)) { - dc_destruct(dc); - goto construct_fail; - } + if (!dc_construct(dc, init_params)) + goto destruct_dc; full_pipe_count = dc->res_pool->pipe_count; if (dc->res_pool->underlay_pipe_index != NO_UNDERLAY_PIPE) @@ -1007,10 +1003,9 @@ struct dc *dc_create(const struct dc_init_data *init_params) return dc; -construct_fail: +destruct_dc: + dc_destruct(dc); kfree(dc); - -alloc_fail: return NULL; } @@ -1493,7 +1488,7 @@ bool dc_commit_state(struct dc *dc, struct dc_state *context) enum dc_status result = DC_ERROR_UNEXPECTED; int i; - if (false == context_changed(dc, context)) + if (!context_changed(dc, context)) return DC_OK; DC_LOG_DC("%s: %d streams\n", @@ -1540,7 +1535,7 @@ bool dc_acquire_release_mpc_3dlut( if (found_pipe_idx) { if (acquire && pool->funcs->acquire_post_bldn_3dlut) ret = pool->funcs->acquire_post_bldn_3dlut(res_ctx, pool, mpcc_id, lut, shaper); - else if (acquire == false && pool->funcs->release_post_bldn_3dlut) + else if (!acquire && pool->funcs->release_post_bldn_3dlut) ret = pool->funcs->release_post_bldn_3dlut(res_ctx, pool, lut, shaper); } } @@ -3143,9 +3138,11 @@ void dc_lock_memory_clock_frequency(struct dc *dc) core_link_enable_stream(dc->current_state, &dc->current_state->res_ctx.pipe_ctx[i]); } -bool dc_is_plane_eligible_for_idle_optimizaitons(struct dc *dc, - struct dc_plane_state *plane) +bool dc_is_plane_eligible_for_idle_optimizaitons(struct dc *dc, struct dc_plane_state *plane) { + if (dc->hwss.does_plane_fit_in_mall && dc->hwss.does_plane_fit_in_mall(dc, plane)) + return true; + return false; } diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c index 9e1071b2181f..f4a2088ab179 100644 --- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c +++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c @@ -2487,9 +2487,14 @@ enum dc_status dc_link_validate_mode_timing( static struct abm *get_abm_from_stream_res(const struct dc_link *link) { int i; - struct dc *dc = link->ctx->dc; + struct dc *dc = NULL; struct abm *abm = NULL; + if (!link || !link->ctx) + return NULL; + + dc = link->ctx->dc; + for (i = 0; i < MAX_PIPES; i++) { struct pipe_ctx pipe_ctx = dc->current_state->res_ctx.pipe_ctx[i]; struct dc_stream_state *stream = pipe_ctx.stream; diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h index 3aedadb34548..90fdddb72e3b 100644 --- a/drivers/gpu/drm/amd/display/dc/dc.h +++ b/drivers/gpu/drm/amd/display/dc/dc.h @@ -171,6 +171,9 @@ struct dc_caps { bool dmcub_support; uint32_t num_of_internal_disp; enum dp_protocol_version max_dp_protocol_version; + unsigned int mall_size_per_mem_channel; + unsigned int mall_size_total; + unsigned int cursor_cache_size; struct dc_plane_cap planes[MAX_PLANES]; struct dc_color_caps color; }; @@ -499,6 +502,7 @@ struct dc_debug_options { bool dmcub_emulation; #if defined(CONFIG_DRM_AMD_DC_DCN) bool disable_idle_power_optimizations; + unsigned int mall_size_override; #endif bool dmub_command_table; /* for testing only */ struct dc_bw_validation_profile bw_val_profile; diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h index 701aa7178a89..b41e6367b15e 100644 --- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h +++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h @@ -71,6 +71,7 @@ struct dc_plane_address { union { struct{ PHYSICAL_ADDRESS_LOC addr; + PHYSICAL_ADDRESS_LOC cursor_cache_addr; PHYSICAL_ADDRESS_LOC meta_addr; union large_integer dcc_const_color; } grph; diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c index 8ab9d6c79808..f20ed05a5050 100644 --- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_resource.c @@ -385,7 +385,7 @@ static const struct dc_plane_cap plane_cap = { .pixel_format_support = { .argb8888 = true, .nv12 = false, - .fp16 = false + .fp16 = true }, .max_upscale_factor = { diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c index 3f63822b8e28..af208f9bd03b 100644 --- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_resource.c @@ -410,7 +410,7 @@ static const struct dc_plane_cap plane_cap = { .pixel_format_support = { .argb8888 = true, .nv12 = false, - .fp16 = false + .fp16 = true }, .max_upscale_factor = { diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c index 390a0fa37239..26fe25caa281 100644 --- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c @@ -402,7 +402,7 @@ static const struct dc_plane_cap plane_cap = { .pixel_format_support = { .argb8888 = true, .nv12 = false, - .fp16 = false + .fp16 = true }, .max_upscale_factor = { diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile index 733e6e6e43bd..62ad1a11bff9 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn10/Makefile @@ -31,11 +31,4 @@ DCN10 = dcn10_init.o dcn10_resource.o dcn10_ipp.o dcn10_hw_sequencer.o \ AMD_DAL_DCN10 = $(addprefix $(AMDDALPATH)/dc/dcn10/,$(DCN10)) -# fix: -# ...: '-mgeneral-regs-only' is incompatible with the use of floating-point types -# aarch64 does not support soft-float, so use hard-float and handle this in code -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn10/dcn10_resource.o := -mgeneral-regs-only -endif - AMD_DISPLAY_FILES += $(AMD_DAL_DCN10) diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c index bdc37831535e..36745193c391 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c @@ -1534,15 +1534,8 @@ static bool dcn10_resource_construct( memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults)); memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults)); -#if defined(CONFIG_ARM64) - /* Aarch64 does not support -msoft-float/-mfloat-abi=soft */ - DC_FP_START(); - dcn10_resource_construct_fp(dc); - DC_FP_END(); -#else /* Other architectures we build for build this with soft-float */ dcn10_resource_construct_fp(dc); -#endif pool->base.pp_smu = dcn10_pp_smu_create(ctx); diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile index 624cb1341ef1..5fcaf78334ff 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn20/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn20/Makefile @@ -17,10 +17,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn20/dcn20_resource.o := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile index 1ee5fc03b7b3..bb8c95141082 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn21/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn21/Makefile @@ -13,10 +13,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn21/dcn21_resource.o := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile index 248c2711aace..c20331eb62e0 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn30/Makefile @@ -41,11 +41,6 @@ CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mhard-float -maltivec CFLAGS_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn30/dcn30_resource.o := -mgeneral-regs-only -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn30/dcn30_optc.o := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c index 3deb3fb1724d..b83c13d3d8b7 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c @@ -814,6 +814,19 @@ bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable) return true; } +bool dcn30_does_plane_fit_in_mall(struct dc *dc, struct dc_plane_state *plane) +{ + // add meta size? + unsigned int surface_size = plane->plane_size.surface_pitch * plane->plane_size.surface_size.height * + (plane->format >= SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 ? 8 : 4); + unsigned int mall_size = dc->caps.mall_size_total; + + if (dc->debug.mall_size_override) + mall_size = 1024 * 1024 * dc->debug.mall_size_override; + + return (surface_size + dc->caps.cursor_cache_size) < mall_size; +} + void dcn30_hardware_release(struct dc *dc) { /* if pstate unsupported, force it supported */ diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h index 7d32c43aafe0..bfc97e2ece61 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.h @@ -65,6 +65,8 @@ void dcn30_set_avmute(struct pipe_ctx *pipe_ctx, bool enable); void dcn30_update_info_frame(struct pipe_ctx *pipe_ctx); void dcn30_program_dmdata_engine(struct pipe_ctx *pipe_ctx); +bool dcn30_does_plane_fit_in_mall(struct dc *dc, struct dc_plane_state *plane); + bool dcn30_apply_idle_power_optimizations(struct dc *dc, bool enable); void dcn30_hardware_release(struct dc *dc); diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c index 6125fe440ad0..87c74aa84406 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_init.c @@ -91,6 +91,7 @@ static const struct hw_sequencer_funcs dcn30_funcs = { .get_vupdate_offset_from_vsync = dcn10_get_vupdate_offset_from_vsync, .calc_vupdate_position = dcn10_calc_vupdate_position, .apply_idle_power_optimizations = dcn30_apply_idle_power_optimizations, + .does_plane_fit_in_mall = dcn30_does_plane_fit_in_mall, .set_backlight_level = dcn21_set_backlight_level, .set_abm_immediate_disable = dcn21_set_abm_immediate_disable, .hardware_release = dcn30_hardware_release, diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c index 5e126fdf6ec1..e5bb15d8487b 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c @@ -2631,6 +2631,10 @@ static bool dcn30_resource_construct( dc->caps.max_cursor_size = 256; dc->caps.min_horizontal_blanking_period = 80; dc->caps.dmdata_alloc_size = 2048; + dc->caps.mall_size_per_mem_channel = 8; + /* total size = mall per channel * num channels * 1024 * 1024 */ + dc->caps.mall_size_total = dc->caps.mall_size_per_mem_channel * dc->ctx->dc_bios->vram_info.num_chans * 1048576; + dc->caps.cursor_cache_size = dc->caps.max_cursor_size * dc->caps.max_cursor_size * 8; dc->caps.max_slave_planes = 1; dc->caps.post_blend_color_processing = true; diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile index 2fd5d34e4ba6..3ca7d911d25c 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn301/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn301/Makefile @@ -21,10 +21,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/dcn301/dcn301_resource.o := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn301/dcn301_resource.o := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile index 36e44e1b07fa..8d4924b7dc22 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dcn302/Makefile @@ -20,10 +20,6 @@ ifdef CONFIG_PPC64 CFLAGS_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -CFLAGS_REMOVE_$(AMDDALPATH)/dc/dcn302/dcn302_resource.o := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c index 808c4dcdb3ac..22ba0be88faf 100644 --- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c +++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c @@ -53,6 +53,7 @@ #include "dce/dce_i2c_hw.h" #include "dce/dce_panel_cntl.h" #include "dce/dmub_abm.h" +#include "dce/dmub_psr.h" #include "hw_sequencer_private.h" #include "reg_helper.h" @@ -238,6 +239,7 @@ static const struct dc_debug_options debug_defaults_diags = { .dwb_fi_phase = -1, // -1 = disable .dmub_command_table = true, .enable_tri_buf = true, + .disable_psr = true, }; enum dcn302_clk_src_array_id { @@ -1213,6 +1215,9 @@ static void dcn302_resource_destruct(struct resource_pool *pool) dce_abm_destroy(&pool->multiple_abms[i]); } + if (pool->psr != NULL) + dmub_psr_destroy(&pool->psr); + if (pool->dccg != NULL) dcn_dccg_destroy(&pool->dccg); } @@ -1354,8 +1359,6 @@ static bool dcn302_resource_construct( if (dc->ctx->dce_environment == DCE_ENV_PRODUCTION_DRV) dc->debug = debug_defaults_drv; - else if (dc->ctx->dce_environment == DCE_ENV_FPGA_MAXIMUS) - dc->debug = debug_defaults_diags; else dc->debug = debug_defaults_diags; @@ -1469,6 +1472,14 @@ static bool dcn302_resource_construct( } pool->timing_generator_count = i; + /* PSR */ + pool->psr = dmub_psr_create(ctx); + if (pool->psr == NULL) { + dm_error("DC: failed to create psr!\n"); + BREAK_TO_DEBUGGER(); + goto create_fail; + } + /* ABMs */ for (i = 0; i < pool->res_cap->num_timing_generator; i++) { pool->multiple_abms[i] = dmub_abm_create(ctx, &abm_regs[i], &abm_shift, &abm_mask); diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile index a02a33dcd70b..6bb7f2905821 100644 --- a/drivers/gpu/drm/amd/display/dc/dml/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile @@ -33,10 +33,6 @@ ifdef CONFIG_PPC64 dml_ccflags := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -dml_rcflags := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/dsc/Makefile b/drivers/gpu/drm/amd/display/dc/dsc/Makefile index f2624a1156e5..8d31eb75c6a6 100644 --- a/drivers/gpu/drm/amd/display/dc/dsc/Makefile +++ b/drivers/gpu/drm/amd/display/dc/dsc/Makefile @@ -10,10 +10,6 @@ ifdef CONFIG_PPC64 dsc_ccflags := -mhard-float -maltivec endif -ifdef CONFIG_ARM64 -dsc_rcflags := -mgeneral-regs-only -endif - ifdef CONFIG_CC_IS_GCC ifeq ($(call cc-ifversion, -lt, 0701, y), y) IS_OLD_GCC = 1 diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h index 62804dc7b698..7b12ffcdd4ec 100644 --- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h +++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h @@ -217,6 +217,8 @@ struct hw_sequencer_funcs { /* Idle Optimization Related */ bool (*apply_idle_power_optimizations)(struct dc *dc, bool enable); + bool (*does_plane_fit_in_mall)(struct dc *dc, struct dc_plane_state *plane); + bool (*is_abm_supported)(struct dc *dc, struct dc_state *context, struct dc_stream_state *stream); diff --git a/drivers/gpu/drm/amd/display/dc/os_types.h b/drivers/gpu/drm/amd/display/dc/os_types.h index 95cb56929e79..126c2f3a4dd3 100644 --- a/drivers/gpu/drm/amd/display/dc/os_types.h +++ b/drivers/gpu/drm/amd/display/dc/os_types.h @@ -55,10 +55,6 @@ #include <asm/fpu/api.h> #define DC_FP_START() kernel_fpu_begin() #define DC_FP_END() kernel_fpu_end() -#elif defined(CONFIG_ARM64) -#include <asm/neon.h> -#define DC_FP_START() kernel_neon_begin() -#define DC_FP_END() kernel_neon_end() #elif defined(CONFIG_PPC64) #include <asm/switch_to.h> #include <asm/cputable.h> diff --git a/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_offset.h new file mode 100644 index 000000000000..bd129266ebfd --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_offset.h @@ -0,0 +1,345 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef _osssys_4_2_0_OFFSET_HEADER +#define _osssys_4_2_0_OFFSET_HEADER + + + +// addressBlock: osssys_osssysdec +// base address: 0x4280 +#define mmIH_VMID_0_LUT 0x0000 +#define mmIH_VMID_0_LUT_BASE_IDX 0 +#define mmIH_VMID_1_LUT 0x0001 +#define mmIH_VMID_1_LUT_BASE_IDX 0 +#define mmIH_VMID_2_LUT 0x0002 +#define mmIH_VMID_2_LUT_BASE_IDX 0 +#define mmIH_VMID_3_LUT 0x0003 +#define mmIH_VMID_3_LUT_BASE_IDX 0 +#define mmIH_VMID_4_LUT 0x0004 +#define mmIH_VMID_4_LUT_BASE_IDX 0 +#define mmIH_VMID_5_LUT 0x0005 +#define mmIH_VMID_5_LUT_BASE_IDX 0 +#define mmIH_VMID_6_LUT 0x0006 +#define mmIH_VMID_6_LUT_BASE_IDX 0 +#define mmIH_VMID_7_LUT 0x0007 +#define mmIH_VMID_7_LUT_BASE_IDX 0 +#define mmIH_VMID_8_LUT 0x0008 +#define mmIH_VMID_8_LUT_BASE_IDX 0 +#define mmIH_VMID_9_LUT 0x0009 +#define mmIH_VMID_9_LUT_BASE_IDX 0 +#define mmIH_VMID_10_LUT 0x000a +#define mmIH_VMID_10_LUT_BASE_IDX 0 +#define mmIH_VMID_11_LUT 0x000b +#define mmIH_VMID_11_LUT_BASE_IDX 0 +#define mmIH_VMID_12_LUT 0x000c +#define mmIH_VMID_12_LUT_BASE_IDX 0 +#define mmIH_VMID_13_LUT 0x000d +#define mmIH_VMID_13_LUT_BASE_IDX 0 +#define mmIH_VMID_14_LUT 0x000e +#define mmIH_VMID_14_LUT_BASE_IDX 0 +#define mmIH_VMID_15_LUT 0x000f +#define mmIH_VMID_15_LUT_BASE_IDX 0 +#define mmIH_VMID_0_LUT_MM 0x0010 +#define mmIH_VMID_0_LUT_MM_BASE_IDX 0 +#define mmIH_VMID_1_LUT_MM 0x0011 +#define mmIH_VMID_1_LUT_MM_BASE_IDX 0 +#define mmIH_VMID_2_LUT_MM 0x0012 +#define mmIH_VMID_2_LUT_MM_BASE_IDX 0 +#define mmIH_VMID_3_LUT_MM 0x0013 +#define mmIH_VMID_3_LUT_MM_BASE_IDX 0 +#define mmIH_VMID_4_LUT_MM 0x0014 +#define mmIH_VMID_4_LUT_MM_BASE_IDX 0 +#define mmIH_VMID_5_LUT_MM 0x0015 +#define mmIH_VMID_5_LUT_MM_BASE_IDX 0 +#define mmIH_VMID_6_LUT_MM 0x0016 +#define mmIH_VMID_6_LUT_MM_BASE_IDX 0 +#define mmIH_VMID_7_LUT_MM 0x0017 +#define mmIH_VMID_7_LUT_MM_BASE_IDX 0 +#define mmIH_VMID_8_LUT_MM 0x0018 +#define mmIH_VMID_8_LUT_MM_BASE_IDX 0 +#define mmIH_VMID_9_LUT_MM 0x0019 +#define mmIH_VMID_9_LUT_MM_BASE_IDX 0 +#define mmIH_VMID_10_LUT_MM 0x001a +#define mmIH_VMID_10_LUT_MM_BASE_IDX 0 +#define mmIH_VMID_11_LUT_MM 0x001b +#define mmIH_VMID_11_LUT_MM_BASE_IDX 0 +#define mmIH_VMID_12_LUT_MM 0x001c +#define mmIH_VMID_12_LUT_MM_BASE_IDX 0 +#define mmIH_VMID_13_LUT_MM 0x001d +#define mmIH_VMID_13_LUT_MM_BASE_IDX 0 +#define mmIH_VMID_14_LUT_MM 0x001e +#define mmIH_VMID_14_LUT_MM_BASE_IDX 0 +#define mmIH_VMID_15_LUT_MM 0x001f +#define mmIH_VMID_15_LUT_MM_BASE_IDX 0 +#define mmIH_COOKIE_0 0x0020 +#define mmIH_COOKIE_0_BASE_IDX 0 +#define mmIH_COOKIE_1 0x0021 +#define mmIH_COOKIE_1_BASE_IDX 0 +#define mmIH_COOKIE_2 0x0022 +#define mmIH_COOKIE_2_BASE_IDX 0 +#define mmIH_COOKIE_3 0x0023 +#define mmIH_COOKIE_3_BASE_IDX 0 +#define mmIH_COOKIE_4 0x0024 +#define mmIH_COOKIE_4_BASE_IDX 0 +#define mmIH_COOKIE_5 0x0025 +#define mmIH_COOKIE_5_BASE_IDX 0 +#define mmIH_COOKIE_6 0x0026 +#define mmIH_COOKIE_6_BASE_IDX 0 +#define mmIH_COOKIE_7 0x0027 +#define mmIH_COOKIE_7_BASE_IDX 0 +#define mmIH_REGISTER_LAST_PART0 0x003f +#define mmIH_REGISTER_LAST_PART0_BASE_IDX 0 +#define mmSEM_REQ_INPUT_0 0x0040 +#define mmSEM_REQ_INPUT_0_BASE_IDX 0 +#define mmSEM_REQ_INPUT_1 0x0041 +#define mmSEM_REQ_INPUT_1_BASE_IDX 0 +#define mmSEM_REQ_INPUT_2 0x0042 +#define mmSEM_REQ_INPUT_2_BASE_IDX 0 +#define mmSEM_REQ_INPUT_3 0x0043 +#define mmSEM_REQ_INPUT_3_BASE_IDX 0 +#define mmSEM_REGISTER_LAST_PART0 0x007f +#define mmSEM_REGISTER_LAST_PART0_BASE_IDX 0 +#define mmIH_RB_CNTL 0x0080 +#define mmIH_RB_CNTL_BASE_IDX 0 +#define mmIH_RB_BASE 0x0081 +#define mmIH_RB_BASE_BASE_IDX 0 +#define mmIH_RB_BASE_HI 0x0082 +#define mmIH_RB_BASE_HI_BASE_IDX 0 +#define mmIH_RB_RPTR 0x0083 +#define mmIH_RB_RPTR_BASE_IDX 0 +#define mmIH_RB_WPTR 0x0084 +#define mmIH_RB_WPTR_BASE_IDX 0 +#define mmIH_RB_WPTR_ADDR_HI 0x0085 +#define mmIH_RB_WPTR_ADDR_HI_BASE_IDX 0 +#define mmIH_RB_WPTR_ADDR_LO 0x0086 +#define mmIH_RB_WPTR_ADDR_LO_BASE_IDX 0 +#define mmIH_DOORBELL_RPTR 0x0087 +#define mmIH_DOORBELL_RPTR_BASE_IDX 0 +#define mmIH_RB_CNTL_RING1 0x008c +#define mmIH_RB_CNTL_RING1_BASE_IDX 0 +#define mmIH_RB_BASE_RING1 0x008d +#define mmIH_RB_BASE_RING1_BASE_IDX 0 +#define mmIH_RB_BASE_HI_RING1 0x008e +#define mmIH_RB_BASE_HI_RING1_BASE_IDX 0 +#define mmIH_RB_RPTR_RING1 0x008f +#define mmIH_RB_RPTR_RING1_BASE_IDX 0 +#define mmIH_RB_WPTR_RING1 0x0090 +#define mmIH_RB_WPTR_RING1_BASE_IDX 0 +#define mmIH_DOORBELL_RPTR_RING1 0x0093 +#define mmIH_DOORBELL_RPTR_RING1_BASE_IDX 0 +#define mmIH_RB_CNTL_RING2 0x0098 +#define mmIH_RB_CNTL_RING2_BASE_IDX 0 +#define mmIH_RB_BASE_RING2 0x0099 +#define mmIH_RB_BASE_RING2_BASE_IDX 0 +#define mmIH_RB_BASE_HI_RING2 0x009a +#define mmIH_RB_BASE_HI_RING2_BASE_IDX 0 +#define mmIH_RB_RPTR_RING2 0x009b +#define mmIH_RB_RPTR_RING2_BASE_IDX 0 +#define mmIH_RB_WPTR_RING2 0x009c +#define mmIH_RB_WPTR_RING2_BASE_IDX 0 +#define mmIH_DOORBELL_RPTR_RING2 0x009f +#define mmIH_DOORBELL_RPTR_RING2_BASE_IDX 0 +#define mmIH_VERSION 0x00a5 +#define mmIH_VERSION_BASE_IDX 0 +#define mmIH_CNTL 0x00c0 +#define mmIH_CNTL_BASE_IDX 0 +#define mmIH_CNTL2 0x00c1 +#define mmIH_CNTL2_BASE_IDX 0 +#define mmIH_STATUS 0x00c2 +#define mmIH_STATUS_BASE_IDX 0 +#define mmIH_PERFMON_CNTL 0x00c3 +#define mmIH_PERFMON_CNTL_BASE_IDX 0 +#define mmIH_PERFCOUNTER0_RESULT 0x00c4 +#define mmIH_PERFCOUNTER0_RESULT_BASE_IDX 0 +#define mmIH_PERFCOUNTER1_RESULT 0x00c5 +#define mmIH_PERFCOUNTER1_RESULT_BASE_IDX 0 +#define mmIH_DSM_MATCH_VALUE_BIT_31_0 0x00c7 +#define mmIH_DSM_MATCH_VALUE_BIT_31_0_BASE_IDX 0 +#define mmIH_DSM_MATCH_VALUE_BIT_63_32 0x00c8 +#define mmIH_DSM_MATCH_VALUE_BIT_63_32_BASE_IDX 0 +#define mmIH_DSM_MATCH_VALUE_BIT_95_64 0x00c9 +#define mmIH_DSM_MATCH_VALUE_BIT_95_64_BASE_IDX 0 +#define mmIH_DSM_MATCH_FIELD_CONTROL 0x00ca +#define mmIH_DSM_MATCH_FIELD_CONTROL_BASE_IDX 0 +#define mmIH_DSM_MATCH_DATA_CONTROL 0x00cb +#define mmIH_DSM_MATCH_DATA_CONTROL_BASE_IDX 0 +#define mmIH_DSM_MATCH_FCN_ID 0x00cc +#define mmIH_DSM_MATCH_FCN_ID_BASE_IDX 0 +#define mmIH_LIMIT_INT_RATE_CNTL 0x00cd +#define mmIH_LIMIT_INT_RATE_CNTL_BASE_IDX 0 +#define mmIH_VF_RB_STATUS 0x00ce +#define mmIH_VF_RB_STATUS_BASE_IDX 0 +#define mmIH_VF_RB_STATUS2 0x00cf +#define mmIH_VF_RB_STATUS2_BASE_IDX 0 +#define mmIH_VF_RB1_STATUS 0x00d0 +#define mmIH_VF_RB1_STATUS_BASE_IDX 0 +#define mmIH_VF_RB1_STATUS2 0x00d1 +#define mmIH_VF_RB1_STATUS2_BASE_IDX 0 +#define mmIH_VF_RB2_STATUS 0x00d2 +#define mmIH_VF_RB2_STATUS_BASE_IDX 0 +#define mmIH_VF_RB2_STATUS2 0x00d3 +#define mmIH_VF_RB2_STATUS2_BASE_IDX 0 +#define mmIH_INT_FLOOD_CNTL 0x00d5 +#define mmIH_INT_FLOOD_CNTL_BASE_IDX 0 +#define mmIH_RB0_INT_FLOOD_STATUS 0x00d6 +#define mmIH_RB0_INT_FLOOD_STATUS_BASE_IDX 0 +#define mmIH_RB1_INT_FLOOD_STATUS 0x00d7 +#define mmIH_RB1_INT_FLOOD_STATUS_BASE_IDX 0 +#define mmIH_RB2_INT_FLOOD_STATUS 0x00d8 +#define mmIH_RB2_INT_FLOOD_STATUS_BASE_IDX 0 +#define mmIH_INT_FLOOD_STATUS 0x00d9 +#define mmIH_INT_FLOOD_STATUS_BASE_IDX 0 +#define mmIH_STORM_CLIENT_LIST_CNTL 0x00da +#define mmIH_STORM_CLIENT_LIST_CNTL_BASE_IDX 0 +#define mmIH_CLK_CTRL 0x00db +#define mmIH_CLK_CTRL_BASE_IDX 0 +#define mmIH_INT_FLAGS 0x00dc +#define mmIH_INT_FLAGS_BASE_IDX 0 +#define mmIH_LAST_INT_INFO0 0x00dd +#define mmIH_LAST_INT_INFO0_BASE_IDX 0 +#define mmIH_LAST_INT_INFO1 0x00de +#define mmIH_LAST_INT_INFO1_BASE_IDX 0 +#define mmIH_LAST_INT_INFO2 0x00df +#define mmIH_LAST_INT_INFO2_BASE_IDX 0 +#define mmIH_SCRATCH 0x00e0 +#define mmIH_SCRATCH_BASE_IDX 0 +#define mmIH_CLIENT_CREDIT_ERROR 0x00e1 +#define mmIH_CLIENT_CREDIT_ERROR_BASE_IDX 0 +#define mmIH_GPU_IOV_VIOLATION_LOG 0x00e2 +#define mmIH_GPU_IOV_VIOLATION_LOG_BASE_IDX 0 +#define mmIH_COOKIE_REC_VIOLATION_LOG 0x00e3 +#define mmIH_COOKIE_REC_VIOLATION_LOG_BASE_IDX 0 +#define mmIH_CREDIT_STATUS 0x00e4 +#define mmIH_CREDIT_STATUS_BASE_IDX 0 +#define mmIH_MMHUB_ERROR 0x00e5 +#define mmIH_MMHUB_ERROR_BASE_IDX 0 +#define mmIH_MEM_POWER_CTRL 0x00e8 +#define mmIH_MEM_POWER_CTRL_BASE_IDX 0 +#define mmIH_REGISTER_LAST_PART2 0x00ff +#define mmIH_REGISTER_LAST_PART2_BASE_IDX 0 +#define mmSEM_CLK_CTRL 0x0100 +#define mmSEM_CLK_CTRL_BASE_IDX 0 +#define mmSEM_UTC_CREDIT 0x0101 +#define mmSEM_UTC_CREDIT_BASE_IDX 0 +#define mmSEM_UTC_CONFIG 0x0102 +#define mmSEM_UTC_CONFIG_BASE_IDX 0 +#define mmSEM_UTCL2_TRAN_EN_LUT 0x0103 +#define mmSEM_UTCL2_TRAN_EN_LUT_BASE_IDX 0 +#define mmSEM_MCIF_CONFIG 0x0104 +#define mmSEM_MCIF_CONFIG_BASE_IDX 0 +#define mmSEM_PERFMON_CNTL 0x0105 +#define mmSEM_PERFMON_CNTL_BASE_IDX 0 +#define mmSEM_PERFCOUNTER0_RESULT 0x0106 +#define mmSEM_PERFCOUNTER0_RESULT_BASE_IDX 0 +#define mmSEM_PERFCOUNTER1_RESULT 0x0107 +#define mmSEM_PERFCOUNTER1_RESULT_BASE_IDX 0 +#define mmSEM_STATUS 0x0108 +#define mmSEM_STATUS_BASE_IDX 0 +#define mmSEM_MAILBOX_CLIENTCONFIG 0x0109 +#define mmSEM_MAILBOX_CLIENTCONFIG_BASE_IDX 0 +#define mmSEM_MAILBOX 0x010a +#define mmSEM_MAILBOX_BASE_IDX 0 +#define mmSEM_MAILBOX_CONTROL 0x010b +#define mmSEM_MAILBOX_CONTROL_BASE_IDX 0 +#define mmSEM_CHICKEN_BITS 0x010c +#define mmSEM_CHICKEN_BITS_BASE_IDX 0 +#define mmSEM_MAILBOX_CLIENTCONFIG_EXTRA 0x010d +#define mmSEM_MAILBOX_CLIENTCONFIG_EXTRA_BASE_IDX 0 +#define mmSEM_GPU_IOV_VIOLATION_LOG 0x010e +#define mmSEM_GPU_IOV_VIOLATION_LOG_BASE_IDX 0 +#define mmSEM_OUTSTANDING_THRESHOLD 0x010f +#define mmSEM_OUTSTANDING_THRESHOLD_BASE_IDX 0 +#define mmSEM_MEM_POWER_CTRL 0x0110 +#define mmSEM_MEM_POWER_CTRL_BASE_IDX 0 +#define mmSEM_REGISTER_LAST_PART2 0x017f +#define mmSEM_REGISTER_LAST_PART2_BASE_IDX 0 +#define mmIH_ACTIVE_FCN_ID 0x0180 +#define mmIH_ACTIVE_FCN_ID_BASE_IDX 0 +#define mmIH_VIRT_RESET_REQ 0x0181 +#define mmIH_VIRT_RESET_REQ_BASE_IDX 0 +#define mmIH_CLIENT_CFG 0x0184 +#define mmIH_CLIENT_CFG_BASE_IDX 0 +#define mmIH_CLIENT_CFG_INDEX 0x0188 +#define mmIH_CLIENT_CFG_INDEX_BASE_IDX 0 +#define mmIH_CLIENT_CFG_DATA 0x0189 +#define mmIH_CLIENT_CFG_DATA_BASE_IDX 0 +#define mmIH_CID_REMAP_INDEX 0x018a +#define mmIH_CID_REMAP_INDEX_BASE_IDX 0 +#define mmIH_CID_REMAP_DATA 0x018b +#define mmIH_CID_REMAP_DATA_BASE_IDX 0 +#define mmIH_CHICKEN 0x018c +#define mmIH_CHICKEN_BASE_IDX 0 +#define mmIH_MMHUB_CNTL 0x018d +#define mmIH_MMHUB_CNTL_BASE_IDX 0 +#define mmIH_INT_DROP_CNTL 0x018e +#define mmIH_INT_DROP_CNTL_BASE_IDX 0 +#define mmIH_INT_DROP_MATCH_VALUE0 0x018f +#define mmIH_INT_DROP_MATCH_VALUE0_BASE_IDX 0 +#define mmIH_INT_DROP_MATCH_VALUE1 0x0190 +#define mmIH_INT_DROP_MATCH_VALUE1_BASE_IDX 0 +#define mmIH_INT_DROP_MATCH_MASK0 0x0191 +#define mmIH_INT_DROP_MATCH_MASK0_BASE_IDX 0 +#define mmIH_INT_DROP_MATCH_MASK1 0x0192 +#define mmIH_INT_DROP_MATCH_MASK1_BASE_IDX 0 +#define mmIH_REGISTER_LAST_PART1 0x019f +#define mmIH_REGISTER_LAST_PART1_BASE_IDX 0 +#define mmSEM_ACTIVE_FCN_ID 0x01a0 +#define mmSEM_ACTIVE_FCN_ID_BASE_IDX 0 +#define mmSEM_VIRT_RESET_REQ 0x01a1 +#define mmSEM_VIRT_RESET_REQ_BASE_IDX 0 +#define mmSEM_RESP_SDMA0 0x01a4 +#define mmSEM_RESP_SDMA0_BASE_IDX 0 +#define mmSEM_RESP_SDMA1 0x01a5 +#define mmSEM_RESP_SDMA1_BASE_IDX 0 +#define mmSEM_RESP_UVD 0x01a6 +#define mmSEM_RESP_UVD_BASE_IDX 0 +#define mmSEM_RESP_VCE_0 0x01a7 +#define mmSEM_RESP_VCE_0_BASE_IDX 0 +#define mmSEM_RESP_ACP 0x01a8 +#define mmSEM_RESP_ACP_BASE_IDX 0 +#define mmSEM_RESP_ISP 0x01a9 +#define mmSEM_RESP_ISP_BASE_IDX 0 +#define mmSEM_RESP_VCE_1 0x01aa +#define mmSEM_RESP_VCE_1_BASE_IDX 0 +#define mmSEM_RESP_VP8 0x01ab +#define mmSEM_RESP_VP8_BASE_IDX 0 +#define mmSEM_RESP_GC 0x01ac +#define mmSEM_RESP_GC_BASE_IDX 0 +#define mmSEM_RESP_UVD_1 0x01ad +#define mmSEM_RESP_UVD_1_BASE_IDX 0 +#define mmSEM_CID_REMAP_INDEX 0x01b0 +#define mmSEM_CID_REMAP_INDEX_BASE_IDX 0 +#define mmSEM_CID_REMAP_DATA 0x01b1 +#define mmSEM_CID_REMAP_DATA_BASE_IDX 0 +#define mmSEM_ATOMIC_OP_LUT 0x01b2 +#define mmSEM_ATOMIC_OP_LUT_BASE_IDX 0 +#define mmSEM_EDC_CONFIG 0x01b3 +#define mmSEM_EDC_CONFIG_BASE_IDX 0 +#define mmSEM_CHICKEN_BITS2 0x01b4 +#define mmSEM_CHICKEN_BITS2_BASE_IDX 0 +#define mmSEM_MMHUB_CNTL 0x01b5 +#define mmSEM_MMHUB_CNTL_BASE_IDX 0 +#define mmSEM_REGISTER_LAST_PART1 0x01bf +#define mmSEM_REGISTER_LAST_PART1_BASE_IDX 0 + +#endif diff --git a/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_sh_mask.h new file mode 100644 index 000000000000..3ea83ea9ce3a --- /dev/null +++ b/drivers/gpu/drm/amd/include/asic_reg/oss/osssys_4_2_0_sh_mask.h @@ -0,0 +1,1300 @@ +/* + * Copyright 2020 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef _osssys_4_2_0_SH_MASK_HEADER +#define _osssys_4_2_0_SH_MASK_HEADER + + +// addressBlock: osssys_osssysdec +//IH_VMID_0_LUT +#define IH_VMID_0_LUT__PASID__SHIFT 0x0 +#define IH_VMID_0_LUT__PASID_MASK 0x0000FFFFL +//IH_VMID_1_LUT +#define IH_VMID_1_LUT__PASID__SHIFT 0x0 +#define IH_VMID_1_LUT__PASID_MASK 0x0000FFFFL +//IH_VMID_2_LUT +#define IH_VMID_2_LUT__PASID__SHIFT 0x0 +#define IH_VMID_2_LUT__PASID_MASK 0x0000FFFFL +//IH_VMID_3_LUT +#define IH_VMID_3_LUT__PASID__SHIFT 0x0 +#define IH_VMID_3_LUT__PASID_MASK 0x0000FFFFL +//IH_VMID_4_LUT +#define IH_VMID_4_LUT__PASID__SHIFT 0x0 +#define IH_VMID_4_LUT__PASID_MASK 0x0000FFFFL +//IH_VMID_5_LUT +#define IH_VMID_5_LUT__PASID__SHIFT 0x0 +#define IH_VMID_5_LUT__PASID_MASK 0x0000FFFFL +//IH_VMID_6_LUT +#define IH_VMID_6_LUT__PASID__SHIFT 0x0 +#define IH_VMID_6_LUT__PASID_MASK 0x0000FFFFL +//IH_VMID_7_LUT +#define IH_VMID_7_LUT__PASID__SHIFT 0x0 +#define IH_VMID_7_LUT__PASID_MASK 0x0000FFFFL +//IH_VMID_8_LUT +#define IH_VMID_8_LUT__PASID__SHIFT 0x0 +#define IH_VMID_8_LUT__PASID_MASK 0x0000FFFFL +//IH_VMID_9_LUT +#define IH_VMID_9_LUT__PASID__SHIFT 0x0 +#define IH_VMID_9_LUT__PASID_MASK 0x0000FFFFL +//IH_VMID_10_LUT +#define IH_VMID_10_LUT__PASID__SHIFT 0x0 +#define IH_VMID_10_LUT__PASID_MASK 0x0000FFFFL +//IH_VMID_11_LUT +#define IH_VMID_11_LUT__PASID__SHIFT 0x0 +#define IH_VMID_11_LUT__PASID_MASK 0x0000FFFFL +//IH_VMID_12_LUT +#define IH_VMID_12_LUT__PASID__SHIFT 0x0 +#define IH_VMID_12_LUT__PASID_MASK 0x0000FFFFL +//IH_VMID_13_LUT +#define IH_VMID_13_LUT__PASID__SHIFT 0x0 +#define IH_VMID_13_LUT__PASID_MASK 0x0000FFFFL +//IH_VMID_14_LUT +#define IH_VMID_14_LUT__PASID__SHIFT 0x0 +#define IH_VMID_14_LUT__PASID_MASK 0x0000FFFFL +//IH_VMID_15_LUT +#define IH_VMID_15_LUT__PASID__SHIFT 0x0 +#define IH_VMID_15_LUT__PASID_MASK 0x0000FFFFL +//IH_VMID_0_LUT_MM +#define IH_VMID_0_LUT_MM__PASID__SHIFT 0x0 +#define IH_VMID_0_LUT_MM__PASID_MASK 0x0000FFFFL +//IH_VMID_1_LUT_MM +#define IH_VMID_1_LUT_MM__PASID__SHIFT 0x0 +#define IH_VMID_1_LUT_MM__PASID_MASK 0x0000FFFFL +//IH_VMID_2_LUT_MM +#define IH_VMID_2_LUT_MM__PASID__SHIFT 0x0 +#define IH_VMID_2_LUT_MM__PASID_MASK 0x0000FFFFL +//IH_VMID_3_LUT_MM +#define IH_VMID_3_LUT_MM__PASID__SHIFT 0x0 +#define IH_VMID_3_LUT_MM__PASID_MASK 0x0000FFFFL +//IH_VMID_4_LUT_MM +#define IH_VMID_4_LUT_MM__PASID__SHIFT 0x0 +#define IH_VMID_4_LUT_MM__PASID_MASK 0x0000FFFFL +//IH_VMID_5_LUT_MM +#define IH_VMID_5_LUT_MM__PASID__SHIFT 0x0 +#define IH_VMID_5_LUT_MM__PASID_MASK 0x0000FFFFL +//IH_VMID_6_LUT_MM +#define IH_VMID_6_LUT_MM__PASID__SHIFT 0x0 +#define IH_VMID_6_LUT_MM__PASID_MASK 0x0000FFFFL +//IH_VMID_7_LUT_MM +#define IH_VMID_7_LUT_MM__PASID__SHIFT 0x0 +#define IH_VMID_7_LUT_MM__PASID_MASK 0x0000FFFFL +//IH_VMID_8_LUT_MM +#define IH_VMID_8_LUT_MM__PASID__SHIFT 0x0 +#define IH_VMID_8_LUT_MM__PASID_MASK 0x0000FFFFL +//IH_VMID_9_LUT_MM +#define IH_VMID_9_LUT_MM__PASID__SHIFT 0x0 +#define IH_VMID_9_LUT_MM__PASID_MASK 0x0000FFFFL +//IH_VMID_10_LUT_MM +#define IH_VMID_10_LUT_MM__PASID__SHIFT 0x0 +#define IH_VMID_10_LUT_MM__PASID_MASK 0x0000FFFFL +//IH_VMID_11_LUT_MM +#define IH_VMID_11_LUT_MM__PASID__SHIFT 0x0 +#define IH_VMID_11_LUT_MM__PASID_MASK 0x0000FFFFL +//IH_VMID_12_LUT_MM +#define IH_VMID_12_LUT_MM__PASID__SHIFT 0x0 +#define IH_VMID_12_LUT_MM__PASID_MASK 0x0000FFFFL +//IH_VMID_13_LUT_MM +#define IH_VMID_13_LUT_MM__PASID__SHIFT 0x0 +#define IH_VMID_13_LUT_MM__PASID_MASK 0x0000FFFFL +//IH_VMID_14_LUT_MM +#define IH_VMID_14_LUT_MM__PASID__SHIFT 0x0 +#define IH_VMID_14_LUT_MM__PASID_MASK 0x0000FFFFL +//IH_VMID_15_LUT_MM +#define IH_VMID_15_LUT_MM__PASID__SHIFT 0x0 +#define IH_VMID_15_LUT_MM__PASID_MASK 0x0000FFFFL +//IH_COOKIE_0 +#define IH_COOKIE_0__CLIENT_ID__SHIFT 0x0 +#define IH_COOKIE_0__SOURCE_ID__SHIFT 0x8 +#define IH_COOKIE_0__RING_ID__SHIFT 0x10 +#define IH_COOKIE_0__VM_ID__SHIFT 0x18 +#define IH_COOKIE_0__RESERVED__SHIFT 0x1c +#define IH_COOKIE_0__VMID_TYPE__SHIFT 0x1f +#define IH_COOKIE_0__CLIENT_ID_MASK 0x000000FFL +#define IH_COOKIE_0__SOURCE_ID_MASK 0x0000FF00L +#define IH_COOKIE_0__RING_ID_MASK 0x00FF0000L +#define IH_COOKIE_0__VM_ID_MASK 0x0F000000L +#define IH_COOKIE_0__RESERVED_MASK 0x70000000L +#define IH_COOKIE_0__VMID_TYPE_MASK 0x80000000L +//IH_COOKIE_1 +#define IH_COOKIE_1__TIMESTAMP_31_0__SHIFT 0x0 +#define IH_COOKIE_1__TIMESTAMP_31_0_MASK 0xFFFFFFFFL +//IH_COOKIE_2 +#define IH_COOKIE_2__TIMESTAMP_47_32__SHIFT 0x0 +#define IH_COOKIE_2__RESERVED__SHIFT 0x10 +#define IH_COOKIE_2__TIMESTAMP_SRC__SHIFT 0x1f +#define IH_COOKIE_2__TIMESTAMP_47_32_MASK 0x0000FFFFL +#define IH_COOKIE_2__RESERVED_MASK 0x7FFF0000L +#define IH_COOKIE_2__TIMESTAMP_SRC_MASK 0x80000000L +//IH_COOKIE_3 +#define IH_COOKIE_3__PAS_ID__SHIFT 0x0 +#define IH_COOKIE_3__RESERVED__SHIFT 0x10 +#define IH_COOKIE_3__PASID_SRC__SHIFT 0x1f +#define IH_COOKIE_3__PAS_ID_MASK 0x0000FFFFL +#define IH_COOKIE_3__RESERVED_MASK 0x7FFF0000L +#define IH_COOKIE_3__PASID_SRC_MASK 0x80000000L +//IH_COOKIE_4 +#define IH_COOKIE_4__CONTEXT_ID_31_0__SHIFT 0x0 +#define IH_COOKIE_4__CONTEXT_ID_31_0_MASK 0xFFFFFFFFL +//IH_COOKIE_5 +#define IH_COOKIE_5__CONTEXT_ID_63_32__SHIFT 0x0 +#define IH_COOKIE_5__CONTEXT_ID_63_32_MASK 0xFFFFFFFFL +//IH_COOKIE_6 +#define IH_COOKIE_6__CONTEXT_ID_95_64__SHIFT 0x0 +#define IH_COOKIE_6__CONTEXT_ID_95_64_MASK 0xFFFFFFFFL +//IH_COOKIE_7 +#define IH_COOKIE_7__CONTEXT_ID_128_96__SHIFT 0x0 +#define IH_COOKIE_7__CONTEXT_ID_128_96_MASK 0xFFFFFFFFL +//IH_REGISTER_LAST_PART0 +#define IH_REGISTER_LAST_PART0__RESERVED__SHIFT 0x0 +#define IH_REGISTER_LAST_PART0__RESERVED_MASK 0xFFFFFFFFL +//SEM_REQ_INPUT_0 +#define SEM_REQ_INPUT_0__DATA__SHIFT 0x0 +#define SEM_REQ_INPUT_0__DATA_MASK 0xFFFFFFFFL +//SEM_REQ_INPUT_1 +#define SEM_REQ_INPUT_1__DATA__SHIFT 0x0 +#define SEM_REQ_INPUT_1__DATA_MASK 0xFFFFFFFFL +//SEM_REQ_INPUT_2 +#define SEM_REQ_INPUT_2__DATA__SHIFT 0x0 +#define SEM_REQ_INPUT_2__DATA_MASK 0xFFFFFFFFL +//SEM_REQ_INPUT_3 +#define SEM_REQ_INPUT_3__DATA__SHIFT 0x0 +#define SEM_REQ_INPUT_3__DATA_MASK 0xFFFFFFFFL +//SEM_REGISTER_LAST_PART0 +#define SEM_REGISTER_LAST_PART0__RESERVED__SHIFT 0x0 +#define SEM_REGISTER_LAST_PART0__RESERVED_MASK 0xFFFFFFFFL +//IH_RB_CNTL +#define IH_RB_CNTL__RB_ENABLE__SHIFT 0x0 +#define IH_RB_CNTL__RB_SIZE__SHIFT 0x1 +#define IH_RB_CNTL__RB_GPU_TS_ENABLE__SHIFT 0x7 +#define IH_RB_CNTL__WPTR_WRITEBACK_ENABLE__SHIFT 0x8 +#define IH_RB_CNTL__RB_FULL_DRAIN_ENABLE__SHIFT 0x9 +#define IH_RB_CNTL__FULL_DRAIN_CLEAR__SHIFT 0xa +#define IH_RB_CNTL__PAGE_RB_CLEAR__SHIFT 0xb +#define IH_RB_CNTL__RB_USED_INT_THRESHOLD__SHIFT 0xc +#define IH_RB_CNTL__WPTR_OVERFLOW_ENABLE__SHIFT 0x10 +#define IH_RB_CNTL__ENABLE_INTR__SHIFT 0x11 +#define IH_RB_CNTL__MC_SWAP__SHIFT 0x12 +#define IH_RB_CNTL__MC_SNOOP__SHIFT 0x14 +#define IH_RB_CNTL__RPTR_REARM__SHIFT 0x15 +#define IH_RB_CNTL__MC_RO__SHIFT 0x16 +#define IH_RB_CNTL__MC_VMID__SHIFT 0x18 +#define IH_RB_CNTL__MC_SPACE__SHIFT 0x1c +#define IH_RB_CNTL__WPTR_OVERFLOW_CLEAR__SHIFT 0x1f +#define IH_RB_CNTL__RB_ENABLE_MASK 0x00000001L +#define IH_RB_CNTL__RB_SIZE_MASK 0x0000003EL +#define IH_RB_CNTL__RB_GPU_TS_ENABLE_MASK 0x00000080L +#define IH_RB_CNTL__WPTR_WRITEBACK_ENABLE_MASK 0x00000100L +#define IH_RB_CNTL__RB_FULL_DRAIN_ENABLE_MASK 0x00000200L +#define IH_RB_CNTL__FULL_DRAIN_CLEAR_MASK 0x00000400L +#define IH_RB_CNTL__PAGE_RB_CLEAR_MASK 0x00000800L +#define IH_RB_CNTL__RB_USED_INT_THRESHOLD_MASK 0x0000F000L +#define IH_RB_CNTL__WPTR_OVERFLOW_ENABLE_MASK 0x00010000L +#define IH_RB_CNTL__ENABLE_INTR_MASK 0x00020000L +#define IH_RB_CNTL__MC_SWAP_MASK 0x000C0000L +#define IH_RB_CNTL__MC_SNOOP_MASK 0x00100000L +#define IH_RB_CNTL__RPTR_REARM_MASK 0x00200000L +#define IH_RB_CNTL__MC_RO_MASK 0x00400000L +#define IH_RB_CNTL__MC_VMID_MASK 0x0F000000L +#define IH_RB_CNTL__MC_SPACE_MASK 0x70000000L +#define IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK 0x80000000L +//IH_RB_BASE +#define IH_RB_BASE__ADDR__SHIFT 0x0 +#define IH_RB_BASE__ADDR_MASK 0xFFFFFFFFL +//IH_RB_BASE_HI +#define IH_RB_BASE_HI__ADDR__SHIFT 0x0 +#define IH_RB_BASE_HI__ADDR_MASK 0x000000FFL +//IH_RB_RPTR +#define IH_RB_RPTR__OFFSET__SHIFT 0x2 +#define IH_RB_RPTR__OFFSET_MASK 0x0003FFFCL +//IH_RB_WPTR +#define IH_RB_WPTR__RB_OVERFLOW__SHIFT 0x0 +#define IH_RB_WPTR__OFFSET__SHIFT 0x2 +#define IH_RB_WPTR__RB_LEFT_NONE__SHIFT 0x12 +#define IH_RB_WPTR__RB_MAY_OVERFLOW__SHIFT 0x13 +#define IH_RB_WPTR__RB_OVERFLOW_MASK 0x00000001L +#define IH_RB_WPTR__OFFSET_MASK 0x0003FFFCL +#define IH_RB_WPTR__RB_LEFT_NONE_MASK 0x00040000L +#define IH_RB_WPTR__RB_MAY_OVERFLOW_MASK 0x00080000L +//IH_RB_WPTR_ADDR_HI +#define IH_RB_WPTR_ADDR_HI__ADDR__SHIFT 0x0 +#define IH_RB_WPTR_ADDR_HI__ADDR_MASK 0x0000FFFFL +//IH_RB_WPTR_ADDR_LO +#define IH_RB_WPTR_ADDR_LO__ADDR__SHIFT 0x2 +#define IH_RB_WPTR_ADDR_LO__ADDR_MASK 0xFFFFFFFCL +//IH_DOORBELL_RPTR +#define IH_DOORBELL_RPTR__OFFSET__SHIFT 0x0 +#define IH_DOORBELL_RPTR__ENABLE__SHIFT 0x1c +#define IH_DOORBELL_RPTR__OFFSET_MASK 0x03FFFFFFL +#define IH_DOORBELL_RPTR__ENABLE_MASK 0x10000000L +//IH_RB_CNTL_RING1 +#define IH_RB_CNTL_RING1__RB_ENABLE__SHIFT 0x0 +#define IH_RB_CNTL_RING1__RB_SIZE__SHIFT 0x1 +#define IH_RB_CNTL_RING1__RB_GPU_TS_ENABLE__SHIFT 0x7 +#define IH_RB_CNTL_RING1__RB_FULL_DRAIN_ENABLE__SHIFT 0x9 +#define IH_RB_CNTL_RING1__FULL_DRAIN_CLEAR__SHIFT 0xa +#define IH_RB_CNTL_RING1__PAGE_RB_CLEAR__SHIFT 0xb +#define IH_RB_CNTL_RING1__RB_USED_INT_THRESHOLD__SHIFT 0xc +#define IH_RB_CNTL_RING1__WPTR_OVERFLOW_ENABLE__SHIFT 0x10 +#define IH_RB_CNTL_RING1__MC_SWAP__SHIFT 0x12 +#define IH_RB_CNTL_RING1__MC_SNOOP__SHIFT 0x14 +#define IH_RB_CNTL_RING1__MC_RO__SHIFT 0x16 +#define IH_RB_CNTL_RING1__MC_VMID__SHIFT 0x18 +#define IH_RB_CNTL_RING1__MC_SPACE__SHIFT 0x1c +#define IH_RB_CNTL_RING1__WPTR_OVERFLOW_CLEAR__SHIFT 0x1f +#define IH_RB_CNTL_RING1__RB_ENABLE_MASK 0x00000001L +#define IH_RB_CNTL_RING1__RB_SIZE_MASK 0x0000003EL +#define IH_RB_CNTL_RING1__RB_GPU_TS_ENABLE_MASK 0x00000080L +#define IH_RB_CNTL_RING1__RB_FULL_DRAIN_ENABLE_MASK 0x00000200L +#define IH_RB_CNTL_RING1__FULL_DRAIN_CLEAR_MASK 0x00000400L +#define IH_RB_CNTL_RING1__PAGE_RB_CLEAR_MASK 0x00000800L +#define IH_RB_CNTL_RING1__RB_USED_INT_THRESHOLD_MASK 0x0000F000L +#define IH_RB_CNTL_RING1__WPTR_OVERFLOW_ENABLE_MASK 0x00010000L +#define IH_RB_CNTL_RING1__MC_SWAP_MASK 0x000C0000L +#define IH_RB_CNTL_RING1__MC_SNOOP_MASK 0x00100000L +#define IH_RB_CNTL_RING1__MC_RO_MASK 0x00400000L +#define IH_RB_CNTL_RING1__MC_VMID_MASK 0x0F000000L +#define IH_RB_CNTL_RING1__MC_SPACE_MASK 0x70000000L +#define IH_RB_CNTL_RING1__WPTR_OVERFLOW_CLEAR_MASK 0x80000000L +//IH_RB_BASE_RING1 +#define IH_RB_BASE_RING1__ADDR__SHIFT 0x0 +#define IH_RB_BASE_RING1__ADDR_MASK 0xFFFFFFFFL +//IH_RB_BASE_HI_RING1 +#define IH_RB_BASE_HI_RING1__ADDR__SHIFT 0x0 +#define IH_RB_BASE_HI_RING1__ADDR_MASK 0x000000FFL +//IH_RB_RPTR_RING1 +#define IH_RB_RPTR_RING1__OFFSET__SHIFT 0x2 +#define IH_RB_RPTR_RING1__OFFSET_MASK 0x0003FFFCL +//IH_RB_WPTR_RING1 +#define IH_RB_WPTR_RING1__RB_OVERFLOW__SHIFT 0x0 +#define IH_RB_WPTR_RING1__OFFSET__SHIFT 0x2 +#define IH_RB_WPTR_RING1__RB_LEFT_NONE__SHIFT 0x12 +#define IH_RB_WPTR_RING1__RB_MAY_OVERFLOW__SHIFT 0x13 +#define IH_RB_WPTR_RING1__RB_OVERFLOW_MASK 0x00000001L +#define IH_RB_WPTR_RING1__OFFSET_MASK 0x0003FFFCL +#define IH_RB_WPTR_RING1__RB_LEFT_NONE_MASK 0x00040000L +#define IH_RB_WPTR_RING1__RB_MAY_OVERFLOW_MASK 0x00080000L +//IH_DOORBELL_RPTR_RING1 +#define IH_DOORBELL_RPTR_RING1__OFFSET__SHIFT 0x0 +#define IH_DOORBELL_RPTR_RING1__ENABLE__SHIFT 0x1c +#define IH_DOORBELL_RPTR_RING1__OFFSET_MASK 0x03FFFFFFL +#define IH_DOORBELL_RPTR_RING1__ENABLE_MASK 0x10000000L +//IH_RB_CNTL_RING2 +#define IH_RB_CNTL_RING2__RB_ENABLE__SHIFT 0x0 +#define IH_RB_CNTL_RING2__RB_SIZE__SHIFT 0x1 +#define IH_RB_CNTL_RING2__RB_GPU_TS_ENABLE__SHIFT 0x7 +#define IH_RB_CNTL_RING2__RB_FULL_DRAIN_ENABLE__SHIFT 0x9 +#define IH_RB_CNTL_RING2__FULL_DRAIN_CLEAR__SHIFT 0xa +#define IH_RB_CNTL_RING2__PAGE_RB_CLEAR__SHIFT 0xb +#define IH_RB_CNTL_RING2__RB_USED_INT_THRESHOLD__SHIFT 0xc +#define IH_RB_CNTL_RING2__WPTR_OVERFLOW_ENABLE__SHIFT 0x10 +#define IH_RB_CNTL_RING2__MC_SWAP__SHIFT 0x12 +#define IH_RB_CNTL_RING2__MC_SNOOP__SHIFT 0x14 +#define IH_RB_CNTL_RING2__MC_RO__SHIFT 0x16 +#define IH_RB_CNTL_RING2__MC_VMID__SHIFT 0x18 +#define IH_RB_CNTL_RING2__MC_SPACE__SHIFT 0x1c +#define IH_RB_CNTL_RING2__WPTR_OVERFLOW_CLEAR__SHIFT 0x1f +#define IH_RB_CNTL_RING2__RB_ENABLE_MASK 0x00000001L +#define IH_RB_CNTL_RING2__RB_SIZE_MASK 0x0000003EL +#define IH_RB_CNTL_RING2__RB_GPU_TS_ENABLE_MASK 0x00000080L +#define IH_RB_CNTL_RING2__RB_FULL_DRAIN_ENABLE_MASK 0x00000200L +#define IH_RB_CNTL_RING2__FULL_DRAIN_CLEAR_MASK 0x00000400L +#define IH_RB_CNTL_RING2__PAGE_RB_CLEAR_MASK 0x00000800L +#define IH_RB_CNTL_RING2__RB_USED_INT_THRESHOLD_MASK 0x0000F000L +#define IH_RB_CNTL_RING2__WPTR_OVERFLOW_ENABLE_MASK 0x00010000L +#define IH_RB_CNTL_RING2__MC_SWAP_MASK 0x000C0000L +#define IH_RB_CNTL_RING2__MC_SNOOP_MASK 0x00100000L +#define IH_RB_CNTL_RING2__MC_RO_MASK 0x00400000L +#define IH_RB_CNTL_RING2__MC_VMID_MASK 0x0F000000L +#define IH_RB_CNTL_RING2__MC_SPACE_MASK 0x70000000L +#define IH_RB_CNTL_RING2__WPTR_OVERFLOW_CLEAR_MASK 0x80000000L +//IH_RB_BASE_RING2 +#define IH_RB_BASE_RING2__ADDR__SHIFT 0x0 +#define IH_RB_BASE_RING2__ADDR_MASK 0xFFFFFFFFL +//IH_RB_BASE_HI_RING2 +#define IH_RB_BASE_HI_RING2__ADDR__SHIFT 0x0 +#define IH_RB_BASE_HI_RING2__ADDR_MASK 0x000000FFL +//IH_RB_RPTR_RING2 +#define IH_RB_RPTR_RING2__OFFSET__SHIFT 0x2 +#define IH_RB_RPTR_RING2__OFFSET_MASK 0x0003FFFCL +//IH_RB_WPTR_RING2 +#define IH_RB_WPTR_RING2__RB_OVERFLOW__SHIFT 0x0 +#define IH_RB_WPTR_RING2__OFFSET__SHIFT 0x2 +#define IH_RB_WPTR_RING2__RB_LEFT_NONE__SHIFT 0x12 +#define IH_RB_WPTR_RING2__RB_MAY_OVERFLOW__SHIFT 0x13 +#define IH_RB_WPTR_RING2__RB_OVERFLOW_MASK 0x00000001L +#define IH_RB_WPTR_RING2__OFFSET_MASK 0x0003FFFCL +#define IH_RB_WPTR_RING2__RB_LEFT_NONE_MASK 0x00040000L +#define IH_RB_WPTR_RING2__RB_MAY_OVERFLOW_MASK 0x00080000L +//IH_DOORBELL_RPTR_RING2 +#define IH_DOORBELL_RPTR_RING2__OFFSET__SHIFT 0x0 +#define IH_DOORBELL_RPTR_RING2__ENABLE__SHIFT 0x1c +#define IH_DOORBELL_RPTR_RING2__OFFSET_MASK 0x03FFFFFFL +#define IH_DOORBELL_RPTR_RING2__ENABLE_MASK 0x10000000L +//IH_VERSION +#define IH_VERSION__MINVER__SHIFT 0x0 +#define IH_VERSION__MAJVER__SHIFT 0x8 +#define IH_VERSION__REV__SHIFT 0x10 +#define IH_VERSION__MINVER_MASK 0x0000007FL +#define IH_VERSION__MAJVER_MASK 0x00007F00L +#define IH_VERSION__REV_MASK 0x003F0000L +//IH_CNTL +#define IH_CNTL__WPTR_WRITEBACK_TIMER__SHIFT 0x0 +#define IH_CNTL__IH_IDLE_HYSTERESIS_CNTL__SHIFT 0x6 +#define IH_CNTL__IH_FIFO_HIGHWATER__SHIFT 0x8 +#define IH_CNTL__MC_WR_CLEAN_CNT__SHIFT 0x14 +#define IH_CNTL__WPTR_WRITEBACK_TIMER_MASK 0x0000001FL +#define IH_CNTL__IH_IDLE_HYSTERESIS_CNTL_MASK 0x000000C0L +#define IH_CNTL__IH_FIFO_HIGHWATER_MASK 0x00007F00L +#define IH_CNTL__MC_WR_CLEAN_CNT_MASK 0x01F00000L +//IH_CNTL2 +#define IH_CNTL2__SELF_IV_FORCE_WPTR_UPDATE_TIMEOUT__SHIFT 0x0 +#define IH_CNTL2__SELF_IV_FORCE_WPTR_UPDATE_ENABLE__SHIFT 0x8 +#define IH_CNTL2__SELF_IV_FORCE_WPTR_UPDATE_TIMEOUT_MASK 0x0000001FL +#define IH_CNTL2__SELF_IV_FORCE_WPTR_UPDATE_ENABLE_MASK 0x00000100L +//IH_STATUS +#define IH_STATUS__IDLE__SHIFT 0x0 +#define IH_STATUS__INPUT_IDLE__SHIFT 0x1 +#define IH_STATUS__BUFFER_IDLE__SHIFT 0x2 +#define IH_STATUS__RB_FULL__SHIFT 0x3 +#define IH_STATUS__RB_FULL_DRAIN__SHIFT 0x4 +#define IH_STATUS__RB_OVERFLOW__SHIFT 0x5 +#define IH_STATUS__MC_WR_IDLE__SHIFT 0x6 +#define IH_STATUS__MC_WR_STALL__SHIFT 0x7 +#define IH_STATUS__MC_WR_CLEAN_PENDING__SHIFT 0x8 +#define IH_STATUS__MC_WR_CLEAN_STALL__SHIFT 0x9 +#define IH_STATUS__BIF_INTERRUPT_LINE__SHIFT 0xa +#define IH_STATUS__SWITCH_READY__SHIFT 0xb +#define IH_STATUS__RB1_FULL__SHIFT 0xc +#define IH_STATUS__RB1_FULL_DRAIN__SHIFT 0xd +#define IH_STATUS__RB1_OVERFLOW__SHIFT 0xe +#define IH_STATUS__RB2_FULL__SHIFT 0xf +#define IH_STATUS__RB2_FULL_DRAIN__SHIFT 0x10 +#define IH_STATUS__RB2_OVERFLOW__SHIFT 0x11 +#define IH_STATUS__SELF_INT_GEN_IDLE__SHIFT 0x12 +#define IH_STATUS__IDLE_MASK 0x00000001L +#define IH_STATUS__INPUT_IDLE_MASK 0x00000002L +#define IH_STATUS__BUFFER_IDLE_MASK 0x00000004L +#define IH_STATUS__RB_FULL_MASK 0x00000008L +#define IH_STATUS__RB_FULL_DRAIN_MASK 0x00000010L +#define IH_STATUS__RB_OVERFLOW_MASK 0x00000020L +#define IH_STATUS__MC_WR_IDLE_MASK 0x00000040L +#define IH_STATUS__MC_WR_STALL_MASK 0x00000080L +#define IH_STATUS__MC_WR_CLEAN_PENDING_MASK 0x00000100L +#define IH_STATUS__MC_WR_CLEAN_STALL_MASK 0x00000200L +#define IH_STATUS__BIF_INTERRUPT_LINE_MASK 0x00000400L +#define IH_STATUS__SWITCH_READY_MASK 0x00000800L +#define IH_STATUS__RB1_FULL_MASK 0x00001000L +#define IH_STATUS__RB1_FULL_DRAIN_MASK 0x00002000L +#define IH_STATUS__RB1_OVERFLOW_MASK 0x00004000L +#define IH_STATUS__RB2_FULL_MASK 0x00008000L +#define IH_STATUS__RB2_FULL_DRAIN_MASK 0x00010000L +#define IH_STATUS__RB2_OVERFLOW_MASK 0x00020000L +#define IH_STATUS__SELF_INT_GEN_IDLE_MASK 0x00040000L +//IH_PERFMON_CNTL +#define IH_PERFMON_CNTL__ENABLE0__SHIFT 0x0 +#define IH_PERFMON_CNTL__CLEAR0__SHIFT 0x1 +#define IH_PERFMON_CNTL__PERF_SEL0__SHIFT 0x2 +#define IH_PERFMON_CNTL__ENABLE1__SHIFT 0x10 +#define IH_PERFMON_CNTL__CLEAR1__SHIFT 0x11 +#define IH_PERFMON_CNTL__PERF_SEL1__SHIFT 0x12 +#define IH_PERFMON_CNTL__ENABLE0_MASK 0x00000001L +#define IH_PERFMON_CNTL__CLEAR0_MASK 0x00000002L +#define IH_PERFMON_CNTL__PERF_SEL0_MASK 0x000007FCL +#define IH_PERFMON_CNTL__ENABLE1_MASK 0x00010000L +#define IH_PERFMON_CNTL__CLEAR1_MASK 0x00020000L +#define IH_PERFMON_CNTL__PERF_SEL1_MASK 0x07FC0000L +//IH_PERFCOUNTER0_RESULT +#define IH_PERFCOUNTER0_RESULT__PERF_COUNT__SHIFT 0x0 +#define IH_PERFCOUNTER0_RESULT__PERF_COUNT_MASK 0xFFFFFFFFL +//IH_PERFCOUNTER1_RESULT +#define IH_PERFCOUNTER1_RESULT__PERF_COUNT__SHIFT 0x0 +#define IH_PERFCOUNTER1_RESULT__PERF_COUNT_MASK 0xFFFFFFFFL +//IH_DSM_MATCH_VALUE_BIT_31_0 +#define IH_DSM_MATCH_VALUE_BIT_31_0__VALUE__SHIFT 0x0 +#define IH_DSM_MATCH_VALUE_BIT_31_0__VALUE_MASK 0xFFFFFFFFL +//IH_DSM_MATCH_VALUE_BIT_63_32 +#define IH_DSM_MATCH_VALUE_BIT_63_32__VALUE__SHIFT 0x0 +#define IH_DSM_MATCH_VALUE_BIT_63_32__VALUE_MASK 0xFFFFFFFFL +//IH_DSM_MATCH_VALUE_BIT_95_64 +#define IH_DSM_MATCH_VALUE_BIT_95_64__VALUE__SHIFT 0x0 +#define IH_DSM_MATCH_VALUE_BIT_95_64__VALUE_MASK 0xFFFFFFFFL +//IH_DSM_MATCH_FIELD_CONTROL +#define IH_DSM_MATCH_FIELD_CONTROL__SRC_EN__SHIFT 0x0 +#define IH_DSM_MATCH_FIELD_CONTROL__FCNID_EN__SHIFT 0x1 +#define IH_DSM_MATCH_FIELD_CONTROL__TIMESTAMP_EN__SHIFT 0x2 +#define IH_DSM_MATCH_FIELD_CONTROL__RINGID_EN__SHIFT 0x3 +#define IH_DSM_MATCH_FIELD_CONTROL__VMID_EN__SHIFT 0x4 +#define IH_DSM_MATCH_FIELD_CONTROL__PASID_EN__SHIFT 0x5 +#define IH_DSM_MATCH_FIELD_CONTROL__CLIENT_ID_EN__SHIFT 0x6 +#define IH_DSM_MATCH_FIELD_CONTROL__SRC_EN_MASK 0x00000001L +#define IH_DSM_MATCH_FIELD_CONTROL__FCNID_EN_MASK 0x00000002L +#define IH_DSM_MATCH_FIELD_CONTROL__TIMESTAMP_EN_MASK 0x00000004L +#define IH_DSM_MATCH_FIELD_CONTROL__RINGID_EN_MASK 0x00000008L +#define IH_DSM_MATCH_FIELD_CONTROL__VMID_EN_MASK 0x00000010L +#define IH_DSM_MATCH_FIELD_CONTROL__PASID_EN_MASK 0x00000020L +#define IH_DSM_MATCH_FIELD_CONTROL__CLIENT_ID_EN_MASK 0x00000040L +//IH_DSM_MATCH_DATA_CONTROL +#define IH_DSM_MATCH_DATA_CONTROL__VALUE__SHIFT 0x0 +#define IH_DSM_MATCH_DATA_CONTROL__VALUE_MASK 0x0FFFFFFFL +//IH_DSM_MATCH_FCN_ID +#define IH_DSM_MATCH_FCN_ID__PF_VF__SHIFT 0x0 +#define IH_DSM_MATCH_FCN_ID__VF_ID__SHIFT 0x1 +#define IH_DSM_MATCH_FCN_ID__PF_VF_MASK 0x00000001L +#define IH_DSM_MATCH_FCN_ID__VF_ID_MASK 0x0000001EL +//IH_LIMIT_INT_RATE_CNTL +#define IH_LIMIT_INT_RATE_CNTL__LIMIT_ENABLE__SHIFT 0x0 +#define IH_LIMIT_INT_RATE_CNTL__PERF_INTERVAL__SHIFT 0x1 +#define IH_LIMIT_INT_RATE_CNTL__PERF_THRESHOLD__SHIFT 0x5 +#define IH_LIMIT_INT_RATE_CNTL__RETURN_DELAY__SHIFT 0x11 +#define IH_LIMIT_INT_RATE_CNTL__PERF_RESULT__SHIFT 0x15 +#define IH_LIMIT_INT_RATE_CNTL__LIMIT_ENABLE_MASK 0x00000001L +#define IH_LIMIT_INT_RATE_CNTL__PERF_INTERVAL_MASK 0x0000001EL +#define IH_LIMIT_INT_RATE_CNTL__PERF_THRESHOLD_MASK 0x0000FFE0L +#define IH_LIMIT_INT_RATE_CNTL__RETURN_DELAY_MASK 0x001E0000L +#define IH_LIMIT_INT_RATE_CNTL__PERF_RESULT_MASK 0xFFE00000L +//IH_VF_RB_STATUS +#define IH_VF_RB_STATUS__RB_FULL_DRAIN_VF__SHIFT 0x0 +#define IH_VF_RB_STATUS__RB_OVERFLOW_VF__SHIFT 0x10 +#define IH_VF_RB_STATUS__RB_FULL_DRAIN_VF_MASK 0x0000FFFFL +#define IH_VF_RB_STATUS__RB_OVERFLOW_VF_MASK 0xFFFF0000L +//IH_VF_RB_STATUS2 +#define IH_VF_RB_STATUS2__RB_FULL_VF__SHIFT 0x0 +#define IH_VF_RB_STATUS2__BIF_INTERRUPT_LINE_VF__SHIFT 0x10 +#define IH_VF_RB_STATUS2__RB_FULL_VF_MASK 0x0000FFFFL +#define IH_VF_RB_STATUS2__BIF_INTERRUPT_LINE_VF_MASK 0xFFFF0000L +//IH_VF_RB1_STATUS +#define IH_VF_RB1_STATUS__RB_FULL_DRAIN_VF__SHIFT 0x0 +#define IH_VF_RB1_STATUS__RB_OVERFLOW_VF__SHIFT 0x10 +#define IH_VF_RB1_STATUS__RB_FULL_DRAIN_VF_MASK 0x0000FFFFL +#define IH_VF_RB1_STATUS__RB_OVERFLOW_VF_MASK 0xFFFF0000L +//IH_VF_RB1_STATUS2 +#define IH_VF_RB1_STATUS2__RB_FULL_VF__SHIFT 0x0 +#define IH_VF_RB1_STATUS2__RB_FULL_VF_MASK 0x0000FFFFL +//IH_VF_RB2_STATUS +#define IH_VF_RB2_STATUS__RB_FULL_DRAIN_VF__SHIFT 0x0 +#define IH_VF_RB2_STATUS__RB_OVERFLOW_VF__SHIFT 0x10 +#define IH_VF_RB2_STATUS__RB_FULL_DRAIN_VF_MASK 0x0000FFFFL +#define IH_VF_RB2_STATUS__RB_OVERFLOW_VF_MASK 0xFFFF0000L +//IH_VF_RB2_STATUS2 +#define IH_VF_RB2_STATUS2__RB_FULL_VF__SHIFT 0x0 +#define IH_VF_RB2_STATUS2__RB_FULL_VF_MASK 0x0000FFFFL +//IH_INT_FLOOD_CNTL +#define IH_INT_FLOOD_CNTL__HIGHWATER__SHIFT 0x0 +#define IH_INT_FLOOD_CNTL__FLOOD_CNTL_ENABLE__SHIFT 0x3 +#define IH_INT_FLOOD_CNTL__CLEAR_INT_FLOOD_STATUS__SHIFT 0x4 +#define IH_INT_FLOOD_CNTL__HIGHWATER_MASK 0x00000007L +#define IH_INT_FLOOD_CNTL__FLOOD_CNTL_ENABLE_MASK 0x00000008L +#define IH_INT_FLOOD_CNTL__CLEAR_INT_FLOOD_STATUS_MASK 0x00000010L +//IH_RB0_INT_FLOOD_STATUS +#define IH_RB0_INT_FLOOD_STATUS__RB_INT_DROPPED_VF__SHIFT 0x0 +#define IH_RB0_INT_FLOOD_STATUS__RB_INT_DROPPED__SHIFT 0x1f +#define IH_RB0_INT_FLOOD_STATUS__RB_INT_DROPPED_VF_MASK 0x0000FFFFL +#define IH_RB0_INT_FLOOD_STATUS__RB_INT_DROPPED_MASK 0x80000000L +//IH_RB1_INT_FLOOD_STATUS +#define IH_RB1_INT_FLOOD_STATUS__RB_INT_DROPPED_VF__SHIFT 0x0 +#define IH_RB1_INT_FLOOD_STATUS__RB_INT_DROPPED__SHIFT 0x1f +#define IH_RB1_INT_FLOOD_STATUS__RB_INT_DROPPED_VF_MASK 0x0000FFFFL +#define IH_RB1_INT_FLOOD_STATUS__RB_INT_DROPPED_MASK 0x80000000L +//IH_RB2_INT_FLOOD_STATUS +#define IH_RB2_INT_FLOOD_STATUS__RB_INT_DROPPED_VF__SHIFT 0x0 +#define IH_RB2_INT_FLOOD_STATUS__RB_INT_DROPPED__SHIFT 0x1f +#define IH_RB2_INT_FLOOD_STATUS__RB_INT_DROPPED_VF_MASK 0x0000FFFFL +#define IH_RB2_INT_FLOOD_STATUS__RB_INT_DROPPED_MASK 0x80000000L +//IH_INT_FLOOD_STATUS +#define IH_INT_FLOOD_STATUS__INT_DROP_CNT__SHIFT 0x0 +#define IH_INT_FLOOD_STATUS__FIRST_DROP_INT_CLIENT_ID__SHIFT 0x8 +#define IH_INT_FLOOD_STATUS__FIRST_DROP_INT_SOURCE_ID__SHIFT 0x10 +#define IH_INT_FLOOD_STATUS__FIRST_DROP_INT_VF_ID__SHIFT 0x18 +#define IH_INT_FLOOD_STATUS__FIRST_DROP_INT_VF__SHIFT 0x1c +#define IH_INT_FLOOD_STATUS__INT_DROPPED__SHIFT 0x1e +#define IH_INT_FLOOD_STATUS__INT_DROP_CNT_MASK 0x000000FFL +#define IH_INT_FLOOD_STATUS__FIRST_DROP_INT_CLIENT_ID_MASK 0x0000FF00L +#define IH_INT_FLOOD_STATUS__FIRST_DROP_INT_SOURCE_ID_MASK 0x00FF0000L +#define IH_INT_FLOOD_STATUS__FIRST_DROP_INT_VF_ID_MASK 0x0F000000L +#define IH_INT_FLOOD_STATUS__FIRST_DROP_INT_VF_MASK 0x10000000L +#define IH_INT_FLOOD_STATUS__INT_DROPPED_MASK 0x40000000L +//IH_STORM_CLIENT_LIST_CNTL +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT1_IS_STORM_CLIENT__SHIFT 0x1 +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT2_IS_STORM_CLIENT__SHIFT 0x2 +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT3_IS_STORM_CLIENT__SHIFT 0x3 +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT4_IS_STORM_CLIENT__SHIFT 0x4 +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT5_IS_STORM_CLIENT__SHIFT 0x5 +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT6_IS_STORM_CLIENT__SHIFT 0x6 +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT7_IS_STORM_CLIENT__SHIFT 0x7 +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT8_IS_STORM_CLIENT__SHIFT 0x8 +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT9_IS_STORM_CLIENT__SHIFT 0x9 +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT10_IS_STORM_CLIENT__SHIFT 0xa +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT11_IS_STORM_CLIENT__SHIFT 0xb +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT12_IS_STORM_CLIENT__SHIFT 0xc +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT13_IS_STORM_CLIENT__SHIFT 0xd +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT14_IS_STORM_CLIENT__SHIFT 0xe +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT15_IS_STORM_CLIENT__SHIFT 0xf +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT16_IS_STORM_CLIENT__SHIFT 0x10 +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT17_IS_STORM_CLIENT__SHIFT 0x11 +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT18_IS_STORM_CLIENT__SHIFT 0x12 +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT19_IS_STORM_CLIENT__SHIFT 0x13 +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT20_IS_STORM_CLIENT__SHIFT 0x14 +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT21_IS_STORM_CLIENT__SHIFT 0x15 +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT22_IS_STORM_CLIENT__SHIFT 0x16 +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT23_IS_STORM_CLIENT__SHIFT 0x17 +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT24_IS_STORM_CLIENT__SHIFT 0x18 +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT25_IS_STORM_CLIENT__SHIFT 0x19 +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT26_IS_STORM_CLIENT__SHIFT 0x1a +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT27_IS_STORM_CLIENT__SHIFT 0x1b +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT28_IS_STORM_CLIENT__SHIFT 0x1c +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT29_IS_STORM_CLIENT__SHIFT 0x1d +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT30_IS_STORM_CLIENT__SHIFT 0x1e +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT31_IS_STORM_CLIENT__SHIFT 0x1f +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT1_IS_STORM_CLIENT_MASK 0x00000002L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT2_IS_STORM_CLIENT_MASK 0x00000004L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT3_IS_STORM_CLIENT_MASK 0x00000008L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT4_IS_STORM_CLIENT_MASK 0x00000010L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT5_IS_STORM_CLIENT_MASK 0x00000020L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT6_IS_STORM_CLIENT_MASK 0x00000040L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT7_IS_STORM_CLIENT_MASK 0x00000080L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT8_IS_STORM_CLIENT_MASK 0x00000100L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT9_IS_STORM_CLIENT_MASK 0x00000200L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT10_IS_STORM_CLIENT_MASK 0x00000400L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT11_IS_STORM_CLIENT_MASK 0x00000800L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT12_IS_STORM_CLIENT_MASK 0x00001000L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT13_IS_STORM_CLIENT_MASK 0x00002000L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT14_IS_STORM_CLIENT_MASK 0x00004000L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT15_IS_STORM_CLIENT_MASK 0x00008000L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT16_IS_STORM_CLIENT_MASK 0x00010000L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT17_IS_STORM_CLIENT_MASK 0x00020000L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT18_IS_STORM_CLIENT_MASK 0x00040000L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT19_IS_STORM_CLIENT_MASK 0x00080000L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT20_IS_STORM_CLIENT_MASK 0x00100000L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT21_IS_STORM_CLIENT_MASK 0x00200000L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT22_IS_STORM_CLIENT_MASK 0x00400000L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT23_IS_STORM_CLIENT_MASK 0x00800000L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT24_IS_STORM_CLIENT_MASK 0x01000000L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT25_IS_STORM_CLIENT_MASK 0x02000000L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT26_IS_STORM_CLIENT_MASK 0x04000000L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT27_IS_STORM_CLIENT_MASK 0x08000000L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT28_IS_STORM_CLIENT_MASK 0x10000000L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT29_IS_STORM_CLIENT_MASK 0x20000000L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT30_IS_STORM_CLIENT_MASK 0x40000000L +#define IH_STORM_CLIENT_LIST_CNTL__CLIENT31_IS_STORM_CLIENT_MASK 0x80000000L +//IH_CLK_CTRL +#define IH_CLK_CTRL__IH_RETRY_INT_CAM_MEM_CLK_SOFT_OVERRIDE__SHIFT 0x19 +#define IH_CLK_CTRL__IH_BUFFER_MEM_CLK_SOFT_OVERRIDE__SHIFT 0x1a +#define IH_CLK_CTRL__DBUS_MUX_CLK_SOFT_OVERRIDE__SHIFT 0x1b +#define IH_CLK_CTRL__OSSSYS_SHARE_CLK_SOFT_OVERRIDE__SHIFT 0x1c +#define IH_CLK_CTRL__LIMIT_SMN_CLK_SOFT_OVERRIDE__SHIFT 0x1d +#define IH_CLK_CTRL__DYN_CLK_SOFT_OVERRIDE__SHIFT 0x1e +#define IH_CLK_CTRL__REG_CLK_SOFT_OVERRIDE__SHIFT 0x1f +#define IH_CLK_CTRL__IH_RETRY_INT_CAM_MEM_CLK_SOFT_OVERRIDE_MASK 0x02000000L +#define IH_CLK_CTRL__IH_BUFFER_MEM_CLK_SOFT_OVERRIDE_MASK 0x04000000L +#define IH_CLK_CTRL__DBUS_MUX_CLK_SOFT_OVERRIDE_MASK 0x08000000L +#define IH_CLK_CTRL__OSSSYS_SHARE_CLK_SOFT_OVERRIDE_MASK 0x10000000L +#define IH_CLK_CTRL__LIMIT_SMN_CLK_SOFT_OVERRIDE_MASK 0x20000000L +#define IH_CLK_CTRL__DYN_CLK_SOFT_OVERRIDE_MASK 0x40000000L +#define IH_CLK_CTRL__REG_CLK_SOFT_OVERRIDE_MASK 0x80000000L +//IH_INT_FLAGS +#define IH_INT_FLAGS__CLIENT_0_FLAG__SHIFT 0x0 +#define IH_INT_FLAGS__CLIENT_1_FLAG__SHIFT 0x1 +#define IH_INT_FLAGS__CLIENT_2_FLAG__SHIFT 0x2 +#define IH_INT_FLAGS__CLIENT_3_FLAG__SHIFT 0x3 +#define IH_INT_FLAGS__CLIENT_4_FLAG__SHIFT 0x4 +#define IH_INT_FLAGS__CLIENT_5_FLAG__SHIFT 0x5 +#define IH_INT_FLAGS__CLIENT_6_FLAG__SHIFT 0x6 +#define IH_INT_FLAGS__CLIENT_7_FLAG__SHIFT 0x7 +#define IH_INT_FLAGS__CLIENT_8_FLAG__SHIFT 0x8 +#define IH_INT_FLAGS__CLIENT_9_FLAG__SHIFT 0x9 +#define IH_INT_FLAGS__CLIENT_10_FLAG__SHIFT 0xa +#define IH_INT_FLAGS__CLIENT_11_FLAG__SHIFT 0xb +#define IH_INT_FLAGS__CLIENT_12_FLAG__SHIFT 0xc +#define IH_INT_FLAGS__CLIENT_13_FLAG__SHIFT 0xd +#define IH_INT_FLAGS__CLIENT_14_FLAG__SHIFT 0xe +#define IH_INT_FLAGS__CLIENT_15_FLAG__SHIFT 0xf +#define IH_INT_FLAGS__CLIENT_16_FLAG__SHIFT 0x10 +#define IH_INT_FLAGS__CLIENT_17_FLAG__SHIFT 0x11 +#define IH_INT_FLAGS__CLIENT_18_FLAG__SHIFT 0x12 +#define IH_INT_FLAGS__CLIENT_19_FLAG__SHIFT 0x13 +#define IH_INT_FLAGS__CLIENT_20_FLAG__SHIFT 0x14 +#define IH_INT_FLAGS__CLIENT_21_FLAG__SHIFT 0x15 +#define IH_INT_FLAGS__CLIENT_22_FLAG__SHIFT 0x16 +#define IH_INT_FLAGS__CLIENT_23_FLAG__SHIFT 0x17 +#define IH_INT_FLAGS__CLIENT_24_FLAG__SHIFT 0x18 +#define IH_INT_FLAGS__CLIENT_25_FLAG__SHIFT 0x19 +#define IH_INT_FLAGS__CLIENT_26_FLAG__SHIFT 0x1a +#define IH_INT_FLAGS__CLIENT_27_FLAG__SHIFT 0x1b +#define IH_INT_FLAGS__CLIENT_28_FLAG__SHIFT 0x1c +#define IH_INT_FLAGS__CLIENT_29_FLAG__SHIFT 0x1d +#define IH_INT_FLAGS__CLIENT_30_FLAG__SHIFT 0x1e +#define IH_INT_FLAGS__CLIENT_31_FLAG__SHIFT 0x1f +#define IH_INT_FLAGS__CLIENT_0_FLAG_MASK 0x00000001L +#define IH_INT_FLAGS__CLIENT_1_FLAG_MASK 0x00000002L +#define IH_INT_FLAGS__CLIENT_2_FLAG_MASK 0x00000004L +#define IH_INT_FLAGS__CLIENT_3_FLAG_MASK 0x00000008L +#define IH_INT_FLAGS__CLIENT_4_FLAG_MASK 0x00000010L +#define IH_INT_FLAGS__CLIENT_5_FLAG_MASK 0x00000020L +#define IH_INT_FLAGS__CLIENT_6_FLAG_MASK 0x00000040L +#define IH_INT_FLAGS__CLIENT_7_FLAG_MASK 0x00000080L +#define IH_INT_FLAGS__CLIENT_8_FLAG_MASK 0x00000100L +#define IH_INT_FLAGS__CLIENT_9_FLAG_MASK 0x00000200L +#define IH_INT_FLAGS__CLIENT_10_FLAG_MASK 0x00000400L +#define IH_INT_FLAGS__CLIENT_11_FLAG_MASK 0x00000800L +#define IH_INT_FLAGS__CLIENT_12_FLAG_MASK 0x00001000L +#define IH_INT_FLAGS__CLIENT_13_FLAG_MASK 0x00002000L +#define IH_INT_FLAGS__CLIENT_14_FLAG_MASK 0x00004000L +#define IH_INT_FLAGS__CLIENT_15_FLAG_MASK 0x00008000L +#define IH_INT_FLAGS__CLIENT_16_FLAG_MASK 0x00010000L +#define IH_INT_FLAGS__CLIENT_17_FLAG_MASK 0x00020000L +#define IH_INT_FLAGS__CLIENT_18_FLAG_MASK 0x00040000L +#define IH_INT_FLAGS__CLIENT_19_FLAG_MASK 0x00080000L +#define IH_INT_FLAGS__CLIENT_20_FLAG_MASK 0x00100000L +#define IH_INT_FLAGS__CLIENT_21_FLAG_MASK 0x00200000L +#define IH_INT_FLAGS__CLIENT_22_FLAG_MASK 0x00400000L +#define IH_INT_FLAGS__CLIENT_23_FLAG_MASK 0x00800000L +#define IH_INT_FLAGS__CLIENT_24_FLAG_MASK 0x01000000L +#define IH_INT_FLAGS__CLIENT_25_FLAG_MASK 0x02000000L +#define IH_INT_FLAGS__CLIENT_26_FLAG_MASK 0x04000000L +#define IH_INT_FLAGS__CLIENT_27_FLAG_MASK 0x08000000L +#define IH_INT_FLAGS__CLIENT_28_FLAG_MASK 0x10000000L +#define IH_INT_FLAGS__CLIENT_29_FLAG_MASK 0x20000000L +#define IH_INT_FLAGS__CLIENT_30_FLAG_MASK 0x40000000L +#define IH_INT_FLAGS__CLIENT_31_FLAG_MASK 0x80000000L +//IH_LAST_INT_INFO0 +#define IH_LAST_INT_INFO0__CLIENT_ID__SHIFT 0x0 +#define IH_LAST_INT_INFO0__SOURCE_ID__SHIFT 0x8 +#define IH_LAST_INT_INFO0__RING_ID__SHIFT 0x10 +#define IH_LAST_INT_INFO0__VM_ID__SHIFT 0x18 +#define IH_LAST_INT_INFO0__VMID_TYPE__SHIFT 0x1f +#define IH_LAST_INT_INFO0__CLIENT_ID_MASK 0x000000FFL +#define IH_LAST_INT_INFO0__SOURCE_ID_MASK 0x0000FF00L +#define IH_LAST_INT_INFO0__RING_ID_MASK 0x00FF0000L +#define IH_LAST_INT_INFO0__VM_ID_MASK 0x0F000000L +#define IH_LAST_INT_INFO0__VMID_TYPE_MASK 0x80000000L +//IH_LAST_INT_INFO1 +#define IH_LAST_INT_INFO1__CONTEXT_ID__SHIFT 0x0 +#define IH_LAST_INT_INFO1__CONTEXT_ID_MASK 0xFFFFFFFFL +//IH_LAST_INT_INFO2 +#define IH_LAST_INT_INFO2__PAS_ID__SHIFT 0x0 +#define IH_LAST_INT_INFO2__VF_ID__SHIFT 0x10 +#define IH_LAST_INT_INFO2__VF__SHIFT 0x14 +#define IH_LAST_INT_INFO2__PAS_ID_MASK 0x0000FFFFL +#define IH_LAST_INT_INFO2__VF_ID_MASK 0x000F0000L +#define IH_LAST_INT_INFO2__VF_MASK 0x00100000L +//IH_SCRATCH +#define IH_SCRATCH__DATA__SHIFT 0x0 +#define IH_SCRATCH__DATA_MASK 0xFFFFFFFFL +//IH_CLIENT_CREDIT_ERROR +#define IH_CLIENT_CREDIT_ERROR__CLEAR__SHIFT 0x0 +#define IH_CLIENT_CREDIT_ERROR__CLIENT_1_ERROR__SHIFT 0x1 +#define IH_CLIENT_CREDIT_ERROR__CLIENT_2_ERROR__SHIFT 0x2 +#define IH_CLIENT_CREDIT_ERROR__CLIENT_3_ERROR__SHIFT 0x3 +#define IH_CLIENT_CREDIT_ERROR__CLIENT_4_ERROR__SHIFT 0x4 +#define IH_CLIENT_CREDIT_ERROR__CLIENT_5_ERROR__SHIFT 0x5 +#define IH_CLIENT_CREDIT_ERROR__CLIENT_6_ERROR__SHIFT 0x6 +#define IH_CLIENT_CREDIT_ERROR__CLIENT_7_ERROR__SHIFT 0x7 +#define IH_CLIENT_CREDIT_ERROR__CLIENT_8_ERROR__SHIFT 0x8 +#define IH_CLIENT_CREDIT_ERROR__CLIENT_9_ERROR__SHIFT 0x9 +#define IH_CLIENT_CREDIT_ERROR__CLIENT_10_ERROR__SHIFT 0xa +#define IH_CLIENT_CREDIT_ERROR__CLIENT_11_ERROR__SHIFT 0xb +#define IH_CLIENT_CREDIT_ERROR__CLIENT_12_ERROR__SHIFT 0xc +#define IH_CLIENT_CREDIT_ERROR__CLIENT_13_ERROR__SHIFT 0xd +#define IH_CLIENT_CREDIT_ERROR__CLIENT_14_ERROR__SHIFT 0xe +#define IH_CLIENT_CREDIT_ERROR__CLIENT_15_ERROR__SHIFT 0xf +#define IH_CLIENT_CREDIT_ERROR__CLIENT_16_ERROR__SHIFT 0x10 +#define IH_CLIENT_CREDIT_ERROR__CLIENT_17_ERROR__SHIFT 0x11 +#define IH_CLIENT_CREDIT_ERROR__CLIENT_18_ERROR__SHIFT 0x12 +#define IH_CLIENT_CREDIT_ERROR__CLIENT_19_ERROR__SHIFT 0x13 +#define IH_CLIENT_CREDIT_ERROR__CLIENT_20_ERROR__SHIFT 0x14 +#define IH_CLIENT_CREDIT_ERROR__CLIENT_21_ERROR__SHIFT 0x15 +#define IH_CLIENT_CREDIT_ERROR__CLIENT_22_ERROR__SHIFT 0x16 +#define IH_CLIENT_CREDIT_ERROR__CLIENT_23_ERROR__SHIFT 0x17 +#define IH_CLIENT_CREDIT_ERROR__CLIENT_24_ERROR__SHIFT 0x18 +#define IH_CLIENT_CREDIT_ERROR__CLIENT_25_ERROR__SHIFT 0x19 +#define IH_CLIENT_CREDIT_ERROR__CLIENT_26_ERROR__SHIFT 0x1a +#define IH_CLIENT_CREDIT_ERROR__CLIENT_27_ERROR__SHIFT 0x1b +#define IH_CLIENT_CREDIT_ERROR__CLIENT_28_ERROR__SHIFT 0x1c +#define IH_CLIENT_CREDIT_ERROR__CLIENT_29_ERROR__SHIFT 0x1d +#define IH_CLIENT_CREDIT_ERROR__CLIENT_30_ERROR__SHIFT 0x1e +#define IH_CLIENT_CREDIT_ERROR__CLIENT_31_ERROR__SHIFT 0x1f +#define IH_CLIENT_CREDIT_ERROR__CLEAR_MASK 0x00000001L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_1_ERROR_MASK 0x00000002L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_2_ERROR_MASK 0x00000004L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_3_ERROR_MASK 0x00000008L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_4_ERROR_MASK 0x00000010L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_5_ERROR_MASK 0x00000020L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_6_ERROR_MASK 0x00000040L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_7_ERROR_MASK 0x00000080L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_8_ERROR_MASK 0x00000100L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_9_ERROR_MASK 0x00000200L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_10_ERROR_MASK 0x00000400L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_11_ERROR_MASK 0x00000800L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_12_ERROR_MASK 0x00001000L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_13_ERROR_MASK 0x00002000L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_14_ERROR_MASK 0x00004000L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_15_ERROR_MASK 0x00008000L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_16_ERROR_MASK 0x00010000L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_17_ERROR_MASK 0x00020000L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_18_ERROR_MASK 0x00040000L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_19_ERROR_MASK 0x00080000L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_20_ERROR_MASK 0x00100000L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_21_ERROR_MASK 0x00200000L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_22_ERROR_MASK 0x00400000L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_23_ERROR_MASK 0x00800000L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_24_ERROR_MASK 0x01000000L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_25_ERROR_MASK 0x02000000L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_26_ERROR_MASK 0x04000000L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_27_ERROR_MASK 0x08000000L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_28_ERROR_MASK 0x10000000L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_29_ERROR_MASK 0x20000000L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_30_ERROR_MASK 0x40000000L +#define IH_CLIENT_CREDIT_ERROR__CLIENT_31_ERROR_MASK 0x80000000L +//IH_GPU_IOV_VIOLATION_LOG +#define IH_GPU_IOV_VIOLATION_LOG__VIOLATION_STATUS__SHIFT 0x0 +#define IH_GPU_IOV_VIOLATION_LOG__MULTIPLE_VIOLATION_STATUS__SHIFT 0x1 +#define IH_GPU_IOV_VIOLATION_LOG__ADDRESS__SHIFT 0x2 +#define IH_GPU_IOV_VIOLATION_LOG__OPCODE__SHIFT 0x12 +#define IH_GPU_IOV_VIOLATION_LOG__VF__SHIFT 0x13 +#define IH_GPU_IOV_VIOLATION_LOG__VF_ID__SHIFT 0x14 +#define IH_GPU_IOV_VIOLATION_LOG__INITIATOR_ID__SHIFT 0x18 +#define IH_GPU_IOV_VIOLATION_LOG__VIOLATION_STATUS_MASK 0x00000001L +#define IH_GPU_IOV_VIOLATION_LOG__MULTIPLE_VIOLATION_STATUS_MASK 0x00000002L +#define IH_GPU_IOV_VIOLATION_LOG__ADDRESS_MASK 0x0003FFFCL +#define IH_GPU_IOV_VIOLATION_LOG__OPCODE_MASK 0x00040000L +#define IH_GPU_IOV_VIOLATION_LOG__VF_MASK 0x00080000L +#define IH_GPU_IOV_VIOLATION_LOG__VF_ID_MASK 0x00F00000L +#define IH_GPU_IOV_VIOLATION_LOG__INITIATOR_ID_MASK 0xFF000000L +//IH_COOKIE_REC_VIOLATION_LOG +#define IH_COOKIE_REC_VIOLATION_LOG__VIOLATION_STATUS__SHIFT 0x0 +#define IH_COOKIE_REC_VIOLATION_LOG__CLIENT_ID__SHIFT 0x10 +#define IH_COOKIE_REC_VIOLATION_LOG__INITIATOR_ID__SHIFT 0x18 +#define IH_COOKIE_REC_VIOLATION_LOG__VIOLATION_STATUS_MASK 0x00000001L +#define IH_COOKIE_REC_VIOLATION_LOG__CLIENT_ID_MASK 0x00FF0000L +#define IH_COOKIE_REC_VIOLATION_LOG__INITIATOR_ID_MASK 0xFF000000L +//IH_CREDIT_STATUS +#define IH_CREDIT_STATUS__CLIENT_1_CREDIT_RETURNED__SHIFT 0x1 +#define IH_CREDIT_STATUS__CLIENT_2_CREDIT_RETURNED__SHIFT 0x2 +#define IH_CREDIT_STATUS__CLIENT_3_CREDIT_RETURNED__SHIFT 0x3 +#define IH_CREDIT_STATUS__CLIENT_4_CREDIT_RETURNED__SHIFT 0x4 +#define IH_CREDIT_STATUS__CLIENT_5_CREDIT_RETURNED__SHIFT 0x5 +#define IH_CREDIT_STATUS__CLIENT_6_CREDIT_RETURNED__SHIFT 0x6 +#define IH_CREDIT_STATUS__CLIENT_7_CREDIT_RETURNED__SHIFT 0x7 +#define IH_CREDIT_STATUS__CLIENT_8_CREDIT_RETURNED__SHIFT 0x8 +#define IH_CREDIT_STATUS__CLIENT_9_CREDIT_RETURNED__SHIFT 0x9 +#define IH_CREDIT_STATUS__CLIENT_10_CREDIT_RETURNED__SHIFT 0xa +#define IH_CREDIT_STATUS__CLIENT_11_CREDIT_RETURNED__SHIFT 0xb +#define IH_CREDIT_STATUS__CLIENT_12_CREDIT_RETURNED__SHIFT 0xc +#define IH_CREDIT_STATUS__CLIENT_13_CREDIT_RETURNED__SHIFT 0xd +#define IH_CREDIT_STATUS__CLIENT_14_CREDIT_RETURNED__SHIFT 0xe +#define IH_CREDIT_STATUS__CLIENT_15_CREDIT_RETURNED__SHIFT 0xf +#define IH_CREDIT_STATUS__CLIENT_16_CREDIT_RETURNED__SHIFT 0x10 +#define IH_CREDIT_STATUS__CLIENT_17_CREDIT_RETURNED__SHIFT 0x11 +#define IH_CREDIT_STATUS__CLIENT_18_CREDIT_RETURNED__SHIFT 0x12 +#define IH_CREDIT_STATUS__CLIENT_19_CREDIT_RETURNED__SHIFT 0x13 +#define IH_CREDIT_STATUS__CLIENT_20_CREDIT_RETURNED__SHIFT 0x14 +#define IH_CREDIT_STATUS__CLIENT_21_CREDIT_RETURNED__SHIFT 0x15 +#define IH_CREDIT_STATUS__CLIENT_22_CREDIT_RETURNED__SHIFT 0x16 +#define IH_CREDIT_STATUS__CLIENT_23_CREDIT_RETURNED__SHIFT 0x17 +#define IH_CREDIT_STATUS__CLIENT_24_CREDIT_RETURNED__SHIFT 0x18 +#define IH_CREDIT_STATUS__CLIENT_25_CREDIT_RETURNED__SHIFT 0x19 +#define IH_CREDIT_STATUS__CLIENT_26_CREDIT_RETURNED__SHIFT 0x1a +#define IH_CREDIT_STATUS__CLIENT_27_CREDIT_RETURNED__SHIFT 0x1b +#define IH_CREDIT_STATUS__CLIENT_28_CREDIT_RETURNED__SHIFT 0x1c +#define IH_CREDIT_STATUS__CLIENT_29_CREDIT_RETURNED__SHIFT 0x1d +#define IH_CREDIT_STATUS__CLIENT_30_CREDIT_RETURNED__SHIFT 0x1e +#define IH_CREDIT_STATUS__CLIENT_31_CREDIT_RETURNED__SHIFT 0x1f +#define IH_CREDIT_STATUS__CLIENT_1_CREDIT_RETURNED_MASK 0x00000002L +#define IH_CREDIT_STATUS__CLIENT_2_CREDIT_RETURNED_MASK 0x00000004L +#define IH_CREDIT_STATUS__CLIENT_3_CREDIT_RETURNED_MASK 0x00000008L +#define IH_CREDIT_STATUS__CLIENT_4_CREDIT_RETURNED_MASK 0x00000010L +#define IH_CREDIT_STATUS__CLIENT_5_CREDIT_RETURNED_MASK 0x00000020L +#define IH_CREDIT_STATUS__CLIENT_6_CREDIT_RETURNED_MASK 0x00000040L +#define IH_CREDIT_STATUS__CLIENT_7_CREDIT_RETURNED_MASK 0x00000080L +#define IH_CREDIT_STATUS__CLIENT_8_CREDIT_RETURNED_MASK 0x00000100L +#define IH_CREDIT_STATUS__CLIENT_9_CREDIT_RETURNED_MASK 0x00000200L +#define IH_CREDIT_STATUS__CLIENT_10_CREDIT_RETURNED_MASK 0x00000400L +#define IH_CREDIT_STATUS__CLIENT_11_CREDIT_RETURNED_MASK 0x00000800L +#define IH_CREDIT_STATUS__CLIENT_12_CREDIT_RETURNED_MASK 0x00001000L +#define IH_CREDIT_STATUS__CLIENT_13_CREDIT_RETURNED_MASK 0x00002000L +#define IH_CREDIT_STATUS__CLIENT_14_CREDIT_RETURNED_MASK 0x00004000L +#define IH_CREDIT_STATUS__CLIENT_15_CREDIT_RETURNED_MASK 0x00008000L +#define IH_CREDIT_STATUS__CLIENT_16_CREDIT_RETURNED_MASK 0x00010000L +#define IH_CREDIT_STATUS__CLIENT_17_CREDIT_RETURNED_MASK 0x00020000L +#define IH_CREDIT_STATUS__CLIENT_18_CREDIT_RETURNED_MASK 0x00040000L +#define IH_CREDIT_STATUS__CLIENT_19_CREDIT_RETURNED_MASK 0x00080000L +#define IH_CREDIT_STATUS__CLIENT_20_CREDIT_RETURNED_MASK 0x00100000L +#define IH_CREDIT_STATUS__CLIENT_21_CREDIT_RETURNED_MASK 0x00200000L +#define IH_CREDIT_STATUS__CLIENT_22_CREDIT_RETURNED_MASK 0x00400000L +#define IH_CREDIT_STATUS__CLIENT_23_CREDIT_RETURNED_MASK 0x00800000L +#define IH_CREDIT_STATUS__CLIENT_24_CREDIT_RETURNED_MASK 0x01000000L +#define IH_CREDIT_STATUS__CLIENT_25_CREDIT_RETURNED_MASK 0x02000000L +#define IH_CREDIT_STATUS__CLIENT_26_CREDIT_RETURNED_MASK 0x04000000L +#define IH_CREDIT_STATUS__CLIENT_27_CREDIT_RETURNED_MASK 0x08000000L +#define IH_CREDIT_STATUS__CLIENT_28_CREDIT_RETURNED_MASK 0x10000000L +#define IH_CREDIT_STATUS__CLIENT_29_CREDIT_RETURNED_MASK 0x20000000L +#define IH_CREDIT_STATUS__CLIENT_30_CREDIT_RETURNED_MASK 0x40000000L +#define IH_CREDIT_STATUS__CLIENT_31_CREDIT_RETURNED_MASK 0x80000000L +//IH_MMHUB_ERROR +#define IH_MMHUB_ERROR__IH_BRESP_01__SHIFT 0x1 +#define IH_MMHUB_ERROR__IH_BRESP_10__SHIFT 0x2 +#define IH_MMHUB_ERROR__IH_BRESP_11__SHIFT 0x3 +#define IH_MMHUB_ERROR__IH_BUSER_NACK_01__SHIFT 0x5 +#define IH_MMHUB_ERROR__IH_BUSER_NACK_10__SHIFT 0x6 +#define IH_MMHUB_ERROR__IH_BUSER_NACK_11__SHIFT 0x7 +#define IH_MMHUB_ERROR__IH_BRESP_01_MASK 0x00000002L +#define IH_MMHUB_ERROR__IH_BRESP_10_MASK 0x00000004L +#define IH_MMHUB_ERROR__IH_BRESP_11_MASK 0x00000008L +#define IH_MMHUB_ERROR__IH_BUSER_NACK_01_MASK 0x00000020L +#define IH_MMHUB_ERROR__IH_BUSER_NACK_10_MASK 0x00000040L +#define IH_MMHUB_ERROR__IH_BUSER_NACK_11_MASK 0x00000080L +//IH_MEM_POWER_CTRL +#define IH_MEM_POWER_CTRL__IH_BUFFER_MEM_POWER_CTRL_EN__SHIFT 0x0 +#define IH_MEM_POWER_CTRL__IH_BUFFER_MEM_POWER_LS_EN__SHIFT 0x1 +#define IH_MEM_POWER_CTRL__IH_BUFFER_MEM_POWER_DS_EN__SHIFT 0x2 +#define IH_MEM_POWER_CTRL__IH_BUFFER_MEM_POWER_SD_EN__SHIFT 0x3 +#define IH_MEM_POWER_CTRL__IH_BUFFER_MEM_IDLE_HYSTERESIS__SHIFT 0x4 +#define IH_MEM_POWER_CTRL__IH_BUFFER_MEM_POWER_UP_RECOVER_DELAY__SHIFT 0x8 +#define IH_MEM_POWER_CTRL__IH_BUFFER_MEM_POWER_DOWN_LS_ENTER_DELAY__SHIFT 0xe +#define IH_MEM_POWER_CTRL__IH_BUFFER_MEM_POWER_CTRL_EN_MASK 0x00000001L +#define IH_MEM_POWER_CTRL__IH_BUFFER_MEM_POWER_LS_EN_MASK 0x00000002L +#define IH_MEM_POWER_CTRL__IH_BUFFER_MEM_POWER_DS_EN_MASK 0x00000004L +#define IH_MEM_POWER_CTRL__IH_BUFFER_MEM_POWER_SD_EN_MASK 0x00000008L +#define IH_MEM_POWER_CTRL__IH_BUFFER_MEM_IDLE_HYSTERESIS_MASK 0x00000070L +#define IH_MEM_POWER_CTRL__IH_BUFFER_MEM_POWER_UP_RECOVER_DELAY_MASK 0x00003F00L +#define IH_MEM_POWER_CTRL__IH_BUFFER_MEM_POWER_DOWN_LS_ENTER_DELAY_MASK 0x0000C000L +//IH_REGISTER_LAST_PART2 +#define IH_REGISTER_LAST_PART2__RESERVED__SHIFT 0x0 +#define IH_REGISTER_LAST_PART2__RESERVED_MASK 0xFFFFFFFFL +//SEM_CLK_CTRL +#define SEM_CLK_CTRL__ON_DELAY__SHIFT 0x0 +#define SEM_CLK_CTRL__OFF_HYSTERESIS__SHIFT 0x4 +#define SEM_CLK_CTRL__RESERVED__SHIFT 0xc +#define SEM_CLK_CTRL__SOFT_OVERRIDE7__SHIFT 0x18 +#define SEM_CLK_CTRL__SOFT_OVERRIDE6__SHIFT 0x19 +#define SEM_CLK_CTRL__MEM_CLK_SOFT_OVERRIDE__SHIFT 0x1a +#define SEM_CLK_CTRL__SOFT_OVERRIDE4__SHIFT 0x1b +#define SEM_CLK_CTRL__SOFT_OVERRIDE3__SHIFT 0x1c +#define SEM_CLK_CTRL__SOFT_OVERRIDE2__SHIFT 0x1d +#define SEM_CLK_CTRL__DYN_CLK_SOFT_OVERRIDE__SHIFT 0x1e +#define SEM_CLK_CTRL__REG_CLK_SOFT_OVERRIDE__SHIFT 0x1f +#define SEM_CLK_CTRL__ON_DELAY_MASK 0x0000000FL +#define SEM_CLK_CTRL__OFF_HYSTERESIS_MASK 0x00000FF0L +#define SEM_CLK_CTRL__RESERVED_MASK 0x00FFF000L +#define SEM_CLK_CTRL__SOFT_OVERRIDE7_MASK 0x01000000L +#define SEM_CLK_CTRL__SOFT_OVERRIDE6_MASK 0x02000000L +#define SEM_CLK_CTRL__MEM_CLK_SOFT_OVERRIDE_MASK 0x04000000L +#define SEM_CLK_CTRL__SOFT_OVERRIDE4_MASK 0x08000000L +#define SEM_CLK_CTRL__SOFT_OVERRIDE3_MASK 0x10000000L +#define SEM_CLK_CTRL__SOFT_OVERRIDE2_MASK 0x20000000L +#define SEM_CLK_CTRL__DYN_CLK_SOFT_OVERRIDE_MASK 0x40000000L +#define SEM_CLK_CTRL__REG_CLK_SOFT_OVERRIDE_MASK 0x80000000L +//SEM_UTC_CREDIT +#define SEM_UTC_CREDIT__UTCL2_CREDIT__SHIFT 0x0 +#define SEM_UTC_CREDIT__WATERMARK__SHIFT 0x8 +#define SEM_UTC_CREDIT__UTCL2_CREDIT_MASK 0x0000001FL +#define SEM_UTC_CREDIT__WATERMARK_MASK 0x00000F00L +//SEM_UTC_CONFIG +#define SEM_UTC_CONFIG__USE_MTYPE__SHIFT 0x0 +#define SEM_UTC_CONFIG__FORCE_SNOOP__SHIFT 0x3 +#define SEM_UTC_CONFIG__FORCE_GCC__SHIFT 0x4 +#define SEM_UTC_CONFIG__USE_PT_SNOOP__SHIFT 0x5 +#define SEM_UTC_CONFIG__USE_MTYPE_MASK 0x00000007L +#define SEM_UTC_CONFIG__FORCE_SNOOP_MASK 0x00000008L +#define SEM_UTC_CONFIG__FORCE_GCC_MASK 0x00000010L +#define SEM_UTC_CONFIG__USE_PT_SNOOP_MASK 0x00000020L +//SEM_UTCL2_TRAN_EN_LUT +#define SEM_UTCL2_TRAN_EN_LUT__SDMA0_UTCL2_EN__SHIFT 0x0 +#define SEM_UTCL2_TRAN_EN_LUT__SDMA1_UTCL2_EN__SHIFT 0x1 +#define SEM_UTCL2_TRAN_EN_LUT__UVD_UTCL2_EN__SHIFT 0x2 +#define SEM_UTCL2_TRAN_EN_LUT__VCE0_UTCL2_EN__SHIFT 0x3 +#define SEM_UTCL2_TRAN_EN_LUT__ACP_UTCL2_EN__SHIFT 0x4 +#define SEM_UTCL2_TRAN_EN_LUT__ISP_UTCL2_EN__SHIFT 0x5 +#define SEM_UTCL2_TRAN_EN_LUT__VCE1_UTCL2_EN__SHIFT 0x6 +#define SEM_UTCL2_TRAN_EN_LUT__VP8_UTCL2_EN__SHIFT 0x7 +#define SEM_UTCL2_TRAN_EN_LUT__UVD1_UTCL2_EN__SHIFT 0x8 +#define SEM_UTCL2_TRAN_EN_LUT__RESERVED__SHIFT 0x9 +#define SEM_UTCL2_TRAN_EN_LUT__CP_UTCL2_EN__SHIFT 0x1f +#define SEM_UTCL2_TRAN_EN_LUT__SDMA0_UTCL2_EN_MASK 0x00000001L +#define SEM_UTCL2_TRAN_EN_LUT__SDMA1_UTCL2_EN_MASK 0x00000002L +#define SEM_UTCL2_TRAN_EN_LUT__UVD_UTCL2_EN_MASK 0x00000004L +#define SEM_UTCL2_TRAN_EN_LUT__VCE0_UTCL2_EN_MASK 0x00000008L +#define SEM_UTCL2_TRAN_EN_LUT__ACP_UTCL2_EN_MASK 0x00000010L +#define SEM_UTCL2_TRAN_EN_LUT__ISP_UTCL2_EN_MASK 0x00000020L +#define SEM_UTCL2_TRAN_EN_LUT__VCE1_UTCL2_EN_MASK 0x00000040L +#define SEM_UTCL2_TRAN_EN_LUT__VP8_UTCL2_EN_MASK 0x00000080L +#define SEM_UTCL2_TRAN_EN_LUT__UVD1_UTCL2_EN_MASK 0x00000100L +#define SEM_UTCL2_TRAN_EN_LUT__RESERVED_MASK 0x7FFFFE00L +#define SEM_UTCL2_TRAN_EN_LUT__CP_UTCL2_EN_MASK 0x80000000L +//SEM_MCIF_CONFIG +#define SEM_MCIF_CONFIG__MC_REQ_SWAP__SHIFT 0x0 +#define SEM_MCIF_CONFIG__MC_WRREQ_CREDIT__SHIFT 0x2 +#define SEM_MCIF_CONFIG__MC_RDREQ_CREDIT__SHIFT 0x8 +#define SEM_MCIF_CONFIG__MC_REQ_SWAP_MASK 0x00000003L +#define SEM_MCIF_CONFIG__MC_WRREQ_CREDIT_MASK 0x000000FCL +#define SEM_MCIF_CONFIG__MC_RDREQ_CREDIT_MASK 0x00003F00L +//SEM_PERFMON_CNTL +#define SEM_PERFMON_CNTL__PERF_ENABLE0__SHIFT 0x0 +#define SEM_PERFMON_CNTL__PERF_CLEAR0__SHIFT 0x1 +#define SEM_PERFMON_CNTL__PERF_SEL0__SHIFT 0x2 +#define SEM_PERFMON_CNTL__PERF_ENABLE1__SHIFT 0xa +#define SEM_PERFMON_CNTL__PERF_CLEAR1__SHIFT 0xb +#define SEM_PERFMON_CNTL__PERF_SEL1__SHIFT 0xc +#define SEM_PERFMON_CNTL__PERF_ENABLE0_MASK 0x00000001L +#define SEM_PERFMON_CNTL__PERF_CLEAR0_MASK 0x00000002L +#define SEM_PERFMON_CNTL__PERF_SEL0_MASK 0x000003FCL +#define SEM_PERFMON_CNTL__PERF_ENABLE1_MASK 0x00000400L +#define SEM_PERFMON_CNTL__PERF_CLEAR1_MASK 0x00000800L +#define SEM_PERFMON_CNTL__PERF_SEL1_MASK 0x000FF000L +//SEM_PERFCOUNTER0_RESULT +#define SEM_PERFCOUNTER0_RESULT__PERF_COUNT__SHIFT 0x0 +#define SEM_PERFCOUNTER0_RESULT__PERF_COUNT_MASK 0xFFFFFFFFL +//SEM_PERFCOUNTER1_RESULT +#define SEM_PERFCOUNTER1_RESULT__PERF_COUNT__SHIFT 0x0 +#define SEM_PERFCOUNTER1_RESULT__PERF_COUNT_MASK 0xFFFFFFFFL +//SEM_STATUS +#define SEM_STATUS__SEM_IDLE__SHIFT 0x0 +#define SEM_STATUS__SEM_INTERNAL_IDLE__SHIFT 0x1 +#define SEM_STATUS__MC_RDREQ_FIFO_FULL__SHIFT 0x2 +#define SEM_STATUS__MC_WRREQ_FIFO_FULL__SHIFT 0x3 +#define SEM_STATUS__WRITE1_FIFO_FULL__SHIFT 0x4 +#define SEM_STATUS__CHECK0_FIFO_FULL__SHIFT 0x5 +#define SEM_STATUS__MC_RDREQ_PENDING__SHIFT 0x6 +#define SEM_STATUS__MC_WRREQ_PENDING__SHIFT 0x7 +#define SEM_STATUS__SDMA0_MAILBOX_PENDING__SHIFT 0x8 +#define SEM_STATUS__SDMA1_MAILBOX_PENDING__SHIFT 0x9 +#define SEM_STATUS__UVD_MAILBOX_PENDING__SHIFT 0xa +#define SEM_STATUS__VCE_MAILBOX_PENDING__SHIFT 0xb +#define SEM_STATUS__CPG1_MAILBOX_PENDING__SHIFT 0xc +#define SEM_STATUS__CPG2_MAILBOX_PENDING__SHIFT 0xd +#define SEM_STATUS__VCE1_MAILBOX_PENDING__SHIFT 0xe +#define SEM_STATUS__ATC_REQ_PENDING__SHIFT 0xf +#define SEM_STATUS__OUTSTANDING_CLEAN__SHIFT 0x10 +#define SEM_STATUS__INVREQ_FLUSH_VF_MISMATCH__SHIFT 0x11 +#define SEM_STATUS__INVREQ_NONFLUSH_VF_MISMATCH__SHIFT 0x12 +#define SEM_STATUS__INVREQ_CNT_IDLE__SHIFT 0x13 +#define SEM_STATUS__ENTRYLIST_IDLE__SHIFT 0x14 +#define SEM_STATUS__MIF_IDLE__SHIFT 0x15 +#define SEM_STATUS__REGISTER_IDLE__SHIFT 0x16 +#define SEM_STATUS__ATCL2_INVREQ_IDLE__SHIFT 0x17 +#define SEM_STATUS__UVD1_MAILBOX_PENDING__SHIFT 0x18 +#define SEM_STATUS__SWITCH_READY__SHIFT 0x1f +#define SEM_STATUS__SEM_IDLE_MASK 0x00000001L +#define SEM_STATUS__SEM_INTERNAL_IDLE_MASK 0x00000002L +#define SEM_STATUS__MC_RDREQ_FIFO_FULL_MASK 0x00000004L +#define SEM_STATUS__MC_WRREQ_FIFO_FULL_MASK 0x00000008L +#define SEM_STATUS__WRITE1_FIFO_FULL_MASK 0x00000010L +#define SEM_STATUS__CHECK0_FIFO_FULL_MASK 0x00000020L +#define SEM_STATUS__MC_RDREQ_PENDING_MASK 0x00000040L +#define SEM_STATUS__MC_WRREQ_PENDING_MASK 0x00000080L +#define SEM_STATUS__SDMA0_MAILBOX_PENDING_MASK 0x00000100L +#define SEM_STATUS__SDMA1_MAILBOX_PENDING_MASK 0x00000200L +#define SEM_STATUS__UVD_MAILBOX_PENDING_MASK 0x00000400L +#define SEM_STATUS__VCE_MAILBOX_PENDING_MASK 0x00000800L +#define SEM_STATUS__CPG1_MAILBOX_PENDING_MASK 0x00001000L +#define SEM_STATUS__CPG2_MAILBOX_PENDING_MASK 0x00002000L +#define SEM_STATUS__VCE1_MAILBOX_PENDING_MASK 0x00004000L +#define SEM_STATUS__ATC_REQ_PENDING_MASK 0x00008000L +#define SEM_STATUS__OUTSTANDING_CLEAN_MASK 0x00010000L +#define SEM_STATUS__INVREQ_FLUSH_VF_MISMATCH_MASK 0x00020000L +#define SEM_STATUS__INVREQ_NONFLUSH_VF_MISMATCH_MASK 0x00040000L +#define SEM_STATUS__INVREQ_CNT_IDLE_MASK 0x00080000L +#define SEM_STATUS__ENTRYLIST_IDLE_MASK 0x00100000L +#define SEM_STATUS__MIF_IDLE_MASK 0x00200000L +#define SEM_STATUS__REGISTER_IDLE_MASK 0x00400000L +#define SEM_STATUS__ATCL2_INVREQ_IDLE_MASK 0x00800000L +#define SEM_STATUS__UVD1_MAILBOX_PENDING_MASK 0x01000000L +#define SEM_STATUS__SWITCH_READY_MASK 0x80000000L +//SEM_MAILBOX_CLIENTCONFIG +#define SEM_MAILBOX_CLIENTCONFIG__CP_CLIENT0__SHIFT 0x0 +#define SEM_MAILBOX_CLIENTCONFIG__CP_CLIENT1__SHIFT 0x3 +#define SEM_MAILBOX_CLIENTCONFIG__CP_CLIENT2__SHIFT 0x6 +#define SEM_MAILBOX_CLIENTCONFIG__CP_CLIENT3__SHIFT 0x9 +#define SEM_MAILBOX_CLIENTCONFIG__SDMA_CLIENT0__SHIFT 0xc +#define SEM_MAILBOX_CLIENTCONFIG__UVD_CLIENT0__SHIFT 0xf +#define SEM_MAILBOX_CLIENTCONFIG__SDMA1_CLIENT0__SHIFT 0x12 +#define SEM_MAILBOX_CLIENTCONFIG__VCE_CLIENT0__SHIFT 0x15 +#define SEM_MAILBOX_CLIENTCONFIG__CP_CLIENT0_MASK 0x00000007L +#define SEM_MAILBOX_CLIENTCONFIG__CP_CLIENT1_MASK 0x00000038L +#define SEM_MAILBOX_CLIENTCONFIG__CP_CLIENT2_MASK 0x000001C0L +#define SEM_MAILBOX_CLIENTCONFIG__CP_CLIENT3_MASK 0x00000E00L +#define SEM_MAILBOX_CLIENTCONFIG__SDMA_CLIENT0_MASK 0x00007000L +#define SEM_MAILBOX_CLIENTCONFIG__UVD_CLIENT0_MASK 0x00038000L +#define SEM_MAILBOX_CLIENTCONFIG__SDMA1_CLIENT0_MASK 0x001C0000L +#define SEM_MAILBOX_CLIENTCONFIG__VCE_CLIENT0_MASK 0x00E00000L +//SEM_MAILBOX +#define SEM_MAILBOX__HOSTPORT__SHIFT 0x0 +#define SEM_MAILBOX__RESERVED__SHIFT 0x10 +#define SEM_MAILBOX__HOSTPORT_MASK 0x0000FFFFL +#define SEM_MAILBOX__RESERVED_MASK 0xFFFF0000L +//SEM_MAILBOX_CONTROL +#define SEM_MAILBOX_CONTROL__HOSTPORT_ENABLE__SHIFT 0x0 +#define SEM_MAILBOX_CONTROL__RESERVED__SHIFT 0x10 +#define SEM_MAILBOX_CONTROL__HOSTPORT_ENABLE_MASK 0x0000FFFFL +#define SEM_MAILBOX_CONTROL__RESERVED_MASK 0xFFFF0000L +//SEM_CHICKEN_BITS +#define SEM_CHICKEN_BITS__VMID_PIPELINE_EN__SHIFT 0x0 +#define SEM_CHICKEN_BITS__ENTRY_PIPELINE_EN__SHIFT 0x1 +#define SEM_CHICKEN_BITS__CHECK_COUNTER_EN__SHIFT 0x2 +#define SEM_CHICKEN_BITS__ECC_BEHAVIOR__SHIFT 0x3 +#define SEM_CHICKEN_BITS__PHY_TRAN_EN__SHIFT 0x6 +#define SEM_CHICKEN_BITS__ADDR_CMP_UNTRAN_EN__SHIFT 0x7 +#define SEM_CHICKEN_BITS__IDLE_COUNTER_INDEX__SHIFT 0x8 +#define SEM_CHICKEN_BITS__OUTSTANDING_CLEAN_COUNTER_INDEX__SHIFT 0xa +#define SEM_CHICKEN_BITS__ATCL2_BUS_ID__SHIFT 0xc +#define SEM_CHICKEN_BITS__ATOMIC_EN__SHIFT 0xe +#define SEM_CHICKEN_BITS__EXTERNAL_ATOMIC_CHECK__SHIFT 0xf +#define SEM_CHICKEN_BITS__CLEAR_MAILBOX__SHIFT 0x10 +#define SEM_CHICKEN_BITS__INVACK_AFTER_OUTSTANDING_CLEAN__SHIFT 0x12 +#define SEM_CHICKEN_BITS__UTC_TAG_CONFLICT_CHECK__SHIFT 0x13 +#define SEM_CHICKEN_BITS__VMID_PIPELINE_EN_MASK 0x00000001L +#define SEM_CHICKEN_BITS__ENTRY_PIPELINE_EN_MASK 0x00000002L +#define SEM_CHICKEN_BITS__CHECK_COUNTER_EN_MASK 0x00000004L +#define SEM_CHICKEN_BITS__ECC_BEHAVIOR_MASK 0x00000018L +#define SEM_CHICKEN_BITS__PHY_TRAN_EN_MASK 0x00000040L +#define SEM_CHICKEN_BITS__ADDR_CMP_UNTRAN_EN_MASK 0x00000080L +#define SEM_CHICKEN_BITS__IDLE_COUNTER_INDEX_MASK 0x00000300L +#define SEM_CHICKEN_BITS__OUTSTANDING_CLEAN_COUNTER_INDEX_MASK 0x00000C00L +#define SEM_CHICKEN_BITS__ATCL2_BUS_ID_MASK 0x00003000L +#define SEM_CHICKEN_BITS__ATOMIC_EN_MASK 0x00004000L +#define SEM_CHICKEN_BITS__EXTERNAL_ATOMIC_CHECK_MASK 0x00008000L +#define SEM_CHICKEN_BITS__CLEAR_MAILBOX_MASK 0x00030000L +#define SEM_CHICKEN_BITS__INVACK_AFTER_OUTSTANDING_CLEAN_MASK 0x00040000L +#define SEM_CHICKEN_BITS__UTC_TAG_CONFLICT_CHECK_MASK 0x00080000L +//SEM_MAILBOX_CLIENTCONFIG_EXTRA +#define SEM_MAILBOX_CLIENTCONFIG_EXTRA__VCE1_CLIENT0__SHIFT 0x0 +#define SEM_MAILBOX_CLIENTCONFIG_EXTRA__UVD1_CLIENT0__SHIFT 0x4 +#define SEM_MAILBOX_CLIENTCONFIG_EXTRA__VCE1_CLIENT0_MASK 0x0000000FL +#define SEM_MAILBOX_CLIENTCONFIG_EXTRA__UVD1_CLIENT0_MASK 0x000000F0L +//SEM_GPU_IOV_VIOLATION_LOG +#define SEM_GPU_IOV_VIOLATION_LOG__VIOLATION_STATUS__SHIFT 0x0 +#define SEM_GPU_IOV_VIOLATION_LOG__MULTIPLE_VIOLATION_STATUS__SHIFT 0x1 +#define SEM_GPU_IOV_VIOLATION_LOG__ADDRESS__SHIFT 0x2 +#define SEM_GPU_IOV_VIOLATION_LOG__OPCODE__SHIFT 0x12 +#define SEM_GPU_IOV_VIOLATION_LOG__VF__SHIFT 0x13 +#define SEM_GPU_IOV_VIOLATION_LOG__VF_ID__SHIFT 0x14 +#define SEM_GPU_IOV_VIOLATION_LOG__INITIATOR_ID__SHIFT 0x18 +#define SEM_GPU_IOV_VIOLATION_LOG__VIOLATION_STATUS_MASK 0x00000001L +#define SEM_GPU_IOV_VIOLATION_LOG__MULTIPLE_VIOLATION_STATUS_MASK 0x00000002L +#define SEM_GPU_IOV_VIOLATION_LOG__ADDRESS_MASK 0x0003FFFCL +#define SEM_GPU_IOV_VIOLATION_LOG__OPCODE_MASK 0x00040000L +#define SEM_GPU_IOV_VIOLATION_LOG__VF_MASK 0x00080000L +#define SEM_GPU_IOV_VIOLATION_LOG__VF_ID_MASK 0x00F00000L +#define SEM_GPU_IOV_VIOLATION_LOG__INITIATOR_ID_MASK 0xFF000000L +//SEM_OUTSTANDING_THRESHOLD +#define SEM_OUTSTANDING_THRESHOLD__VALUE__SHIFT 0x0 +#define SEM_OUTSTANDING_THRESHOLD__VALUE_MASK 0x000000FFL +//SEM_MEM_POWER_CTRL +#define SEM_MEM_POWER_CTRL__MEM_POWER_CTRL_EN__SHIFT 0x0 +#define SEM_MEM_POWER_CTRL__MEM_POWER_LS_EN__SHIFT 0x1 +#define SEM_MEM_POWER_CTRL__MEM_POWER_DS_EN__SHIFT 0x2 +#define SEM_MEM_POWER_CTRL__MEM_POWER_SD_EN__SHIFT 0x3 +#define SEM_MEM_POWER_CTRL__MEM_IDLE_HYSTERESIS__SHIFT 0x4 +#define SEM_MEM_POWER_CTRL__MEM_POWER_UP_RECOVER_DELAY__SHIFT 0x8 +#define SEM_MEM_POWER_CTRL__MEM_POWER_DOWN_LS_ENTER_DELAY__SHIFT 0xe +#define SEM_MEM_POWER_CTRL__MEM_POWER_CTRL_EN_MASK 0x00000001L +#define SEM_MEM_POWER_CTRL__MEM_POWER_LS_EN_MASK 0x00000002L +#define SEM_MEM_POWER_CTRL__MEM_POWER_DS_EN_MASK 0x00000004L +#define SEM_MEM_POWER_CTRL__MEM_POWER_SD_EN_MASK 0x00000008L +#define SEM_MEM_POWER_CTRL__MEM_IDLE_HYSTERESIS_MASK 0x00000070L +#define SEM_MEM_POWER_CTRL__MEM_POWER_UP_RECOVER_DELAY_MASK 0x00003F00L +#define SEM_MEM_POWER_CTRL__MEM_POWER_DOWN_LS_ENTER_DELAY_MASK 0x0000C000L +//SEM_REGISTER_LAST_PART2 +#define SEM_REGISTER_LAST_PART2__RESERVED__SHIFT 0x0 +#define SEM_REGISTER_LAST_PART2__RESERVED_MASK 0xFFFFFFFFL +//IH_ACTIVE_FCN_ID +#define IH_ACTIVE_FCN_ID__VF_ID__SHIFT 0x0 +#define IH_ACTIVE_FCN_ID__RESERVED__SHIFT 0x4 +#define IH_ACTIVE_FCN_ID__PF_VF__SHIFT 0x1f +#define IH_ACTIVE_FCN_ID__VF_ID_MASK 0x0000000FL +#define IH_ACTIVE_FCN_ID__RESERVED_MASK 0x7FFFFFF0L +#define IH_ACTIVE_FCN_ID__PF_VF_MASK 0x80000000L +//IH_VIRT_RESET_REQ +#define IH_VIRT_RESET_REQ__VF__SHIFT 0x0 +#define IH_VIRT_RESET_REQ__PF__SHIFT 0x1f +#define IH_VIRT_RESET_REQ__VF_MASK 0x0000FFFFL +#define IH_VIRT_RESET_REQ__PF_MASK 0x80000000L +//IH_CLIENT_CFG +#define IH_CLIENT_CFG__TOTAL_CLIENT_NUM__SHIFT 0x0 +#define IH_CLIENT_CFG__TOTAL_CLIENT_NUM_MASK 0x0000001FL +//IH_CLIENT_CFG_INDEX +#define IH_CLIENT_CFG_INDEX__INDEX__SHIFT 0x0 +#define IH_CLIENT_CFG_INDEX__INDEX_MASK 0x0000001FL +//IH_CLIENT_CFG_DATA +#define IH_CLIENT_CFG_DATA__CREDIT_RETURN_ADDR__SHIFT 0x0 +#define IH_CLIENT_CFG_DATA__CLIENT_TYPE__SHIFT 0x12 +#define IH_CLIENT_CFG_DATA__RING_ID__SHIFT 0x14 +#define IH_CLIENT_CFG_DATA__VF_RB_SELECT__SHIFT 0x16 +#define IH_CLIENT_CFG_DATA__OVERWRITE_RING_ID_WITH_ACTIVE_FCN_ID__SHIFT 0x18 +#define IH_CLIENT_CFG_DATA__CREDIT_RETURN_ADDR_MASK 0x0003FFFFL +#define IH_CLIENT_CFG_DATA__CLIENT_TYPE_MASK 0x000C0000L +#define IH_CLIENT_CFG_DATA__RING_ID_MASK 0x00300000L +#define IH_CLIENT_CFG_DATA__VF_RB_SELECT_MASK 0x00C00000L +#define IH_CLIENT_CFG_DATA__OVERWRITE_RING_ID_WITH_ACTIVE_FCN_ID_MASK 0x01000000L +//IH_CID_REMAP_INDEX +#define IH_CID_REMAP_INDEX__INDEX__SHIFT 0x0 +#define IH_CID_REMAP_INDEX__INDEX_MASK 0x00000003L +//IH_CID_REMAP_DATA +#define IH_CID_REMAP_DATA__CLIENT_ID__SHIFT 0x0 +#define IH_CID_REMAP_DATA__INITIATOR_ID__SHIFT 0x8 +#define IH_CID_REMAP_DATA__CLIENT_ID_REMAP__SHIFT 0x10 +#define IH_CID_REMAP_DATA__CLIENT_ID_MASK 0x000000FFL +#define IH_CID_REMAP_DATA__INITIATOR_ID_MASK 0x0000FF00L +#define IH_CID_REMAP_DATA__CLIENT_ID_REMAP_MASK 0x00FF0000L +//IH_CHICKEN +#define IH_CHICKEN__ACTIVE_FCN_ID_PROT_ENABLE__SHIFT 0x0 +#define IH_CHICKEN__MC_SPACE_FBPA_ENABLE__SHIFT 0x3 +#define IH_CHICKEN__MC_SPACE_GPA_ENABLE__SHIFT 0x4 +#define IH_CHICKEN__ACTIVE_FCN_ID_PROT_ENABLE_MASK 0x00000001L +#define IH_CHICKEN__MC_SPACE_FBPA_ENABLE_MASK 0x00000008L +#define IH_CHICKEN__MC_SPACE_GPA_ENABLE_MASK 0x00000010L +//IH_MMHUB_CNTL +#define IH_MMHUB_CNTL__UNITID__SHIFT 0x0 +#define IH_MMHUB_CNTL__IV_TLVL__SHIFT 0x8 +#define IH_MMHUB_CNTL__WPTR_WB_TLVL__SHIFT 0xc +#define IH_MMHUB_CNTL__UNITID_MASK 0x0000003FL +#define IH_MMHUB_CNTL__IV_TLVL_MASK 0x00000700L +#define IH_MMHUB_CNTL__WPTR_WB_TLVL_MASK 0x00007000L +//IH_INT_DROP_CNTL +#define IH_INT_DROP_CNTL__INT_DROP_EN__SHIFT 0x0 +#define IH_INT_DROP_CNTL__CLIENT_ID_MATCH_EN__SHIFT 0x1 +#define IH_INT_DROP_CNTL__SOURCE_ID_MATCH_EN__SHIFT 0x2 +#define IH_INT_DROP_CNTL__VF_ID_MATCH_EN__SHIFT 0x3 +#define IH_INT_DROP_CNTL__VF_MATCH_EN__SHIFT 0x4 +#define IH_INT_DROP_CNTL__CONTEXT_ID_MATCH_EN__SHIFT 0x5 +#define IH_INT_DROP_CNTL__INT_DROP_MODE__SHIFT 0x6 +#define IH_INT_DROP_CNTL__UTCL2_RETRY_INT_DROP_EN__SHIFT 0x8 +#define IH_INT_DROP_CNTL__INT_DROPPED__SHIFT 0x10 +#define IH_INT_DROP_CNTL__INT_DROP_EN_MASK 0x00000001L +#define IH_INT_DROP_CNTL__CLIENT_ID_MATCH_EN_MASK 0x00000002L +#define IH_INT_DROP_CNTL__SOURCE_ID_MATCH_EN_MASK 0x00000004L +#define IH_INT_DROP_CNTL__VF_ID_MATCH_EN_MASK 0x00000008L +#define IH_INT_DROP_CNTL__VF_MATCH_EN_MASK 0x00000010L +#define IH_INT_DROP_CNTL__CONTEXT_ID_MATCH_EN_MASK 0x00000020L +#define IH_INT_DROP_CNTL__INT_DROP_MODE_MASK 0x000000C0L +#define IH_INT_DROP_CNTL__UTCL2_RETRY_INT_DROP_EN_MASK 0x00000100L +#define IH_INT_DROP_CNTL__INT_DROPPED_MASK 0x00010000L +//IH_INT_DROP_MATCH_VALUE0 +#define IH_INT_DROP_MATCH_VALUE0__CLIENT_ID_MATCH_VALUE__SHIFT 0x0 +#define IH_INT_DROP_MATCH_VALUE0__SOURCE_ID_MATCH_VALUE__SHIFT 0x8 +#define IH_INT_DROP_MATCH_VALUE0__VF_ID_MATCH_VALUE__SHIFT 0x10 +#define IH_INT_DROP_MATCH_VALUE0__VF_MATCH_VALUE__SHIFT 0x17 +#define IH_INT_DROP_MATCH_VALUE0__CONTEXT_ID_39_32_MATCH_VALUE__SHIFT 0x18 +#define IH_INT_DROP_MATCH_VALUE0__CLIENT_ID_MATCH_VALUE_MASK 0x000000FFL +#define IH_INT_DROP_MATCH_VALUE0__SOURCE_ID_MATCH_VALUE_MASK 0x0000FF00L +#define IH_INT_DROP_MATCH_VALUE0__VF_ID_MATCH_VALUE_MASK 0x000F0000L +#define IH_INT_DROP_MATCH_VALUE0__VF_MATCH_VALUE_MASK 0x00800000L +#define IH_INT_DROP_MATCH_VALUE0__CONTEXT_ID_39_32_MATCH_VALUE_MASK 0xFF000000L +//IH_INT_DROP_MATCH_VALUE1 +#define IH_INT_DROP_MATCH_VALUE1__CONTEXT_ID_31_0_MATCH_VALUE__SHIFT 0x0 +#define IH_INT_DROP_MATCH_VALUE1__CONTEXT_ID_31_0_MATCH_VALUE_MASK 0xFFFFFFFFL +//IH_INT_DROP_MATCH_MASK0 +#define IH_INT_DROP_MATCH_MASK0__CLIENT_ID_MATCH_MASK__SHIFT 0x0 +#define IH_INT_DROP_MATCH_MASK0__SOURCE_ID_MATCH_MASK__SHIFT 0x8 +#define IH_INT_DROP_MATCH_MASK0__VF_ID_MATCH_MASK__SHIFT 0x10 +#define IH_INT_DROP_MATCH_MASK0__VF_MATCH_MASK__SHIFT 0x17 +#define IH_INT_DROP_MATCH_MASK0__CONTEXT_ID_39_32_MATCH_MASK__SHIFT 0x18 +#define IH_INT_DROP_MATCH_MASK0__CLIENT_ID_MATCH_MASK_MASK 0x000000FFL +#define IH_INT_DROP_MATCH_MASK0__SOURCE_ID_MATCH_MASK_MASK 0x0000FF00L +#define IH_INT_DROP_MATCH_MASK0__VF_ID_MATCH_MASK_MASK 0x000F0000L +#define IH_INT_DROP_MATCH_MASK0__VF_MATCH_MASK_MASK 0x00800000L +#define IH_INT_DROP_MATCH_MASK0__CONTEXT_ID_39_32_MATCH_MASK_MASK 0xFF000000L +//IH_INT_DROP_MATCH_MASK1 +#define IH_INT_DROP_MATCH_MASK1__CONTEXT_ID_31_0_MATCH_MASK__SHIFT 0x0 +#define IH_INT_DROP_MATCH_MASK1__CONTEXT_ID_31_0_MATCH_MASK_MASK 0xFFFFFFFFL +//IH_REGISTER_LAST_PART1 +#define IH_REGISTER_LAST_PART1__RESERVED__SHIFT 0x0 +#define IH_REGISTER_LAST_PART1__RESERVED_MASK 0xFFFFFFFFL +//SEM_ACTIVE_FCN_ID +#define SEM_ACTIVE_FCN_ID__VFID__SHIFT 0x0 +#define SEM_ACTIVE_FCN_ID__VF__SHIFT 0x1f +#define SEM_ACTIVE_FCN_ID__VFID_MASK 0x0000000FL +#define SEM_ACTIVE_FCN_ID__VF_MASK 0x80000000L +//SEM_VIRT_RESET_REQ +#define SEM_VIRT_RESET_REQ__VF__SHIFT 0x0 +#define SEM_VIRT_RESET_REQ__PF__SHIFT 0x1f +#define SEM_VIRT_RESET_REQ__VF_MASK 0x0000FFFFL +#define SEM_VIRT_RESET_REQ__PF_MASK 0x80000000L +//SEM_RESP_SDMA0 +#define SEM_RESP_SDMA0__ADDR__SHIFT 0x2 +#define SEM_RESP_SDMA0__ADDR_MASK 0x000FFFFCL +//SEM_RESP_SDMA1 +#define SEM_RESP_SDMA1__ADDR__SHIFT 0x2 +#define SEM_RESP_SDMA1__ADDR_MASK 0x000FFFFCL +//SEM_RESP_UVD +#define SEM_RESP_UVD__ADDR__SHIFT 0x2 +#define SEM_RESP_UVD__ADDR_MASK 0x000FFFFCL +//SEM_RESP_VCE_0 +#define SEM_RESP_VCE_0__ADDR__SHIFT 0x2 +#define SEM_RESP_VCE_0__ADDR_MASK 0x000FFFFCL +//SEM_RESP_ACP +#define SEM_RESP_ACP__ADDR__SHIFT 0x2 +#define SEM_RESP_ACP__ADDR_MASK 0x000FFFFCL +//SEM_RESP_ISP +#define SEM_RESP_ISP__ADDR__SHIFT 0x2 +#define SEM_RESP_ISP__ADDR_MASK 0x000FFFFCL +//SEM_RESP_VCE_1 +#define SEM_RESP_VCE_1__ADDR__SHIFT 0x2 +#define SEM_RESP_VCE_1__ADDR_MASK 0x000FFFFCL +//SEM_RESP_VP8 +#define SEM_RESP_VP8__ADDR__SHIFT 0x2 +#define SEM_RESP_VP8__ADDR_MASK 0x000FFFFCL +//SEM_RESP_GC +#define SEM_RESP_GC__ADDR__SHIFT 0x2 +#define SEM_RESP_GC__ADDR_MASK 0x000FFFFCL +//SEM_RESP_UVD_1 +#define SEM_RESP_UVD_1__ADDR__SHIFT 0x2 +#define SEM_RESP_UVD_1__ADDR_MASK 0x000FFFFCL +//SEM_CID_REMAP_INDEX +#define SEM_CID_REMAP_INDEX__INDEX__SHIFT 0x0 +#define SEM_CID_REMAP_INDEX__INDEX_MASK 0x00000003L +//SEM_CID_REMAP_DATA +#define SEM_CID_REMAP_DATA__CLIENT_ID__SHIFT 0x0 +#define SEM_CID_REMAP_DATA__INITIATOR_ID__SHIFT 0x8 +#define SEM_CID_REMAP_DATA__CLIENT_ID_REMAP__SHIFT 0x10 +#define SEM_CID_REMAP_DATA__CLIENT_ID_MASK 0x000000FFL +#define SEM_CID_REMAP_DATA__INITIATOR_ID_MASK 0x0000FF00L +#define SEM_CID_REMAP_DATA__CLIENT_ID_REMAP_MASK 0x00FF0000L +//SEM_ATOMIC_OP_LUT +#define SEM_ATOMIC_OP_LUT__SIGNAL_NORMAL__SHIFT 0x0 +#define SEM_ATOMIC_OP_LUT__SIGNAL_WRITE1__SHIFT 0x7 +#define SEM_ATOMIC_OP_LUT__WAIT_NORMAL__SHIFT 0xe +#define SEM_ATOMIC_OP_LUT__WAIT_CHECK0__SHIFT 0x15 +#define SEM_ATOMIC_OP_LUT__SIGNAL_NORMAL_MASK 0x0000007FL +#define SEM_ATOMIC_OP_LUT__SIGNAL_WRITE1_MASK 0x00003F80L +#define SEM_ATOMIC_OP_LUT__WAIT_NORMAL_MASK 0x001FC000L +#define SEM_ATOMIC_OP_LUT__WAIT_CHECK0_MASK 0x0FE00000L +//SEM_EDC_CONFIG +#define SEM_EDC_CONFIG__WRITE_DIS__SHIFT 0x0 +#define SEM_EDC_CONFIG__DIS_EDC__SHIFT 0x1 +#define SEM_EDC_CONFIG__WRITE_DIS_MASK 0x00000001L +#define SEM_EDC_CONFIG__DIS_EDC_MASK 0x00000002L +//SEM_CHICKEN_BITS2 +#define SEM_CHICKEN_BITS2__ACTIVE_FCN_ID_PROT_ENABLE__SHIFT 0x0 +#define SEM_CHICKEN_BITS2__MM_CLIENT_USE_CONFIG_VFID__SHIFT 0x1 +#define SEM_CHICKEN_BITS2__ACTIVE_FCN_ID_PROT_ENABLE_MASK 0x00000001L +#define SEM_CHICKEN_BITS2__MM_CLIENT_USE_CONFIG_VFID_MASK 0x00000002L +//SEM_MMHUB_CNTL +#define SEM_MMHUB_CNTL__UNIT_ID__SHIFT 0x0 +#define SEM_MMHUB_CNTL__TLVL_VALUE__SHIFT 0x8 +#define SEM_MMHUB_CNTL__UNIT_ID_MASK 0x0000003FL +#define SEM_MMHUB_CNTL__TLVL_VALUE_MASK 0x00000700L +//SEM_REGISTER_LAST_PART1 +#define SEM_REGISTER_LAST_PART1__RESERVED__SHIFT 0x0 +#define SEM_REGISTER_LAST_PART1__RESERVED_MASK 0xFFFFFFFFL + +#endif diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h index f775aac6c1bd..270f8db5115a 100644 --- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h +++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h @@ -157,7 +157,8 @@ enum PP_OD_DPM_TABLE_COMMAND { PP_OD_EDIT_MCLK_VDDC_TABLE, PP_OD_EDIT_VDDC_CURVE, PP_OD_RESTORE_DEFAULT_TABLE, - PP_OD_COMMIT_DPM_TABLE + PP_OD_COMMIT_DPM_TABLE, + PP_OD_EDIT_VDDGFX_OFFSET }; struct pp_states_info { diff --git a/drivers/gpu/drm/amd/pm/amdgpu_pm.c b/drivers/gpu/drm/amd/pm/amdgpu_pm.c index 7b6ef05a1d35..f5d97b97353a 100644 --- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c @@ -730,11 +730,18 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, * * - minimum and maximum engine clock labeled OD_SCLK * - * - maximum memory clock labeled OD_MCLK + * - minimum(not available for Vega20 and Navi1x) and maximum memory + * clock labeled OD_MCLK * * - three <frequency, voltage> points labeled OD_VDDC_CURVE. * They can be used to calibrate the sclk voltage curve. * + * - voltage offset(in mV) applied on target voltage calculation. + * This is available for Sienna Cichlid, Navy Flounder and Dimgrey + * Cavefish. For these ASICs, the target voltage calculation can be + * illustrated by "voltage = voltage calculated from v/f curve + + * overdrive vddgfx offset" + * * - a list of valid ranges for sclk, mclk, and voltage curve points * labeled OD_RANGE * @@ -755,6 +762,11 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, * 600mV. "vc 2 1000 1000" will update point3 with clock set * as 1000Mhz and voltage 1000mV. * + * To update the voltage offset applied for gfxclk/voltage calculation, + * enter the new value by writing a string that contains "vo offset". + * This is supported by Sienna Cichlid, Navy Flounder and Dimgrey Cavefish. + * And the offset can be a positive or negative value. + * * - When you have edited all of the states as needed, write "c" (commit) * to the file to commit your changes * @@ -795,6 +807,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, type = PP_OD_COMMIT_DPM_TABLE; else if (!strncmp(buf, "vc", 2)) type = PP_OD_EDIT_VDDC_CURVE; + else if (!strncmp(buf, "vo", 2)) + type = PP_OD_EDIT_VDDGFX_OFFSET; else return -EINVAL; @@ -802,7 +816,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, tmp_str = buf_cpy; - if (type == PP_OD_EDIT_VDDC_CURVE) + if ((type == PP_OD_EDIT_VDDC_CURVE) || + (type == PP_OD_EDIT_VDDGFX_OFFSET)) tmp_str++; while (isspace(*++tmp_str)); @@ -898,6 +913,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, size = smu_print_clk_levels(&adev->smu, SMU_OD_SCLK, buf); size += smu_print_clk_levels(&adev->smu, SMU_OD_MCLK, buf+size); size += smu_print_clk_levels(&adev->smu, SMU_OD_VDDC_CURVE, buf+size); + size += smu_print_clk_levels(&adev->smu, SMU_OD_VDDGFX_OFFSET, buf+size); size += smu_print_clk_levels(&adev->smu, SMU_OD_RANGE, buf+size); } else if (adev->powerplay.pp_funcs->print_clock_levels) { size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf); @@ -1346,6 +1362,138 @@ static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev, return count; } +static ssize_t amdgpu_get_pp_dpm_vclk(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + ssize_t size; + int ret; + + if (amdgpu_in_reset(adev)) + return -EPERM; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); + return ret; + } + + if (is_support_sw_smu(adev)) + size = smu_print_clk_levels(&adev->smu, SMU_VCLK, buf); + else + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; +} + +static ssize_t amdgpu_set_pp_dpm_vclk(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + int ret; + uint32_t mask = 0; + + if (amdgpu_in_reset(adev)) + return -EPERM; + + ret = amdgpu_read_mask(buf, count, &mask); + if (ret) + return ret; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); + return ret; + } + + if (is_support_sw_smu(adev)) + ret = smu_force_clk_levels(&adev->smu, SMU_VCLK, mask); + else + ret = 0; + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + if (ret) + return -EINVAL; + + return count; +} + +static ssize_t amdgpu_get_pp_dpm_dclk(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + ssize_t size; + int ret; + + if (amdgpu_in_reset(adev)) + return -EPERM; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); + return ret; + } + + if (is_support_sw_smu(adev)) + size = smu_print_clk_levels(&adev->smu, SMU_DCLK, buf); + else + size = snprintf(buf, PAGE_SIZE, "\n"); + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + return size; +} + +static ssize_t amdgpu_set_pp_dpm_dclk(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct drm_device *ddev = dev_get_drvdata(dev); + struct amdgpu_device *adev = drm_to_adev(ddev); + int ret; + uint32_t mask = 0; + + if (amdgpu_in_reset(adev)) + return -EPERM; + + ret = amdgpu_read_mask(buf, count, &mask); + if (ret) + return ret; + + ret = pm_runtime_get_sync(ddev->dev); + if (ret < 0) { + pm_runtime_put_autosuspend(ddev->dev); + return ret; + } + + if (is_support_sw_smu(adev)) + ret = smu_force_clk_levels(&adev->smu, SMU_DCLK, mask); + else + ret = 0; + + pm_runtime_mark_last_busy(ddev->dev); + pm_runtime_put_autosuspend(ddev->dev); + + if (ret) + return -EINVAL; + + return count; +} + static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev, struct device_attribute *attr, char *buf) @@ -2025,6 +2173,8 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = { AMDGPU_DEVICE_ATTR_RW(pp_dpm_mclk, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), AMDGPU_DEVICE_ATTR_RW(pp_dpm_socclk, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), AMDGPU_DEVICE_ATTR_RW(pp_dpm_fclk, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), + AMDGPU_DEVICE_ATTR_RW(pp_dpm_vclk, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), + AMDGPU_DEVICE_ATTR_RW(pp_dpm_dclk, ATTR_FLAG_BASIC|ATTR_FLAG_ONEVF), AMDGPU_DEVICE_ATTR_RW(pp_dpm_dcefclk, ATTR_FLAG_BASIC), AMDGPU_DEVICE_ATTR_RW(pp_dpm_pcie, ATTR_FLAG_BASIC), AMDGPU_DEVICE_ATTR_RW(pp_sclk_od, ATTR_FLAG_BASIC), @@ -2067,7 +2217,8 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ } else if (DEVICE_ATTR_IS(pp_od_clk_voltage)) { *states = ATTR_STATE_UNSUPPORTED; if ((is_support_sw_smu(adev) && adev->smu.od_enabled) || - (!is_support_sw_smu(adev) && hwmgr->od_enabled)) + (is_support_sw_smu(adev) && adev->smu.fine_grain_enabled) || + (!is_support_sw_smu(adev) && hwmgr->od_enabled)) *states = ATTR_STATE_SUPPORTED; } else if (DEVICE_ATTR_IS(mem_busy_percent)) { if (adev->flags & AMD_IS_APU || asic_type == CHIP_VEGA10) @@ -2087,6 +2238,12 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_ } else if (DEVICE_ATTR_IS(gpu_metrics)) { if (asic_type < CHIP_VEGA12) *states = ATTR_STATE_UNSUPPORTED; + } else if (DEVICE_ATTR_IS(pp_dpm_vclk)) { + if (!(asic_type == CHIP_VANGOGH)) + *states = ATTR_STATE_UNSUPPORTED; + } else if (DEVICE_ATTR_IS(pp_dpm_dclk)) { + if (!(asic_type == CHIP_VANGOGH)) + *states = ATTR_STATE_UNSUPPORTED; } if (asic_type == CHIP_ARCTURUS) { diff --git a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h index 4bdbcce7092d..e2e59fb0f754 100644 --- a/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h +++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h @@ -465,124 +465,653 @@ struct smu_context uint32_t gfx_default_soft_max_freq; uint32_t gfx_actual_hard_min_freq; uint32_t gfx_actual_soft_max_freq; + + bool fine_grain_enabled; + bool fine_grain_started; }; struct i2c_adapter; +/** + * struct pptable_funcs - Callbacks used to interact with the SMU. + */ struct pptable_funcs { + /** + * @run_btc: Calibrate voltage/frequency curve to fit the system's + * power delivery and voltage margins. Required for adaptive + * voltage frequency scaling (AVFS). + */ int (*run_btc)(struct smu_context *smu); + + /** + * @get_allowed_feature_mask: Get allowed feature mask. + * &feature_mask: Array to store feature mask. + * &num: Elements in &feature_mask. + */ int (*get_allowed_feature_mask)(struct smu_context *smu, uint32_t *feature_mask, uint32_t num); + + /** + * @get_current_power_state: Get the current power state. + * + * Return: Current power state on success, negative errno on failure. + */ enum amd_pm_state_type (*get_current_power_state)(struct smu_context *smu); + + /** + * @set_default_dpm_table: Retrieve the default overdrive settings from + * the SMU. + */ int (*set_default_dpm_table)(struct smu_context *smu); + int (*set_power_state)(struct smu_context *smu); + + /** + * @populate_umd_state_clk: Populate the UMD power state table with + * defaults. + */ int (*populate_umd_state_clk)(struct smu_context *smu); + + /** + * @print_clk_levels: Print DPM clock levels for a clock domain + * to buffer. Star current level. + * + * Used for sysfs interfaces. + */ int (*print_clk_levels)(struct smu_context *smu, enum smu_clk_type clk_type, char *buf); + + /** + * @force_clk_levels: Set a range of allowed DPM levels for a clock + * domain. + * &clk_type: Clock domain. + * &mask: Range of allowed DPM levels. + */ int (*force_clk_levels)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t mask); + + /** + * @od_edit_dpm_table: Edit the custom overdrive DPM table. + * &type: Type of edit. + * &input: Edit parameters. + * &size: Size of &input. + */ int (*od_edit_dpm_table)(struct smu_context *smu, enum PP_OD_DPM_TABLE_COMMAND type, long *input, uint32_t size); + + /** + * @get_clock_by_type_with_latency: Get the speed and latency of a clock + * domain. + */ int (*get_clock_by_type_with_latency)(struct smu_context *smu, enum smu_clk_type clk_type, struct pp_clock_levels_with_latency *clocks); + /** + * @get_clock_by_type_with_voltage: Get the speed and voltage of a clock + * domain. + */ + int (*get_clock_by_type_with_voltage)(struct smu_context *smu, + enum amd_pp_clock_type type, + struct + pp_clock_levels_with_voltage + *clocks); + + /** + * @get_power_profile_mode: Print all power profile modes to + * buffer. Star current mode. + */ int (*get_power_profile_mode)(struct smu_context *smu, char *buf); + + /** + * @set_power_profile_mode: Set a power profile mode. Also used to + * create/set custom power profile modes. + * &input: Power profile mode parameters. + * &size: Size of &input. + */ int (*set_power_profile_mode)(struct smu_context *smu, long *input, uint32_t size); + + /** + * @dpm_set_vcn_enable: Enable/disable VCN engine dynamic power + * management. + */ int (*dpm_set_vcn_enable)(struct smu_context *smu, bool enable); + + /** + * @dpm_set_jpeg_enable: Enable/disable JPEG engine dynamic power + * management. + */ int (*dpm_set_jpeg_enable)(struct smu_context *smu, bool enable); + + /** + * @read_sensor: Read data from a sensor. + * &sensor: Sensor to read data from. + * &data: Sensor reading. + * &size: Size of &data. + */ int (*read_sensor)(struct smu_context *smu, enum amd_pp_sensors sensor, void *data, uint32_t *size); + + /** + * @pre_display_config_changed: Prepare GPU for a display configuration + * change. + * + * Disable display tracking and pin memory clock speed to maximum. Used + * in display component synchronization. + */ int (*pre_display_config_changed)(struct smu_context *smu); + + /** + * @display_config_changed: Notify the SMU of the current display + * configuration. + * + * Allows SMU to properly track blanking periods for memory clock + * adjustment. Used in display component synchronization. + */ int (*display_config_changed)(struct smu_context *smu); + int (*apply_clocks_adjust_rules)(struct smu_context *smu); + + /** + * @notify_smc_display_config: Applies display requirements to the + * current power state. + * + * Optimize deep sleep DCEFclk and mclk for the current display + * configuration. Used in display component synchronization. + */ int (*notify_smc_display_config)(struct smu_context *smu); + + /** + * @is_dpm_running: Check if DPM is running. + * + * Return: True if DPM is running, false otherwise. + */ bool (*is_dpm_running)(struct smu_context *smu); + + /** + * @get_fan_speed_rpm: Get the current fan speed in RPM. + */ int (*get_fan_speed_rpm)(struct smu_context *smu, uint32_t *speed); + + /** + * @set_watermarks_table: Configure and upload the watermarks tables to + * the SMU. + */ int (*set_watermarks_table)(struct smu_context *smu, struct pp_smu_wm_range_sets *clock_ranges); + + /** + * @get_thermal_temperature_range: Get safe thermal limits in Celcius. + */ int (*get_thermal_temperature_range)(struct smu_context *smu, struct smu_temperature_range *range); + + /** + * @get_uclk_dpm_states: Get memory clock DPM levels in kHz. + * &clocks_in_khz: Array of DPM levels. + * &num_states: Elements in &clocks_in_khz. + */ int (*get_uclk_dpm_states)(struct smu_context *smu, uint32_t *clocks_in_khz, uint32_t *num_states); + + /** + * @set_default_od_settings: Set the overdrive tables to defaults. + */ int (*set_default_od_settings)(struct smu_context *smu); + + /** + * @set_performance_level: Set a performance level. + */ int (*set_performance_level)(struct smu_context *smu, enum amd_dpm_forced_level level); + + /** + * @display_disable_memory_clock_switch: Enable/disable dynamic memory + * clock switching. + * + * Disabling this feature forces memory clock speed to maximum. + * Enabling sets the minimum memory clock capable of driving the + * current display configuration. + */ int (*display_disable_memory_clock_switch)(struct smu_context *smu, bool disable_memory_clock_switch); + + /** + * @dump_pptable: Print the power play table to the system log. + */ void (*dump_pptable)(struct smu_context *smu); + + /** + * @get_power_limit: Get the device's power limits. + */ int (*get_power_limit)(struct smu_context *smu); + + /** + * @set_df_cstate: Set data fabric cstate. + */ int (*set_df_cstate)(struct smu_context *smu, enum pp_df_cstate state); + + /** + * @allow_xgmi_power_down: Enable/disable external global memory + * interconnect power down. + */ int (*allow_xgmi_power_down)(struct smu_context *smu, bool en); + + /** + * @update_pcie_parameters: Update and upload the system's PCIe + * capabilites to the SMU. + * &pcie_gen_cap: Maximum allowed PCIe generation. + * &pcie_width_cap: Maximum allowed PCIe width. + */ int (*update_pcie_parameters)(struct smu_context *smu, uint32_t pcie_gen_cap, uint32_t pcie_width_cap); + + /** + * @i2c_init: Initialize i2c. + * + * The i2c bus is used internally by the SMU voltage regulators and + * other devices. The i2c's EEPROM also stores bad page tables on boards + * with ECC. + */ int (*i2c_init)(struct smu_context *smu, struct i2c_adapter *control); + + /** + * @i2c_fini: Tear down i2c. + */ void (*i2c_fini)(struct smu_context *smu, struct i2c_adapter *control); + + /** + * @get_unique_id: Get the GPU's unique id. Used for asset tracking. + */ void (*get_unique_id)(struct smu_context *smu); + + /** + * @get_dpm_clock_table: Get a copy of the DPM clock table. + * + * Used by display component in bandwidth and watermark calculations. + */ int (*get_dpm_clock_table)(struct smu_context *smu, struct dpm_clocks *clock_table); + + /** + * @init_microcode: Request the SMU's firmware from the kernel. + */ int (*init_microcode)(struct smu_context *smu); + + /** + * @load_microcode: Load firmware onto the SMU. + */ int (*load_microcode)(struct smu_context *smu); + + /** + * @fini_microcode: Release the SMU's firmware. + */ void (*fini_microcode)(struct smu_context *smu); + + /** + * @init_smc_tables: Initialize the SMU tables. + */ int (*init_smc_tables)(struct smu_context *smu); + + /** + * @fini_smc_tables: Release the SMU tables. + */ int (*fini_smc_tables)(struct smu_context *smu); + + /** + * @init_power: Initialize the power gate table context. + */ int (*init_power)(struct smu_context *smu); + + /** + * @fini_power: Release the power gate table context. + */ int (*fini_power)(struct smu_context *smu); + + /** + * @check_fw_status: Check the SMU's firmware status. + * + * Return: Zero if check passes, negative errno on failure. + */ int (*check_fw_status)(struct smu_context *smu); + + /** + * @setup_pptable: Initialize the power play table and populate it with + * default values. + */ int (*setup_pptable)(struct smu_context *smu); + + /** + * @get_vbios_bootup_values: Get default boot values from the VBIOS. + */ int (*get_vbios_bootup_values)(struct smu_context *smu); + + /** + * @check_fw_version: Print driver and SMU interface versions to the + * system log. + * + * Interface mismatch is not a critical failure. + */ int (*check_fw_version)(struct smu_context *smu); + + /** + * @powergate_sdma: Power up/down system direct memory access. + */ int (*powergate_sdma)(struct smu_context *smu, bool gate); + + /** + * @set_gfx_cgpg: Enable/disable graphics engine course grain power + * gating. + */ int (*set_gfx_cgpg)(struct smu_context *smu, bool enable); + + /** + * @write_pptable: Write the power play table to the SMU. + */ int (*write_pptable)(struct smu_context *smu); + + /** + * @set_driver_table_location: Send the location of the driver table to + * the SMU. + */ int (*set_driver_table_location)(struct smu_context *smu); + + /** + * @set_tool_table_location: Send the location of the tool table to the + * SMU. + */ int (*set_tool_table_location)(struct smu_context *smu); + + /** + * @notify_memory_pool_location: Send the location of the memory pool to + * the SMU. + */ int (*notify_memory_pool_location)(struct smu_context *smu); + + /** + * @system_features_control: Enable/disable all SMU features. + */ int (*system_features_control)(struct smu_context *smu, bool en); + + /** + * @send_smc_msg_with_param: Send a message with a parameter to the SMU. + * &msg: Type of message. + * ¶m: Message parameter. + * &read_arg: SMU response (optional). + */ int (*send_smc_msg_with_param)(struct smu_context *smu, enum smu_message_type msg, uint32_t param, uint32_t *read_arg); + + /** + * @send_smc_msg: Send a message to the SMU. + * &msg: Type of message. + * &read_arg: SMU response (optional). + */ int (*send_smc_msg)(struct smu_context *smu, enum smu_message_type msg, uint32_t *read_arg); + + /** + * @init_display_count: Notify the SMU of the number of display + * components in current display configuration. + */ int (*init_display_count)(struct smu_context *smu, uint32_t count); + + /** + * @set_allowed_mask: Notify the SMU of the features currently allowed + * by the driver. + */ int (*set_allowed_mask)(struct smu_context *smu); + + /** + * @get_enabled_mask: Get a mask of features that are currently enabled + * on the SMU. + * &feature_mask: Array representing enabled feature mask. + * &num: Elements in &feature_mask. + */ int (*get_enabled_mask)(struct smu_context *smu, uint32_t *feature_mask, uint32_t num); + + /** + * @feature_is_enabled: Test if a feature is enabled. + * + * Return: One if enabled, zero if disabled. + */ int (*feature_is_enabled)(struct smu_context *smu, enum smu_feature_mask mask); + + /** + * @disable_all_features_with_exception: Disable all features with + * exception to those in &mask. + */ int (*disable_all_features_with_exception)(struct smu_context *smu, enum smu_feature_mask mask); + + /** + * @notify_display_change: Enable fast memory clock switching. + * + * Allows for fine grained memory clock switching but has more stringent + * timing requirements. + */ int (*notify_display_change)(struct smu_context *smu); + + /** + * @set_power_limit: Set power limit in watts. + */ int (*set_power_limit)(struct smu_context *smu, uint32_t n); + + /** + * @init_max_sustainable_clocks: Populate max sustainable clock speed + * table with values from the SMU. + */ int (*init_max_sustainable_clocks)(struct smu_context *smu); + + /** + * @enable_thermal_alert: Enable thermal alert interrupts. + */ int (*enable_thermal_alert)(struct smu_context *smu); + + /** + * @disable_thermal_alert: Disable thermal alert interrupts. + */ int (*disable_thermal_alert)(struct smu_context *smu); + + /** + * @set_min_dcef_deep_sleep: Set a minimum display fabric deep sleep + * clock speed in MHz. + */ int (*set_min_dcef_deep_sleep)(struct smu_context *smu, uint32_t clk); + + /** + * @display_clock_voltage_request: Set a hard minimum frequency + * for a clock domain. + */ int (*display_clock_voltage_request)(struct smu_context *smu, struct pp_display_clock_request *clock_req); + + /** + * @get_fan_control_mode: Get the current fan control mode. + */ uint32_t (*get_fan_control_mode)(struct smu_context *smu); + + /** + * @set_fan_control_mode: Set the fan control mode. + */ int (*set_fan_control_mode)(struct smu_context *smu, uint32_t mode); + + /** + * @set_fan_speed_rpm: Set a static fan speed in RPM. + */ int (*set_fan_speed_rpm)(struct smu_context *smu, uint32_t speed); + + /** + * @set_xgmi_pstate: Set inter-chip global memory interconnect pstate. + * &pstate: Pstate to set. D0 if Nonzero, D3 otherwise. + */ int (*set_xgmi_pstate)(struct smu_context *smu, uint32_t pstate); + + /** + * @gfx_off_control: Enable/disable graphics engine poweroff. + */ int (*gfx_off_control)(struct smu_context *smu, bool enable); + + + /** + * @get_gfx_off_status: Get graphics engine poweroff status. + * + * Return: + * 0 - GFXOFF(default). + * 1 - Transition out of GFX State. + * 2 - Not in GFXOFF. + * 3 - Transition into GFXOFF. + */ uint32_t (*get_gfx_off_status)(struct smu_context *smu); + + /** + * @register_irq_handler: Register interupt request handlers. + */ int (*register_irq_handler)(struct smu_context *smu); + + /** + * @set_azalia_d3_pme: Wake the audio decode engine from d3 sleep. + */ int (*set_azalia_d3_pme)(struct smu_context *smu); + + /** + * @get_max_sustainable_clocks_by_dc: Get a copy of the max sustainable + * clock speeds table. + * + * Provides a way for the display component (DC) to get the max + * sustainable clocks from the SMU. + */ int (*get_max_sustainable_clocks_by_dc)(struct smu_context *smu, struct pp_smu_nv_clock_table *max_clocks); + + /** + * @baco_is_support: Check if GPU supports BACO (Bus Active, Chip Off). + */ bool (*baco_is_support)(struct smu_context *smu); + + /** + * @baco_get_state: Get the current BACO state. + * + * Return: Current BACO state. + */ enum smu_baco_state (*baco_get_state)(struct smu_context *smu); + + /** + * @baco_set_state: Enter/exit BACO. + */ int (*baco_set_state)(struct smu_context *smu, enum smu_baco_state state); + + /** + * @baco_enter: Enter BACO. + */ int (*baco_enter)(struct smu_context *smu); + + /** + * @baco_exit: Exit Baco. + */ int (*baco_exit)(struct smu_context *smu); + + /** + * @mode1_reset_is_support: Check if GPU supports mode1 reset. + */ bool (*mode1_reset_is_support)(struct smu_context *smu); + + /** + * @mode1_reset: Perform mode1 reset. + * + * Complete GPU reset. + */ int (*mode1_reset)(struct smu_context *smu); + + /** + * @mode2_reset: Perform mode2 reset. + * + * Mode2 reset generally does not reset as many IPs as mode1 reset. The + * IPs reset varies by asic. + */ int (*mode2_reset)(struct smu_context *smu); + + /** + * @get_dpm_ultimate_freq: Get the hard frequency range of a clock + * domain in MHz. + */ int (*get_dpm_ultimate_freq)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t *min, uint32_t *max); + + /** + * @set_soft_freq_limited_range: Set the soft frequency range of a clock + * domain in MHz. + */ int (*set_soft_freq_limited_range)(struct smu_context *smu, enum smu_clk_type clk_type, uint32_t min, uint32_t max); + + /** + * @set_power_source: Notify the SMU of the current power source. + */ int (*set_power_source)(struct smu_context *smu, enum smu_power_src_type power_src); + + /** + * @log_thermal_throttling_event: Print a thermal throttling warning to + * the system's log. + */ void (*log_thermal_throttling_event)(struct smu_context *smu); + + /** + * @get_pp_feature_mask: Print a human readable table of enabled + * features to buffer. + */ size_t (*get_pp_feature_mask)(struct smu_context *smu, char *buf); + + /** + * @set_pp_feature_mask: Request the SMU enable/disable features to + * match those enabled in &new_mask. + */ int (*set_pp_feature_mask)(struct smu_context *smu, uint64_t new_mask); + + /** + * @get_gpu_metrics: Get a copy of the GPU metrics table from the SMU. + * + * Return: Size of &table + */ ssize_t (*get_gpu_metrics)(struct smu_context *smu, void **table); + + /** + * @enable_mgpu_fan_boost: Enable multi-GPU fan boost. + */ int (*enable_mgpu_fan_boost)(struct smu_context *smu); + + /** + * @gfx_ulv_control: Enable/disable ultra low voltage. + */ int (*gfx_ulv_control)(struct smu_context *smu, bool enablement); + + /** + * @deep_sleep_control: Enable/disable deep sleep. + */ int (*deep_sleep_control)(struct smu_context *smu, bool enablement); + + /** + * @get_fan_parameters: Get fan parameters. + * + * Get maximum fan speed from the power play table. + */ int (*get_fan_parameters)(struct smu_context *smu); + + /** + * @post_init: Helper function for asic specific workarounds. + */ int (*post_init)(struct smu_context *smu); + + /** + * @interrupt_work: Work task scheduled from SMU interrupt handler. + */ void (*interrupt_work)(struct smu_context *smu); + + /** + * @gpo_control: Enable/disable graphics power optimization if supported. + */ int (*gpo_control)(struct smu_context *smu, bool enablement); + + /** + * @gfx_state_change_set: Send the current graphics state to the SMU. + */ int (*gfx_state_change_set)(struct smu_context *smu, uint32_t state); + + /** + * @set_fine_grain_gfx_freq_parameters: Set fine grain graphics clock + * parameters to defaults. + */ int (*set_fine_grain_gfx_freq_parameters)(struct smu_context *smu); }; @@ -636,6 +1165,12 @@ enum smu_cmn2asic_mapping_type { #define FEA_MAP(fea) \ [SMU_FEATURE_##fea##_BIT] = {1, FEATURE_##fea##_BIT} +#define FEA_MAP_REVERSE(fea) \ + [SMU_FEATURE_DPM_##fea##_BIT] = {1, FEATURE_##fea##_DPM_BIT} + +#define FEA_MAP_HALF_REVERSE(fea) \ + [SMU_FEATURE_DPM_##fea##CLK_BIT] = {1, FEATURE_##fea##_DPM_BIT} + #define TAB_MAP(tab) \ [SMU_TABLE_##tab] = {1, TABLE_##tab} diff --git a/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_vangogh.h b/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_vangogh.h index 1c19eae93ff1..6e23a3f803a7 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_vangogh.h +++ b/drivers/gpu/drm/amd/pm/inc/smu11_driver_if_vangogh.h @@ -141,7 +141,6 @@ typedef struct { uint32_t MaxGfxClk; uint8_t NumDfPstatesEnabled; - uint8_t NumDpmLevelsEnabled; uint8_t NumDcfclkLevelsEnabled; uint8_t NumDispClkLevelsEnabled; //applies to both dispclk and dppclk uint8_t NumSocClkLevelsEnabled; diff --git a/drivers/gpu/drm/amd/pm/inc/smu_types.h b/drivers/gpu/drm/amd/pm/inc/smu_types.h index 720d15612fe1..8e428c728e0e 100644 --- a/drivers/gpu/drm/amd/pm/inc/smu_types.h +++ b/drivers/gpu/drm/amd/pm/inc/smu_types.h @@ -211,6 +211,7 @@ __SMU_DUMMY_MAP(SetGpoFeaturePMask), \ __SMU_DUMMY_MAP(DisallowGpo), \ __SMU_DUMMY_MAP(Enable2ndUSB20Port), \ + __SMU_DUMMY_MAP(RequestActiveWgp), \ #undef __SMU_DUMMY_MAP #define __SMU_DUMMY_MAP(type) SMU_MSG_##type @@ -240,6 +241,7 @@ enum smu_clk_type { SMU_OD_MCLK, SMU_OD_VDDC_CURVE, SMU_OD_RANGE, + SMU_OD_VDDGFX_OFFSET, SMU_CLK_COUNT, }; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c index e57e64bbacdc..88322781e447 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c @@ -251,7 +251,7 @@ static int smu10_set_hard_min_gfxclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t cl smu10_data->gfx_actual_soft_min_freq = clock; smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, - smu10_data->gfx_actual_soft_min_freq, + clock, NULL); } return 0; @@ -558,7 +558,8 @@ static int smu10_hwmgr_backend_init(struct pp_hwmgr *hwmgr) /* enable the pp_od_clk_voltage sysfs file */ hwmgr->od_enabled = 1; - + /* disabled fine grain tuning function by default */ + data->fine_grain_enabled = 0; return result; } @@ -597,6 +598,7 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, uint32_t min_mclk = hwmgr->display_config->min_mem_set_clock/100; uint32_t index_fclk = data->clock_vol_info.vdd_dep_on_fclk->count - 1; uint32_t index_socclk = data->clock_vol_info.vdd_dep_on_socclk->count - 1; + uint32_t fine_grain_min_freq = 0, fine_grain_max_freq = 0; if (hwmgr->smu_version < 0x1E3700) { pr_info("smu firmware version too old, can not set dpm level\n"); @@ -613,6 +615,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, switch (level) { case AMD_DPM_FORCED_LEVEL_HIGH: case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: + data->fine_grain_enabled = 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq); + + data->gfx_actual_soft_min_freq = fine_grain_min_freq; + data->gfx_actual_soft_max_freq = fine_grain_max_freq; + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, data->gfx_max_freq_limit/100, @@ -648,6 +658,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); break; case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: + data->fine_grain_enabled = 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq); + + data->gfx_actual_soft_min_freq = fine_grain_min_freq; + data->gfx_actual_soft_max_freq = fine_grain_max_freq; + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, min_sclk, @@ -658,6 +676,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); break; case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: + data->fine_grain_enabled = 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq); + + data->gfx_actual_soft_min_freq = fine_grain_min_freq; + data->gfx_actual_soft_max_freq = fine_grain_max_freq; + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinFclkByFreq, min_mclk, @@ -668,6 +694,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); break; case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: + data->fine_grain_enabled = 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq); + + data->gfx_actual_soft_min_freq = fine_grain_min_freq; + data->gfx_actual_soft_max_freq = fine_grain_max_freq; + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, SMU10_UMD_PSTATE_GFXCLK, @@ -703,6 +737,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); break; case AMD_DPM_FORCED_LEVEL_AUTO: + data->fine_grain_enabled = 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq); + + data->gfx_actual_soft_min_freq = fine_grain_min_freq; + data->gfx_actual_soft_max_freq = fine_grain_max_freq; + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, min_sclk, @@ -741,6 +783,14 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); break; case AMD_DPM_FORCED_LEVEL_LOW: + data->fine_grain_enabled = 0; + + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &fine_grain_min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &fine_grain_max_freq); + + data->gfx_actual_soft_min_freq = fine_grain_min_freq; + data->gfx_actual_soft_max_freq = fine_grain_max_freq; + smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_SetHardMinGfxClk, data->gfx_min_freq_limit/100, @@ -759,6 +809,7 @@ static int smu10_dpm_force_dpm_level(struct pp_hwmgr *hwmgr, NULL); break; case AMD_DPM_FORCED_LEVEL_MANUAL: + data->fine_grain_enabled = 1; case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT: default: break; @@ -948,6 +999,8 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr, struct smu10_voltage_dependency_table *mclk_table = data->clock_vol_info.vdd_dep_on_fclk; uint32_t i, now, size = 0; + uint32_t min_freq, max_freq = 0; + uint32_t ret = 0; switch (type) { case PP_SCLK: @@ -983,18 +1036,28 @@ static int smu10_print_clock_levels(struct pp_hwmgr *hwmgr, break; case OD_SCLK: if (hwmgr->od_enabled) { - size = sprintf(buf, "%s:\n", "OD_SCLK"); + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq); + if (ret) + return ret; + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq); + if (ret) + return ret; + size = sprintf(buf, "%s:\n", "OD_SCLK"); size += sprintf(buf + size, "0: %10uMhz\n", - (data->gfx_actual_soft_min_freq > 0) ? data->gfx_actual_soft_min_freq : data->gfx_min_freq_limit/100); - size += sprintf(buf + size, "1: %10uMhz\n", data->gfx_max_freq_limit/100); + (data->gfx_actual_soft_min_freq > 0) ? data->gfx_actual_soft_min_freq : min_freq); + size += sprintf(buf + size, "1: %10uMhz\n", + (data->gfx_actual_soft_max_freq > 0) ? data->gfx_actual_soft_max_freq : max_freq); } break; case OD_RANGE: if (hwmgr->od_enabled) { - uint32_t min_freq, max_freq = 0; - smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq); - smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq); + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq); + if (ret) + return ret; + ret = smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq); + if (ret) + return ret; size = sprintf(buf, "%s:\n", "OD_RANGE"); size += sprintf(buf + size, "SCLK: %7uMHz %10uMHz\n", @@ -1414,23 +1477,96 @@ static int smu10_set_fine_grain_clk_vol(struct pp_hwmgr *hwmgr, enum PP_OD_DPM_TABLE_COMMAND type, long *input, uint32_t size) { + uint32_t min_freq, max_freq = 0; + struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend); + int ret = 0; + if (!hwmgr->od_enabled) { pr_err("Fine grain not support\n"); return -EINVAL; } - if (size != 2) { - pr_err("Input parameter number not correct\n"); + if (!smu10_data->fine_grain_enabled) { + pr_err("Fine grain not started\n"); return -EINVAL; } if (type == PP_OD_EDIT_SCLK_VDDC_TABLE) { - if (input[0] == 0) - smu10_set_hard_min_gfxclk_by_freq(hwmgr, input[1]); - else if (input[0] == 1) - smu10_set_soft_max_gfxclk_by_freq(hwmgr, input[1]); - else + if (size != 2) { + pr_err("Input parameter number not correct\n"); return -EINVAL; + } + + if (input[0] == 0) { + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq); + if (input[1] < min_freq) { + pr_err("Fine grain setting minimum sclk (%ld) MHz is less than the minimum allowed (%d) MHz\n", + input[1], min_freq); + return -EINVAL; + } + smu10_data->gfx_actual_soft_min_freq = input[1]; + } else if (input[0] == 1) { + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq); + if (input[1] > max_freq) { + pr_err("Fine grain setting maximum sclk (%ld) MHz is greater than the maximum allowed (%d) MHz\n", + input[1], max_freq); + return -EINVAL; + } + smu10_data->gfx_actual_soft_max_freq = input[1]; + } else { + return -EINVAL; + } + } else if (type == PP_OD_RESTORE_DEFAULT_TABLE) { + if (size != 0) { + pr_err("Input parameter number not correct\n"); + return -EINVAL; + } + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMinGfxclkFrequency, &min_freq); + smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetMaxGfxclkFrequency, &max_freq); + + smu10_data->gfx_actual_soft_min_freq = min_freq; + smu10_data->gfx_actual_soft_max_freq = max_freq; + + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinGfxClk, + min_freq, + NULL); + if (ret) + return ret; + + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxGfxClk, + max_freq, + NULL); + if (ret) + return ret; + } else if (type == PP_OD_COMMIT_DPM_TABLE) { + if (size != 0) { + pr_err("Input parameter number not correct\n"); + return -EINVAL; + } + + if (smu10_data->gfx_actual_soft_min_freq > smu10_data->gfx_actual_soft_max_freq) { + pr_err("The setting minimun sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n", + smu10_data->gfx_actual_soft_min_freq, smu10_data->gfx_actual_soft_max_freq); + return -EINVAL; + } + + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetHardMinGfxClk, + smu10_data->gfx_actual_soft_min_freq, + NULL); + if (ret) + return ret; + + ret = smum_send_msg_to_smc_with_parameter(hwmgr, + PPSMC_MSG_SetSoftMaxGfxClk, + smu10_data->gfx_actual_soft_max_freq, + NULL); + if (ret) + return ret; + } else { + return -EINVAL; } return 0; diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h index 6c9b5f060902..808e0ecbe1f0 100644 --- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h +++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.h @@ -283,6 +283,7 @@ struct smu10_hwmgr { uint32_t vclk_soft_min; uint32_t dclk_soft_min; uint32_t gfx_actual_soft_min_freq; + uint32_t gfx_actual_soft_max_freq; uint32_t gfx_min_freq_limit; uint32_t gfx_max_freq_limit; /* in 10Khz*/ @@ -299,6 +300,8 @@ struct smu10_hwmgr { bool need_min_deep_sleep_dcefclk; uint32_t deep_sleep_dcefclk; uint32_t num_active_display; + + bool fine_grain_enabled; }; struct pp_hwmgr; diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c index 8b867a6d52b5..8e1e97e31411 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c +++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c @@ -402,6 +402,10 @@ static int smu_set_funcs(struct amdgpu_device *adev) break; case CHIP_RENOIR: renoir_set_ppt_funcs(smu); + /* enable the fine grain tuning function by default */ + smu->fine_grain_enabled = true; + /* close the fine grain tuning function by default */ + smu->fine_grain_started = false; break; case CHIP_VANGOGH: vangogh_set_ppt_funcs(smu); @@ -478,9 +482,6 @@ static int smu_late_init(void *handle) smu_set_fine_grain_gfx_freq_parameters(smu); - if (adev->asic_type == CHIP_VANGOGH) - return 0; - if (!smu->pm_enabled) return 0; @@ -490,6 +491,9 @@ static int smu_late_init(void *handle) return ret; } + if (adev->asic_type == CHIP_VANGOGH) + return 0; + ret = smu_set_default_od_settings(smu); if (ret) { dev_err(adev->dev, "Failed to setup default OD settings!\n"); diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c index 51e83123f72a..7ebf9588983f 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c @@ -1673,7 +1673,7 @@ static int navi10_read_sensor(struct smu_context *smu, *size = 4; break; case AMDGPU_PP_SENSOR_GFX_SCLK: - ret = navi10_get_current_clk_freq_by_table(smu, SMU_GFXCLK, (uint32_t *)data); + ret = navi10_get_smu_metrics_data(smu, METRICS_AVERAGE_GFXCLK, (uint32_t *)data); *(uint32_t *)data *= 100; *size = 4; break; diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c index 9608745d732f..24f3c96a5e5e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c @@ -314,6 +314,12 @@ static int sienna_cichlid_check_powerplay_table(struct smu_context *smu) table_context->thermal_controller_type = powerplay_table->thermal_controller_type; + /* + * Instead of having its own buffer space and get overdrive_table copied, + * smu->od_settings just points to the actual overdrive_table + */ + smu->od_settings = &powerplay_table->overdrive_table; + return 0; } @@ -907,6 +913,22 @@ static bool sienna_cichlid_is_support_fine_grained_dpm(struct smu_context *smu, return dpm_desc->SnapToDiscrete == 0 ? true : false; } +static bool sienna_cichlid_is_od_feature_supported(struct smu_11_0_7_overdrive_table *od_table, + enum SMU_11_0_7_ODFEATURE_CAP cap) +{ + return od_table->cap[cap]; +} + +static void sienna_cichlid_get_od_setting_range(struct smu_11_0_7_overdrive_table *od_table, + enum SMU_11_0_7_ODSETTING_ID setting, + uint32_t *min, uint32_t *max) +{ + if (min) + *min = od_table->min[setting]; + if (max) + *max = od_table->max[setting]; +} + static int sienna_cichlid_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { @@ -915,11 +937,16 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu, struct smu_dpm_context *smu_dpm = &smu->smu_dpm; struct smu_11_0_dpm_context *dpm_context = smu_dpm->dpm_context; PPTable_t *pptable = (PPTable_t *)table_context->driver_pptable; + struct smu_11_0_7_overdrive_table *od_settings = smu->od_settings; + OverDriveTable_t *od_table = + (OverDriveTable_t *)table_context->overdrive_table; int i, size = 0, ret = 0; uint32_t cur_value = 0, value = 0, count = 0; uint32_t freq_values[3] = {0}; uint32_t mark_index = 0; uint32_t gen_speed, lane_width; + uint32_t min_value, max_value; + uint32_t smu_version; switch (clk_type) { case SMU_GFXCLK: @@ -995,6 +1022,70 @@ static int sienna_cichlid_print_clk_levels(struct smu_context *smu, (lane_width == dpm_context->dpm_tables.pcie_table.pcie_lane[i]) ? "*" : ""); break; + case SMU_OD_SCLK: + if (!smu->od_enabled || !od_table || !od_settings) + break; + + if (!sienna_cichlid_is_od_feature_supported(od_settings, SMU_11_0_7_ODCAP_GFXCLK_LIMITS)) + break; + + size += sprintf(buf + size, "OD_SCLK:\n"); + size += sprintf(buf + size, "0: %uMhz\n1: %uMhz\n", od_table->GfxclkFmin, od_table->GfxclkFmax); + break; + + case SMU_OD_MCLK: + if (!smu->od_enabled || !od_table || !od_settings) + break; + + if (!sienna_cichlid_is_od_feature_supported(od_settings, SMU_11_0_7_ODCAP_UCLK_LIMITS)) + break; + + size += sprintf(buf + size, "OD_MCLK:\n"); + size += sprintf(buf + size, "0: %uMhz\n1: %uMHz\n", od_table->UclkFmin, od_table->UclkFmax); + break; + + case SMU_OD_VDDGFX_OFFSET: + if (!smu->od_enabled || !od_table || !od_settings) + break; + + /* + * OD GFX Voltage Offset functionality is supported only by 58.41.0 + * and onwards SMU firmwares. + */ + smu_cmn_get_smc_version(smu, NULL, &smu_version); + if ((adev->asic_type == CHIP_SIENNA_CICHLID) && + (smu_version < 0x003a2900)) + break; + + size += sprintf(buf + size, "OD_VDDGFX_OFFSET:\n"); + size += sprintf(buf + size, "%dmV\n", od_table->VddGfxOffset); + break; + + case SMU_OD_RANGE: + if (!smu->od_enabled || !od_table || !od_settings) + break; + + size = sprintf(buf, "%s:\n", "OD_RANGE"); + + if (sienna_cichlid_is_od_feature_supported(od_settings, SMU_11_0_7_ODCAP_GFXCLK_LIMITS)) { + sienna_cichlid_get_od_setting_range(od_settings, SMU_11_0_7_ODSETTING_GFXCLKFMIN, + &min_value, NULL); + sienna_cichlid_get_od_setting_range(od_settings, SMU_11_0_7_ODSETTING_GFXCLKFMAX, + NULL, &max_value); + size += sprintf(buf + size, "SCLK: %7uMhz %10uMhz\n", + min_value, max_value); + } + + if (sienna_cichlid_is_od_feature_supported(od_settings, SMU_11_0_7_ODCAP_UCLK_LIMITS)) { + sienna_cichlid_get_od_setting_range(od_settings, SMU_11_0_7_ODSETTING_UCLKFMIN, + &min_value, NULL); + sienna_cichlid_get_od_setting_range(od_settings, SMU_11_0_7_ODSETTING_UCLKFMAX, + NULL, &max_value); + size += sprintf(buf + size, "MCLK: %7uMhz %10uMhz\n", + min_value, max_value); + } + break; + default: break; } @@ -1694,6 +1785,243 @@ static int sienna_cichlid_get_dpm_ultimate_freq(struct smu_context *smu, return ret; } +static void sienna_cichlid_dump_od_table(struct smu_context *smu, + OverDriveTable_t *od_table) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t smu_version; + + dev_dbg(smu->adev->dev, "OD: Gfxclk: (%d, %d)\n", od_table->GfxclkFmin, + od_table->GfxclkFmax); + dev_dbg(smu->adev->dev, "OD: Uclk: (%d, %d)\n", od_table->UclkFmin, + od_table->UclkFmax); + + smu_cmn_get_smc_version(smu, NULL, &smu_version); + if (!((adev->asic_type == CHIP_SIENNA_CICHLID) && + (smu_version < 0x003a2900))) + dev_dbg(smu->adev->dev, "OD: VddGfxOffset: %d\n", od_table->VddGfxOffset); +} + +static int sienna_cichlid_set_default_od_settings(struct smu_context *smu) +{ + OverDriveTable_t *od_table = + (OverDriveTable_t *)smu->smu_table.overdrive_table; + OverDriveTable_t *boot_od_table = + (OverDriveTable_t *)smu->smu_table.boot_overdrive_table; + int ret = 0; + + ret = smu_cmn_update_table(smu, SMU_TABLE_OVERDRIVE, + 0, (void *)od_table, false); + if (ret) { + dev_err(smu->adev->dev, "Failed to get overdrive table!\n"); + return ret; + } + + memcpy(boot_od_table, od_table, sizeof(OverDriveTable_t)); + + sienna_cichlid_dump_od_table(smu, od_table); + + return 0; +} + +static int sienna_cichlid_od_setting_check_range(struct smu_context *smu, + struct smu_11_0_7_overdrive_table *od_table, + enum SMU_11_0_7_ODSETTING_ID setting, + uint32_t value) +{ + if (value < od_table->min[setting]) { + dev_warn(smu->adev->dev, "OD setting (%d, %d) is less than the minimum allowed (%d)\n", + setting, value, od_table->min[setting]); + return -EINVAL; + } + if (value > od_table->max[setting]) { + dev_warn(smu->adev->dev, "OD setting (%d, %d) is greater than the maximum allowed (%d)\n", + setting, value, od_table->max[setting]); + return -EINVAL; + } + + return 0; +} + +static int sienna_cichlid_od_edit_dpm_table(struct smu_context *smu, + enum PP_OD_DPM_TABLE_COMMAND type, + long input[], uint32_t size) +{ + struct smu_table_context *table_context = &smu->smu_table; + OverDriveTable_t *od_table = + (OverDriveTable_t *)table_context->overdrive_table; + struct smu_11_0_7_overdrive_table *od_settings = + (struct smu_11_0_7_overdrive_table *)smu->od_settings; + struct amdgpu_device *adev = smu->adev; + enum SMU_11_0_7_ODSETTING_ID freq_setting; + uint16_t *freq_ptr; + int i, ret = 0; + uint32_t smu_version; + + if (!smu->od_enabled) { + dev_warn(smu->adev->dev, "OverDrive is not enabled!\n"); + return -EINVAL; + } + + if (!smu->od_settings) { + dev_err(smu->adev->dev, "OD board limits are not set!\n"); + return -ENOENT; + } + + if (!(table_context->overdrive_table && table_context->boot_overdrive_table)) { + dev_err(smu->adev->dev, "Overdrive table was not initialized!\n"); + return -EINVAL; + } + + switch (type) { + case PP_OD_EDIT_SCLK_VDDC_TABLE: + if (!sienna_cichlid_is_od_feature_supported(od_settings, + SMU_11_0_7_ODCAP_GFXCLK_LIMITS)) { + dev_warn(smu->adev->dev, "GFXCLK_LIMITS not supported!\n"); + return -ENOTSUPP; + } + + for (i = 0; i < size; i += 2) { + if (i + 2 > size) { + dev_info(smu->adev->dev, "invalid number of input parameters %d\n", size); + return -EINVAL; + } + + switch (input[i]) { + case 0: + if (input[i + 1] > od_table->GfxclkFmax) { + dev_info(smu->adev->dev, "GfxclkFmin (%ld) must be <= GfxclkFmax (%u)!\n", + input[i + 1], od_table->GfxclkFmax); + return -EINVAL; + } + + freq_setting = SMU_11_0_7_ODSETTING_GFXCLKFMIN; + freq_ptr = &od_table->GfxclkFmin; + break; + + case 1: + if (input[i + 1] < od_table->GfxclkFmin) { + dev_info(smu->adev->dev, "GfxclkFmax (%ld) must be >= GfxclkFmin (%u)!\n", + input[i + 1], od_table->GfxclkFmin); + return -EINVAL; + } + + freq_setting = SMU_11_0_7_ODSETTING_GFXCLKFMAX; + freq_ptr = &od_table->GfxclkFmax; + break; + + default: + dev_info(smu->adev->dev, "Invalid SCLK_VDDC_TABLE index: %ld\n", input[i]); + dev_info(smu->adev->dev, "Supported indices: [0:min,1:max]\n"); + return -EINVAL; + } + + ret = sienna_cichlid_od_setting_check_range(smu, od_settings, + freq_setting, input[i + 1]); + if (ret) + return ret; + + *freq_ptr = (uint16_t)input[i + 1]; + } + break; + + case PP_OD_EDIT_MCLK_VDDC_TABLE: + if (!sienna_cichlid_is_od_feature_supported(od_settings, SMU_11_0_7_ODCAP_UCLK_LIMITS)) { + dev_warn(smu->adev->dev, "UCLK_LIMITS not supported!\n"); + return -ENOTSUPP; + } + + for (i = 0; i < size; i += 2) { + if (i + 2 > size) { + dev_info(smu->adev->dev, "invalid number of input parameters %d\n", size); + return -EINVAL; + } + + switch (input[i]) { + case 0: + if (input[i + 1] > od_table->UclkFmax) { + dev_info(smu->adev->dev, "UclkFmin (%ld) must be <= UclkFmax (%u)!\n", + input[i + 1], od_table->UclkFmax); + return -EINVAL; + } + + freq_setting = SMU_11_0_7_ODSETTING_UCLKFMIN; + freq_ptr = &od_table->UclkFmin; + break; + + case 1: + if (input[i + 1] < od_table->UclkFmin) { + dev_info(smu->adev->dev, "UclkFmax (%ld) must be >= UclkFmin (%u)!\n", + input[i + 1], od_table->UclkFmin); + return -EINVAL; + } + + freq_setting = SMU_11_0_7_ODSETTING_UCLKFMAX; + freq_ptr = &od_table->UclkFmax; + break; + + default: + dev_info(smu->adev->dev, "Invalid MCLK_VDDC_TABLE index: %ld\n", input[i]); + dev_info(smu->adev->dev, "Supported indices: [0:min,1:max]\n"); + return -EINVAL; + } + + ret = sienna_cichlid_od_setting_check_range(smu, od_settings, + freq_setting, input[i + 1]); + if (ret) + return ret; + + *freq_ptr = (uint16_t)input[i + 1]; + } + break; + + case PP_OD_RESTORE_DEFAULT_TABLE: + memcpy(table_context->overdrive_table, + table_context->boot_overdrive_table, + sizeof(OverDriveTable_t)); + fallthrough; + + case PP_OD_COMMIT_DPM_TABLE: + sienna_cichlid_dump_od_table(smu, od_table); + + ret = smu_cmn_update_table(smu, SMU_TABLE_OVERDRIVE, + 0, (void *)od_table, true); + if (ret) { + dev_err(smu->adev->dev, "Failed to import overdrive table!\n"); + return ret; + } + break; + + case PP_OD_EDIT_VDDGFX_OFFSET: + if (size != 1) { + dev_info(smu->adev->dev, "invalid number of parameters: %d\n", size); + return -EINVAL; + } + + /* + * OD GFX Voltage Offset functionality is supported only by 58.41.0 + * and onwards SMU firmwares. + */ + smu_cmn_get_smc_version(smu, NULL, &smu_version); + if ((adev->asic_type == CHIP_SIENNA_CICHLID) && + (smu_version < 0x003a2900)) { + dev_err(smu->adev->dev, "OD GFX Voltage offset functionality is supported " + "only by 58.41.0 and onwards SMU firmwares!\n"); + return -EOPNOTSUPP; + } + + od_table->VddGfxOffset = (int16_t)input[0]; + + sienna_cichlid_dump_od_table(smu, od_table); + break; + + default: + return -ENOSYS; + } + + return ret; +} + static int sienna_cichlid_run_btc(struct smu_context *smu) { return smu_cmn_send_smc_msg(smu, SMU_MSG_RunDcBtc, NULL); @@ -2372,7 +2700,7 @@ static void sienna_cichlid_fill_i2c_req(SwI2cRequest_t *req, bool write, { int i; - req->I2CcontrollerPort = 0; + req->I2CcontrollerPort = 1; req->I2CSpeed = 2; req->SlaveAddress = address; req->NumCmds = numbytes; @@ -2817,6 +3145,8 @@ static const struct pptable_funcs sienna_cichlid_ppt_funcs = { .mode1_reset = smu_v11_0_mode1_reset, .get_dpm_ultimate_freq = sienna_cichlid_get_dpm_ultimate_freq, .set_soft_freq_limited_range = smu_v11_0_set_soft_freq_limited_range, + .set_default_od_settings = sienna_cichlid_set_default_od_settings, + .od_edit_dpm_table = sienna_cichlid_od_edit_dpm_table, .run_btc = sienna_cichlid_run_btc, .set_power_source = smu_v11_0_set_power_source, .get_pp_feature_mask = smu_cmn_get_pp_feature_mask, diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c index 8cb4fcee9a2c..a79dd04f81a2 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c @@ -31,6 +31,9 @@ #include "smu_v11_5_ppsmc.h" #include "smu_v11_5_pmfw.h" #include "smu_cmn.h" +#include "soc15_common.h" +#include "asic_reg/gc/gc_10_3_0_offset.h" +#include "asic_reg/gc/gc_10_3_0_sh_mask.h" /* * DO NOT use these for err/warn/info/debug messages. @@ -118,6 +121,7 @@ static struct cmn2asic_msg_mapping vangogh_message_map[SMU_MSG_MAX_COUNT] = { MSG_MAP(StopDramLogging, PPSMC_MSG_StopDramLogging, 0), MSG_MAP(SetSoftMinCclk, PPSMC_MSG_SetSoftMinCclk, 0), MSG_MAP(SetSoftMaxCclk, PPSMC_MSG_SetSoftMaxCclk, 0), + MSG_MAP(RequestActiveWgp, PPSMC_MSG_RequestActiveWgp, 0), }; static struct cmn2asic_mapping vangogh_feature_mask_map[SMU_FEATURE_COUNT] = { @@ -162,6 +166,9 @@ static struct cmn2asic_mapping vangogh_feature_mask_map[SMU_FEATURE_COUNT] = { FEA_MAP(A55_DPM), FEA_MAP(CVIP_DSP_DPM), FEA_MAP(MSMU_LOW_POWER), + FEA_MAP_REVERSE(SOCCLK), + FEA_MAP_REVERSE(FCLK), + FEA_MAP_HALF_REVERSE(GFX), }; static struct cmn2asic_mapping vangogh_table_map[SMU_TABLE_COUNT] = { @@ -242,6 +249,12 @@ static int vangogh_get_smu_metrics_data(struct smu_context *smu, case METRICS_AVERAGE_SOCCLK: *value = metrics->SocclkFrequency; break; + case METRICS_AVERAGE_VCLK: + *value = metrics->VclkFrequency; + break; + case METRICS_AVERAGE_DCLK: + *value = metrics->DclkFrequency; + break; case METRICS_AVERAGE_UCLK: *value = metrics->MemclkFrequency; break; @@ -252,7 +265,8 @@ static int vangogh_get_smu_metrics_data(struct smu_context *smu, *value = metrics->UvdActivity; break; case METRICS_AVERAGE_SOCKETPOWER: - *value = metrics->CurrentSocketPower; + *value = (metrics->CurrentSocketPower << 8) / + 1000 ; break; case METRICS_TEMPERATURE_EDGE: *value = metrics->GfxTemperature / 100 * @@ -366,6 +380,10 @@ static int vangogh_get_allowed_feature_mask(struct smu_context *smu, *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFX_DPM_BIT) | FEATURE_MASK(FEATURE_MP0CLK_DPM_BIT) + | FEATURE_MASK(FEATURE_SOCCLK_DPM_BIT) + | FEATURE_MASK(FEATURE_VCN_DPM_BIT) + | FEATURE_MASK(FEATURE_FCLK_DPM_BIT) + | FEATURE_MASK(FEATURE_DCFCLK_DPM_BIT) | FEATURE_MASK(FEATURE_DS_SOCCLK_BIT) | FEATURE_MASK(FEATURE_PPT_BIT) | FEATURE_MASK(FEATURE_TDC_BIT) @@ -379,6 +397,12 @@ static int vangogh_get_allowed_feature_mask(struct smu_context *smu, if (adev->pm.pp_feature & PP_DCEFCLK_DPM_MASK) *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_DCFCLK_DPM_BIT); + if (adev->pm.pp_feature & PP_MCLK_DPM_MASK) + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_FCLK_DPM_BIT); + + if (adev->pm.pp_feature & PP_SCLK_DPM_MASK) + *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_GFX_DPM_BIT); + if (smu->adev->pg_flags & AMD_PG_SUPPORT_ATHUB) *(uint64_t *)feature_mask |= FEATURE_MASK(FEATURE_ATHUB_PG_BIT); @@ -402,10 +426,63 @@ static bool vangogh_is_dpm_running(struct smu_context *smu) return !!(feature_enabled & SMC_DPM_FEATURE); } +static int vangogh_get_dpm_clk_limited(struct smu_context *smu, enum smu_clk_type clk_type, + uint32_t dpm_level, uint32_t *freq) +{ + DpmClocks_t *clk_table = smu->smu_table.clocks_table; + + if (!clk_table || clk_type >= SMU_CLK_COUNT) + return -EINVAL; + + switch (clk_type) { + case SMU_SOCCLK: + if (dpm_level >= clk_table->NumSocClkLevelsEnabled) + return -EINVAL; + *freq = clk_table->SocClocks[dpm_level]; + break; + case SMU_VCLK: + if (dpm_level >= clk_table->VcnClkLevelsEnabled) + return -EINVAL; + *freq = clk_table->VcnClocks[dpm_level].vclk; + break; + case SMU_DCLK: + if (dpm_level >= clk_table->VcnClkLevelsEnabled) + return -EINVAL; + *freq = clk_table->VcnClocks[dpm_level].dclk; + break; + case SMU_UCLK: + case SMU_MCLK: + if (dpm_level >= clk_table->NumDfPstatesEnabled) + return -EINVAL; + *freq = clk_table->DfPstateTable[dpm_level].memclk; + + break; + case SMU_FCLK: + if (dpm_level >= clk_table->NumDfPstatesEnabled) + return -EINVAL; + *freq = clk_table->DfPstateTable[dpm_level].fclk; + break; + default: + return -EINVAL; + } + + return 0; +} + static int vangogh_print_fine_grain_clk(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { - int size = 0; + DpmClocks_t *clk_table = smu->smu_table.clocks_table; + SmuMetrics_t metrics; + int i, size = 0, ret = 0; + uint32_t cur_value = 0, value = 0, count = 0; + bool cur_value_match_level = false; + + memset(&metrics, 0, sizeof(metrics)); + + ret = smu_cmn_get_metrics_table(smu, &metrics, false); + if (ret) + return ret; switch (clk_type) { case SMU_OD_SCLK: @@ -424,13 +501,733 @@ static int vangogh_print_fine_grain_clk(struct smu_context *smu, smu->gfx_default_hard_min_freq, smu->gfx_default_soft_max_freq); } break; + case SMU_SOCCLK: + /* the level 3 ~ 6 of socclk use the same frequency for vangogh */ + count = clk_table->NumSocClkLevelsEnabled; + cur_value = metrics.SocclkFrequency; + break; + case SMU_VCLK: + count = clk_table->VcnClkLevelsEnabled; + cur_value = metrics.VclkFrequency; + break; + case SMU_DCLK: + count = clk_table->VcnClkLevelsEnabled; + cur_value = metrics.DclkFrequency; + break; + case SMU_MCLK: + count = clk_table->NumDfPstatesEnabled; + cur_value = metrics.MemclkFrequency; + break; + case SMU_FCLK: + count = clk_table->NumDfPstatesEnabled; + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_GetFclkFrequency, 0, &cur_value); + if (ret) + return ret; + break; default: break; } + switch (clk_type) { + case SMU_SOCCLK: + case SMU_VCLK: + case SMU_DCLK: + case SMU_MCLK: + case SMU_FCLK: + for (i = 0; i < count; i++) { + ret = vangogh_get_dpm_clk_limited(smu, clk_type, i, &value); + if (ret) + return ret; + if (!value) + continue; + size += sprintf(buf + size, "%d: %uMhz %s\n", i, value, + cur_value == value ? "*" : ""); + if (cur_value == value) + cur_value_match_level = true; + } + + if (!cur_value_match_level) + size += sprintf(buf + size, " %uMhz *\n", cur_value); + break; + default: + break; + } + + return size; +} + +static int vangogh_get_profiling_clk_mask(struct smu_context *smu, + enum amd_dpm_forced_level level, + uint32_t *vclk_mask, + uint32_t *dclk_mask, + uint32_t *mclk_mask, + uint32_t *fclk_mask, + uint32_t *soc_mask) +{ + DpmClocks_t *clk_table = smu->smu_table.clocks_table; + + if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK) { + if (mclk_mask) + *mclk_mask = clk_table->NumDfPstatesEnabled - 1; + + if (fclk_mask) + *fclk_mask = clk_table->NumDfPstatesEnabled - 1; + + if (soc_mask) + *soc_mask = 0; + } else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK) { + if (mclk_mask) + *mclk_mask = 0; + + if (fclk_mask) + *fclk_mask = 0; + + if (soc_mask) + *soc_mask = 1; + + if (vclk_mask) + *vclk_mask = 1; + + if (dclk_mask) + *dclk_mask = 1; + } else if (level == AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD) { + if (mclk_mask) + *mclk_mask = 0; + + if (fclk_mask) + *fclk_mask = 0; + + if (soc_mask) + *soc_mask = 1; + + if (vclk_mask) + *vclk_mask = 1; + + if (dclk_mask) + *dclk_mask = 1; + } + + return 0; +} + +bool vangogh_clk_dpm_is_enabled(struct smu_context *smu, + enum smu_clk_type clk_type) +{ + enum smu_feature_mask feature_id = 0; + + switch (clk_type) { + case SMU_MCLK: + case SMU_UCLK: + case SMU_FCLK: + feature_id = SMU_FEATURE_DPM_FCLK_BIT; + break; + case SMU_GFXCLK: + case SMU_SCLK: + feature_id = SMU_FEATURE_DPM_GFXCLK_BIT; + break; + case SMU_SOCCLK: + feature_id = SMU_FEATURE_DPM_SOCCLK_BIT; + break; + case SMU_VCLK: + case SMU_DCLK: + feature_id = SMU_FEATURE_VCN_DPM_BIT; + break; + default: + return true; + } + + if (!smu_cmn_feature_is_enabled(smu, feature_id)) + return false; + + return true; +} + +static int vangogh_get_dpm_ultimate_freq(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t *min, + uint32_t *max) +{ + int ret = 0; + uint32_t soc_mask; + uint32_t vclk_mask; + uint32_t dclk_mask; + uint32_t mclk_mask; + uint32_t fclk_mask; + uint32_t clock_limit; + + if (!vangogh_clk_dpm_is_enabled(smu, clk_type)) { + switch (clk_type) { + case SMU_MCLK: + case SMU_UCLK: + clock_limit = smu->smu_table.boot_values.uclk; + break; + case SMU_FCLK: + clock_limit = smu->smu_table.boot_values.fclk; + break; + case SMU_GFXCLK: + case SMU_SCLK: + clock_limit = smu->smu_table.boot_values.gfxclk; + break; + case SMU_SOCCLK: + clock_limit = smu->smu_table.boot_values.socclk; + break; + case SMU_VCLK: + clock_limit = smu->smu_table.boot_values.vclk; + break; + case SMU_DCLK: + clock_limit = smu->smu_table.boot_values.dclk; + break; + default: + clock_limit = 0; + break; + } + + /* clock in Mhz unit */ + if (min) + *min = clock_limit / 100; + if (max) + *max = clock_limit / 100; + + return 0; + } + if (max) { + ret = vangogh_get_profiling_clk_mask(smu, + AMD_DPM_FORCED_LEVEL_PROFILE_PEAK, + &vclk_mask, + &dclk_mask, + &mclk_mask, + &fclk_mask, + &soc_mask); + if (ret) + goto failed; + + switch (clk_type) { + case SMU_UCLK: + case SMU_MCLK: + ret = vangogh_get_dpm_clk_limited(smu, clk_type, mclk_mask, max); + if (ret) + goto failed; + break; + case SMU_SOCCLK: + ret = vangogh_get_dpm_clk_limited(smu, clk_type, soc_mask, max); + if (ret) + goto failed; + break; + case SMU_FCLK: + ret = vangogh_get_dpm_clk_limited(smu, clk_type, fclk_mask, max); + if (ret) + goto failed; + break; + case SMU_VCLK: + ret = vangogh_get_dpm_clk_limited(smu, clk_type, vclk_mask, max); + if (ret) + goto failed; + break; + case SMU_DCLK: + ret = vangogh_get_dpm_clk_limited(smu, clk_type, dclk_mask, max); + if (ret) + goto failed; + break; + default: + ret = -EINVAL; + goto failed; + } + } + if (min) { + switch (clk_type) { + case SMU_UCLK: + case SMU_MCLK: + ret = vangogh_get_dpm_clk_limited(smu, clk_type, mclk_mask, min); + if (ret) + goto failed; + break; + case SMU_SOCCLK: + ret = vangogh_get_dpm_clk_limited(smu, clk_type, soc_mask, min); + if (ret) + goto failed; + break; + case SMU_FCLK: + ret = vangogh_get_dpm_clk_limited(smu, clk_type, fclk_mask, min); + if (ret) + goto failed; + break; + case SMU_VCLK: + ret = vangogh_get_dpm_clk_limited(smu, clk_type, vclk_mask, min); + if (ret) + goto failed; + break; + case SMU_DCLK: + ret = vangogh_get_dpm_clk_limited(smu, clk_type, dclk_mask, min); + if (ret) + goto failed; + break; + default: + ret = -EINVAL; + goto failed; + } + } +failed: + return ret; +} + +static int vangogh_get_power_profile_mode(struct smu_context *smu, + char *buf) +{ + static const char *profile_name[] = { + "FULL_SCREEN_3D", + "VIDEO", + "VR", + "COMPUTE", + "CUSTOM"}; + uint32_t i, size = 0; + int16_t workload_type = 0; + + if (!buf) + return -EINVAL; + + for (i = 0; i <= PP_SMC_POWER_PROFILE_CUSTOM; i++) { + /* + * Conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT + * Not all profile modes are supported on vangogh. + */ + workload_type = smu_cmn_to_asic_specific_index(smu, + CMN2ASIC_MAPPING_WORKLOAD, + i); + + if (workload_type < 0) + continue; + + size += sprintf(buf + size, "%2d %14s%s\n", + i, profile_name[i], (i == smu->power_profile_mode) ? "*" : " "); + } + return size; } +static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size) +{ + int workload_type, ret; + uint32_t profile_mode = input[size]; + + if (profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) { + dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode); + return -EINVAL; + } + + /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */ + workload_type = smu_cmn_to_asic_specific_index(smu, + CMN2ASIC_MAPPING_WORKLOAD, + profile_mode); + if (workload_type < 0) { + dev_err_once(smu->adev->dev, "Unsupported power profile mode %d on VANGOGH\n", + profile_mode); + return -EINVAL; + } + + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify, + 1 << workload_type, + NULL); + if (ret) { + dev_err_once(smu->adev->dev, "Fail to set workload type %d\n", + workload_type); + return ret; + } + + smu->power_profile_mode = profile_mode; + + return 0; +} + +static int vangogh_set_soft_freq_limited_range(struct smu_context *smu, + enum smu_clk_type clk_type, + uint32_t min, + uint32_t max) +{ + int ret = 0; + + if (!vangogh_clk_dpm_is_enabled(smu, clk_type)) + return 0; + + switch (clk_type) { + case SMU_GFXCLK: + case SMU_SCLK: + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetHardMinGfxClk, + min, NULL); + if (ret) + return ret; + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetSoftMaxGfxClk, + max, NULL); + if (ret) + return ret; + break; + case SMU_FCLK: + case SMU_MCLK: + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetHardMinFclkByFreq, + min, NULL); + if (ret) + return ret; + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetSoftMaxFclkByFreq, + max, NULL); + if (ret) + return ret; + break; + case SMU_SOCCLK: + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetHardMinSocclkByFreq, + min, NULL); + if (ret) + return ret; + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetSoftMaxSocclkByFreq, + max, NULL); + if (ret) + return ret; + break; + case SMU_VCLK: + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetHardMinVcn, + min << 16, NULL); + if (ret) + return ret; + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetSoftMaxVcn, + max << 16, NULL); + if (ret) + return ret; + break; + case SMU_DCLK: + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetHardMinVcn, + min, NULL); + if (ret) + return ret; + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetSoftMaxVcn, + max, NULL); + if (ret) + return ret; + break; + default: + return -EINVAL; + } + + return ret; +} + +static int vangogh_force_clk_levels(struct smu_context *smu, + enum smu_clk_type clk_type, uint32_t mask) +{ + uint32_t soft_min_level = 0, soft_max_level = 0; + uint32_t min_freq = 0, max_freq = 0; + int ret = 0 ; + + soft_min_level = mask ? (ffs(mask) - 1) : 0; + soft_max_level = mask ? (fls(mask) - 1) : 0; + + switch (clk_type) { + case SMU_SOCCLK: + ret = vangogh_get_dpm_clk_limited(smu, clk_type, + soft_min_level, &min_freq); + if (ret) + return ret; + ret = vangogh_get_dpm_clk_limited(smu, clk_type, + soft_max_level, &max_freq); + if (ret) + return ret; + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetSoftMaxSocclkByFreq, + max_freq, NULL); + if (ret) + return ret; + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetHardMinSocclkByFreq, + min_freq, NULL); + if (ret) + return ret; + break; + case SMU_MCLK: + case SMU_FCLK: + ret = vangogh_get_dpm_clk_limited(smu, + clk_type, soft_min_level, &min_freq); + if (ret) + return ret; + ret = vangogh_get_dpm_clk_limited(smu, + clk_type, soft_max_level, &max_freq); + if (ret) + return ret; + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetSoftMaxFclkByFreq, + max_freq, NULL); + if (ret) + return ret; + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetHardMinFclkByFreq, + min_freq, NULL); + if (ret) + return ret; + break; + case SMU_VCLK: + ret = vangogh_get_dpm_clk_limited(smu, + clk_type, soft_min_level, &min_freq); + if (ret) + return ret; + + ret = vangogh_get_dpm_clk_limited(smu, + clk_type, soft_max_level, &max_freq); + if (ret) + return ret; + + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetHardMinVcn, + min_freq << 16, NULL); + if (ret) + return ret; + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetSoftMaxVcn, + max_freq << 16, NULL); + if (ret) + return ret; + + break; + case SMU_DCLK: + ret = vangogh_get_dpm_clk_limited(smu, + clk_type, soft_min_level, &min_freq); + if (ret) + return ret; + + ret = vangogh_get_dpm_clk_limited(smu, + clk_type, soft_max_level, &max_freq); + if (ret) + return ret; + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetHardMinVcn, + min_freq, NULL); + if (ret) + return ret; + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetSoftMaxVcn, + max_freq, NULL); + if (ret) + return ret; + + break; + default: + break; + } + + return ret; +} + +static int vangogh_force_dpm_limit_value(struct smu_context *smu, bool highest) +{ + int ret = 0, i = 0; + uint32_t min_freq, max_freq, force_freq; + enum smu_clk_type clk_type; + + enum smu_clk_type clks[] = { + SMU_SOCCLK, + SMU_VCLK, + SMU_DCLK, + SMU_MCLK, + SMU_FCLK, + }; + + for (i = 0; i < ARRAY_SIZE(clks); i++) { + clk_type = clks[i]; + ret = vangogh_get_dpm_ultimate_freq(smu, clk_type, &min_freq, &max_freq); + if (ret) + return ret; + + force_freq = highest ? max_freq : min_freq; + ret = vangogh_set_soft_freq_limited_range(smu, clk_type, force_freq, force_freq); + if (ret) + return ret; + } + + return ret; +} + +static int vangogh_unforce_dpm_levels(struct smu_context *smu) +{ + int ret = 0, i = 0; + uint32_t min_freq, max_freq; + enum smu_clk_type clk_type; + + struct clk_feature_map { + enum smu_clk_type clk_type; + uint32_t feature; + } clk_feature_map[] = { + {SMU_MCLK, SMU_FEATURE_DPM_FCLK_BIT}, + {SMU_FCLK, SMU_FEATURE_DPM_FCLK_BIT}, + {SMU_SOCCLK, SMU_FEATURE_DPM_SOCCLK_BIT}, + {SMU_VCLK, SMU_FEATURE_VCN_DPM_BIT}, + {SMU_DCLK, SMU_FEATURE_VCN_DPM_BIT}, + }; + + for (i = 0; i < ARRAY_SIZE(clk_feature_map); i++) { + + if (!smu_cmn_feature_is_enabled(smu, clk_feature_map[i].feature)) + continue; + + clk_type = clk_feature_map[i].clk_type; + + ret = vangogh_get_dpm_ultimate_freq(smu, clk_type, &min_freq, &max_freq); + + if (ret) + return ret; + + ret = vangogh_set_soft_freq_limited_range(smu, clk_type, min_freq, max_freq); + + if (ret) + return ret; + } + + return ret; +} + +static int vangogh_set_peak_clock_by_device(struct smu_context *smu) +{ + int ret = 0; + uint32_t socclk_freq = 0, fclk_freq = 0; + uint32_t vclk_freq = 0, dclk_freq = 0; + + ret = vangogh_get_dpm_ultimate_freq(smu, SMU_FCLK, NULL, &fclk_freq); + if (ret) + return ret; + + ret = vangogh_set_soft_freq_limited_range(smu, SMU_FCLK, fclk_freq, fclk_freq); + if (ret) + return ret; + + ret = vangogh_get_dpm_ultimate_freq(smu, SMU_SOCCLK, NULL, &socclk_freq); + if (ret) + return ret; + + ret = vangogh_set_soft_freq_limited_range(smu, SMU_SOCCLK, socclk_freq, socclk_freq); + if (ret) + return ret; + + ret = vangogh_get_dpm_ultimate_freq(smu, SMU_VCLK, NULL, &vclk_freq); + if (ret) + return ret; + + ret = vangogh_set_soft_freq_limited_range(smu, SMU_VCLK, vclk_freq, vclk_freq); + if (ret) + return ret; + + ret = vangogh_get_dpm_ultimate_freq(smu, SMU_DCLK, NULL, &dclk_freq); + if (ret) + return ret; + + ret = vangogh_set_soft_freq_limited_range(smu, SMU_DCLK, dclk_freq, dclk_freq); + if (ret) + return ret; + + return ret; +} + +static int vangogh_set_performance_level(struct smu_context *smu, + enum amd_dpm_forced_level level) +{ + int ret = 0; + uint32_t soc_mask, mclk_mask, fclk_mask; + uint32_t vclk_mask = 0, dclk_mask = 0; + + switch (level) { + case AMD_DPM_FORCED_LEVEL_HIGH: + ret = vangogh_force_dpm_limit_value(smu, true); + break; + case AMD_DPM_FORCED_LEVEL_LOW: + ret = vangogh_force_dpm_limit_value(smu, false); + break; + case AMD_DPM_FORCED_LEVEL_AUTO: + ret = vangogh_unforce_dpm_levels(smu); + break; + case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetHardMinGfxClk, + VANGOGH_UMD_PSTATE_STANDARD_GFXCLK, NULL); + if (ret) + return ret; + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetSoftMaxGfxClk, + VANGOGH_UMD_PSTATE_STANDARD_GFXCLK, NULL); + if (ret) + return ret; + + ret = vangogh_get_profiling_clk_mask(smu, level, + &vclk_mask, + &dclk_mask, + &mclk_mask, + &fclk_mask, + &soc_mask); + if (ret) + return ret; + + vangogh_force_clk_levels(smu, SMU_MCLK, 1 << mclk_mask); + vangogh_force_clk_levels(smu, SMU_FCLK, 1 << fclk_mask); + vangogh_force_clk_levels(smu, SMU_SOCCLK, 1 << soc_mask); + vangogh_force_clk_levels(smu, SMU_VCLK, 1 << vclk_mask); + vangogh_force_clk_levels(smu, SMU_DCLK, 1 << dclk_mask); + + break; + case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinVcn, + VANGOGH_UMD_PSTATE_PEAK_DCLK, NULL); + if (ret) + return ret; + + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxVcn, + VANGOGH_UMD_PSTATE_PEAK_DCLK, NULL); + if (ret) + return ret; + break; + case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: + ret = vangogh_get_profiling_clk_mask(smu, level, + NULL, + NULL, + &mclk_mask, + &fclk_mask, + NULL); + if (ret) + return ret; + + vangogh_force_clk_levels(smu, SMU_MCLK, 1 << mclk_mask); + vangogh_force_clk_levels(smu, SMU_FCLK, 1 << fclk_mask); + break; + case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinGfxClk, + VANGOGH_UMD_PSTATE_PEAK_GFXCLK, NULL); + if (ret) + return ret; + + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetSoftMaxGfxClk, + VANGOGH_UMD_PSTATE_PEAK_GFXCLK, NULL); + if (ret) + return ret; + + ret = vangogh_set_peak_clock_by_device(smu); + break; + case AMD_DPM_FORCED_LEVEL_MANUAL: + case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT: + default: + break; + } + return ret; +} + static int vangogh_read_sensor(struct smu_context *smu, enum amd_pp_sensors sensor, void *data, uint32_t *size) @@ -513,7 +1310,7 @@ static int vangogh_set_watermarks_table(struct smu_context *smu, if (clock_ranges) { if (clock_ranges->num_reader_wm_sets > NUM_WM_RANGES || - clock_ranges->num_writer_wm_sets > NUM_WM_RANGES) + clock_ranges->num_writer_wm_sets > NUM_WM_RANGES) return -EINVAL; for (i = 0; i < clock_ranges->num_reader_wm_sets; i++) { @@ -631,14 +1428,16 @@ static int vangogh_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TAB if (input[0] == 0) { if (input[1] < smu->gfx_default_hard_min_freq) { - dev_warn(smu->adev->dev, "Fine grain setting minimum sclk (%ld) MHz is less than the minimum allowed (%d) MHz\n", + dev_warn(smu->adev->dev, + "Fine grain setting minimum sclk (%ld) MHz is less than the minimum allowed (%d) MHz\n", input[1], smu->gfx_default_hard_min_freq); return -EINVAL; } smu->gfx_actual_hard_min_freq = input[1]; } else if (input[0] == 1) { if (input[1] > smu->gfx_default_soft_max_freq) { - dev_warn(smu->adev->dev, "Fine grain setting maximum sclk (%ld) MHz is greater than the maximum allowed (%d) MHz\n", + dev_warn(smu->adev->dev, + "Fine grain setting maximum sclk (%ld) MHz is greater than the maximum allowed (%d) MHz\n", input[1], smu->gfx_default_soft_max_freq); return -EINVAL; } @@ -676,8 +1475,10 @@ static int vangogh_od_edit_dpm_table(struct smu_context *smu, enum PP_OD_DPM_TAB return -EINVAL; } else { if (smu->gfx_actual_hard_min_freq > smu->gfx_actual_soft_max_freq) { - dev_err(smu->adev->dev, "The setting minimun sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n", - smu->gfx_actual_hard_min_freq, smu->gfx_actual_soft_max_freq); + dev_err(smu->adev->dev, + "The setting minimun sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n", + smu->gfx_actual_hard_min_freq, + smu->gfx_actual_soft_max_freq); return -EINVAL; } @@ -722,6 +1523,33 @@ static int vangogh_set_fine_grain_gfx_freq_parameters(struct smu_context *smu) return 0; } +static int vangogh_get_dpm_clock_table(struct smu_context *smu, struct dpm_clocks *clock_table) +{ + DpmClocks_t *table = smu->smu_table.clocks_table; + int i; + + if (!clock_table || !table) + return -EINVAL; + + for (i = 0; i < NUM_SOCCLK_DPM_LEVELS; i++) { + clock_table->SocClocks[i].Freq = table->SocClocks[i]; + clock_table->SocClocks[i].Vol = table->SocVoltage[i]; + } + + for (i = 0; i < NUM_FCLK_DPM_LEVELS; i++) { + clock_table->FClocks[i].Freq = table->DfPstateTable[i].fclk; + clock_table->FClocks[i].Vol = table->DfPstateTable[i].voltage; + } + + for (i = 0; i < NUM_FCLK_DPM_LEVELS; i++) { + clock_table->MemClocks[i].Freq = table->DfPstateTable[i].memclk; + clock_table->MemClocks[i].Vol = table->DfPstateTable[i].voltage; + } + + return 0; +} + + static int vangogh_system_features_control(struct smu_context *smu, bool en) { struct amdgpu_device *adev = smu->adev; @@ -733,6 +1561,38 @@ static int vangogh_system_features_control(struct smu_context *smu, bool en) return 0; } +static int vangogh_post_smu_init(struct smu_context *smu) +{ + struct amdgpu_device *adev = smu->adev; + uint32_t tmp; + uint8_t aon_bits = 0; + /* Two CUs in one WGP */ + uint32_t req_active_wgps = adev->gfx.cu_info.number/2; + uint32_t total_cu = adev->gfx.config.max_cu_per_sh * + adev->gfx.config.max_sh_per_se * adev->gfx.config.max_shader_engines; + + /* if all CUs are active, no need to power off any WGPs */ + if (total_cu == adev->gfx.cu_info.number) + return 0; + + /* + * Calculate the total bits number of always on WGPs for all SA/SEs in + * RLC_PG_ALWAYS_ON_WGP_MASK. + */ + tmp = RREG32_KIQ(SOC15_REG_OFFSET(GC, 0, mmRLC_PG_ALWAYS_ON_WGP_MASK)); + tmp &= RLC_PG_ALWAYS_ON_WGP_MASK__AON_WGP_MASK_MASK; + + aon_bits = hweight32(tmp) * adev->gfx.config.max_sh_per_se * adev->gfx.config.max_shader_engines; + + /* Do not request any WGPs less than set in the AON_WGP_MASK */ + if (aon_bits > req_active_wgps) { + dev_info(adev->dev, "Number of always on WGPs greater than active WGPs: WGP power save not requested.\n"); + return 0; + } else { + return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_RequestActiveWgp, req_active_wgps, NULL); + } +} + static const struct pptable_funcs vangogh_ppt_funcs = { .check_fw_status = smu_v11_0_check_fw_status, @@ -761,6 +1621,13 @@ static const struct pptable_funcs vangogh_ppt_funcs = { .set_default_dpm_table = vangogh_set_default_dpm_tables, .set_fine_grain_gfx_freq_parameters = vangogh_set_fine_grain_gfx_freq_parameters, .system_features_control = vangogh_system_features_control, + .feature_is_enabled = smu_cmn_feature_is_enabled, + .set_power_profile_mode = vangogh_set_power_profile_mode, + .get_power_profile_mode = vangogh_get_power_profile_mode, + .get_dpm_clock_table = vangogh_get_dpm_clock_table, + .force_clk_levels = vangogh_force_clk_levels, + .set_performance_level = vangogh_set_performance_level, + .post_init = vangogh_post_smu_init, }; void vangogh_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.h b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.h index eab455493076..c56d4583dc72 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.h +++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.h @@ -28,9 +28,29 @@ extern void vangogh_set_ppt_funcs(struct smu_context *smu); /* UMD PState Vangogh Msg Parameters in MHz */ -#define VANGOGH_UMD_PSTATE_GFXCLK 700 -#define VANGOGH_UMD_PSTATE_SOCCLK 678 -#define VANGOGH_UMD_PSTATE_FCLK 800 +#define VANGOGH_UMD_PSTATE_STANDARD_GFXCLK 1100 +#define VANGOGH_UMD_PSTATE_STANDARD_SOCCLK 600 +#define VANGOGH_UMD_PSTATE_STANDARD_FCLK 800 +#define VANGOGH_UMD_PSTATE_STANDARD_VCLK 705 +#define VANGOGH_UMD_PSTATE_STANDARD_DCLK 600 + +#define VANGOGH_UMD_PSTATE_PEAK_GFXCLK 1300 +#define VANGOGH_UMD_PSTATE_PEAK_SOCCLK 600 +#define VANGOGH_UMD_PSTATE_PEAK_FCLK 800 +#define VANGOGH_UMD_PSTATE_PEAK_VCLK 705 +#define VANGOGH_UMD_PSTATE_PEAK_DCLK 600 + +#define VANGOGH_UMD_PSTATE_MIN_SCLK_GFXCLK 400 +#define VANGOGH_UMD_PSTATE_MIN_SCLK_SOCCLK 1000 +#define VANGOGH_UMD_PSTATE_MIN_SCLK_FCLK 800 +#define VANGOGH_UMD_PSTATE_MIN_SCLK_VCLK 1000 +#define VANGOGH_UMD_PSTATE_MIN_SCLK_DCLK 800 + +#define VANGOGH_UMD_PSTATE_MIN_MCLK_GFXCLK 1100 +#define VANGOGH_UMD_PSTATE_MIN_MCLK_SOCCLK 1000 +#define VANGOGH_UMD_PSTATE_MIN_MCLK_FCLK 400 +#define VANGOGH_UMD_PSTATE_MIN_MCLK_VCLK 1000 +#define VANGOGH_UMD_PSTATE_MIN_MCLK_DCLK 800 /* RLC Power Status */ #define RLC_STATUS_OFF 0 diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c index dc75db8af371..1f6a774278b1 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c @@ -188,6 +188,7 @@ static int renoir_get_dpm_clk_limited(struct smu_context *smu, enum smu_clk_type return -EINVAL; *freq = clk_table->SocClocks[dpm_level].Freq; break; + case SMU_UCLK: case SMU_MCLK: if (dpm_level >= NUM_FCLK_DPM_LEVELS) return -EINVAL; @@ -343,6 +344,138 @@ failed: return ret; } +static int renoir_od_edit_dpm_table(struct smu_context *smu, + enum PP_OD_DPM_TABLE_COMMAND type, + long input[], uint32_t size) +{ + int ret = 0; + + if (!smu->fine_grain_enabled) { + dev_warn(smu->adev->dev, "Fine grain is not enabled!\n"); + return -EINVAL; + } + + if (!smu->fine_grain_started) { + dev_warn(smu->adev->dev, "Fine grain is enabled but not started!\n"); + return -EINVAL; + } + + switch (type) { + case PP_OD_EDIT_SCLK_VDDC_TABLE: + if (size != 2) { + dev_err(smu->adev->dev, "Input parameter number not correct\n"); + return -EINVAL; + } + + if (input[0] == 0) { + if (input[1] < smu->gfx_default_hard_min_freq) { + dev_warn(smu->adev->dev, + "Fine grain setting minimum sclk (%ld) MHz is less than the minimum allowed (%d) MHz\n", + input[1], smu->gfx_default_hard_min_freq); + return -EINVAL; + } + smu->gfx_actual_hard_min_freq = input[1]; + } else if (input[0] == 1) { + if (input[1] > smu->gfx_default_soft_max_freq) { + dev_warn(smu->adev->dev, + "Fine grain setting maximum sclk (%ld) MHz is greater than the maximum allowed (%d) MHz\n", + input[1], smu->gfx_default_soft_max_freq); + return -EINVAL; + } + smu->gfx_actual_soft_max_freq = input[1]; + } else { + return -EINVAL; + } + break; + case PP_OD_RESTORE_DEFAULT_TABLE: + if (size != 0) { + dev_err(smu->adev->dev, "Input parameter number not correct\n"); + return -EINVAL; + } + smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq; + smu->gfx_actual_soft_max_freq = smu->gfx_default_soft_max_freq; + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetHardMinGfxClk, + smu->gfx_actual_hard_min_freq, + NULL); + if (ret) { + dev_err(smu->adev->dev, "Restore the default hard min sclk failed!"); + return ret; + } + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetSoftMaxGfxClk, + smu->gfx_actual_soft_max_freq, + NULL); + if (ret) { + dev_err(smu->adev->dev, "Restore the default soft max sclk failed!"); + return ret; + } + break; + case PP_OD_COMMIT_DPM_TABLE: + if (size != 0) { + dev_err(smu->adev->dev, "Input parameter number not correct\n"); + return -EINVAL; + } else { + if (smu->gfx_actual_hard_min_freq > smu->gfx_actual_soft_max_freq) { + dev_err(smu->adev->dev, + "The setting minimun sclk (%d) MHz is greater than the setting maximum sclk (%d) MHz\n", + smu->gfx_actual_hard_min_freq, + smu->gfx_actual_soft_max_freq); + return -EINVAL; + } + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetHardMinGfxClk, + smu->gfx_actual_hard_min_freq, + NULL); + if (ret) { + dev_err(smu->adev->dev, "Set hard min sclk failed!"); + return ret; + } + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_SetSoftMaxGfxClk, + smu->gfx_actual_soft_max_freq, + NULL); + if (ret) { + dev_err(smu->adev->dev, "Set soft max sclk failed!"); + return ret; + } + } + break; + default: + return -ENOSYS; + } + + return ret; +} + +static int renoir_set_fine_grain_gfx_freq_parameters(struct smu_context *smu) +{ + uint32_t min = 0, max = 0; + uint32_t ret = 0; + + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_GetMinGfxclkFrequency, + 0, &min); + if (ret) + return ret; + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_GetMaxGfxclkFrequency, + 0, &max); + if (ret) + return ret; + + smu->gfx_default_hard_min_freq = min; + smu->gfx_default_soft_max_freq = max; + smu->gfx_actual_hard_min_freq = 0; + smu->gfx_actual_soft_max_freq = 0; + + return 0; +} + static int renoir_print_clk_levels(struct smu_context *smu, enum smu_clk_type clk_type, char *buf) { @@ -358,6 +491,30 @@ static int renoir_print_clk_levels(struct smu_context *smu, return ret; switch (clk_type) { + case SMU_OD_RANGE: + if (smu->fine_grain_enabled) { + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_GetMinGfxclkFrequency, + 0, &min); + if (ret) + return ret; + ret = smu_cmn_send_smc_msg_with_param(smu, + SMU_MSG_GetMaxGfxclkFrequency, + 0, &max); + if (ret) + return ret; + size += sprintf(buf + size, "OD_RANGE\nSCLK: %10uMhz %10uMhz\n", min, max); + } + break; + case SMU_OD_SCLK: + if (smu->fine_grain_enabled) { + min = (smu->gfx_actual_hard_min_freq > 0) ? smu->gfx_actual_hard_min_freq : smu->gfx_default_hard_min_freq; + max = (smu->gfx_actual_soft_max_freq > 0) ? smu->gfx_actual_soft_max_freq : smu->gfx_default_soft_max_freq; + size += sprintf(buf + size, "OD_SCLK\n"); + size += sprintf(buf + size, "0:%10uMhz\n", min); + size += sprintf(buf + size, "1:%10uMhz\n", max); + } + break; case SMU_GFXCLK: case SMU_SCLK: /* retirve table returned paramters unit is MHz */ @@ -398,23 +555,35 @@ static int renoir_print_clk_levels(struct smu_context *smu, cur_value = metrics.ClockFrequency[CLOCK_FCLK]; break; default: - return -EINVAL; + break; } - for (i = 0; i < count; i++) { - ret = renoir_get_dpm_clk_limited(smu, clk_type, i, &value); - if (ret) - return ret; - if (!value) - continue; - size += sprintf(buf + size, "%d: %uMhz %s\n", i, value, - cur_value == value ? "*" : ""); - if (cur_value == value) - cur_value_match_level = true; - } + switch (clk_type) { + case SMU_GFXCLK: + case SMU_SCLK: + case SMU_SOCCLK: + case SMU_MCLK: + case SMU_DCEFCLK: + case SMU_FCLK: + for (i = 0; i < count; i++) { + ret = renoir_get_dpm_clk_limited(smu, clk_type, i, &value); + if (ret) + return ret; + if (!value) + continue; + size += sprintf(buf + size, "%d: %uMhz %s\n", i, value, + cur_value == value ? "*" : ""); + if (cur_value == value) + cur_value_match_level = true; + } - if (!cur_value_match_level) - size += sprintf(buf + size, " %uMhz *\n", cur_value); + if (!cur_value_match_level) + size += sprintf(buf + size, " %uMhz *\n", cur_value); + + break; + default: + break; + } return size; } @@ -724,15 +893,31 @@ static int renoir_set_performance_level(struct smu_context *smu, switch (level) { case AMD_DPM_FORCED_LEVEL_HIGH: + smu->fine_grain_started = 0; + smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq; + smu->gfx_actual_soft_max_freq = smu->gfx_default_soft_max_freq; + ret = renoir_force_dpm_limit_value(smu, true); break; case AMD_DPM_FORCED_LEVEL_LOW: + smu->fine_grain_started = 0; + smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq; + smu->gfx_actual_soft_max_freq = smu->gfx_default_soft_max_freq; + ret = renoir_force_dpm_limit_value(smu, false); break; case AMD_DPM_FORCED_LEVEL_AUTO: + smu->fine_grain_started = 0; + smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq; + smu->gfx_actual_soft_max_freq = smu->gfx_default_soft_max_freq; + ret = renoir_unforce_dpm_levels(smu); break; case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD: + smu->fine_grain_started = 0; + smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq; + smu->gfx_actual_soft_max_freq = smu->gfx_default_soft_max_freq; + ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinGfxClk, RENOIR_UMD_PSTATE_GFXCLK, @@ -785,6 +970,10 @@ static int renoir_set_performance_level(struct smu_context *smu, break; case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK: case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK: + smu->fine_grain_started = 0; + smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq; + smu->gfx_actual_soft_max_freq = smu->gfx_default_soft_max_freq; + ret = renoir_get_profiling_clk_mask(smu, level, &sclk_mask, &mclk_mask, @@ -796,9 +985,14 @@ static int renoir_set_performance_level(struct smu_context *smu, renoir_force_clk_levels(smu, SMU_SOCCLK, 1 << soc_mask); break; case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK: + smu->fine_grain_started = 0; + smu->gfx_actual_hard_min_freq = smu->gfx_default_hard_min_freq; + smu->gfx_actual_soft_max_freq = smu->gfx_default_soft_max_freq; + ret = renoir_set_peak_clock_by_device(smu); break; case AMD_DPM_FORCED_LEVEL_MANUAL: + smu->fine_grain_started = 1; case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT: default: break; @@ -1159,6 +1353,8 @@ static const struct pptable_funcs renoir_ppt_funcs = { .set_pp_feature_mask = smu_cmn_set_pp_feature_mask, .get_gpu_metrics = renoir_get_gpu_metrics, .gfx_state_change_set = renoir_gfx_state_change_set, + .set_fine_grain_gfx_freq_parameters = renoir_set_fine_grain_gfx_freq_parameters, + .od_edit_dpm_table = renoir_od_edit_dpm_table, }; void renoir_set_ppt_funcs(struct smu_context *smu) diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c index 522d55004655..06abf2a7ce9e 100644 --- a/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c +++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/smu_v12_0.c @@ -225,6 +225,7 @@ int smu_v12_0_set_soft_freq_limited_range(struct smu_context *smu, enum smu_clk_ break; case SMU_FCLK: case SMU_MCLK: + case SMU_UCLK: ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetHardMinFclkByFreq, min, NULL); if (ret) return ret; diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile index ca545b39814b..e245a036613e 100644 --- a/drivers/gpu/drm/i915/Makefile +++ b/drivers/gpu/drm/i915/Makefile @@ -38,6 +38,7 @@ i915-y += i915_drv.o \ i915_config.o \ i915_irq.o \ i915_getparam.o \ + i915_mitigations.o \ i915_params.o \ i915_pci.o \ i915_scatterlist.o \ @@ -58,6 +59,7 @@ i915-y += i915_drv.o \ # core library code i915-y += \ + dma_resv_utils.o \ i915_memcpy.o \ i915_mm.o \ i915_sw_fence.o \ @@ -82,6 +84,7 @@ gt-y += \ gt/gen6_engine_cs.o \ gt/gen6_ppgtt.o \ gt/gen7_renderclear.o \ + gt/gen8_engine_cs.o \ gt/gen8_ppgtt.o \ gt/intel_breadcrumbs.o \ gt/intel_context.o \ @@ -91,6 +94,7 @@ gt-y += \ gt/intel_engine_heartbeat.o \ gt/intel_engine_pm.o \ gt/intel_engine_user.o \ + gt/intel_execlists_submission.o \ gt/intel_ggtt.o \ gt/intel_ggtt_fencing.o \ gt/intel_gt.o \ @@ -106,6 +110,7 @@ gt-y += \ gt/intel_mocs.o \ gt/intel_ppgtt.o \ gt/intel_rc6.o \ + gt/intel_region_lmem.o \ gt/intel_renderstate.o \ gt/intel_reset.o \ gt/intel_ring.o \ @@ -166,7 +171,6 @@ i915-y += \ i915_scheduler.o \ i915_trace_points.o \ i915_vma.o \ - intel_region_lmem.o \ intel_wopcm.o # general-purpose microcontroller (GuC) support diff --git a/drivers/gpu/drm/i915/display/intel_overlay.c b/drivers/gpu/drm/i915/display/intel_overlay.c index 52b4f6193b4c..6be5d8946c69 100644 --- a/drivers/gpu/drm/i915/display/intel_overlay.c +++ b/drivers/gpu/drm/i915/display/intel_overlay.c @@ -29,6 +29,7 @@ #include <drm/drm_fourcc.h> #include "gem/i915_gem_pm.h" +#include "gt/intel_gpu_commands.h" #include "gt/intel_ring.h" #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/dma_resv_utils.c b/drivers/gpu/drm/i915/dma_resv_utils.c new file mode 100644 index 000000000000..9e508e7d4629 --- /dev/null +++ b/drivers/gpu/drm/i915/dma_resv_utils.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2020 Intel Corporation + */ + +#include <linux/dma-resv.h> + +#include "dma_resv_utils.h" + +void dma_resv_prune(struct dma_resv *resv) +{ + if (dma_resv_trylock(resv)) { + if (dma_resv_test_signaled_rcu(resv, true)) + dma_resv_add_excl_fence(resv, NULL); + dma_resv_unlock(resv); + } +} diff --git a/drivers/gpu/drm/i915/dma_resv_utils.h b/drivers/gpu/drm/i915/dma_resv_utils.h new file mode 100644 index 000000000000..b9d8fb5f8367 --- /dev/null +++ b/drivers/gpu/drm/i915/dma_resv_utils.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2020 Intel Corporation + */ + +#ifndef DMA_RESV_UTILS_H +#define DMA_RESV_UTILS_H + +struct dma_resv; + +void dma_resv_prune(struct dma_resv *resv); + +#endif /* DMA_RESV_UTILS_H */ diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 4fd38101bb56..68f58762d5e3 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -72,6 +72,8 @@ #include "gt/intel_context_param.h" #include "gt/intel_engine_heartbeat.h" #include "gt/intel_engine_user.h" +#include "gt/intel_execlists_submission.h" /* virtual_engine */ +#include "gt/intel_gpu_commands.h" #include "gt/intel_ring.h" #include "i915_gem_context.h" @@ -333,13 +335,12 @@ static struct i915_gem_engines *default_engines(struct i915_gem_context *ctx) return e; } -static void i915_gem_context_free(struct i915_gem_context *ctx) +void i915_gem_context_release(struct kref *ref) { - GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); + struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref); - spin_lock(&ctx->i915->gem.contexts.lock); - list_del(&ctx->link); - spin_unlock(&ctx->i915->gem.contexts.lock); + trace_i915_context_free(ctx); + GEM_BUG_ON(!i915_gem_context_is_closed(ctx)); mutex_destroy(&ctx->engines_mutex); mutex_destroy(&ctx->lut_mutex); @@ -353,37 +354,6 @@ static void i915_gem_context_free(struct i915_gem_context *ctx) kfree_rcu(ctx, rcu); } -static void contexts_free_all(struct llist_node *list) -{ - struct i915_gem_context *ctx, *cn; - - llist_for_each_entry_safe(ctx, cn, list, free_link) - i915_gem_context_free(ctx); -} - -static void contexts_flush_free(struct i915_gem_contexts *gc) -{ - contexts_free_all(llist_del_all(&gc->free_list)); -} - -static void contexts_free_worker(struct work_struct *work) -{ - struct i915_gem_contexts *gc = - container_of(work, typeof(*gc), free_work); - - contexts_flush_free(gc); -} - -void i915_gem_context_release(struct kref *ref) -{ - struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref); - struct i915_gem_contexts *gc = &ctx->i915->gem.contexts; - - trace_i915_context_free(ctx); - if (llist_add(&ctx->free_link, &gc->free_list)) - schedule_work(&gc->free_work); -} - static inline struct i915_gem_engines * __context_engines_static(const struct i915_gem_context *ctx) { @@ -453,6 +423,9 @@ static struct intel_engine_cs *active_engine(struct intel_context *ce) struct intel_engine_cs *engine = NULL; struct i915_request *rq; + if (intel_context_has_inflight(ce)) + return intel_context_inflight(ce); + if (!ce->timeline) return NULL; @@ -632,6 +605,10 @@ static void context_close(struct i915_gem_context *ctx) */ lut_close(ctx); + spin_lock(&ctx->i915->gem.contexts.lock); + list_del(&ctx->link); + spin_unlock(&ctx->i915->gem.contexts.lock); + mutex_unlock(&ctx->mutex); /* @@ -849,9 +826,6 @@ i915_gem_create_context(struct drm_i915_private *i915, unsigned int flags) !HAS_EXECLISTS(i915)) return ERR_PTR(-EINVAL); - /* Reap the stale contexts */ - contexts_flush_free(&i915->gem.contexts); - ctx = __create_context(i915); if (IS_ERR(ctx)) return ctx; @@ -896,23 +870,11 @@ static void init_contexts(struct i915_gem_contexts *gc) { spin_lock_init(&gc->lock); INIT_LIST_HEAD(&gc->list); - - INIT_WORK(&gc->free_work, contexts_free_worker); - init_llist_head(&gc->free_list); } void i915_gem_init__contexts(struct drm_i915_private *i915) { init_contexts(&i915->gem.contexts); - drm_dbg(&i915->drm, "%s context support initialized\n", - DRIVER_CAPS(i915)->has_logical_contexts ? - "logical" : "fake"); -} - -void i915_gem_driver_release__contexts(struct drm_i915_private *i915) -{ - flush_work(&i915->gem.contexts.free_work); - rcu_barrier(); /* and flush the left over RCU frees */ } static int gem_context_register(struct i915_gem_context *ctx, @@ -988,7 +950,6 @@ err: void i915_gem_context_close(struct drm_file *file) { struct drm_i915_file_private *file_priv = file->driver_priv; - struct drm_i915_private *i915 = file_priv->dev_priv; struct i915_address_space *vm; struct i915_gem_context *ctx; unsigned long idx; @@ -1000,8 +961,6 @@ void i915_gem_context_close(struct drm_file *file) xa_for_each(&file_priv->vm_xa, idx, vm) i915_vm_put(vm); xa_destroy(&file_priv->vm_xa); - - contexts_flush_free(&i915->gem.contexts); } int i915_gem_vm_create_ioctl(struct drm_device *dev, void *data, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.h b/drivers/gpu/drm/i915/gem/i915_gem_context.h index a133f92bbedb..b5c908f3f4f2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.h @@ -110,7 +110,6 @@ i915_gem_context_clear_user_engines(struct i915_gem_context *ctx) /* i915_gem_context.c */ void i915_gem_init__contexts(struct drm_i915_private *i915); -void i915_gem_driver_release__contexts(struct drm_i915_private *i915); int i915_gem_context_open(struct drm_i915_private *i915, struct drm_file *file); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h index ae14ca24a11f..1449f54924e0 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context_types.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_context_types.h @@ -108,7 +108,6 @@ struct i915_gem_context { /** link: place with &drm_i915_private.context_list */ struct list_head link; - struct llist_node free_link; /** * @ref: reference count diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index bcc80f428172..b91b32195dcf 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -15,6 +15,7 @@ #include "gem/i915_gem_ioctls.h" #include "gt/intel_context.h" +#include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" #include "gt/intel_gt_buffer_pool.h" #include "gt/intel_gt_pm.h" @@ -534,8 +535,6 @@ eb_add_vma(struct i915_execbuffer *eb, struct drm_i915_gem_exec_object2 *entry = &eb->exec[i]; struct eb_vma *ev = &eb->vma[i]; - GEM_BUG_ON(i915_vma_is_closed(vma)); - ev->vma = vma; ev->exec = entry; ev->flags = entry->flags; @@ -1046,7 +1045,7 @@ static void reloc_gpu_flush(struct i915_execbuffer *eb, struct reloc_cache *cach GEM_BUG_ON(cache->rq_size >= obj->base.size / sizeof(u32)); cache->rq_cmd[cache->rq_size] = MI_BATCH_BUFFER_END; - __i915_gem_object_flush_map(obj, 0, sizeof(u32) * (cache->rq_size + 1)); + i915_gem_object_flush_map(obj); i915_gem_object_unpin_map(obj); intel_gt_chipset_flush(cache->rq->engine->gt); @@ -1296,6 +1295,8 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb, goto err_pool; } + memset32(cmd, 0, pool->obj->base.size / sizeof(u32)); + batch = i915_vma_instance(pool->obj, vma->vm, NULL); if (IS_ERR(batch)) { err = PTR_ERR(batch); @@ -2533,6 +2534,9 @@ static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch) { int err; + if (intel_context_nopreempt(eb->context)) + __set_bit(I915_FENCE_FLAG_NOPREEMPT, &eb->request->fence.flags); + err = eb_move_to_gpu(eb); if (err) return err; @@ -2573,15 +2577,12 @@ static int eb_submit(struct i915_execbuffer *eb, struct i915_vma *batch) return err; } - if (intel_context_nopreempt(eb->context)) - __set_bit(I915_FENCE_FLAG_NOPREEMPT, &eb->request->fence.flags); - return 0; } static int num_vcs_engines(const struct drm_i915_private *i915) { - return hweight64(VDBOX_MASK(&i915->gt)); + return hweight_long(VDBOX_MASK(&i915->gt)); } /* diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c index aee7ad3cc3c6..10cac9fac79b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object_blt.c @@ -6,6 +6,7 @@ #include "i915_drv.h" #include "gt/intel_context.h" #include "gt/intel_engine_pm.h" +#include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" #include "gt/intel_gt_buffer_pool.h" #include "gt/intel_ring.h" diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index e2c7b2a7895f..3db3c667c486 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -238,7 +238,7 @@ unlock: /* The 'mapping' part of i915_gem_object_pin_map() below */ static void *i915_gem_object_map_page(struct drm_i915_gem_object *obj, - enum i915_map_type type) + enum i915_map_type type) { unsigned long n_pages = obj->base.size >> PAGE_SHIFT, i; struct page *stack[32], **pages = stack, *page; @@ -281,7 +281,7 @@ static void *i915_gem_object_map_page(struct drm_i915_gem_object *obj, /* Too big for stack -- allocate temporary array instead */ pages = kvmalloc_array(n_pages, sizeof(*pages), GFP_KERNEL); if (!pages) - return NULL; + return ERR_PTR(-ENOMEM); } i = 0; @@ -290,11 +290,12 @@ static void *i915_gem_object_map_page(struct drm_i915_gem_object *obj, vaddr = vmap(pages, n_pages, 0, pgprot); if (pages != stack) kvfree(pages); - return vaddr; + + return vaddr ?: ERR_PTR(-ENOMEM); } static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj, - enum i915_map_type type) + enum i915_map_type type) { resource_size_t iomap = obj->mm.region->iomap.base - obj->mm.region->region.start; @@ -305,13 +306,13 @@ static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj, void *vaddr; if (type != I915_MAP_WC) - return NULL; + return ERR_PTR(-ENODEV); if (n_pfn > ARRAY_SIZE(stack)) { /* Too big for stack -- allocate temporary array instead */ pfns = kvmalloc_array(n_pfn, sizeof(*pfns), GFP_KERNEL); if (!pfns) - return NULL; + return ERR_PTR(-ENOMEM); } i = 0; @@ -320,7 +321,8 @@ static void *i915_gem_object_map_pfn(struct drm_i915_gem_object *obj, vaddr = vmap_pfn(pfns, n_pfn, pgprot_writecombine(PAGE_KERNEL_IO)); if (pfns != stack) kvfree(pfns); - return vaddr; + + return vaddr ?: ERR_PTR(-ENOMEM); } /* get, pin, and map the pages of the object into kernel space */ @@ -349,8 +351,10 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, GEM_BUG_ON(i915_gem_object_has_pinned_pages(obj)); err = ____i915_gem_object_get_pages(obj); - if (err) - goto err_unlock; + if (err) { + ptr = ERR_PTR(err); + goto out_unlock; + } smp_mb__before_atomic(); } @@ -362,7 +366,7 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, ptr = page_unpack_bits(obj->mm.mapping, &has_type); if (ptr && has_type != type) { if (pinned) { - err = -EBUSY; + ptr = ERR_PTR(-EBUSY); goto err_unpin; } @@ -374,15 +378,13 @@ void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, if (!ptr) { if (GEM_WARN_ON(type == I915_MAP_WC && !static_cpu_has(X86_FEATURE_PAT))) - ptr = NULL; + ptr = ERR_PTR(-ENODEV); else if (i915_gem_object_has_struct_page(obj)) ptr = i915_gem_object_map_page(obj, type); else ptr = i915_gem_object_map_pfn(obj, type); - if (!ptr) { - err = -ENOMEM; + if (IS_ERR(ptr)) goto err_unpin; - } obj->mm.mapping = page_pack_bits(ptr, type); } @@ -393,8 +395,6 @@ out_unlock: err_unpin: atomic_dec(&obj->mm.pages_pin_count); -err_unlock: - ptr = ERR_PTR(err); goto out_unlock; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_region.c b/drivers/gpu/drm/i915/gem/i915_gem_region.c index 1515384d7e0e..835bd01f2e5d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_region.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_region.c @@ -22,6 +22,7 @@ i915_gem_object_put_pages_buddy(struct drm_i915_gem_object *obj, int i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj) { + const u64 max_segment = i915_sg_segment_size(); struct intel_memory_region *mem = obj->mm.region; struct list_head *blocks = &obj->mm.blocks; resource_size_t size = obj->base.size; @@ -37,7 +38,7 @@ i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj) if (!st) return -ENOMEM; - if (sg_alloc_table(st, size >> ilog2(mem->mm.chunk_size), GFP_KERNEL)) { + if (sg_alloc_table(st, size >> PAGE_SHIFT, GFP_KERNEL)) { kfree(st); return -ENOMEM; } @@ -64,27 +65,30 @@ i915_gem_object_get_pages_buddy(struct drm_i915_gem_object *obj) i915_buddy_block_size(&mem->mm, block)); offset = i915_buddy_block_offset(block); - GEM_BUG_ON(overflows_type(block_size, sg->length)); + while (block_size) { + u64 len; - if (offset != prev_end || - add_overflows_t(typeof(sg->length), sg->length, block_size)) { - if (st->nents) { - sg_page_sizes |= sg->length; - sg = __sg_next(sg); + if (offset != prev_end || sg->length >= max_segment) { + if (st->nents) { + sg_page_sizes |= sg->length; + sg = __sg_next(sg); + } + + sg_dma_address(sg) = mem->region.start + offset; + sg_dma_len(sg) = 0; + sg->length = 0; + st->nents++; } - sg_dma_address(sg) = mem->region.start + offset; - sg_dma_len(sg) = block_size; + len = min(block_size, max_segment - sg->length); + sg->length += len; + sg_dma_len(sg) += len; - sg->length = block_size; + offset += len; + block_size -= len; - st->nents++; - } else { - sg->length += block_size; - sg_dma_len(sg) += block_size; + prev_end = offset; } - - prev_end = offset + block_size; } sg_page_sizes |= sg->length; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c index dc8f052a0ffe..c2dba1cd9532 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c @@ -15,6 +15,7 @@ #include "gt/intel_gt_requests.h" +#include "dma_resv_utils.h" #include "i915_trace.h" static bool swap_available(void) @@ -209,6 +210,8 @@ i915_gem_shrink(struct drm_i915_private *i915, mutex_unlock(&obj->mm.lock); } + dma_resv_prune(obj->base.resv); + scanned += obj->base.size >> PAGE_SHIFT; i915_gem_object_put(obj); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index 29bffc6afcc1..41b9fbf4dbcc 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -608,11 +608,10 @@ i915_gem_object_release_stolen(struct drm_i915_gem_object *obj) struct drm_mm_node *stolen = fetch_and_zero(&obj->stolen); GEM_BUG_ON(!stolen); - - i915_gem_object_release_memory_region(obj); - i915_gem_stolen_remove_node(i915, stolen); kfree(stolen); + + i915_gem_object_release_memory_region(obj); } static const struct drm_i915_gem_object_ops i915_gem_object_stolen_ops = { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_wait.c b/drivers/gpu/drm/i915/gem/i915_gem_wait.c index 8af55cd3e690..c1b13ac50d0f 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_wait.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_wait.c @@ -9,6 +9,7 @@ #include "gt/intel_engine.h" +#include "dma_resv_utils.h" #include "i915_gem_ioctls.h" #include "i915_gem_object.h" @@ -84,11 +85,8 @@ i915_gem_object_wait_reservation(struct dma_resv *resv, * Opportunistically prune the fences iff we know they have *all* been * signaled. */ - if (prune_fences && dma_resv_trylock(resv)) { - if (dma_resv_test_signaled_rcu(resv, true)) - dma_resv_add_excl_fence(resv, NULL); - dma_resv_unlock(resv); - } + if (prune_fences) + dma_resv_prune(resv); return timeout; } diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c index a768ec61e966..2fb501a78a85 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_gem_object.c @@ -27,7 +27,7 @@ static void huge_free_pages(struct drm_i915_gem_object *obj, static int huge_get_pages(struct drm_i915_gem_object *obj) { -#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) +#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL) const unsigned long nreal = obj->scratch / PAGE_SIZE; const unsigned long npages = obj->base.size / PAGE_SIZE; struct scatterlist *sg, *src, *end; diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index 1f35e71429b4..aacf4856ccb4 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -368,6 +368,27 @@ static int igt_check_page_sizes(struct i915_vma *vma) err = -EINVAL; } + /* + * The dma-api is like a box of chocolates when it comes to the + * alignment of dma addresses, however for LMEM we have total control + * and so can guarantee alignment, likewise when we allocate our blocks + * they should appear in descending order, and if we know that we align + * to the largest page size for the GTT address, we should be able to + * assert that if we see 2M physical pages then we should also get 2M + * GTT pages. If we don't then something might be wrong in our + * construction of the backing pages. + * + * Maintaining alignment is required to utilise huge pages in the ppGGT. + */ + if (i915_gem_object_is_lmem(obj) && + IS_ALIGNED(vma->node.start, SZ_2M) && + vma->page_sizes.sg & SZ_2M && + vma->page_sizes.gtt < SZ_2M) { + pr_err("gtt pages mismatch for LMEM, expected 2M GTT pages, sg(%u), gtt(%u)\n", + vma->page_sizes.sg, vma->page_sizes.gtt); + err = -EINVAL; + } + if (obj->mm.page_sizes.gtt) { pr_err("obj->page_sizes.gtt(%u) should never be set\n", obj->mm.page_sizes.gtt); @@ -1333,6 +1354,7 @@ static int igt_ppgtt_sanity_check(void *arg) unsigned int flags; } backends[] = { { igt_create_system, 0, }, + { igt_create_local, 0, }, { igt_create_local, I915_BO_ALLOC_CONTIGUOUS, }, }; struct { diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c index 4e36d4897ea6..6a674a7994df 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_client_blt.c @@ -20,13 +20,11 @@ static int __igt_client_fill(struct intel_engine_cs *engine) { struct intel_context *ce = engine->kernel_context; struct drm_i915_gem_object *obj; - struct rnd_state prng; + I915_RND_STATE(prng); IGT_TIMEOUT(end); u32 *vaddr; int err = 0; - prandom_seed_state(&prng, i915_selftest.random_seed); - intel_engine_pm_get(engine); do { const u32 max_block_size = S16_MAX * PAGE_SIZE; diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c index 7049a6bbc03d..1117d2a44518 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_coherency.c @@ -7,6 +7,7 @@ #include <linux/prime_numbers.h> #include "gt/intel_engine_pm.h" +#include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" #include "gt/intel_ring.h" diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index d27d87a678c8..d429c7643ff2 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -7,6 +7,7 @@ #include <linux/prime_numbers.h> #include "gt/intel_engine_pm.h" +#include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" #include "gem/i915_gem_region.h" diff --git a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c index e21b5023ca7d..d6783061bc72 100644 --- a/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c +++ b/drivers/gpu/drm/i915/gem/selftests/igt_gem_utils.c @@ -9,6 +9,7 @@ #include "gem/i915_gem_context.h" #include "gem/i915_gem_pm.h" #include "gt/intel_context.h" +#include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" #include "i915_vma.h" #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c index 174a24553322..d4f4452ce5ed 100644 --- a/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/debugfs_gt_pm.c @@ -11,6 +11,7 @@ #include "i915_drv.h" #include "intel_gt.h" #include "intel_gt_clock_utils.h" +#include "intel_gt_pm.h" #include "intel_llc.h" #include "intel_rc6.h" #include "intel_rps.h" @@ -403,34 +404,34 @@ static int frequency_show(struct seq_file *m, void *unused) seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit); seq_printf(m, "RPNSWREQ: %dMHz\n", reqf); seq_printf(m, "CAGF: %dMHz\n", cagf); - seq_printf(m, "RP CUR UP EI: %d (%dns)\n", + seq_printf(m, "RP CUR UP EI: %d (%lldns)\n", rpcurupei, intel_gt_pm_interval_to_ns(gt, rpcurupei)); - seq_printf(m, "RP CUR UP: %d (%dns)\n", + seq_printf(m, "RP CUR UP: %d (%lldns)\n", rpcurup, intel_gt_pm_interval_to_ns(gt, rpcurup)); - seq_printf(m, "RP PREV UP: %d (%dns)\n", + seq_printf(m, "RP PREV UP: %d (%lldns)\n", rpprevup, intel_gt_pm_interval_to_ns(gt, rpprevup)); seq_printf(m, "Up threshold: %d%%\n", rps->power.up_threshold); - seq_printf(m, "RP UP EI: %d (%dns)\n", + seq_printf(m, "RP UP EI: %d (%lldns)\n", rpupei, intel_gt_pm_interval_to_ns(gt, rpupei)); - seq_printf(m, "RP UP THRESHOLD: %d (%dns)\n", + seq_printf(m, "RP UP THRESHOLD: %d (%lldns)\n", rpupt, intel_gt_pm_interval_to_ns(gt, rpupt)); - seq_printf(m, "RP CUR DOWN EI: %d (%dns)\n", + seq_printf(m, "RP CUR DOWN EI: %d (%lldns)\n", rpcurdownei, intel_gt_pm_interval_to_ns(gt, rpcurdownei)); - seq_printf(m, "RP CUR DOWN: %d (%dns)\n", + seq_printf(m, "RP CUR DOWN: %d (%lldns)\n", rpcurdown, intel_gt_pm_interval_to_ns(gt, rpcurdown)); - seq_printf(m, "RP PREV DOWN: %d (%dns)\n", + seq_printf(m, "RP PREV DOWN: %d (%lldns)\n", rpprevdown, intel_gt_pm_interval_to_ns(gt, rpprevdown)); seq_printf(m, "Down threshold: %d%%\n", rps->power.down_threshold); - seq_printf(m, "RP DOWN EI: %d (%dns)\n", + seq_printf(m, "RP DOWN EI: %d (%lldns)\n", rpdownei, intel_gt_pm_interval_to_ns(gt, rpdownei)); - seq_printf(m, "RP DOWN THRESHOLD: %d (%dns)\n", + seq_printf(m, "RP DOWN THRESHOLD: %d (%lldns)\n", rpdownt, intel_gt_pm_interval_to_ns(gt, rpdownt)); max_freq = (IS_GEN9_LP(i915) ? rp_state_cap >> 0 : @@ -558,7 +559,9 @@ static int rps_boost_show(struct seq_file *m, void *data) seq_printf(m, "RPS enabled? %s\n", yesno(intel_rps_is_enabled(rps))); seq_printf(m, "RPS active? %s\n", yesno(intel_rps_is_active(rps))); - seq_printf(m, "GPU busy? %s\n", yesno(gt->awake)); + seq_printf(m, "GPU busy? %s, %llums\n", + yesno(gt->awake), + ktime_to_ms(intel_gt_get_awake_time(gt))); seq_printf(m, "Boosts outstanding? %d\n", atomic_read(&rps->num_waiters)); seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive)); @@ -575,7 +578,7 @@ static int rps_boost_show(struct seq_file *m, void *data) intel_gpu_freq(rps, rps->efficient_freq), intel_gpu_freq(rps, rps->boost_freq)); - seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts)); + seq_printf(m, "Wait boosts: %d\n", READ_ONCE(rps->boosts)); if (INTEL_GEN(i915) >= 6 && intel_rps_is_active(rps)) { struct intel_uncore *uncore = gt->uncore; diff --git a/drivers/gpu/drm/i915/gt/gen7_renderclear.c b/drivers/gpu/drm/i915/gt/gen7_renderclear.c index d93d85cd3027..94465374ca2f 100644 --- a/drivers/gpu/drm/i915/gt/gen7_renderclear.c +++ b/drivers/gpu/drm/i915/gt/gen7_renderclear.c @@ -7,8 +7,6 @@ #include "i915_drv.h" #include "intel_gpu_commands.h" -#define MAX_URB_ENTRIES 64 -#define STATE_SIZE (4 * 1024) #define GT3_INLINE_DATA_DELAYS 0x1E00 #define batch_advance(Y, CS) GEM_BUG_ON((Y)->end != (CS)) @@ -34,38 +32,59 @@ struct batch_chunk { }; struct batch_vals { - u32 max_primitives; - u32 max_urb_entries; - u32 cmd_size; - u32 state_size; + u32 max_threads; u32 state_start; - u32 batch_size; + u32 surface_start; u32 surface_height; u32 surface_width; - u32 scratch_size; - u32 max_size; + u32 size; }; +static inline int num_primitives(const struct batch_vals *bv) +{ + /* + * We need to saturate the GPU with work in order to dispatch + * a shader on every HW thread, and clear the thread-local registers. + * In short, we have to dispatch work faster than the shaders can + * run in order to fill the EU and occupy each HW thread. + */ + return bv->max_threads; +} + static void batch_get_defaults(struct drm_i915_private *i915, struct batch_vals *bv) { if (IS_HASWELL(i915)) { - bv->max_primitives = 280; - bv->max_urb_entries = MAX_URB_ENTRIES; + switch (INTEL_INFO(i915)->gt) { + default: + case 1: + bv->max_threads = 70; + break; + case 2: + bv->max_threads = 140; + break; + case 3: + bv->max_threads = 280; + break; + } bv->surface_height = 16 * 16; bv->surface_width = 32 * 2 * 16; } else { - bv->max_primitives = 128; - bv->max_urb_entries = MAX_URB_ENTRIES / 2; + switch (INTEL_INFO(i915)->gt) { + default: + case 1: /* including vlv */ + bv->max_threads = 36; + break; + case 2: + bv->max_threads = 128; + break; + } bv->surface_height = 16 * 8; bv->surface_width = 32 * 16; } - bv->cmd_size = bv->max_primitives * 4096; - bv->state_size = STATE_SIZE; - bv->state_start = bv->cmd_size; - bv->batch_size = bv->cmd_size + bv->state_size; - bv->scratch_size = bv->surface_height * bv->surface_width; - bv->max_size = bv->batch_size + bv->scratch_size; + bv->state_start = round_up(SZ_1K + num_primitives(bv) * 64, SZ_4K); + bv->surface_start = bv->state_start + SZ_4K; + bv->size = bv->surface_start + bv->surface_height * bv->surface_width; } static void batch_init(struct batch_chunk *bc, @@ -155,7 +174,8 @@ static u32 gen7_fill_binding_table(struct batch_chunk *state, const struct batch_vals *bv) { - u32 surface_start = gen7_fill_surface_state(state, bv->batch_size, bv); + u32 surface_start = + gen7_fill_surface_state(state, bv->surface_start, bv); u32 *cs = batch_alloc_items(state, 32, 8); u32 offset = batch_offset(state, cs); @@ -214,9 +234,9 @@ static void gen7_emit_state_base_address(struct batch_chunk *batch, u32 surface_state_base) { - u32 *cs = batch_alloc_items(batch, 0, 12); + u32 *cs = batch_alloc_items(batch, 0, 10); - *cs++ = STATE_BASE_ADDRESS | (12 - 2); + *cs++ = STATE_BASE_ADDRESS | (10 - 2); /* general */ *cs++ = batch_addr(batch) | BASE_ADDRESS_MODIFY; /* surface */ @@ -233,8 +253,6 @@ gen7_emit_state_base_address(struct batch_chunk *batch, *cs++ = BASE_ADDRESS_MODIFY; *cs++ = 0; *cs++ = BASE_ADDRESS_MODIFY; - *cs++ = 0; - *cs++ = 0; batch_advance(batch, cs); } @@ -244,8 +262,7 @@ gen7_emit_vfe_state(struct batch_chunk *batch, u32 urb_size, u32 curbe_size, u32 mode) { - u32 urb_entries = bv->max_urb_entries; - u32 threads = bv->max_primitives - 1; + u32 threads = bv->max_threads - 1; u32 *cs = batch_alloc_items(batch, 32, 8); *cs++ = MEDIA_VFE_STATE | (8 - 2); @@ -254,7 +271,7 @@ gen7_emit_vfe_state(struct batch_chunk *batch, *cs++ = 0; /* number of threads & urb entries for GPGPU vs Media Mode */ - *cs++ = threads << 16 | urb_entries << 8 | mode << 2; + *cs++ = threads << 16 | 1 << 8 | mode << 2; *cs++ = 0; @@ -293,17 +310,12 @@ gen7_emit_media_object(struct batch_chunk *batch, { unsigned int x_offset = (media_object_index % 16) * 64; unsigned int y_offset = (media_object_index / 16) * 16; - unsigned int inline_data_size; - unsigned int media_batch_size; - unsigned int i; + unsigned int pkt = 6 + 3; u32 *cs; - inline_data_size = 112 * 8; - media_batch_size = inline_data_size + 6; - - cs = batch_alloc_items(batch, 8, media_batch_size); + cs = batch_alloc_items(batch, 8, pkt); - *cs++ = MEDIA_OBJECT | (media_batch_size - 2); + *cs++ = MEDIA_OBJECT | (pkt - 2); /* interface descriptor offset */ *cs++ = 0; @@ -317,25 +329,44 @@ gen7_emit_media_object(struct batch_chunk *batch, *cs++ = 0; /* inline */ - *cs++ = (y_offset << 16) | (x_offset); + *cs++ = y_offset << 16 | x_offset; *cs++ = 0; *cs++ = GT3_INLINE_DATA_DELAYS; - for (i = 3; i < inline_data_size; i++) - *cs++ = 0; batch_advance(batch, cs); } static void gen7_emit_pipeline_flush(struct batch_chunk *batch) { - u32 *cs = batch_alloc_items(batch, 0, 5); + u32 *cs = batch_alloc_items(batch, 0, 4); - *cs++ = GFX_OP_PIPE_CONTROL(5); - *cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE | - PIPE_CONTROL_GLOBAL_GTT_IVB; + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_CS_STALL; *cs++ = 0; *cs++ = 0; + + batch_advance(batch, cs); +} + +static void gen7_emit_pipeline_invalidate(struct batch_chunk *batch) +{ + u32 *cs = batch_alloc_items(batch, 0, 8); + + /* ivb: Stall before STATE_CACHE_INVALIDATE */ + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_STALL_AT_SCOREBOARD | + PIPE_CONTROL_CS_STALL; + *cs++ = 0; + *cs++ = 0; + + *cs++ = GFX_OP_PIPE_CONTROL(4); + *cs++ = PIPE_CONTROL_STATE_CACHE_INVALIDATE; *cs++ = 0; + *cs++ = 0; + batch_advance(batch, cs); } @@ -344,34 +375,34 @@ static void emit_batch(struct i915_vma * const vma, const struct batch_vals *bv) { struct drm_i915_private *i915 = vma->vm->i915; - unsigned int desc_count = 64; - const u32 urb_size = 112; + const unsigned int desc_count = 1; + const unsigned int urb_size = 1; struct batch_chunk cmds, state; - u32 interface_descriptor; + u32 descriptors; unsigned int i; - batch_init(&cmds, vma, start, 0, bv->cmd_size); - batch_init(&state, vma, start, bv->state_start, bv->state_size); + batch_init(&cmds, vma, start, 0, bv->state_start); + batch_init(&state, vma, start, bv->state_start, SZ_4K); - interface_descriptor = - gen7_fill_interface_descriptor(&state, bv, - IS_HASWELL(i915) ? - &cb_kernel_hsw : - &cb_kernel_ivb, - desc_count); - gen7_emit_pipeline_flush(&cmds); + descriptors = gen7_fill_interface_descriptor(&state, bv, + IS_HASWELL(i915) ? + &cb_kernel_hsw : + &cb_kernel_ivb, + desc_count); + + gen7_emit_pipeline_invalidate(&cmds); batch_add(&cmds, PIPELINE_SELECT | PIPELINE_SELECT_MEDIA); batch_add(&cmds, MI_NOOP); - gen7_emit_state_base_address(&cmds, interface_descriptor); + gen7_emit_pipeline_invalidate(&cmds); + gen7_emit_pipeline_flush(&cmds); + gen7_emit_state_base_address(&cmds, descriptors); + gen7_emit_pipeline_invalidate(&cmds); gen7_emit_vfe_state(&cmds, bv, urb_size - 1, 0, 0); + gen7_emit_interface_descriptor_load(&cmds, descriptors, desc_count); - gen7_emit_interface_descriptor_load(&cmds, - interface_descriptor, - desc_count); - - for (i = 0; i < bv->max_primitives; i++) + for (i = 0; i < num_primitives(bv); i++) gen7_emit_media_object(&cmds, i); batch_add(&cmds, MI_BATCH_BUFFER_END); @@ -385,15 +416,15 @@ int gen7_setup_clear_gpr_bb(struct intel_engine_cs * const engine, batch_get_defaults(engine->i915, &bv); if (!vma) - return bv.max_size; + return bv.size; - GEM_BUG_ON(vma->obj->base.size < bv.max_size); + GEM_BUG_ON(vma->obj->base.size < bv.size); batch = i915_gem_object_pin_map(vma->obj, I915_MAP_WC); if (IS_ERR(batch)) return PTR_ERR(batch); - emit_batch(vma, memset(batch, 0, bv.max_size), &bv); + emit_batch(vma, memset(batch, 0, bv.size), &bv); i915_gem_object_flush_map(vma->obj); __i915_gem_object_release_map(vma->obj); diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.c b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c new file mode 100644 index 000000000000..8066b93e6dc4 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.c @@ -0,0 +1,633 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2014 Intel Corporation + */ + +#include "gen8_engine_cs.h" +#include "i915_drv.h" +#include "intel_lrc.h" +#include "intel_gpu_commands.h" +#include "intel_ring.h" + +int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode) +{ + bool vf_flush_wa = false, dc_flush_wa = false; + u32 *cs, flags = 0; + int len; + + flags |= PIPE_CONTROL_CS_STALL; + + if (mode & EMIT_FLUSH) { + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; + flags |= PIPE_CONTROL_FLUSH_ENABLE; + } + + if (mode & EMIT_INVALIDATE) { + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_QW_WRITE; + flags |= PIPE_CONTROL_STORE_DATA_INDEX; + + /* + * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL + * pipe control. + */ + if (IS_GEN(rq->engine->i915, 9)) + vf_flush_wa = true; + + /* WaForGAMHang:kbl */ + if (IS_KBL_GT_REVID(rq->engine->i915, 0, KBL_REVID_B0)) + dc_flush_wa = true; + } + + len = 6; + + if (vf_flush_wa) + len += 6; + + if (dc_flush_wa) + len += 12; + + cs = intel_ring_begin(rq, len); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + if (vf_flush_wa) + cs = gen8_emit_pipe_control(cs, 0, 0); + + if (dc_flush_wa) + cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE, + 0); + + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); + + if (dc_flush_wa) + cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0); + + intel_ring_advance(rq, cs); + + return 0; +} + +int gen8_emit_flush_xcs(struct i915_request *rq, u32 mode) +{ + u32 cmd, *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cmd = MI_FLUSH_DW + 1; + + /* + * We always require a command barrier so that subsequent + * commands, such as breadcrumb interrupts, are strictly ordered + * wrt the contents of the write cache being flushed to memory + * (and thus being coherent from the CPU). + */ + cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; + + if (mode & EMIT_INVALIDATE) { + cmd |= MI_INVALIDATE_TLB; + if (rq->engine->class == VIDEO_DECODE_CLASS) + cmd |= MI_INVALIDATE_BSD; + } + + *cs++ = cmd; + *cs++ = LRC_PPHWSP_SCRATCH_ADDR; + *cs++ = 0; /* upper addr */ + *cs++ = 0; /* value */ + intel_ring_advance(rq, cs); + + return 0; +} + +int gen11_emit_flush_rcs(struct i915_request *rq, u32 mode) +{ + if (mode & EMIT_FLUSH) { + u32 *cs; + u32 flags = 0; + + flags |= PIPE_CONTROL_CS_STALL; + + flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; + flags |= PIPE_CONTROL_FLUSH_ENABLE; + flags |= PIPE_CONTROL_QW_WRITE; + flags |= PIPE_CONTROL_STORE_DATA_INDEX; + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); + intel_ring_advance(rq, cs); + } + + if (mode & EMIT_INVALIDATE) { + u32 *cs; + u32 flags = 0; + + flags |= PIPE_CONTROL_CS_STALL; + + flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_QW_WRITE; + flags |= PIPE_CONTROL_STORE_DATA_INDEX; + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); + intel_ring_advance(rq, cs); + } + + return 0; +} + +static u32 preparser_disable(bool state) +{ + return MI_ARB_CHECK | 1 << 8 | state; +} + +static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine) +{ + static const i915_reg_t vd[] = { + GEN12_VD0_AUX_NV, + GEN12_VD1_AUX_NV, + GEN12_VD2_AUX_NV, + GEN12_VD3_AUX_NV, + }; + + static const i915_reg_t ve[] = { + GEN12_VE0_AUX_NV, + GEN12_VE1_AUX_NV, + }; + + if (engine->class == VIDEO_DECODE_CLASS) + return vd[engine->instance]; + + if (engine->class == VIDEO_ENHANCEMENT_CLASS) + return ve[engine->instance]; + + GEM_BUG_ON("unknown aux_inv reg\n"); + return INVALID_MMIO_REG; +} + +static u32 *gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs) +{ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = i915_mmio_reg_offset(inv_reg); + *cs++ = AUX_INV; + *cs++ = MI_NOOP; + + return cs; +} + +int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode) +{ + if (mode & EMIT_FLUSH) { + u32 flags = 0; + u32 *cs; + + flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; + flags |= PIPE_CONTROL_FLUSH_L3; + flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; + flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; + /* Wa_1409600907:tgl */ + flags |= PIPE_CONTROL_DEPTH_STALL; + flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; + flags |= PIPE_CONTROL_FLUSH_ENABLE; + + flags |= PIPE_CONTROL_STORE_DATA_INDEX; + flags |= PIPE_CONTROL_QW_WRITE; + + flags |= PIPE_CONTROL_CS_STALL; + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + cs = gen12_emit_pipe_control(cs, + PIPE_CONTROL0_HDC_PIPELINE_FLUSH, + flags, LRC_PPHWSP_SCRATCH_ADDR); + intel_ring_advance(rq, cs); + } + + if (mode & EMIT_INVALIDATE) { + u32 flags = 0; + u32 *cs; + + flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TLB_INVALIDATE; + flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; + flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; + + flags |= PIPE_CONTROL_STORE_DATA_INDEX; + flags |= PIPE_CONTROL_QW_WRITE; + + flags |= PIPE_CONTROL_CS_STALL; + + cs = intel_ring_begin(rq, 8 + 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* + * Prevent the pre-parser from skipping past the TLB + * invalidate and loading a stale page for the batch + * buffer / request payload. + */ + *cs++ = preparser_disable(true); + + cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); + + /* hsdes: 1809175790 */ + cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs); + + *cs++ = preparser_disable(false); + intel_ring_advance(rq, cs); + } + + return 0; +} + +int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode) +{ + intel_engine_mask_t aux_inv = 0; + u32 cmd, *cs; + + cmd = 4; + if (mode & EMIT_INVALIDATE) + cmd += 2; + if (mode & EMIT_INVALIDATE) + aux_inv = rq->engine->mask & ~BIT(BCS0); + if (aux_inv) + cmd += 2 * hweight8(aux_inv) + 2; + + cs = intel_ring_begin(rq, cmd); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + if (mode & EMIT_INVALIDATE) + *cs++ = preparser_disable(true); + + cmd = MI_FLUSH_DW + 1; + + /* + * We always require a command barrier so that subsequent + * commands, such as breadcrumb interrupts, are strictly ordered + * wrt the contents of the write cache being flushed to memory + * (and thus being coherent from the CPU). + */ + cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; + + if (mode & EMIT_INVALIDATE) { + cmd |= MI_INVALIDATE_TLB; + if (rq->engine->class == VIDEO_DECODE_CLASS) + cmd |= MI_INVALIDATE_BSD; + } + + *cs++ = cmd; + *cs++ = LRC_PPHWSP_SCRATCH_ADDR; + *cs++ = 0; /* upper addr */ + *cs++ = 0; /* value */ + + if (aux_inv) { /* hsdes: 1809175790 */ + struct intel_engine_cs *engine; + unsigned int tmp; + + *cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv)); + for_each_engine_masked(engine, rq->engine->gt, + aux_inv, tmp) { + *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine)); + *cs++ = AUX_INV; + } + *cs++ = MI_NOOP; + } + + if (mode & EMIT_INVALIDATE) + *cs++ = preparser_disable(false); + + intel_ring_advance(rq, cs); + + return 0; +} + +static inline u32 preempt_address(struct intel_engine_cs *engine) +{ + return (i915_ggtt_offset(engine->status_page.vma) + + I915_GEM_HWS_PREEMPT_ADDR); +} + +static u32 hwsp_offset(const struct i915_request *rq) +{ + const struct intel_timeline_cacheline *cl; + + /* Before the request is executed, the timeline/cachline is fixed */ + + cl = rcu_dereference_protected(rq->hwsp_cacheline, 1); + if (cl) + return cl->ggtt_offset; + + return rcu_dereference_protected(rq->timeline, 1)->hwsp_offset; +} + +int gen8_emit_init_breadcrumb(struct i915_request *rq) +{ + u32 *cs; + + GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq)); + if (!i915_request_timeline(rq)->has_initial_breadcrumb) + return 0; + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = hwsp_offset(rq); + *cs++ = 0; + *cs++ = rq->fence.seqno - 1; + + /* + * Check if we have been preempted before we even get started. + * + * After this point i915_request_started() reports true, even if + * we get preempted and so are no longer running. + * + * i915_request_started() is used during preemption processing + * to decide if the request is currently inside the user payload + * or spinning on a kernel semaphore (or earlier). For no-preemption + * requests, we do allow preemption on the semaphore before the user + * payload, but do not allow preemption once the request is started. + * + * i915_request_started() is similarly used during GPU hangs to + * determine if the user's payload was guilty, and if so, the + * request is banned. Before the request is started, it is assumed + * to be unharmed and an innocent victim of another's hang. + */ + *cs++ = MI_NOOP; + *cs++ = MI_ARB_CHECK; + + intel_ring_advance(rq, cs); + + /* Record the updated position of the request's payload */ + rq->infix = intel_ring_offset(rq, cs); + + __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags); + + return 0; +} + +int gen8_emit_bb_start_noarb(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* + * WaDisableCtxRestoreArbitration:bdw,chv + * + * We don't need to perform MI_ARB_ENABLE as often as we do (in + * particular all the gen that do not need the w/a at all!), if we + * took care to make sure that on every switch into this context + * (both ordinary and for preemption) that arbitrartion was enabled + * we would be fine. However, for gen8 there is another w/a that + * requires us to not preempt inside GPGPU execution, so we keep + * arbitration disabled for gen8 batches. Arbitration will be + * re-enabled before we close the request + * (engine->emit_fini_breadcrumb). + */ + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + + /* FIXME(BDW+): Address space and security selectors. */ + *cs++ = MI_BATCH_BUFFER_START_GEN8 | + (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + + intel_ring_advance(rq, cs); + + return 0; +} + +int gen8_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags) +{ + u32 *cs; + + if (unlikely(i915_request_has_nopreempt(rq))) + return gen8_emit_bb_start_noarb(rq, offset, len, flags); + + cs = intel_ring_begin(rq, 6); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + + *cs++ = MI_BATCH_BUFFER_START_GEN8 | + (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + *cs++ = MI_NOOP; + + intel_ring_advance(rq, cs); + + return 0; +} + +static void assert_request_valid(struct i915_request *rq) +{ + struct intel_ring *ring __maybe_unused = rq->ring; + + /* Can we unwind this request without appearing to go forwards? */ + GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0); +} + +/* + * Reserve space for 2 NOOPs at the end of each request to be + * used as a workaround for not being allowed to do lite + * restore with HEAD==TAIL (WaIdleLiteRestore). + */ +static u32 *gen8_emit_wa_tail(struct i915_request *rq, u32 *cs) +{ + /* Ensure there's always at least one preemption point per-request. */ + *cs++ = MI_ARB_CHECK; + *cs++ = MI_NOOP; + rq->wa_tail = intel_ring_offset(rq, cs); + + /* Check that entire request is less than half the ring */ + assert_request_valid(rq); + + return cs; +} + +static u32 *emit_preempt_busywait(struct i915_request *rq, u32 *cs) +{ + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = preempt_address(rq->engine); + *cs++ = 0; + + return cs; +} + +static __always_inline u32* +gen8_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) +{ + *cs++ = MI_USER_INTERRUPT; + + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + if (intel_engine_has_semaphores(rq->engine)) + cs = emit_preempt_busywait(rq, cs); + + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); + + return gen8_emit_wa_tail(rq, cs); +} + +static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs) +{ + return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0); +} + +u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) +{ + return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs)); +} + +u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) +{ + cs = gen8_emit_pipe_control(cs, + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE, + 0); + + /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ + cs = gen8_emit_ggtt_write_rcs(cs, + rq->fence.seqno, + hwsp_offset(rq), + PIPE_CONTROL_FLUSH_ENABLE | + PIPE_CONTROL_CS_STALL); + + return gen8_emit_fini_breadcrumb_tail(rq, cs); +} + +u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) +{ + cs = gen8_emit_ggtt_write_rcs(cs, + rq->fence.seqno, + hwsp_offset(rq), + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TILE_CACHE_FLUSH | + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE); + + return gen8_emit_fini_breadcrumb_tail(rq, cs); +} + +/* + * Note that the CS instruction pre-parser will not stall on the breadcrumb + * flush and will continue pre-fetching the instructions after it before the + * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at + * BB_START/END instructions, so, even though we might pre-fetch the pre-amble + * of the next request before the memory has been flushed, we're guaranteed that + * we won't access the batch itself too early. + * However, on gen12+ the parser can pre-fetch across the BB_START/END commands, + * so, if the current request is modifying an instruction in the next request on + * the same intel_context, we might pre-fetch and then execute the pre-update + * instruction. To avoid this, the users of self-modifying code should either + * disable the parser around the code emitting the memory writes, via a new flag + * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For + * the in-kernel use-cases we've opted to use a separate context, see + * reloc_gpu() as an example. + * All the above applies only to the instructions themselves. Non-inline data + * used by the instructions is not pre-fetched. + */ + +static u32 *gen12_emit_preempt_busywait(struct i915_request *rq, u32 *cs) +{ + *cs++ = MI_ARB_CHECK; /* trigger IDLE->ACTIVE first */ + *cs++ = MI_SEMAPHORE_WAIT_TOKEN | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = preempt_address(rq->engine); + *cs++ = 0; + *cs++ = 0; + + return cs; +} + +static __always_inline u32* +gen12_emit_fini_breadcrumb_tail(struct i915_request *rq, u32 *cs) +{ + *cs++ = MI_USER_INTERRUPT; + + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + if (intel_engine_has_semaphores(rq->engine)) + cs = gen12_emit_preempt_busywait(rq, cs); + + rq->tail = intel_ring_offset(rq, cs); + assert_ring_tail_valid(rq->ring, rq->tail); + + return gen8_emit_wa_tail(rq, cs); +} + +u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs) +{ + /* XXX Stalling flush before seqno write; post-sync not */ + cs = emit_xcs_breadcrumb(rq, __gen8_emit_flush_dw(cs, 0, 0, 0)); + return gen12_emit_fini_breadcrumb_tail(rq, cs); +} + +u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs) +{ + cs = gen12_emit_ggtt_write_rcs(cs, + rq->fence.seqno, + hwsp_offset(rq), + PIPE_CONTROL0_HDC_PIPELINE_FLUSH, + PIPE_CONTROL_CS_STALL | + PIPE_CONTROL_TILE_CACHE_FLUSH | + PIPE_CONTROL_FLUSH_L3 | + PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | + PIPE_CONTROL_DEPTH_CACHE_FLUSH | + /* Wa_1409600907:tgl */ + PIPE_CONTROL_DEPTH_STALL | + PIPE_CONTROL_DC_FLUSH_ENABLE | + PIPE_CONTROL_FLUSH_ENABLE); + + return gen12_emit_fini_breadcrumb_tail(rq, cs); +} diff --git a/drivers/gpu/drm/i915/gt/gen8_engine_cs.h b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h new file mode 100644 index 000000000000..cc6e21d3662a --- /dev/null +++ b/drivers/gpu/drm/i915/gt/gen8_engine_cs.h @@ -0,0 +1,127 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014 Intel Corporation + */ + +#ifndef __GEN8_ENGINE_CS_H__ +#define __GEN8_ENGINE_CS_H__ + +#include <linux/string.h> +#include <linux/types.h> + +#include "i915_gem.h" /* GEM_BUG_ON */ + +#include "intel_gpu_commands.h" + +struct i915_request; + +int gen8_emit_flush_rcs(struct i915_request *rq, u32 mode); +int gen11_emit_flush_rcs(struct i915_request *rq, u32 mode); +int gen12_emit_flush_rcs(struct i915_request *rq, u32 mode); + +int gen8_emit_flush_xcs(struct i915_request *rq, u32 mode); +int gen12_emit_flush_xcs(struct i915_request *rq, u32 mode); + +int gen8_emit_init_breadcrumb(struct i915_request *rq); + +int gen8_emit_bb_start_noarb(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags); +int gen8_emit_bb_start(struct i915_request *rq, + u64 offset, u32 len, + const unsigned int flags); + +u32 *gen8_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs); +u32 *gen12_emit_fini_breadcrumb_xcs(struct i915_request *rq, u32 *cs); + +u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); +u32 *gen11_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); +u32 *gen12_emit_fini_breadcrumb_rcs(struct i915_request *rq, u32 *cs); + +static inline u32 * +__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) +{ + memset(batch, 0, 6 * sizeof(u32)); + + batch[0] = GFX_OP_PIPE_CONTROL(6) | flags0; + batch[1] = flags1; + batch[2] = offset; + + return batch + 6; +} + +static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) +{ + return __gen8_emit_pipe_control(batch, 0, flags, offset); +} + +static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) +{ + return __gen8_emit_pipe_control(batch, flags0, flags1, offset); +} + +static inline u32 * +__gen8_emit_write_rcs(u32 *cs, u32 value, u32 offset, u32 flags0, u32 flags1) +{ + *cs++ = GFX_OP_PIPE_CONTROL(6) | flags0; + *cs++ = flags1 | PIPE_CONTROL_QW_WRITE; + *cs++ = offset; + *cs++ = 0; + *cs++ = value; + *cs++ = 0; /* We're thrashing one extra dword. */ + + return cs; +} + +static inline u32* +gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) +{ + /* We're using qword write, offset should be aligned to 8 bytes. */ + GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); + + return __gen8_emit_write_rcs(cs, + value, + gtt_offset, + 0, + flags | PIPE_CONTROL_GLOBAL_GTT_IVB); +} + +static inline u32* +gen12_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1) +{ + /* We're using qword write, offset should be aligned to 8 bytes. */ + GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); + + return __gen8_emit_write_rcs(cs, + value, + gtt_offset, + flags0, + flags1 | PIPE_CONTROL_GLOBAL_GTT_IVB); +} + +static inline u32 * +__gen8_emit_flush_dw(u32 *cs, u32 value, u32 gtt_offset, u32 flags) +{ + *cs++ = (MI_FLUSH_DW + 1) | flags; + *cs++ = gtt_offset; + *cs++ = 0; + *cs++ = value; + + return cs; +} + +static inline u32 * +gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags) +{ + /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ + GEM_BUG_ON(gtt_offset & (1 << 5)); + /* Offset should be aligned to 8 bytes for both (QW/DW) write types */ + GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); + + return __gen8_emit_flush_dw(cs, + value, + gtt_offset | MI_FLUSH_DW_USE_GTT, + flags | MI_FLUSH_DW_OP_STOREDW); +} + +#endif /* __GEN8_ENGINE_CS_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c index a24cc1ff08a0..be2c285a0ac7 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c @@ -134,11 +134,6 @@ static bool remove_signaling_context(struct intel_breadcrumbs *b, return true; } -static inline bool __request_completed(const struct i915_request *rq) -{ - return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno); -} - __maybe_unused static bool check_signal_order(struct intel_context *ce, struct i915_request *rq) { @@ -192,18 +187,6 @@ static void add_retire(struct intel_breadcrumbs *b, struct intel_timeline *tl) intel_engine_add_retire(b->irq_engine, tl); } -static bool __signal_request(struct i915_request *rq) -{ - GEM_BUG_ON(test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)); - - if (!__dma_fence_signal(&rq->fence)) { - i915_request_put(rq); - return false; - } - - return true; -} - static struct llist_node * slist_add(struct llist_node *node, struct llist_node *head) { @@ -251,13 +234,14 @@ static void signal_irq_work(struct irq_work *work) intel_breadcrumbs_disarm_irq(b); rcu_read_lock(); + atomic_inc(&b->signaler_active); list_for_each_entry_rcu(ce, &b->signalers, signal_link) { struct i915_request *rq; list_for_each_entry_rcu(rq, &ce->signals, signal_link) { bool release; - if (!__request_completed(rq)) + if (!__i915_request_is_complete(rq)) break; if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, @@ -273,17 +257,20 @@ static void signal_irq_work(struct irq_work *work) list_del_rcu(&rq->signal_link); release = remove_signaling_context(b, ce); spin_unlock(&ce->signal_lock); - - if (__signal_request(rq)) - /* We own signal_node now, xfer to local list */ - signal = slist_add(&rq->signal_node, signal); - if (release) { - add_retire(b, ce->timeline); + if (intel_timeline_is_last(ce->timeline, rq)) + add_retire(b, ce->timeline); intel_context_put(ce); } + + if (__dma_fence_signal(&rq->fence)) + /* We own signal_node now, xfer to local list */ + signal = slist_add(&rq->signal_node, signal); + else + i915_request_put(rq); } } + atomic_dec(&b->signaler_active); rcu_read_unlock(); llist_for_each_safe(signal, sn, signal) { @@ -342,17 +329,19 @@ void intel_breadcrumbs_reset(struct intel_breadcrumbs *b) spin_unlock_irqrestore(&b->irq_lock, flags); } -void intel_breadcrumbs_park(struct intel_breadcrumbs *b) +void __intel_breadcrumbs_park(struct intel_breadcrumbs *b) { - /* Kick the work once more to drain the signalers */ + if (!READ_ONCE(b->irq_armed)) + return; + + /* Kick the work once more to drain the signalers, and disarm the irq */ irq_work_sync(&b->irq_work); - while (unlikely(READ_ONCE(b->irq_armed))) { + while (READ_ONCE(b->irq_armed) && !atomic_read(&b->active)) { local_irq_disable(); signal_irq_work(&b->irq_work); local_irq_enable(); cond_resched(); } - GEM_BUG_ON(!list_empty(&b->signalers)); } void intel_breadcrumbs_free(struct intel_breadcrumbs *b) @@ -363,6 +352,17 @@ void intel_breadcrumbs_free(struct intel_breadcrumbs *b) kfree(b); } +static void irq_signal_request(struct i915_request *rq, + struct intel_breadcrumbs *b) +{ + if (!__dma_fence_signal(&rq->fence)) + return; + + i915_request_get(rq); + if (llist_add(&rq->signal_node, &b->signaled_requests)) + irq_work_queue(&b->irq_work); +} + static void insert_breadcrumb(struct i915_request *rq) { struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs; @@ -372,17 +372,13 @@ static void insert_breadcrumb(struct i915_request *rq) if (test_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) return; - i915_request_get(rq); - /* * If the request is already completed, we can transfer it * straight onto a signaled list, and queue the irq worker for * its signal completion. */ - if (__request_completed(rq)) { - if (__signal_request(rq) && - llist_add(&rq->signal_node, &b->signaled_requests)) - irq_work_queue(&b->irq_work); + if (__i915_request_is_complete(rq)) { + irq_signal_request(rq, b); return; } @@ -413,6 +409,8 @@ static void insert_breadcrumb(struct i915_request *rq) break; } } + + i915_request_get(rq); list_add_rcu(&rq->signal_link, pos); GEM_BUG_ON(!check_signal_order(ce, rq)); GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)); @@ -453,22 +451,60 @@ bool i915_request_enable_breadcrumb(struct i915_request *rq) void i915_request_cancel_breadcrumb(struct i915_request *rq) { + struct intel_breadcrumbs *b = READ_ONCE(rq->engine)->breadcrumbs; struct intel_context *ce = rq->context; + unsigned long flags; bool release; if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, &rq->fence.flags)) return; - spin_lock(&ce->signal_lock); + spin_lock_irqsave(&ce->signal_lock, flags); list_del_rcu(&rq->signal_link); - release = remove_signaling_context(rq->engine->breadcrumbs, ce); - spin_unlock(&ce->signal_lock); + release = remove_signaling_context(b, ce); + spin_unlock_irqrestore(&ce->signal_lock, flags); if (release) intel_context_put(ce); + if (__i915_request_is_complete(rq)) + irq_signal_request(rq, b); + i915_request_put(rq); } +void intel_context_remove_breadcrumbs(struct intel_context *ce, + struct intel_breadcrumbs *b) +{ + struct i915_request *rq, *rn; + bool release = false; + unsigned long flags; + + spin_lock_irqsave(&ce->signal_lock, flags); + + if (list_empty(&ce->signals)) + goto unlock; + + list_for_each_entry_safe(rq, rn, &ce->signals, signal_link) { + GEM_BUG_ON(!__i915_request_is_complete(rq)); + if (!test_and_clear_bit(I915_FENCE_FLAG_SIGNAL, + &rq->fence.flags)) + continue; + + list_del_rcu(&rq->signal_link); + irq_signal_request(rq, b); + i915_request_put(rq); + } + release = remove_signaling_context(b, ce); + +unlock: + spin_unlock_irqrestore(&ce->signal_lock, flags); + if (release) + intel_context_put(ce); + + while (atomic_read(&b->signaler_active)) + cpu_relax(); +} + static void print_signals(struct intel_breadcrumbs *b, struct drm_printer *p) { struct intel_context *ce; diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h index ed3d1deabfbd..3ce5ce270b04 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.h @@ -6,6 +6,7 @@ #ifndef __INTEL_BREADCRUMBS__ #define __INTEL_BREADCRUMBS__ +#include <linux/atomic.h> #include <linux/irq_work.h> #include "intel_engine_types.h" @@ -19,7 +20,18 @@ intel_breadcrumbs_create(struct intel_engine_cs *irq_engine); void intel_breadcrumbs_free(struct intel_breadcrumbs *b); void intel_breadcrumbs_reset(struct intel_breadcrumbs *b); -void intel_breadcrumbs_park(struct intel_breadcrumbs *b); +void __intel_breadcrumbs_park(struct intel_breadcrumbs *b); + +static inline void intel_breadcrumbs_unpark(struct intel_breadcrumbs *b) +{ + atomic_inc(&b->active); +} + +static inline void intel_breadcrumbs_park(struct intel_breadcrumbs *b) +{ + if (atomic_dec_and_test(&b->active)) + __intel_breadcrumbs_park(b); +} static inline void intel_engine_signal_breadcrumbs(struct intel_engine_cs *engine) @@ -33,4 +45,7 @@ void intel_engine_print_breadcrumbs(struct intel_engine_cs *engine, bool i915_request_enable_breadcrumb(struct i915_request *request); void i915_request_cancel_breadcrumb(struct i915_request *request); +void intel_context_remove_breadcrumbs(struct intel_context *ce, + struct intel_breadcrumbs *b); + #endif /* __INTEL_BREADCRUMBS__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h index a74bb3062bd8..3a084ce8ff5e 100644 --- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h +++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs_types.h @@ -29,17 +29,20 @@ * the overhead of waking that client is much preferred. */ struct intel_breadcrumbs { - /* Not all breadcrumbs are attached to physical HW */ - struct intel_engine_cs *irq_engine; + atomic_t active; spinlock_t signalers_lock; /* protects the list of signalers */ struct list_head signalers; struct llist_head signaled_requests; + atomic_t signaler_active; spinlock_t irq_lock; /* protects the interrupt from hardirq context */ struct irq_work irq_work; /* for use from inside irq_lock */ unsigned int irq_enabled; bool irq_armed; + + /* Not all breadcrumbs are attached to physical HW */ + struct intel_engine_cs *irq_engine; }; #endif /* __INTEL_BREADCRUMBS_TYPES__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_context.h b/drivers/gpu/drm/i915/gt/intel_context.h index fda2eba81e22..d24ab6fa0ee5 100644 --- a/drivers/gpu/drm/i915/gt/intel_context.h +++ b/drivers/gpu/drm/i915/gt/intel_context.h @@ -191,6 +191,11 @@ static inline bool intel_context_is_closed(const struct intel_context *ce) return test_bit(CONTEXT_CLOSED_BIT, &ce->flags); } +static inline bool intel_context_has_inflight(const struct intel_context *ce) +{ + return test_bit(COPS_HAS_INFLIGHT_BIT, &ce->ops->flags); +} + static inline bool intel_context_use_semaphores(const struct intel_context *ce) { return test_bit(CONTEXT_USE_SEMAPHORES, &ce->flags); @@ -248,16 +253,14 @@ intel_context_clear_nopreempt(struct intel_context *ce) static inline u64 intel_context_get_total_runtime_ns(struct intel_context *ce) { - const u32 period = - RUNTIME_INFO(ce->engine->i915)->cs_timestamp_period_ns; + const u32 period = ce->engine->gt->clock_period_ns; return READ_ONCE(ce->runtime.total) * period; } static inline u64 intel_context_get_avg_runtime_ns(struct intel_context *ce) { - const u32 period = - RUNTIME_INFO(ce->engine->i915)->cs_timestamp_period_ns; + const u32 period = ce->engine->gt->clock_period_ns; return mul_u32_u32(ewma_runtime_read(&ce->runtime.avg), period); } diff --git a/drivers/gpu/drm/i915/gt/intel_context_sseu.c b/drivers/gpu/drm/i915/gt/intel_context_sseu.c index b9c8163978a3..8dfd8f656aaa 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_sseu.c +++ b/drivers/gpu/drm/i915/gt/intel_context_sseu.c @@ -9,7 +9,6 @@ #include "intel_engine_pm.h" #include "intel_gpu_commands.h" #include "intel_lrc.h" -#include "intel_lrc_reg.h" #include "intel_ring.h" #include "intel_sseu.h" diff --git a/drivers/gpu/drm/i915/gt/intel_context_types.h b/drivers/gpu/drm/i915/gt/intel_context_types.h index 52fa9c132746..e10d78601bbd 100644 --- a/drivers/gpu/drm/i915/gt/intel_context_types.h +++ b/drivers/gpu/drm/i915/gt/intel_context_types.h @@ -30,6 +30,10 @@ struct intel_context; struct intel_ring; struct intel_context_ops { + unsigned long flags; +#define COPS_HAS_INFLIGHT_BIT 0 +#define COPS_HAS_INFLIGHT BIT(COPS_HAS_INFLIGHT_BIT) + int (*alloc)(struct intel_context *ce); int (*pre_pin)(struct intel_context *ce, struct i915_gem_ww_ctx *ww, void **vaddr); @@ -58,8 +62,12 @@ struct intel_context { struct intel_engine_cs *engine; struct intel_engine_cs *inflight; -#define intel_context_inflight(ce) ptr_mask_bits(READ_ONCE((ce)->inflight), 2) -#define intel_context_inflight_count(ce) ptr_unmask_bits(READ_ONCE((ce)->inflight), 2) +#define __intel_context_inflight(engine) ptr_mask_bits(engine, 3) +#define __intel_context_inflight_count(engine) ptr_unmask_bits(engine, 3) +#define intel_context_inflight(ce) \ + __intel_context_inflight(READ_ONCE((ce)->inflight)) +#define intel_context_inflight_count(ce) \ + __intel_context_inflight_count(READ_ONCE((ce)->inflight)) struct i915_address_space *vm; struct i915_gem_context __rcu *gem_context; @@ -81,12 +89,13 @@ struct intel_context { unsigned long flags; #define CONTEXT_BARRIER_BIT 0 #define CONTEXT_ALLOC_BIT 1 -#define CONTEXT_VALID_BIT 2 -#define CONTEXT_CLOSED_BIT 3 -#define CONTEXT_USE_SEMAPHORES 4 -#define CONTEXT_BANNED 5 -#define CONTEXT_FORCE_SINGLE_SUBMISSION 6 -#define CONTEXT_NOPREEMPT 7 +#define CONTEXT_INIT_BIT 2 +#define CONTEXT_VALID_BIT 3 +#define CONTEXT_CLOSED_BIT 4 +#define CONTEXT_USE_SEMAPHORES 5 +#define CONTEXT_BANNED 6 +#define CONTEXT_FORCE_SINGLE_SUBMISSION 7 +#define CONTEXT_NOPREEMPT 8 u32 *lrc_reg_state; union { diff --git a/drivers/gpu/drm/i915/gt/intel_engine.h b/drivers/gpu/drm/i915/gt/intel_engine.h index 760fefdfe392..47ee8578e511 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine.h +++ b/drivers/gpu/drm/i915/gt/intel_engine.h @@ -15,7 +15,6 @@ #include "i915_selftest.h" #include "gt/intel_timeline.h" #include "intel_engine_types.h" -#include "intel_gpu_commands.h" #include "intel_workarounds.h" struct drm_printer; @@ -223,91 +222,6 @@ void intel_engine_get_instdone(const struct intel_engine_cs *engine, void intel_engine_init_execlists(struct intel_engine_cs *engine); -static inline u32 *__gen8_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) -{ - memset(batch, 0, 6 * sizeof(u32)); - - batch[0] = GFX_OP_PIPE_CONTROL(6) | flags0; - batch[1] = flags1; - batch[2] = offset; - - return batch + 6; -} - -static inline u32 *gen8_emit_pipe_control(u32 *batch, u32 flags, u32 offset) -{ - return __gen8_emit_pipe_control(batch, 0, flags, offset); -} - -static inline u32 *gen12_emit_pipe_control(u32 *batch, u32 flags0, u32 flags1, u32 offset) -{ - return __gen8_emit_pipe_control(batch, flags0, flags1, offset); -} - -static inline u32 * -__gen8_emit_write_rcs(u32 *cs, u32 value, u32 offset, u32 flags0, u32 flags1) -{ - *cs++ = GFX_OP_PIPE_CONTROL(6) | flags0; - *cs++ = flags1 | PIPE_CONTROL_QW_WRITE; - *cs++ = offset; - *cs++ = 0; - *cs++ = value; - *cs++ = 0; /* We're thrashing one extra dword. */ - - return cs; -} - -static inline u32* -gen8_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags) -{ - /* We're using qword write, offset should be aligned to 8 bytes. */ - GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); - - return __gen8_emit_write_rcs(cs, - value, - gtt_offset, - 0, - flags | PIPE_CONTROL_GLOBAL_GTT_IVB); -} - -static inline u32* -gen12_emit_ggtt_write_rcs(u32 *cs, u32 value, u32 gtt_offset, u32 flags0, u32 flags1) -{ - /* We're using qword write, offset should be aligned to 8 bytes. */ - GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); - - return __gen8_emit_write_rcs(cs, - value, - gtt_offset, - flags0, - flags1 | PIPE_CONTROL_GLOBAL_GTT_IVB); -} - -static inline u32 * -__gen8_emit_flush_dw(u32 *cs, u32 value, u32 gtt_offset, u32 flags) -{ - *cs++ = (MI_FLUSH_DW + 1) | flags; - *cs++ = gtt_offset; - *cs++ = 0; - *cs++ = value; - - return cs; -} - -static inline u32 * -gen8_emit_ggtt_write(u32 *cs, u32 value, u32 gtt_offset, u32 flags) -{ - /* w/a: bit 5 needs to be zero for MI_FLUSH_DW address. */ - GEM_BUG_ON(gtt_offset & (1 << 5)); - /* Offset should be aligned to 8 bytes for both (QW/DW) write types */ - GEM_BUG_ON(!IS_ALIGNED(gtt_offset, 8)); - - return __gen8_emit_flush_dw(cs, - value, - gtt_offset | MI_FLUSH_DW_USE_GTT, - flags | MI_FLUSH_DW_OP_STOREDW); -} - static inline void __intel_engine_reset(struct intel_engine_cs *engine, bool stalled) { @@ -318,7 +232,12 @@ static inline void __intel_engine_reset(struct intel_engine_cs *engine, bool intel_engines_are_idle(struct intel_gt *gt); bool intel_engine_is_idle(struct intel_engine_cs *engine); -void intel_engine_flush_submission(struct intel_engine_cs *engine); + +void __intel_engine_flush_submission(struct intel_engine_cs *engine, bool sync); +static inline void intel_engine_flush_submission(struct intel_engine_cs *engine) +{ + __intel_engine_flush_submission(engine, true); +} void intel_engines_reset_default_submission(struct intel_gt *gt); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index 0b31670343f5..fa76602f9852 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -33,12 +33,14 @@ #include "intel_engine.h" #include "intel_engine_pm.h" #include "intel_engine_user.h" +#include "intel_execlists_submission.h" #include "intel_gt.h" #include "intel_gt_requests.h" #include "intel_gt_pm.h" -#include "intel_lrc.h" +#include "intel_lrc_reg.h" #include "intel_reset.h" #include "intel_ring.h" +#include "uc/intel_guc_submission.h" /* Haswell does have the CXT_SIZE register however it does not appear to be * valid. Now, docs explain in dwords what is in the context object. The full @@ -647,6 +649,8 @@ static int init_status_page(struct intel_engine_cs *engine) void *vaddr; int ret; + INIT_LIST_HEAD(&engine->status_page.timelines); + /* * Though the HWS register does support 36bit addresses, historically * we have had hangs and corruption reported due to wild writes if @@ -723,6 +727,9 @@ static int engine_setup_common(struct intel_engine_cs *engine) intel_engine_init_whitelist(engine); intel_engine_init_ctx_wa(engine); + if (INTEL_GEN(engine->i915) >= 12) + engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO; + return 0; err_status: @@ -829,6 +836,21 @@ create_pinned_context(struct intel_engine_cs *engine, return ce; } +static void destroy_pinned_context(struct intel_context *ce) +{ + struct intel_engine_cs *engine = ce->engine; + struct i915_vma *hwsp = engine->status_page.vma; + + GEM_BUG_ON(ce->timeline->hwsp_ggtt != hwsp); + + mutex_lock(&hwsp->vm->mutex); + list_del(&ce->timeline->engine_link); + mutex_unlock(&hwsp->vm->mutex); + + intel_context_unpin(ce); + intel_context_put(ce); +} + static struct intel_context * create_kernel_context(struct intel_engine_cs *engine) { @@ -889,7 +911,9 @@ int intel_engines_init(struct intel_gt *gt) enum intel_engine_id id; int err; - if (HAS_EXECLISTS(gt->i915)) + if (intel_uc_uses_guc_submission(>->uc)) + setup = intel_guc_submission_setup; + else if (HAS_EXECLISTS(gt->i915)) setup = intel_execlists_submission_setup; else setup = intel_ring_submission_setup; @@ -925,7 +949,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) GEM_BUG_ON(!list_empty(&engine->active.requests)); tasklet_kill(&engine->execlists.tasklet); /* flush the callback */ - cleanup_status_page(engine); intel_breadcrumbs_free(engine->breadcrumbs); intel_engine_fini_retire(engine); @@ -934,11 +957,11 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine) if (engine->default_state) fput(engine->default_state); - if (engine->kernel_context) { - intel_context_unpin(engine->kernel_context); - intel_context_put(engine->kernel_context); - } + if (engine->kernel_context) + destroy_pinned_context(engine->kernel_context); + GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); + cleanup_status_page(engine); intel_wa_list_free(&engine->ctx_wa_list); intel_wa_list_free(&engine->wa_list); @@ -1002,32 +1025,50 @@ static unsigned long stop_timeout(const struct intel_engine_cs *engine) return READ_ONCE(engine->props.stop_timeout_ms); } -int intel_engine_stop_cs(struct intel_engine_cs *engine) +static int __intel_engine_stop_cs(struct intel_engine_cs *engine, + int fast_timeout_us, + int slow_timeout_ms) { struct intel_uncore *uncore = engine->uncore; - const u32 base = engine->mmio_base; - const i915_reg_t mode = RING_MI_MODE(base); + const i915_reg_t mode = RING_MI_MODE(engine->mmio_base); int err; + intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING)); + err = __intel_wait_for_register_fw(engine->uncore, mode, + MODE_IDLE, MODE_IDLE, + fast_timeout_us, + slow_timeout_ms, + NULL); + + /* A final mmio read to let GPU writes be hopefully flushed to memory */ + intel_uncore_posting_read_fw(uncore, mode); + return err; +} + +int intel_engine_stop_cs(struct intel_engine_cs *engine) +{ + int err = 0; + if (INTEL_GEN(engine->i915) < 3) return -ENODEV; ENGINE_TRACE(engine, "\n"); + if (__intel_engine_stop_cs(engine, 1000, stop_timeout(engine))) { + ENGINE_TRACE(engine, + "timed out on STOP_RING -> IDLE; HEAD:%04x, TAIL:%04x\n", + ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR, + ENGINE_READ_FW(engine, RING_TAIL) & TAIL_ADDR); - intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING)); - - err = 0; - if (__intel_wait_for_register_fw(uncore, - mode, MODE_IDLE, MODE_IDLE, - 1000, stop_timeout(engine), - NULL)) { - ENGINE_TRACE(engine, "timed out on STOP_RING -> IDLE\n"); - err = -ETIMEDOUT; + /* + * Sometimes we observe that the idle flag is not + * set even though the ring is empty. So double + * check before giving up. + */ + if ((ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR) != + (ENGINE_READ_FW(engine, RING_TAIL) & TAIL_ADDR)) + err = -ETIMEDOUT; } - /* A final mmio read to let GPU writes be hopefully flushed to memory */ - intel_uncore_posting_read_fw(uncore, mode); - return err; } @@ -1189,17 +1230,13 @@ static bool ring_is_idle(struct intel_engine_cs *engine) return idle; } -void intel_engine_flush_submission(struct intel_engine_cs *engine) +void __intel_engine_flush_submission(struct intel_engine_cs *engine, bool sync) { struct tasklet_struct *t = &engine->execlists.tasklet; if (!t->func) return; - /* Synchronise and wait for the tasklet on another CPU */ - tasklet_kill(t); - - /* Having cancelled the tasklet, ensure that is run */ local_bh_disable(); if (tasklet_trylock(t)) { /* Must wait for any GPU reset in progress. */ @@ -1208,6 +1245,10 @@ void intel_engine_flush_submission(struct intel_engine_cs *engine) tasklet_unlock(t); } local_bh_enable(); + + /* Synchronise and wait for the tasklet on another CPU */ + if (sync) + tasklet_unlock_wait(t); } /** @@ -1273,8 +1314,12 @@ void intel_engines_reset_default_submission(struct intel_gt *gt) struct intel_engine_cs *engine; enum intel_engine_id id; - for_each_engine(engine, gt, id) + for_each_engine(engine, gt, id) { + if (engine->sanitize) + engine->sanitize(engine); + engine->set_default_submission(engine); + } } bool intel_engine_can_store_dword(struct intel_engine_cs *engine) @@ -1294,44 +1339,6 @@ bool intel_engine_can_store_dword(struct intel_engine_cs *engine) } } -static int print_sched_attr(const struct i915_sched_attr *attr, - char *buf, int x, int len) -{ - if (attr->priority == I915_PRIORITY_INVALID) - return x; - - x += snprintf(buf + x, len - x, - " prio=%d", attr->priority); - - return x; -} - -static void print_request(struct drm_printer *m, - struct i915_request *rq, - const char *prefix) -{ - const char *name = rq->fence.ops->get_timeline_name(&rq->fence); - char buf[80] = ""; - int x = 0; - - x = print_sched_attr(&rq->sched.attr, buf, x, sizeof(buf)); - - drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n", - prefix, - rq->fence.context, rq->fence.seqno, - i915_request_completed(rq) ? "!" : - i915_request_started(rq) ? "*" : - "", - test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, - &rq->fence.flags) ? "+" : - test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, - &rq->fence.flags) ? "-" : - "", - buf, - jiffies_to_msecs(jiffies - rq->emitted_jiffies), - name); -} - static struct intel_timeline *get_timeline(struct i915_request *rq) { struct intel_timeline *tl; @@ -1480,7 +1487,9 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR)); } - if (HAS_EXECLISTS(dev_priv)) { + if (intel_engine_in_guc_submission_mode(engine)) { + /* nothing to print yet */ + } else if (HAS_EXECLISTS(dev_priv)) { struct i915_request * const *port, *rq; const u32 *hws = &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; @@ -1529,7 +1538,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, intel_context_is_banned(rq->context) ? "*" : ""); len += print_ring(hdr + len, sizeof(hdr) - len, rq); scnprintf(hdr + len, sizeof(hdr) - len, "rq: "); - print_request(m, rq, hdr); + i915_request_show(m, rq, hdr, 0); } for (port = execlists->pending; (rq = *port); port++) { char hdr[160]; @@ -1543,7 +1552,7 @@ static void intel_engine_print_registers(struct intel_engine_cs *engine, intel_context_is_banned(rq->context) ? "*" : ""); len += print_ring(hdr + len, sizeof(hdr) - len, rq); scnprintf(hdr + len, sizeof(hdr) - len, "rq: "); - print_request(m, rq, hdr); + i915_request_show(m, rq, hdr, 0); } rcu_read_unlock(); execlists_active_unlock_bh(execlists); @@ -1687,7 +1696,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, if (rq) { struct intel_timeline *tl = get_timeline(rq); - print_request(m, rq, "\t\tactive "); + i915_request_show(m, rq, "\t\tactive ", 0); drm_printf(m, "\t\tring->start: 0x%08x\n", i915_ggtt_offset(rq->ring->vma)); @@ -1725,7 +1734,7 @@ void intel_engine_dump(struct intel_engine_cs *engine, drm_printf(m, "\tDevice is asleep; skipping register dump\n"); } - intel_execlists_show_requests(engine, m, print_request, 8); + intel_execlists_show_requests(engine, m, i915_request_show, 8); drm_printf(m, "HWSP:\n"); hexdump(m, engine->status_page.addr, PAGE_SIZE); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c index 9060385cd69e..d7be2b9339f9 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_heartbeat.c @@ -37,6 +37,18 @@ static bool next_heartbeat(struct intel_engine_cs *engine) return true; } +static struct i915_request * +heartbeat_create(struct intel_context *ce, gfp_t gfp) +{ + struct i915_request *rq; + + intel_context_enter(ce); + rq = __i915_request_create(ce, gfp); + intel_context_exit(ce); + + return rq; +} + static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq) { engine->wakeref_serial = READ_ONCE(engine->serial) + 1; @@ -45,6 +57,15 @@ static void idle_pulse(struct intel_engine_cs *engine, struct i915_request *rq) engine->heartbeat.systole = i915_request_get(rq); } +static void heartbeat_commit(struct i915_request *rq, + const struct i915_sched_attr *attr) +{ + idle_pulse(rq->engine, rq); + + __i915_request_commit(rq); + __i915_request_queue(rq, attr); +} + static void show_heartbeat(const struct i915_request *rq, struct intel_engine_cs *engine) { @@ -139,16 +160,11 @@ static void heartbeat(struct work_struct *wrk) goto out; } - intel_context_enter(ce); - rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN); - intel_context_exit(ce); + rq = heartbeat_create(ce, GFP_NOWAIT | __GFP_NOWARN); if (IS_ERR(rq)) goto unlock; - idle_pulse(engine, rq); - - __i915_request_commit(rq); - __i915_request_queue(rq, &attr); + heartbeat_commit(rq, &attr); unlock: mutex_unlock(&ce->timeline->mutex); @@ -187,17 +203,13 @@ static int __intel_engine_pulse(struct intel_engine_cs *engine) GEM_BUG_ON(!intel_engine_has_preemption(engine)); GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); - intel_context_enter(ce); - rq = __i915_request_create(ce, GFP_NOWAIT | __GFP_NOWARN); - intel_context_exit(ce); + rq = heartbeat_create(ce, GFP_NOWAIT | __GFP_NOWARN); if (IS_ERR(rq)) return PTR_ERR(rq); __set_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags); - idle_pulse(engine, rq); - __i915_request_commit(rq); - __i915_request_queue(rq, &attr); + heartbeat_commit(rq, &attr); GEM_BUG_ON(rq->sched.attr.priority < I915_PRIORITY_BARRIER); return 0; @@ -273,8 +285,12 @@ int intel_engine_pulse(struct intel_engine_cs *engine) int intel_engine_flush_barriers(struct intel_engine_cs *engine) { + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(I915_PRIORITY_MIN), + }; + struct intel_context *ce = engine->kernel_context; struct i915_request *rq; - int err = 0; + int err; if (llist_empty(&engine->barrier_tasks)) return 0; @@ -282,15 +298,22 @@ int intel_engine_flush_barriers(struct intel_engine_cs *engine) if (!intel_engine_pm_get_if_awake(engine)) return 0; - rq = i915_request_create(engine->kernel_context); + if (mutex_lock_interruptible(&ce->timeline->mutex)) { + err = -EINTR; + goto out_rpm; + } + + rq = heartbeat_create(ce, GFP_KERNEL); if (IS_ERR(rq)) { err = PTR_ERR(rq); - goto out_rpm; + goto out_unlock; } - idle_pulse(engine, rq); - i915_request_add(rq); + heartbeat_commit(rq, &attr); + err = 0; +out_unlock: + mutex_unlock(&ce->timeline->mutex); out_rpm: intel_engine_pm_put(engine); return err; diff --git a/drivers/gpu/drm/i915/gt/intel_engine_pm.c b/drivers/gpu/drm/i915/gt/intel_engine_pm.c index 499b09cb4acf..2843db731b7d 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_pm.c @@ -60,11 +60,19 @@ static int __engine_unpark(struct intel_wakeref *wf) /* Scrub the context image after our loss of control */ ce->ops->reset(ce); + + CE_TRACE(ce, "reset { seqno:%x, *hwsp:%x, ring:%x }\n", + ce->timeline->seqno, + READ_ONCE(*ce->timeline->hwsp_seqno), + ce->ring->emit); + GEM_BUG_ON(ce->timeline->seqno != + READ_ONCE(*ce->timeline->hwsp_seqno)); } if (engine->unpark) engine->unpark(engine); + intel_breadcrumbs_unpark(engine->breadcrumbs); intel_engine_unpark_heartbeat(engine); return 0; } @@ -136,7 +144,7 @@ __queue_and_release_pm(struct i915_request *rq, list_add_tail(&tl->link, &timelines->active_list); /* Hand the request over to HW and so engine_retire() */ - __i915_request_queue(rq, NULL); + __i915_request_queue_bh(rq); /* Let new submissions commence (and maybe retire this timeline) */ __intel_wakeref_defer_park(&engine->wakeref); diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h index ee6312601c56..df62e793e747 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h @@ -68,6 +68,7 @@ typedef u8 intel_engine_mask_t; #define ALL_ENGINES ((intel_engine_mask_t)~0ul) struct intel_hw_status_page { + struct list_head timelines; struct i915_vma *vma; u32 *addr; }; @@ -183,7 +184,8 @@ struct intel_engine_execlists { * Reserve the upper 16b for tracking internal errors. */ u32 error_interrupt; -#define ERROR_CSB BIT(31) +#define ERROR_CSB BIT(31) +#define ERROR_PREEMPT BIT(30) /** * @reset_ccid: Active CCID [EXECLISTS_STATUS_HI] at the time of reset @@ -237,16 +239,6 @@ struct intel_engine_execlists { unsigned int port_mask; /** - * @switch_priority_hint: Second context priority. - * - * We submit multiple contexts to the HW simultaneously and would - * like to occasionally switch between them to emulate timeslicing. - * To know when timeslicing is suitable, we track the priority of - * the context submitted second. - */ - int switch_priority_hint; - - /** * @queue_priority_hint: Highest pending priority. * * When we add requests into the queue, or adjust the priority of @@ -559,6 +551,8 @@ struct intel_engine_cs { unsigned long stop_timeout_ms; unsigned long timeslice_duration_ms; } props, defaults; + + I915_SELFTEST_DECLARE(struct fault_attr reset_timeout); }; static inline bool diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c new file mode 100644 index 000000000000..d7d5a58990bb --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c @@ -0,0 +1,3929 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2014 Intel Corporation + */ + +/** + * DOC: Logical Rings, Logical Ring Contexts and Execlists + * + * Motivation: + * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts". + * These expanded contexts enable a number of new abilities, especially + * "Execlists" (also implemented in this file). + * + * One of the main differences with the legacy HW contexts is that logical + * ring contexts incorporate many more things to the context's state, like + * PDPs or ringbuffer control registers: + * + * The reason why PDPs are included in the context is straightforward: as + * PPGTTs (per-process GTTs) are actually per-context, having the PDPs + * contained there mean you don't need to do a ppgtt->switch_mm yourself, + * instead, the GPU will do it for you on the context switch. + * + * But, what about the ringbuffer control registers (head, tail, etc..)? + * shouldn't we just need a set of those per engine command streamer? This is + * where the name "Logical Rings" starts to make sense: by virtualizing the + * rings, the engine cs shifts to a new "ring buffer" with every context + * switch. When you want to submit a workload to the GPU you: A) choose your + * context, B) find its appropriate virtualized ring, C) write commands to it + * and then, finally, D) tell the GPU to switch to that context. + * + * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch + * to a contexts is via a context execution list, ergo "Execlists". + * + * LRC implementation: + * Regarding the creation of contexts, we have: + * + * - One global default context. + * - One local default context for each opened fd. + * - One local extra context for each context create ioctl call. + * + * Now that ringbuffers belong per-context (and not per-engine, like before) + * and that contexts are uniquely tied to a given engine (and not reusable, + * like before) we need: + * + * - One ringbuffer per-engine inside each context. + * - One backing object per-engine inside each context. + * + * The global default context starts its life with these new objects fully + * allocated and populated. The local default context for each opened fd is + * more complex, because we don't know at creation time which engine is going + * to use them. To handle this, we have implemented a deferred creation of LR + * contexts: + * + * The local context starts its life as a hollow or blank holder, that only + * gets populated for a given engine once we receive an execbuffer. If later + * on we receive another execbuffer ioctl for the same context but a different + * engine, we allocate/populate a new ringbuffer and context backing object and + * so on. + * + * Finally, regarding local contexts created using the ioctl call: as they are + * only allowed with the render ring, we can allocate & populate them right + * away (no need to defer anything, at least for now). + * + * Execlists implementation: + * Execlists are the new method by which, on gen8+ hardware, workloads are + * submitted for execution (as opposed to the legacy, ringbuffer-based, method). + * This method works as follows: + * + * When a request is committed, its commands (the BB start and any leading or + * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer + * for the appropriate context. The tail pointer in the hardware context is not + * updated at this time, but instead, kept by the driver in the ringbuffer + * structure. A structure representing this request is added to a request queue + * for the appropriate engine: this structure contains a copy of the context's + * tail after the request was written to the ring buffer and a pointer to the + * context itself. + * + * If the engine's request queue was empty before the request was added, the + * queue is processed immediately. Otherwise the queue will be processed during + * a context switch interrupt. In any case, elements on the queue will get sent + * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a + * globally unique 20-bits submission ID. + * + * When execution of a request completes, the GPU updates the context status + * buffer with a context complete event and generates a context switch interrupt. + * During the interrupt handling, the driver examines the events in the buffer: + * for each context complete event, if the announced ID matches that on the head + * of the request queue, then that request is retired and removed from the queue. + * + * After processing, if any requests were retired and the queue is not empty + * then a new execution list can be submitted. The two requests at the front of + * the queue are next to be submitted but since a context may not occur twice in + * an execution list, if subsequent requests have the same ID as the first then + * the two requests must be combined. This is done simply by discarding requests + * at the head of the queue until either only one requests is left (in which case + * we use a NULL second context) or the first two requests have unique IDs. + * + * By always executing the first two requests in the queue the driver ensures + * that the GPU is kept as busy as possible. In the case where a single context + * completes but a second context is still executing, the request for this second + * context will be at the head of the queue when we remove the first one. This + * request will then be resubmitted along with a new request for a different context, + * which will cause the hardware to continue executing the second request and queue + * the new request (the GPU detects the condition of a context getting preempted + * with the same context and optimizes the context switch flow by not doing + * preemption, but just sampling the new tail pointer). + * + */ +#include <linux/interrupt.h> + +#include "i915_drv.h" +#include "i915_trace.h" +#include "i915_vgpu.h" +#include "gen8_engine_cs.h" +#include "intel_breadcrumbs.h" +#include "intel_context.h" +#include "intel_engine_pm.h" +#include "intel_execlists_submission.h" +#include "intel_gt.h" +#include "intel_gt_pm.h" +#include "intel_gt_requests.h" +#include "intel_lrc.h" +#include "intel_lrc_reg.h" +#include "intel_mocs.h" +#include "intel_reset.h" +#include "intel_ring.h" +#include "intel_workarounds.h" +#include "shmem_utils.h" + +#define RING_EXECLIST_QFULL (1 << 0x2) +#define RING_EXECLIST1_VALID (1 << 0x3) +#define RING_EXECLIST0_VALID (1 << 0x4) +#define RING_EXECLIST_ACTIVE_STATUS (3 << 0xE) +#define RING_EXECLIST1_ACTIVE (1 << 0x11) +#define RING_EXECLIST0_ACTIVE (1 << 0x12) + +#define GEN8_CTX_STATUS_IDLE_ACTIVE (1 << 0) +#define GEN8_CTX_STATUS_PREEMPTED (1 << 1) +#define GEN8_CTX_STATUS_ELEMENT_SWITCH (1 << 2) +#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3) +#define GEN8_CTX_STATUS_COMPLETE (1 << 4) +#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15) + +#define GEN8_CTX_STATUS_COMPLETED_MASK \ + (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED) + +#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE (0x1) /* lower csb dword */ +#define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */ +#define GEN12_CSB_SW_CTX_ID_MASK GENMASK(25, 15) +#define GEN12_IDLE_CTX_ID 0x7FF +#define GEN12_CSB_CTX_VALID(csb_dw) \ + (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID) + +/* Typical size of the average request (2 pipecontrols and a MI_BB) */ +#define EXECLISTS_REQUEST_SIZE 64 /* bytes */ + +struct virtual_engine { + struct intel_engine_cs base; + struct intel_context context; + struct rcu_work rcu; + + /* + * We allow only a single request through the virtual engine at a time + * (each request in the timeline waits for the completion fence of + * the previous before being submitted). By restricting ourselves to + * only submitting a single request, each request is placed on to a + * physical to maximise load spreading (by virtue of the late greedy + * scheduling -- each real engine takes the next available request + * upon idling). + */ + struct i915_request *request; + + /* + * We keep a rbtree of available virtual engines inside each physical + * engine, sorted by priority. Here we preallocate the nodes we need + * for the virtual engine, indexed by physical_engine->id. + */ + struct ve_node { + struct rb_node rb; + int prio; + } nodes[I915_NUM_ENGINES]; + + /* + * Keep track of bonded pairs -- restrictions upon on our selection + * of physical engines any particular request may be submitted to. + * If we receive a submit-fence from a master engine, we will only + * use one of sibling_mask physical engines. + */ + struct ve_bond { + const struct intel_engine_cs *master; + intel_engine_mask_t sibling_mask; + } *bonds; + unsigned int num_bonds; + + /* And finally, which physical engines this virtual engine maps onto. */ + unsigned int num_siblings; + struct intel_engine_cs *siblings[]; +}; + +static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) +{ + GEM_BUG_ON(!intel_engine_is_virtual(engine)); + return container_of(engine, struct virtual_engine, base); +} + +static struct i915_request * +__active_request(const struct intel_timeline * const tl, + struct i915_request *rq, + int error) +{ + struct i915_request *active = rq; + + list_for_each_entry_from_reverse(rq, &tl->requests, link) { + if (__i915_request_is_complete(rq)) + break; + + if (error) { + i915_request_set_error_once(rq, error); + __i915_request_skip(rq); + } + active = rq; + } + + return active; +} + +static struct i915_request * +active_request(const struct intel_timeline * const tl, struct i915_request *rq) +{ + return __active_request(tl, rq, 0); +} + +static inline void +ring_set_paused(const struct intel_engine_cs *engine, int state) +{ + /* + * We inspect HWS_PREEMPT with a semaphore inside + * engine->emit_fini_breadcrumb. If the dword is true, + * the ring is paused as the semaphore will busywait + * until the dword is false. + */ + engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state; + if (state) + wmb(); +} + +static inline struct i915_priolist *to_priolist(struct rb_node *rb) +{ + return rb_entry(rb, struct i915_priolist, node); +} + +static inline int rq_prio(const struct i915_request *rq) +{ + return READ_ONCE(rq->sched.attr.priority); +} + +static int effective_prio(const struct i915_request *rq) +{ + int prio = rq_prio(rq); + + /* + * If this request is special and must not be interrupted at any + * cost, so be it. Note we are only checking the most recent request + * in the context and so may be masking an earlier vip request. It + * is hoped that under the conditions where nopreempt is used, this + * will not matter (i.e. all requests to that context will be + * nopreempt for as long as desired). + */ + if (i915_request_has_nopreempt(rq)) + prio = I915_PRIORITY_UNPREEMPTABLE; + + return prio; +} + +static int queue_prio(const struct intel_engine_execlists *execlists) +{ + struct i915_priolist *p; + struct rb_node *rb; + + rb = rb_first_cached(&execlists->queue); + if (!rb) + return INT_MIN; + + /* + * As the priolist[] are inverted, with the highest priority in [0], + * we have to flip the index value to become priority. + */ + p = to_priolist(rb); + if (!I915_USER_PRIORITY_SHIFT) + return p->priority; + + return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used); +} + +static int virtual_prio(const struct intel_engine_execlists *el) +{ + struct rb_node *rb = rb_first_cached(&el->virtual); + + return rb ? rb_entry(rb, struct ve_node, rb)->prio : INT_MIN; +} + +static inline bool need_preempt(const struct intel_engine_cs *engine, + const struct i915_request *rq) +{ + int last_prio; + + if (!intel_engine_has_semaphores(engine)) + return false; + + /* + * Check if the current priority hint merits a preemption attempt. + * + * We record the highest value priority we saw during rescheduling + * prior to this dequeue, therefore we know that if it is strictly + * less than the current tail of ESLP[0], we do not need to force + * a preempt-to-idle cycle. + * + * However, the priority hint is a mere hint that we may need to + * preempt. If that hint is stale or we may be trying to preempt + * ourselves, ignore the request. + * + * More naturally we would write + * prio >= max(0, last); + * except that we wish to prevent triggering preemption at the same + * priority level: the task that is running should remain running + * to preserve FIFO ordering of dependencies. + */ + last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1); + if (engine->execlists.queue_priority_hint <= last_prio) + return false; + + /* + * Check against the first request in ELSP[1], it will, thanks to the + * power of PI, be the highest priority of that context. + */ + if (!list_is_last(&rq->sched.link, &engine->active.requests) && + rq_prio(list_next_entry(rq, sched.link)) > last_prio) + return true; + + /* + * If the inflight context did not trigger the preemption, then maybe + * it was the set of queued requests? Pick the highest priority in + * the queue (the first active priolist) and see if it deserves to be + * running instead of ELSP[0]. + * + * The highest priority request in the queue can not be either + * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same + * context, it's priority would not exceed ELSP[0] aka last_prio. + */ + return max(virtual_prio(&engine->execlists), + queue_prio(&engine->execlists)) > last_prio; +} + +__maybe_unused static inline bool +assert_priority_queue(const struct i915_request *prev, + const struct i915_request *next) +{ + /* + * Without preemption, the prev may refer to the still active element + * which we refuse to let go. + * + * Even with preemption, there are times when we think it is better not + * to preempt and leave an ostensibly lower priority request in flight. + */ + if (i915_request_is_active(prev)) + return true; + + return rq_prio(prev) >= rq_prio(next); +} + +static struct i915_request * +__unwind_incomplete_requests(struct intel_engine_cs *engine) +{ + struct i915_request *rq, *rn, *active = NULL; + struct list_head *pl; + int prio = I915_PRIORITY_INVALID; + + lockdep_assert_held(&engine->active.lock); + + list_for_each_entry_safe_reverse(rq, rn, + &engine->active.requests, + sched.link) { + if (__i915_request_is_complete(rq)) { + list_del_init(&rq->sched.link); + continue; + } + + __i915_request_unsubmit(rq); + + GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); + if (rq_prio(rq) != prio) { + prio = rq_prio(rq); + pl = i915_sched_lookup_priolist(engine, prio); + } + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + + list_move(&rq->sched.link, pl); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + + /* Check in case we rollback so far we wrap [size/2] */ + if (intel_ring_direction(rq->ring, + rq->tail, + rq->ring->tail + 8) > 0) + rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE; + + active = rq; + } + + return active; +} + +struct i915_request * +execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) +{ + struct intel_engine_cs *engine = + container_of(execlists, typeof(*engine), execlists); + + return __unwind_incomplete_requests(engine); +} + +static inline void +execlists_context_status_change(struct i915_request *rq, unsigned long status) +{ + /* + * Only used when GVT-g is enabled now. When GVT-g is disabled, + * The compiler should eliminate this function as dead-code. + */ + if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) + return; + + atomic_notifier_call_chain(&rq->engine->context_status_notifier, + status, rq); +} + +static void intel_engine_context_in(struct intel_engine_cs *engine) +{ + unsigned long flags; + + if (atomic_add_unless(&engine->stats.active, 1, 0)) + return; + + write_seqlock_irqsave(&engine->stats.lock, flags); + if (!atomic_add_unless(&engine->stats.active, 1, 0)) { + engine->stats.start = ktime_get(); + atomic_inc(&engine->stats.active); + } + write_sequnlock_irqrestore(&engine->stats.lock, flags); +} + +static void intel_engine_context_out(struct intel_engine_cs *engine) +{ + unsigned long flags; + + GEM_BUG_ON(!atomic_read(&engine->stats.active)); + + if (atomic_add_unless(&engine->stats.active, -1, 1)) + return; + + write_seqlock_irqsave(&engine->stats.lock, flags); + if (atomic_dec_and_test(&engine->stats.active)) { + engine->stats.total = + ktime_add(engine->stats.total, + ktime_sub(ktime_get(), engine->stats.start)); + } + write_sequnlock_irqrestore(&engine->stats.lock, flags); +} + +static void reset_active(struct i915_request *rq, + struct intel_engine_cs *engine) +{ + struct intel_context * const ce = rq->context; + u32 head; + + /* + * The executing context has been cancelled. We want to prevent + * further execution along this context and propagate the error on + * to anything depending on its results. + * + * In __i915_request_submit(), we apply the -EIO and remove the + * requests' payloads for any banned requests. But first, we must + * rewind the context back to the start of the incomplete request so + * that we do not jump back into the middle of the batch. + * + * We preserve the breadcrumbs and semaphores of the incomplete + * requests so that inter-timeline dependencies (i.e other timelines) + * remain correctly ordered. And we defer to __i915_request_submit() + * so that all asynchronous waits are correctly handled. + */ + ENGINE_TRACE(engine, "{ reset rq=%llx:%lld }\n", + rq->fence.context, rq->fence.seqno); + + /* On resubmission of the active request, payload will be scrubbed */ + if (__i915_request_is_complete(rq)) + head = rq->tail; + else + head = __active_request(ce->timeline, rq, -EIO)->head; + head = intel_ring_wrap(ce->ring, head); + + /* Scrub the context image to prevent replaying the previous batch */ + lrc_init_regs(ce, engine, true); + + /* We've switched away, so this should be a no-op, but intent matters */ + ce->lrc.lrca = lrc_update_regs(ce, engine, head); +} + +static inline struct intel_engine_cs * +__execlists_schedule_in(struct i915_request *rq) +{ + struct intel_engine_cs * const engine = rq->engine; + struct intel_context * const ce = rq->context; + + intel_context_get(ce); + + if (unlikely(intel_context_is_closed(ce) && + !intel_engine_has_heartbeat(engine))) + intel_context_set_banned(ce); + + if (unlikely(intel_context_is_banned(ce))) + reset_active(rq, engine); + + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + lrc_check_regs(ce, engine, "before"); + + if (ce->tag) { + /* Use a fixed tag for OA and friends */ + GEM_BUG_ON(ce->tag <= BITS_PER_LONG); + ce->lrc.ccid = ce->tag; + } else { + /* We don't need a strict matching tag, just different values */ + unsigned int tag = __ffs(engine->context_tag); + + GEM_BUG_ON(tag >= BITS_PER_LONG); + __clear_bit(tag, &engine->context_tag); + ce->lrc.ccid = (1 + tag) << (GEN11_SW_CTX_ID_SHIFT - 32); + + BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID); + } + + ce->lrc.ccid |= engine->execlists.ccid; + + __intel_gt_pm_get(engine->gt); + if (engine->fw_domain && !atomic_fetch_inc(&engine->fw_active)) + intel_uncore_forcewake_get(engine->uncore, engine->fw_domain); + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); + intel_engine_context_in(engine); + + CE_TRACE(ce, "schedule-in, ccid:%x\n", ce->lrc.ccid); + + return engine; +} + +static inline void execlists_schedule_in(struct i915_request *rq, int idx) +{ + struct intel_context * const ce = rq->context; + struct intel_engine_cs *old; + + GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine)); + trace_i915_request_in(rq, idx); + + old = ce->inflight; + if (!old) + old = __execlists_schedule_in(rq); + WRITE_ONCE(ce->inflight, ptr_inc(old)); + + GEM_BUG_ON(intel_context_inflight(ce) != rq->engine); +} + +static void +resubmit_virtual_request(struct i915_request *rq, struct virtual_engine *ve) +{ + struct intel_engine_cs *engine = rq->engine; + + spin_lock_irq(&engine->active.lock); + + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + WRITE_ONCE(rq->engine, &ve->base); + ve->base.submit_request(rq); + + spin_unlock_irq(&engine->active.lock); +} + +static void kick_siblings(struct i915_request *rq, struct intel_context *ce) +{ + struct virtual_engine *ve = container_of(ce, typeof(*ve), context); + struct intel_engine_cs *engine = rq->engine; + + /* + * After this point, the rq may be transferred to a new sibling, so + * before we clear ce->inflight make sure that the context has been + * removed from the b->signalers and furthermore we need to make sure + * that the concurrent iterator in signal_irq_work is no longer + * following ce->signal_link. + */ + if (!list_empty(&ce->signals)) + intel_context_remove_breadcrumbs(ce, engine->breadcrumbs); + + /* + * This engine is now too busy to run this virtual request, so + * see if we can find an alternative engine for it to execute on. + * Once a request has become bonded to this engine, we treat it the + * same as other native request. + */ + if (i915_request_in_priority_queue(rq) && + rq->execution_mask != engine->mask) + resubmit_virtual_request(rq, ve); + + if (READ_ONCE(ve->request)) + tasklet_hi_schedule(&ve->base.execlists.tasklet); +} + +static inline void __execlists_schedule_out(struct i915_request *rq) +{ + struct intel_context * const ce = rq->context; + struct intel_engine_cs * const engine = rq->engine; + unsigned int ccid; + + /* + * NB process_csb() is not under the engine->active.lock and hence + * schedule_out can race with schedule_in meaning that we should + * refrain from doing non-trivial work here. + */ + + CE_TRACE(ce, "schedule-out, ccid:%x\n", ce->lrc.ccid); + + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + lrc_check_regs(ce, engine, "after"); + + /* + * If we have just completed this context, the engine may now be + * idle and we want to re-enter powersaving. + */ + if (intel_timeline_is_last(ce->timeline, rq) && + __i915_request_is_complete(rq)) + intel_engine_add_retire(engine, ce->timeline); + + ccid = ce->lrc.ccid; + ccid >>= GEN11_SW_CTX_ID_SHIFT - 32; + ccid &= GEN12_MAX_CONTEXT_HW_ID; + if (ccid < BITS_PER_LONG) { + GEM_BUG_ON(ccid == 0); + GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag)); + __set_bit(ccid - 1, &engine->context_tag); + } + + lrc_update_runtime(ce); + intel_engine_context_out(engine); + execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); + if (engine->fw_domain && !atomic_dec_return(&engine->fw_active)) + intel_uncore_forcewake_put(engine->uncore, engine->fw_domain); + intel_gt_pm_put_async(engine->gt); + + /* + * If this is part of a virtual engine, its next request may + * have been blocked waiting for access to the active context. + * We have to kick all the siblings again in case we need to + * switch (e.g. the next request is not runnable on this + * engine). Hopefully, we will already have submitted the next + * request before the tasklet runs and do not need to rebuild + * each virtual tree and kick everyone again. + */ + if (ce->engine != engine) + kick_siblings(rq, ce); +} + +static inline void +execlists_schedule_out(struct i915_request *rq) +{ + struct intel_context * const ce = rq->context; + + trace_i915_request_out(rq); + + GEM_BUG_ON(!ce->inflight); + ce->inflight = ptr_dec(ce->inflight); + if (!__intel_context_inflight_count(ce->inflight)) { + GEM_BUG_ON(ce->inflight != rq->engine); + __execlists_schedule_out(rq); + WRITE_ONCE(ce->inflight, NULL); + intel_context_put(ce); + } + + i915_request_put(rq); +} + +static u64 execlists_update_context(struct i915_request *rq) +{ + struct intel_context *ce = rq->context; + u64 desc = ce->lrc.desc; + u32 tail, prev; + + /* + * WaIdleLiteRestore:bdw,skl + * + * We should never submit the context with the same RING_TAIL twice + * just in case we submit an empty ring, which confuses the HW. + * + * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of + * the normal request to be able to always advance the RING_TAIL on + * subsequent resubmissions (for lite restore). Should that fail us, + * and we try and submit the same tail again, force the context + * reload. + * + * If we need to return to a preempted context, we need to skip the + * lite-restore and force it to reload the RING_TAIL. Otherwise, the + * HW has a tendency to ignore us rewinding the TAIL to the end of + * an earlier request. + */ + GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail); + prev = rq->ring->tail; + tail = intel_ring_set_tail(rq->ring, rq->tail); + if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0)) + desc |= CTX_DESC_FORCE_RESTORE; + ce->lrc_reg_state[CTX_RING_TAIL] = tail; + rq->tail = rq->wa_tail; + + /* + * Make sure the context image is complete before we submit it to HW. + * + * Ostensibly, writes (including the WCB) should be flushed prior to + * an uncached write such as our mmio register access, the empirical + * evidence (esp. on Braswell) suggests that the WC write into memory + * may not be visible to the HW prior to the completion of the UC + * register write and that we may begin execution from the context + * before its image is complete leading to invalid PD chasing. + */ + wmb(); + + ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE; + return desc; +} + +static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port) +{ + if (execlists->ctrl_reg) { + writel(lower_32_bits(desc), execlists->submit_reg + port * 2); + writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1); + } else { + writel(upper_32_bits(desc), execlists->submit_reg); + writel(lower_32_bits(desc), execlists->submit_reg); + } +} + +static __maybe_unused char * +dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq) +{ + if (!rq) + return ""; + + snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d", + prefix, + rq->context->lrc.ccid, + rq->fence.context, rq->fence.seqno, + __i915_request_is_complete(rq) ? "!" : + __i915_request_has_started(rq) ? "*" : + "", + rq_prio(rq)); + + return buf; +} + +static __maybe_unused void +trace_ports(const struct intel_engine_execlists *execlists, + const char *msg, + struct i915_request * const *ports) +{ + const struct intel_engine_cs *engine = + container_of(execlists, typeof(*engine), execlists); + char __maybe_unused p0[40], p1[40]; + + if (!ports[0]) + return; + + ENGINE_TRACE(engine, "%s { %s%s }\n", msg, + dump_port(p0, sizeof(p0), "", ports[0]), + dump_port(p1, sizeof(p1), ", ", ports[1])); +} + +static inline bool +reset_in_progress(const struct intel_engine_execlists *execlists) +{ + return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); +} + +static __maybe_unused bool +assert_pending_valid(const struct intel_engine_execlists *execlists, + const char *msg) +{ + struct intel_engine_cs *engine = + container_of(execlists, typeof(*engine), execlists); + struct i915_request * const *port, *rq; + struct intel_context *ce = NULL; + bool sentinel = false; + u32 ccid = -1; + + trace_ports(execlists, msg, execlists->pending); + + /* We may be messing around with the lists during reset, lalala */ + if (reset_in_progress(execlists)) + return true; + + if (!execlists->pending[0]) { + GEM_TRACE_ERR("%s: Nothing pending for promotion!\n", + engine->name); + return false; + } + + if (execlists->pending[execlists_num_ports(execlists)]) { + GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n", + engine->name, execlists_num_ports(execlists)); + return false; + } + + for (port = execlists->pending; (rq = *port); port++) { + unsigned long flags; + bool ok = true; + + GEM_BUG_ON(!kref_read(&rq->fence.refcount)); + GEM_BUG_ON(!i915_request_is_active(rq)); + + if (ce == rq->context) { + GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n", + engine->name, + ce->timeline->fence_context, + port - execlists->pending); + return false; + } + ce = rq->context; + + if (ccid == ce->lrc.ccid) { + GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n", + engine->name, + ccid, ce->timeline->fence_context, + port - execlists->pending); + return false; + } + ccid = ce->lrc.ccid; + + /* + * Sentinels are supposed to be the last request so they flush + * the current execution off the HW. Check that they are the only + * request in the pending submission. + */ + if (sentinel) { + GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n", + engine->name, + ce->timeline->fence_context, + port - execlists->pending); + return false; + } + sentinel = i915_request_has_sentinel(rq); + + /* + * We want virtual requests to only be in the first slot so + * that they are never stuck behind a hog and can be immediately + * transferred onto the next idle engine. + */ + if (rq->execution_mask != engine->mask && + port != execlists->pending) { + GEM_TRACE_ERR("%s: virtual engine:%llx not in prime position[%zd]\n", + engine->name, + ce->timeline->fence_context, + port - execlists->pending); + return false; + } + + /* Hold tightly onto the lock to prevent concurrent retires! */ + if (!spin_trylock_irqsave(&rq->lock, flags)) + continue; + + if (__i915_request_is_complete(rq)) + goto unlock; + + if (i915_active_is_idle(&ce->active) && + !intel_context_is_barrier(ce)) { + GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n", + engine->name, + ce->timeline->fence_context, + port - execlists->pending); + ok = false; + goto unlock; + } + + if (!i915_vma_is_pinned(ce->state)) { + GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n", + engine->name, + ce->timeline->fence_context, + port - execlists->pending); + ok = false; + goto unlock; + } + + if (!i915_vma_is_pinned(ce->ring->vma)) { + GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n", + engine->name, + ce->timeline->fence_context, + port - execlists->pending); + ok = false; + goto unlock; + } + +unlock: + spin_unlock_irqrestore(&rq->lock, flags); + if (!ok) + return false; + } + + return ce; +} + +static void execlists_submit_ports(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists *execlists = &engine->execlists; + unsigned int n; + + GEM_BUG_ON(!assert_pending_valid(execlists, "submit")); + + /* + * We can skip acquiring intel_runtime_pm_get() here as it was taken + * on our behalf by the request (see i915_gem_mark_busy()) and it will + * not be relinquished until the device is idle (see + * i915_gem_idle_work_handler()). As a precaution, we make sure + * that all ELSP are drained i.e. we have processed the CSB, + * before allowing ourselves to idle and calling intel_runtime_pm_put(). + */ + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); + + /* + * ELSQ note: the submit queue is not cleared after being submitted + * to the HW so we need to make sure we always clean it up. This is + * currently ensured by the fact that we always write the same number + * of elsq entries, keep this in mind before changing the loop below. + */ + for (n = execlists_num_ports(execlists); n--; ) { + struct i915_request *rq = execlists->pending[n]; + + write_desc(execlists, + rq ? execlists_update_context(rq) : 0, + n); + } + + /* we need to manually load the submit queue */ + if (execlists->ctrl_reg) + writel(EL_CTRL_LOAD, execlists->ctrl_reg); +} + +static bool ctx_single_port_submission(const struct intel_context *ce) +{ + return (IS_ENABLED(CONFIG_DRM_I915_GVT) && + intel_context_force_single_submission(ce)); +} + +static bool can_merge_ctx(const struct intel_context *prev, + const struct intel_context *next) +{ + if (prev != next) + return false; + + if (ctx_single_port_submission(prev)) + return false; + + return true; +} + +static unsigned long i915_request_flags(const struct i915_request *rq) +{ + return READ_ONCE(rq->fence.flags); +} + +static bool can_merge_rq(const struct i915_request *prev, + const struct i915_request *next) +{ + GEM_BUG_ON(prev == next); + GEM_BUG_ON(!assert_priority_queue(prev, next)); + + /* + * We do not submit known completed requests. Therefore if the next + * request is already completed, we can pretend to merge it in + * with the previous context (and we will skip updating the ELSP + * and tracking). Thus hopefully keeping the ELSP full with active + * contexts, despite the best efforts of preempt-to-busy to confuse + * us. + */ + if (__i915_request_is_complete(next)) + return true; + + if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) & + (BIT(I915_FENCE_FLAG_NOPREEMPT) | + BIT(I915_FENCE_FLAG_SENTINEL)))) + return false; + + if (!can_merge_ctx(prev->context, next->context)) + return false; + + GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno)); + return true; +} + +static bool virtual_matches(const struct virtual_engine *ve, + const struct i915_request *rq, + const struct intel_engine_cs *engine) +{ + const struct intel_engine_cs *inflight; + + if (!rq) + return false; + + if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */ + return false; + + /* + * We track when the HW has completed saving the context image + * (i.e. when we have seen the final CS event switching out of + * the context) and must not overwrite the context image before + * then. This restricts us to only using the active engine + * while the previous virtualized request is inflight (so + * we reuse the register offsets). This is a very small + * hystersis on the greedy seelction algorithm. + */ + inflight = intel_context_inflight(&ve->context); + if (inflight && inflight != engine) + return false; + + return true; +} + +static struct virtual_engine * +first_virtual_engine(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists *el = &engine->execlists; + struct rb_node *rb = rb_first_cached(&el->virtual); + + while (rb) { + struct virtual_engine *ve = + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); + struct i915_request *rq = READ_ONCE(ve->request); + + /* lazily cleanup after another engine handled rq */ + if (!rq || !virtual_matches(ve, rq, engine)) { + rb_erase_cached(rb, &el->virtual); + RB_CLEAR_NODE(rb); + rb = rb_first_cached(&el->virtual); + continue; + } + + return ve; + } + + return NULL; +} + +static void virtual_xfer_context(struct virtual_engine *ve, + struct intel_engine_cs *engine) +{ + unsigned int n; + + if (likely(engine == ve->siblings[0])) + return; + + GEM_BUG_ON(READ_ONCE(ve->context.inflight)); + if (!intel_engine_has_relative_mmio(engine)) + lrc_update_offsets(&ve->context, engine); + + /* + * Move the bound engine to the top of the list for + * future execution. We then kick this tasklet first + * before checking others, so that we preferentially + * reuse this set of bound registers. + */ + for (n = 1; n < ve->num_siblings; n++) { + if (ve->siblings[n] == engine) { + swap(ve->siblings[n], ve->siblings[0]); + break; + } + } +} + +static void defer_request(struct i915_request *rq, struct list_head * const pl) +{ + LIST_HEAD(list); + + /* + * We want to move the interrupted request to the back of + * the round-robin list (i.e. its priority level), but + * in doing so, we must then move all requests that were in + * flight and were waiting for the interrupted request to + * be run after it again. + */ + do { + struct i915_dependency *p; + + GEM_BUG_ON(i915_request_is_active(rq)); + list_move_tail(&rq->sched.link, pl); + + for_each_waiter(p, rq) { + struct i915_request *w = + container_of(p->waiter, typeof(*w), sched); + + if (p->flags & I915_DEPENDENCY_WEAK) + continue; + + /* Leave semaphores spinning on the other engines */ + if (w->engine != rq->engine) + continue; + + /* No waiter should start before its signaler */ + GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) && + __i915_request_has_started(w) && + !__i915_request_is_complete(rq)); + + GEM_BUG_ON(i915_request_is_active(w)); + if (!i915_request_is_ready(w)) + continue; + + if (rq_prio(w) < rq_prio(rq)) + continue; + + GEM_BUG_ON(rq_prio(w) > rq_prio(rq)); + list_move_tail(&w->sched.link, &list); + } + + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); + } while (rq); +} + +static void defer_active(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + rq = __unwind_incomplete_requests(engine); + if (!rq) + return; + + defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq))); +} + +static bool +timeslice_yield(const struct intel_engine_execlists *el, + const struct i915_request *rq) +{ + /* + * Once bitten, forever smitten! + * + * If the active context ever busy-waited on a semaphore, + * it will be treated as a hog until the end of its timeslice (i.e. + * until it is scheduled out and replaced by a new submission, + * possibly even its own lite-restore). The HW only sends an interrupt + * on the first miss, and we do know if that semaphore has been + * signaled, or even if it is now stuck on another semaphore. Play + * safe, yield if it might be stuck -- it will be given a fresh + * timeslice in the near future. + */ + return rq->context->lrc.ccid == READ_ONCE(el->yield); +} + +static bool needs_timeslice(const struct intel_engine_cs *engine, + const struct i915_request *rq) +{ + if (!intel_engine_has_timeslices(engine)) + return false; + + /* If not currently active, or about to switch, wait for next event */ + if (!rq || __i915_request_is_complete(rq)) + return false; + + /* We do not need to start the timeslice until after the ACK */ + if (READ_ONCE(engine->execlists.pending[0])) + return false; + + /* If ELSP[1] is occupied, always check to see if worth slicing */ + if (!list_is_last_rcu(&rq->sched.link, &engine->active.requests)) { + ENGINE_TRACE(engine, "timeslice required for second inflight context\n"); + return true; + } + + /* Otherwise, ELSP[0] is by itself, but may be waiting in the queue */ + if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)) { + ENGINE_TRACE(engine, "timeslice required for queue\n"); + return true; + } + + if (!RB_EMPTY_ROOT(&engine->execlists.virtual.rb_root)) { + ENGINE_TRACE(engine, "timeslice required for virtual\n"); + return true; + } + + return false; +} + +static bool +timeslice_expired(struct intel_engine_cs *engine, const struct i915_request *rq) +{ + const struct intel_engine_execlists *el = &engine->execlists; + + if (i915_request_has_nopreempt(rq) && __i915_request_has_started(rq)) + return false; + + if (!needs_timeslice(engine, rq)) + return false; + + return timer_expired(&el->timer) || timeslice_yield(el, rq); +} + +static unsigned long timeslice(const struct intel_engine_cs *engine) +{ + return READ_ONCE(engine->props.timeslice_duration_ms); +} + +static void start_timeslice(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists *el = &engine->execlists; + unsigned long duration; + + /* Disable the timer if there is nothing to switch to */ + duration = 0; + if (needs_timeslice(engine, *el->active)) { + /* Avoid continually prolonging an active timeslice */ + if (timer_active(&el->timer)) { + /* + * If we just submitted a new ELSP after an old + * context, that context may have already consumed + * its timeslice, so recheck. + */ + if (!timer_pending(&el->timer)) + tasklet_hi_schedule(&el->tasklet); + return; + } + + duration = timeslice(engine); + } + + set_timer_ms(&el->timer, duration); +} + +static void record_preemption(struct intel_engine_execlists *execlists) +{ + (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); +} + +static unsigned long active_preempt_timeout(struct intel_engine_cs *engine, + const struct i915_request *rq) +{ + if (!rq) + return 0; + + /* Force a fast reset for terminated contexts (ignoring sysfs!) */ + if (unlikely(intel_context_is_banned(rq->context))) + return 1; + + return READ_ONCE(engine->props.preempt_timeout_ms); +} + +static void set_preempt_timeout(struct intel_engine_cs *engine, + const struct i915_request *rq) +{ + if (!intel_engine_has_preempt_reset(engine)) + return; + + set_timer_ms(&engine->execlists.preempt, + active_preempt_timeout(engine, rq)); +} + +static void execlists_dequeue(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_request **port = execlists->pending; + struct i915_request ** const last_port = port + execlists->port_mask; + struct i915_request *last = *execlists->active; + struct virtual_engine *ve; + struct rb_node *rb; + bool submit = false; + + /* + * Hardware submission is through 2 ports. Conceptually each port + * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is + * static for a context, and unique to each, so we only execute + * requests belonging to a single context from each ring. RING_HEAD + * is maintained by the CS in the context image, it marks the place + * where it got up to last time, and through RING_TAIL we tell the CS + * where we want to execute up to this time. + * + * In this list the requests are in order of execution. Consecutive + * requests from the same context are adjacent in the ringbuffer. We + * can combine these requests into a single RING_TAIL update: + * + * RING_HEAD...req1...req2 + * ^- RING_TAIL + * since to execute req2 the CS must first execute req1. + * + * Our goal then is to point each port to the end of a consecutive + * sequence of requests as being the most optimal (fewest wake ups + * and context switches) submission. + */ + + spin_lock(&engine->active.lock); + + /* + * If the queue is higher priority than the last + * request in the currently active context, submit afresh. + * We will resubmit again afterwards in case we need to split + * the active context to interject the preemption request, + * i.e. we will retrigger preemption following the ack in case + * of trouble. + * + * In theory we can skip over completed contexts that have not + * yet been processed by events (as those events are in flight): + * + * while ((last = *active) && i915_request_completed(last)) + * active++; + * + * However, the GPU cannot handle this as it will ultimately + * find itself trying to jump back into a context it has just + * completed and barf. + */ + + if (last) { + if (__i915_request_is_complete(last)) { + goto check_secondary; + } else if (need_preempt(engine, last)) { + ENGINE_TRACE(engine, + "preempting last=%llx:%lld, prio=%d, hint=%d\n", + last->fence.context, + last->fence.seqno, + last->sched.attr.priority, + execlists->queue_priority_hint); + record_preemption(execlists); + + /* + * Don't let the RING_HEAD advance past the breadcrumb + * as we unwind (and until we resubmit) so that we do + * not accidentally tell it to go backwards. + */ + ring_set_paused(engine, 1); + + /* + * Note that we have not stopped the GPU at this point, + * so we are unwinding the incomplete requests as they + * remain inflight and so by the time we do complete + * the preemption, some of the unwound requests may + * complete! + */ + __unwind_incomplete_requests(engine); + + last = NULL; + } else if (timeslice_expired(engine, last)) { + ENGINE_TRACE(engine, + "expired:%s last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n", + yesno(timer_expired(&execlists->timer)), + last->fence.context, last->fence.seqno, + rq_prio(last), + execlists->queue_priority_hint, + yesno(timeslice_yield(execlists, last))); + + /* + * Consume this timeslice; ensure we start a new one. + * + * The timeslice expired, and we will unwind the + * running contexts and recompute the next ELSP. + * If that submit will be the same pair of contexts + * (due to dependency ordering), we will skip the + * submission. If we don't cancel the timer now, + * we will see that the timer has expired and + * reschedule the tasklet; continually until the + * next context switch or other preeemption event. + * + * Since we have decided to reschedule based on + * consumption of this timeslice, if we submit the + * same context again, grant it a full timeslice. + */ + cancel_timer(&execlists->timer); + ring_set_paused(engine, 1); + defer_active(engine); + + /* + * Unlike for preemption, if we rewind and continue + * executing the same context as previously active, + * the order of execution will remain the same and + * the tail will only advance. We do not need to + * force a full context restore, as a lite-restore + * is sufficient to resample the monotonic TAIL. + * + * If we switch to any other context, similarly we + * will not rewind TAIL of current context, and + * normal save/restore will preserve state and allow + * us to later continue executing the same request. + */ + last = NULL; + } else { + /* + * Otherwise if we already have a request pending + * for execution after the current one, we can + * just wait until the next CS event before + * queuing more. In either case we will force a + * lite-restore preemption event, but if we wait + * we hopefully coalesce several updates into a single + * submission. + */ +check_secondary: + if (!list_is_last(&last->sched.link, + &engine->active.requests)) { + /* + * Even if ELSP[1] is occupied and not worthy + * of timeslices, our queue might be. + */ + spin_unlock(&engine->active.lock); + return; + } + } + } + + /* XXX virtual is always taking precedence */ + while ((ve = first_virtual_engine(engine))) { + struct i915_request *rq; + + spin_lock(&ve->base.active.lock); + + rq = ve->request; + if (unlikely(!virtual_matches(ve, rq, engine))) + goto unlock; /* lost the race to a sibling */ + + GEM_BUG_ON(rq->engine != &ve->base); + GEM_BUG_ON(rq->context != &ve->context); + + if (unlikely(rq_prio(rq) < queue_prio(execlists))) { + spin_unlock(&ve->base.active.lock); + break; + } + + if (last && !can_merge_rq(last, rq)) { + spin_unlock(&ve->base.active.lock); + spin_unlock(&engine->active.lock); + return; /* leave this for another sibling */ + } + + ENGINE_TRACE(engine, + "virtual rq=%llx:%lld%s, new engine? %s\n", + rq->fence.context, + rq->fence.seqno, + __i915_request_is_complete(rq) ? "!" : + __i915_request_has_started(rq) ? "*" : + "", + yesno(engine != ve->siblings[0])); + + WRITE_ONCE(ve->request, NULL); + WRITE_ONCE(ve->base.execlists.queue_priority_hint, INT_MIN); + + rb = &ve->nodes[engine->id].rb; + rb_erase_cached(rb, &execlists->virtual); + RB_CLEAR_NODE(rb); + + GEM_BUG_ON(!(rq->execution_mask & engine->mask)); + WRITE_ONCE(rq->engine, engine); + + if (__i915_request_submit(rq)) { + /* + * Only after we confirm that we will submit + * this request (i.e. it has not already + * completed), do we want to update the context. + * + * This serves two purposes. It avoids + * unnecessary work if we are resubmitting an + * already completed request after timeslicing. + * But more importantly, it prevents us altering + * ve->siblings[] on an idle context, where + * we may be using ve->siblings[] in + * virtual_context_enter / virtual_context_exit. + */ + virtual_xfer_context(ve, engine); + GEM_BUG_ON(ve->siblings[0] != engine); + + submit = true; + last = rq; + } + + i915_request_put(rq); +unlock: + spin_unlock(&ve->base.active.lock); + + /* + * Hmm, we have a bunch of virtual engine requests, + * but the first one was already completed (thanks + * preempt-to-busy!). Keep looking at the veng queue + * until we have no more relevant requests (i.e. + * the normal submit queue has higher priority). + */ + if (submit) + break; + } + + while ((rb = rb_first_cached(&execlists->queue))) { + struct i915_priolist *p = to_priolist(rb); + struct i915_request *rq, *rn; + int i; + + priolist_for_each_request_consume(rq, rn, p, i) { + bool merge = true; + + /* + * Can we combine this request with the current port? + * It has to be the same context/ringbuffer and not + * have any exceptions (e.g. GVT saying never to + * combine contexts). + * + * If we can combine the requests, we can execute both + * by updating the RING_TAIL to point to the end of the + * second request, and so we never need to tell the + * hardware about the first. + */ + if (last && !can_merge_rq(last, rq)) { + /* + * If we are on the second port and cannot + * combine this request with the last, then we + * are done. + */ + if (port == last_port) + goto done; + + /* + * We must not populate both ELSP[] with the + * same LRCA, i.e. we must submit 2 different + * contexts if we submit 2 ELSP. + */ + if (last->context == rq->context) + goto done; + + if (i915_request_has_sentinel(last)) + goto done; + + /* + * We avoid submitting virtual requests into + * the secondary ports so that we can migrate + * the request immediately to another engine + * rather than wait for the primary request. + */ + if (rq->execution_mask != engine->mask) + goto done; + + /* + * If GVT overrides us we only ever submit + * port[0], leaving port[1] empty. Note that we + * also have to be careful that we don't queue + * the same context (even though a different + * request) to the second port. + */ + if (ctx_single_port_submission(last->context) || + ctx_single_port_submission(rq->context)) + goto done; + + merge = false; + } + + if (__i915_request_submit(rq)) { + if (!merge) { + *port++ = i915_request_get(last); + last = NULL; + } + + GEM_BUG_ON(last && + !can_merge_ctx(last->context, + rq->context)); + GEM_BUG_ON(last && + i915_seqno_passed(last->fence.seqno, + rq->fence.seqno)); + + submit = true; + last = rq; + } + } + + rb_erase_cached(&p->node, &execlists->queue); + i915_priolist_free(p); + } +done: + *port++ = i915_request_get(last); + + /* + * Here be a bit of magic! Or sleight-of-hand, whichever you prefer. + * + * We choose the priority hint such that if we add a request of greater + * priority than this, we kick the submission tasklet to decide on + * the right order of submitting the requests to hardware. We must + * also be prepared to reorder requests as they are in-flight on the + * HW. We derive the priority hint then as the first "hole" in + * the HW submission ports and if there are no available slots, + * the priority of the lowest executing request, i.e. last. + * + * When we do receive a higher priority request ready to run from the + * user, see queue_request(), the priority hint is bumped to that + * request triggering preemption on the next dequeue (or subsequent + * interrupt for secondary ports). + */ + execlists->queue_priority_hint = queue_prio(execlists); + spin_unlock(&engine->active.lock); + + /* + * We can skip poking the HW if we ended up with exactly the same set + * of requests as currently running, e.g. trying to timeslice a pair + * of ordered contexts. + */ + if (submit && + memcmp(execlists->active, + execlists->pending, + (port - execlists->pending) * sizeof(*port))) { + *port = NULL; + while (port-- != execlists->pending) + execlists_schedule_in(*port, port - execlists->pending); + + WRITE_ONCE(execlists->yield, -1); + set_preempt_timeout(engine, *execlists->active); + execlists_submit_ports(engine); + } else { + ring_set_paused(engine, 0); + while (port-- != execlists->pending) + i915_request_put(*port); + *execlists->pending = NULL; + } +} + +static void execlists_dequeue_irq(struct intel_engine_cs *engine) +{ + local_irq_disable(); /* Suspend interrupts across request submission */ + execlists_dequeue(engine); + local_irq_enable(); /* flush irq_work (e.g. breadcrumb enabling) */ +} + +static inline void clear_ports(struct i915_request **ports, int count) +{ + memset_p((void **)ports, NULL, count); +} + +static inline void +copy_ports(struct i915_request **dst, struct i915_request **src, int count) +{ + /* A memcpy_p() would be very useful here! */ + while (count--) + WRITE_ONCE(*dst++, *src++); /* avoid write tearing */ +} + +static struct i915_request ** +cancel_port_requests(struct intel_engine_execlists * const execlists, + struct i915_request **inactive) +{ + struct i915_request * const *port; + + for (port = execlists->pending; *port; port++) + *inactive++ = *port; + clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending)); + + /* Mark the end of active before we overwrite *active */ + for (port = xchg(&execlists->active, execlists->pending); *port; port++) + *inactive++ = *port; + clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight)); + + smp_wmb(); /* complete the seqlock for execlists_active() */ + WRITE_ONCE(execlists->active, execlists->inflight); + + /* Having cancelled all outstanding process_csb(), stop their timers */ + GEM_BUG_ON(execlists->pending[0]); + cancel_timer(&execlists->timer); + cancel_timer(&execlists->preempt); + + return inactive; +} + +static inline void +invalidate_csb_entries(const u64 *first, const u64 *last) +{ + clflush((void *)first); + clflush((void *)last); +} + +/* + * Starting with Gen12, the status has a new format: + * + * bit 0: switched to new queue + * bit 1: reserved + * bit 2: semaphore wait mode (poll or signal), only valid when + * switch detail is set to "wait on semaphore" + * bits 3-5: engine class + * bits 6-11: engine instance + * bits 12-14: reserved + * bits 15-25: sw context id of the lrc the GT switched to + * bits 26-31: sw counter of the lrc the GT switched to + * bits 32-35: context switch detail + * - 0: ctx complete + * - 1: wait on sync flip + * - 2: wait on vblank + * - 3: wait on scanline + * - 4: wait on semaphore + * - 5: context preempted (not on SEMAPHORE_WAIT or + * WAIT_FOR_EVENT) + * bit 36: reserved + * bits 37-43: wait detail (for switch detail 1 to 4) + * bits 44-46: reserved + * bits 47-57: sw context id of the lrc the GT switched away from + * bits 58-63: sw counter of the lrc the GT switched away from + */ +static inline bool gen12_csb_parse(const u64 csb) +{ + bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb)); + bool new_queue = + lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE; + + /* + * The context switch detail is not guaranteed to be 5 when a preemption + * occurs, so we can't just check for that. The check below works for + * all the cases we care about, including preemptions of WAIT + * instructions and lite-restore. Preempt-to-idle via the CTRL register + * would require some extra handling, but we don't support that. + */ + if (!ctx_away_valid || new_queue) { + GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(csb))); + return true; + } + + /* + * switch detail = 5 is covered by the case above and we do not expect a + * context switch on an unsuccessful wait instruction since we always + * use polling mode. + */ + GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb))); + return false; +} + +static inline bool gen8_csb_parse(const u64 csb) +{ + return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED); +} + +static noinline u64 +wa_csb_read(const struct intel_engine_cs *engine, u64 * const csb) +{ + u64 entry; + + /* + * Reading from the HWSP has one particular advantage: we can detect + * a stale entry. Since the write into HWSP is broken, we have no reason + * to trust the HW at all, the mmio entry may equally be unordered, so + * we prefer the path that is self-checking and as a last resort, + * return the mmio value. + * + * tgl,dg1:HSDES#22011327657 + */ + preempt_disable(); + if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 10)) { + int idx = csb - engine->execlists.csb_status; + int status; + + status = GEN8_EXECLISTS_STATUS_BUF; + if (idx >= 6) { + status = GEN11_EXECLISTS_STATUS_BUF2; + idx -= 6; + } + status += sizeof(u64) * idx; + + entry = intel_uncore_read64(engine->uncore, + _MMIO(engine->mmio_base + status)); + } + preempt_enable(); + + return entry; +} + +static inline u64 +csb_read(const struct intel_engine_cs *engine, u64 * const csb) +{ + u64 entry = READ_ONCE(*csb); + + /* + * Unfortunately, the GPU does not always serialise its write + * of the CSB entries before its write of the CSB pointer, at least + * from the perspective of the CPU, using what is known as a Global + * Observation Point. We may read a new CSB tail pointer, but then + * read the stale CSB entries, causing us to misinterpret the + * context-switch events, and eventually declare the GPU hung. + * + * icl:HSDES#1806554093 + * tgl:HSDES#22011248461 + */ + if (unlikely(entry == -1)) + entry = wa_csb_read(engine, csb); + + /* Consume this entry so that we can spot its future reuse. */ + WRITE_ONCE(*csb, -1); + + /* ELSP is an implicit wmb() before the GPU wraps and overwrites csb */ + return entry; +} + +static void new_timeslice(struct intel_engine_execlists *el) +{ + /* By cancelling, we will start afresh in start_timeslice() */ + cancel_timer(&el->timer); +} + +static struct i915_request ** +process_csb(struct intel_engine_cs *engine, struct i915_request **inactive) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + u64 * const buf = execlists->csb_status; + const u8 num_entries = execlists->csb_size; + struct i915_request **prev; + u8 head, tail; + + /* + * As we modify our execlists state tracking we require exclusive + * access. Either we are inside the tasklet, or the tasklet is disabled + * and we assume that is only inside the reset paths and so serialised. + */ + GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) && + !reset_in_progress(execlists)); + GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine)); + + /* + * Note that csb_write, csb_status may be either in HWSP or mmio. + * When reading from the csb_write mmio register, we have to be + * careful to only use the GEN8_CSB_WRITE_PTR portion, which is + * the low 4bits. As it happens we know the next 4bits are always + * zero and so we can simply masked off the low u8 of the register + * and treat it identically to reading from the HWSP (without having + * to use explicit shifting and masking, and probably bifurcating + * the code to handle the legacy mmio read). + */ + head = execlists->csb_head; + tail = READ_ONCE(*execlists->csb_write); + if (unlikely(head == tail)) + return inactive; + + /* + * We will consume all events from HW, or at least pretend to. + * + * The sequence of events from the HW is deterministic, and derived + * from our writes to the ELSP, with a smidgen of variability for + * the arrival of the asynchronous requests wrt to the inflight + * execution. If the HW sends an event that does not correspond with + * the one we are expecting, we have to abandon all hope as we lose + * all tracking of what the engine is actually executing. We will + * only detect we are out of sequence with the HW when we get an + * 'impossible' event because we have already drained our own + * preemption/promotion queue. If this occurs, we know that we likely + * lost track of execution earlier and must unwind and restart, the + * simplest way is by stop processing the event queue and force the + * engine to reset. + */ + execlists->csb_head = tail; + ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail); + + /* + * Hopefully paired with a wmb() in HW! + * + * We must complete the read of the write pointer before any reads + * from the CSB, so that we do not see stale values. Without an rmb + * (lfence) the HW may speculatively perform the CSB[] reads *before* + * we perform the READ_ONCE(*csb_write). + */ + rmb(); + + /* Remember who was last running under the timer */ + prev = inactive; + *prev = NULL; + + do { + bool promote; + u64 csb; + + if (++head == num_entries) + head = 0; + + /* + * We are flying near dragons again. + * + * We hold a reference to the request in execlist_port[] + * but no more than that. We are operating in softirq + * context and so cannot hold any mutex or sleep. That + * prevents us stopping the requests we are processing + * in port[] from being retired simultaneously (the + * breadcrumb will be complete before we see the + * context-switch). As we only hold the reference to the + * request, any pointer chasing underneath the request + * is subject to a potential use-after-free. Thus we + * store all of the bookkeeping within port[] as + * required, and avoid using unguarded pointers beneath + * request itself. The same applies to the atomic + * status notifier. + */ + + csb = csb_read(engine, buf + head); + ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n", + head, upper_32_bits(csb), lower_32_bits(csb)); + + if (INTEL_GEN(engine->i915) >= 12) + promote = gen12_csb_parse(csb); + else + promote = gen8_csb_parse(csb); + if (promote) { + struct i915_request * const *old = execlists->active; + + if (GEM_WARN_ON(!*execlists->pending)) { + execlists->error_interrupt |= ERROR_CSB; + break; + } + + ring_set_paused(engine, 0); + + /* Point active to the new ELSP; prevent overwriting */ + WRITE_ONCE(execlists->active, execlists->pending); + smp_wmb(); /* notify execlists_active() */ + + /* cancel old inflight, prepare for switch */ + trace_ports(execlists, "preempted", old); + while (*old) + *inactive++ = *old++; + + /* switch pending to inflight */ + GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); + copy_ports(execlists->inflight, + execlists->pending, + execlists_num_ports(execlists)); + smp_wmb(); /* complete the seqlock */ + WRITE_ONCE(execlists->active, execlists->inflight); + + /* XXX Magic delay for tgl */ + ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); + + WRITE_ONCE(execlists->pending[0], NULL); + } else { + if (GEM_WARN_ON(!*execlists->active)) { + execlists->error_interrupt |= ERROR_CSB; + break; + } + + /* port0 completed, advanced to port1 */ + trace_ports(execlists, "completed", execlists->active); + + /* + * We rely on the hardware being strongly + * ordered, that the breadcrumb write is + * coherent (visible from the CPU) before the + * user interrupt is processed. One might assume + * that the breadcrumb write being before the + * user interrupt and the CS event for the context + * switch would therefore be before the CS event + * itself... + */ + if (GEM_SHOW_DEBUG() && + !__i915_request_is_complete(*execlists->active)) { + struct i915_request *rq = *execlists->active; + const u32 *regs __maybe_unused = + rq->context->lrc_reg_state; + + ENGINE_TRACE(engine, + "context completed before request!\n"); + ENGINE_TRACE(engine, + "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n", + ENGINE_READ(engine, RING_START), + ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR, + ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR, + ENGINE_READ(engine, RING_CTL), + ENGINE_READ(engine, RING_MI_MODE)); + ENGINE_TRACE(engine, + "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ", + i915_ggtt_offset(rq->ring->vma), + rq->head, rq->tail, + rq->fence.context, + lower_32_bits(rq->fence.seqno), + hwsp_seqno(rq)); + ENGINE_TRACE(engine, + "ctx:{start:%08x, head:%04x, tail:%04x}, ", + regs[CTX_RING_START], + regs[CTX_RING_HEAD], + regs[CTX_RING_TAIL]); + } + + *inactive++ = *execlists->active++; + + GEM_BUG_ON(execlists->active - execlists->inflight > + execlists_num_ports(execlists)); + } + } while (head != tail); + + /* + * Gen11 has proven to fail wrt global observation point between + * entry and tail update, failing on the ordering and thus + * we see an old entry in the context status buffer. + * + * Forcibly evict out entries for the next gpu csb update, + * to increase the odds that we get a fresh entries with non + * working hardware. The cost for doing so comes out mostly with + * the wash as hardware, working or not, will need to do the + * invalidation before. + */ + invalidate_csb_entries(&buf[0], &buf[num_entries - 1]); + + /* + * We assume that any event reflects a change in context flow + * and merits a fresh timeslice. We reinstall the timer after + * inspecting the queue to see if we need to resumbit. + */ + if (*prev != *execlists->active) /* elide lite-restores */ + new_timeslice(execlists); + + return inactive; +} + +static void post_process_csb(struct i915_request **port, + struct i915_request **last) +{ + while (port != last) + execlists_schedule_out(*port++); +} + +static void __execlists_hold(struct i915_request *rq) +{ + LIST_HEAD(list); + + do { + struct i915_dependency *p; + + if (i915_request_is_active(rq)) + __i915_request_unsubmit(rq); + + clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + list_move_tail(&rq->sched.link, &rq->engine->active.hold); + i915_request_set_hold(rq); + RQ_TRACE(rq, "on hold\n"); + + for_each_waiter(p, rq) { + struct i915_request *w = + container_of(p->waiter, typeof(*w), sched); + + /* Leave semaphores spinning on the other engines */ + if (w->engine != rq->engine) + continue; + + if (!i915_request_is_ready(w)) + continue; + + if (__i915_request_is_complete(w)) + continue; + + if (i915_request_on_hold(w)) + continue; + + list_move_tail(&w->sched.link, &list); + } + + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); + } while (rq); +} + +static bool execlists_hold(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + if (i915_request_on_hold(rq)) + return false; + + spin_lock_irq(&engine->active.lock); + + if (__i915_request_is_complete(rq)) { /* too late! */ + rq = NULL; + goto unlock; + } + + /* + * Transfer this request onto the hold queue to prevent it + * being resumbitted to HW (and potentially completed) before we have + * released it. Since we may have already submitted following + * requests, we need to remove those as well. + */ + GEM_BUG_ON(i915_request_on_hold(rq)); + GEM_BUG_ON(rq->engine != engine); + __execlists_hold(rq); + GEM_BUG_ON(list_empty(&engine->active.hold)); + +unlock: + spin_unlock_irq(&engine->active.lock); + return rq; +} + +static bool hold_request(const struct i915_request *rq) +{ + struct i915_dependency *p; + bool result = false; + + /* + * If one of our ancestors is on hold, we must also be on hold, + * otherwise we will bypass it and execute before it. + */ + rcu_read_lock(); + for_each_signaler(p, rq) { + const struct i915_request *s = + container_of(p->signaler, typeof(*s), sched); + + if (s->engine != rq->engine) + continue; + + result = i915_request_on_hold(s); + if (result) + break; + } + rcu_read_unlock(); + + return result; +} + +static void __execlists_unhold(struct i915_request *rq) +{ + LIST_HEAD(list); + + do { + struct i915_dependency *p; + + RQ_TRACE(rq, "hold release\n"); + + GEM_BUG_ON(!i915_request_on_hold(rq)); + GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); + + i915_request_clear_hold(rq); + list_move_tail(&rq->sched.link, + i915_sched_lookup_priolist(rq->engine, + rq_prio(rq))); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + + /* Also release any children on this engine that are ready */ + for_each_waiter(p, rq) { + struct i915_request *w = + container_of(p->waiter, typeof(*w), sched); + + /* Propagate any change in error status */ + if (rq->fence.error) + i915_request_set_error_once(w, rq->fence.error); + + if (w->engine != rq->engine) + continue; + + if (!i915_request_on_hold(w)) + continue; + + /* Check that no other parents are also on hold */ + if (hold_request(w)) + continue; + + list_move_tail(&w->sched.link, &list); + } + + rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); + } while (rq); +} + +static void execlists_unhold(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + spin_lock_irq(&engine->active.lock); + + /* + * Move this request back to the priority queue, and all of its + * children and grandchildren that were suspended along with it. + */ + __execlists_unhold(rq); + + if (rq_prio(rq) > engine->execlists.queue_priority_hint) { + engine->execlists.queue_priority_hint = rq_prio(rq); + tasklet_hi_schedule(&engine->execlists.tasklet); + } + + spin_unlock_irq(&engine->active.lock); +} + +struct execlists_capture { + struct work_struct work; + struct i915_request *rq; + struct i915_gpu_coredump *error; +}; + +static void execlists_capture_work(struct work_struct *work) +{ + struct execlists_capture *cap = container_of(work, typeof(*cap), work); + const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN; + struct intel_engine_cs *engine = cap->rq->engine; + struct intel_gt_coredump *gt = cap->error->gt; + struct intel_engine_capture_vma *vma; + + /* Compress all the objects attached to the request, slow! */ + vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp); + if (vma) { + struct i915_vma_compress *compress = + i915_vma_capture_prepare(gt); + + intel_engine_coredump_add_vma(gt->engine, vma, compress); + i915_vma_capture_finish(gt, compress); + } + + gt->simulated = gt->engine->simulated; + cap->error->simulated = gt->simulated; + + /* Publish the error state, and announce it to the world */ + i915_error_state_store(cap->error); + i915_gpu_coredump_put(cap->error); + + /* Return this request and all that depend upon it for signaling */ + execlists_unhold(engine, cap->rq); + i915_request_put(cap->rq); + + kfree(cap); +} + +static struct execlists_capture *capture_regs(struct intel_engine_cs *engine) +{ + const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; + struct execlists_capture *cap; + + cap = kmalloc(sizeof(*cap), gfp); + if (!cap) + return NULL; + + cap->error = i915_gpu_coredump_alloc(engine->i915, gfp); + if (!cap->error) + goto err_cap; + + cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp); + if (!cap->error->gt) + goto err_gpu; + + cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp); + if (!cap->error->gt->engine) + goto err_gt; + + cap->error->gt->engine->hung = true; + + return cap; + +err_gt: + kfree(cap->error->gt); +err_gpu: + kfree(cap->error); +err_cap: + kfree(cap); + return NULL; +} + +static struct i915_request * +active_context(struct intel_engine_cs *engine, u32 ccid) +{ + const struct intel_engine_execlists * const el = &engine->execlists; + struct i915_request * const *port, *rq; + + /* + * Use the most recent result from process_csb(), but just in case + * we trigger an error (via interrupt) before the first CS event has + * been written, peek at the next submission. + */ + + for (port = el->active; (rq = *port); port++) { + if (rq->context->lrc.ccid == ccid) { + ENGINE_TRACE(engine, + "ccid:%x found at active:%zd\n", + ccid, port - el->active); + return rq; + } + } + + for (port = el->pending; (rq = *port); port++) { + if (rq->context->lrc.ccid == ccid) { + ENGINE_TRACE(engine, + "ccid:%x found at pending:%zd\n", + ccid, port - el->pending); + return rq; + } + } + + ENGINE_TRACE(engine, "ccid:%x not found\n", ccid); + return NULL; +} + +static u32 active_ccid(struct intel_engine_cs *engine) +{ + return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI); +} + +static void execlists_capture(struct intel_engine_cs *engine) +{ + struct execlists_capture *cap; + + if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)) + return; + + /* + * We need to _quickly_ capture the engine state before we reset. + * We are inside an atomic section (softirq) here and we are delaying + * the forced preemption event. + */ + cap = capture_regs(engine); + if (!cap) + return; + + spin_lock_irq(&engine->active.lock); + cap->rq = active_context(engine, active_ccid(engine)); + if (cap->rq) { + cap->rq = active_request(cap->rq->context->timeline, cap->rq); + cap->rq = i915_request_get_rcu(cap->rq); + } + spin_unlock_irq(&engine->active.lock); + if (!cap->rq) + goto err_free; + + /* + * Remove the request from the execlists queue, and take ownership + * of the request. We pass it to our worker who will _slowly_ compress + * all the pages the _user_ requested for debugging their batch, after + * which we return it to the queue for signaling. + * + * By removing them from the execlists queue, we also remove the + * requests from being processed by __unwind_incomplete_requests() + * during the intel_engine_reset(), and so they will *not* be replayed + * afterwards. + * + * Note that because we have not yet reset the engine at this point, + * it is possible for the request that we have identified as being + * guilty, did in fact complete and we will then hit an arbitration + * point allowing the outstanding preemption to succeed. The likelihood + * of that is very low (as capturing of the engine registers should be + * fast enough to run inside an irq-off atomic section!), so we will + * simply hold that request accountable for being non-preemptible + * long enough to force the reset. + */ + if (!execlists_hold(engine, cap->rq)) + goto err_rq; + + INIT_WORK(&cap->work, execlists_capture_work); + schedule_work(&cap->work); + return; + +err_rq: + i915_request_put(cap->rq); +err_free: + i915_gpu_coredump_put(cap->error); + kfree(cap); +} + +static void execlists_reset(struct intel_engine_cs *engine, const char *msg) +{ + const unsigned int bit = I915_RESET_ENGINE + engine->id; + unsigned long *lock = &engine->gt->reset.flags; + + if (!intel_has_reset_engine(engine->gt)) + return; + + if (test_and_set_bit(bit, lock)) + return; + + ENGINE_TRACE(engine, "reset for %s\n", msg); + + /* Mark this tasklet as disabled to avoid waiting for it to complete */ + tasklet_disable_nosync(&engine->execlists.tasklet); + + ring_set_paused(engine, 1); /* Freeze the current request in place */ + execlists_capture(engine); + intel_engine_reset(engine, msg); + + tasklet_enable(&engine->execlists.tasklet); + clear_and_wake_up_bit(bit, lock); +} + +static bool preempt_timeout(const struct intel_engine_cs *const engine) +{ + const struct timer_list *t = &engine->execlists.preempt; + + if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) + return false; + + if (!timer_expired(t)) + return false; + + return engine->execlists.pending[0]; +} + +/* + * Check the unread Context Status Buffers and manage the submission of new + * contexts to the ELSP accordingly. + */ +static void execlists_submission_tasklet(unsigned long data) +{ + struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; + struct i915_request *post[2 * EXECLIST_MAX_PORTS]; + struct i915_request **inactive; + + rcu_read_lock(); + inactive = process_csb(engine, post); + GEM_BUG_ON(inactive - post > ARRAY_SIZE(post)); + + if (unlikely(preempt_timeout(engine))) { + cancel_timer(&engine->execlists.preempt); + engine->execlists.error_interrupt |= ERROR_PREEMPT; + } + + if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) { + const char *msg; + + /* Generate the error message in priority wrt to the user! */ + if (engine->execlists.error_interrupt & GENMASK(15, 0)) + msg = "CS error"; /* thrown by a user payload */ + else if (engine->execlists.error_interrupt & ERROR_CSB) + msg = "invalid CSB event"; + else if (engine->execlists.error_interrupt & ERROR_PREEMPT) + msg = "preemption time out"; + else + msg = "internal error"; + + engine->execlists.error_interrupt = 0; + execlists_reset(engine, msg); + } + + if (!engine->execlists.pending[0]) { + execlists_dequeue_irq(engine); + start_timeslice(engine); + } + + post_process_csb(post, inactive); + rcu_read_unlock(); +} + +static void __execlists_kick(struct intel_engine_execlists *execlists) +{ + /* Kick the tasklet for some interrupt coalescing and reset handling */ + tasklet_hi_schedule(&execlists->tasklet); +} + +#define execlists_kick(t, member) \ + __execlists_kick(container_of(t, struct intel_engine_execlists, member)) + +static void execlists_timeslice(struct timer_list *timer) +{ + execlists_kick(timer, timer); +} + +static void execlists_preempt(struct timer_list *timer) +{ + execlists_kick(timer, preempt); +} + +static void queue_request(struct intel_engine_cs *engine, + struct i915_request *rq) +{ + GEM_BUG_ON(!list_empty(&rq->sched.link)); + list_add_tail(&rq->sched.link, + i915_sched_lookup_priolist(engine, rq_prio(rq))); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); +} + +static bool submit_queue(struct intel_engine_cs *engine, + const struct i915_request *rq) +{ + struct intel_engine_execlists *execlists = &engine->execlists; + + if (rq_prio(rq) <= execlists->queue_priority_hint) + return false; + + execlists->queue_priority_hint = rq_prio(rq); + return true; +} + +static bool ancestor_on_hold(const struct intel_engine_cs *engine, + const struct i915_request *rq) +{ + GEM_BUG_ON(i915_request_on_hold(rq)); + return !list_empty(&engine->active.hold) && hold_request(rq); +} + +static void execlists_submit_request(struct i915_request *request) +{ + struct intel_engine_cs *engine = request->engine; + unsigned long flags; + + /* Will be called from irq-context when using foreign fences. */ + spin_lock_irqsave(&engine->active.lock, flags); + + if (unlikely(ancestor_on_hold(engine, request))) { + RQ_TRACE(request, "ancestor on hold\n"); + list_add_tail(&request->sched.link, &engine->active.hold); + i915_request_set_hold(request); + } else { + queue_request(engine, request); + + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + GEM_BUG_ON(list_empty(&request->sched.link)); + + if (submit_queue(engine, request)) + __execlists_kick(&engine->execlists); + } + + spin_unlock_irqrestore(&engine->active.lock, flags); +} + +static int execlists_context_pre_pin(struct intel_context *ce, + struct i915_gem_ww_ctx *ww, + void **vaddr) +{ + return lrc_pre_pin(ce, ce->engine, ww, vaddr); +} + +static int execlists_context_pin(struct intel_context *ce, void *vaddr) +{ + return lrc_pin(ce, ce->engine, vaddr); +} + +static int execlists_context_alloc(struct intel_context *ce) +{ + return lrc_alloc(ce, ce->engine); +} + +static const struct intel_context_ops execlists_context_ops = { + .flags = COPS_HAS_INFLIGHT, + + .alloc = execlists_context_alloc, + + .pre_pin = execlists_context_pre_pin, + .pin = execlists_context_pin, + .unpin = lrc_unpin, + .post_unpin = lrc_post_unpin, + + .enter = intel_context_enter_engine, + .exit = intel_context_exit_engine, + + .reset = lrc_reset, + .destroy = lrc_destroy, +}; + +static int emit_pdps(struct i915_request *rq) +{ + const struct intel_engine_cs * const engine = rq->engine; + struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm); + int err, i; + u32 *cs; + + GEM_BUG_ON(intel_vgpu_active(rq->engine->i915)); + + /* + * Beware ye of the dragons, this sequence is magic! + * + * Small changes to this sequence can cause anything from + * GPU hangs to forcewake errors and machine lockups! + */ + + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + /* Flush any residual operations from the context load */ + err = engine->emit_flush(rq, EMIT_FLUSH); + if (err) + return err; + + /* Magic required to prevent forcewake errors! */ + err = engine->emit_flush(rq, EMIT_INVALIDATE); + if (err) + return err; + + cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + /* Ensure the LRI have landed before we invalidate & continue */ + *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; + for (i = GEN8_3LVL_PDPES; i--; ) { + const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); + u32 base = engine->mmio_base; + + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); + *cs++ = upper_32_bits(pd_daddr); + *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); + *cs++ = lower_32_bits(pd_daddr); + } + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + intel_ring_advance(rq, cs); + + intel_ring_advance(rq, cs); + + return 0; +} + +static int execlists_request_alloc(struct i915_request *request) +{ + int ret; + + GEM_BUG_ON(!intel_context_is_pinned(request->context)); + + /* + * Flush enough space to reduce the likelihood of waiting after + * we start building the request - in which case we will just + * have to repeat work. + */ + request->reserved_space += EXECLISTS_REQUEST_SIZE; + + /* + * Note that after this point, we have committed to using + * this request as it is being used to both track the + * state of engine initialisation and liveness of the + * golden renderstate above. Think twice before you try + * to cancel/unwind this request now. + */ + + if (!i915_vm_is_4lvl(request->context->vm)) { + ret = emit_pdps(request); + if (ret) + return ret; + } + + /* Unconditionally invalidate GPU caches and TLBs. */ + ret = request->engine->emit_flush(request, EMIT_INVALIDATE); + if (ret) + return ret; + + request->reserved_space -= EXECLISTS_REQUEST_SIZE; + return 0; +} + +static void reset_csb_pointers(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + const unsigned int reset_value = execlists->csb_size - 1; + + ring_set_paused(engine, 0); + + /* + * Sometimes Icelake forgets to reset its pointers on a GPU reset. + * Bludgeon them with a mmio update to be sure. + */ + ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, + 0xffff << 16 | reset_value << 8 | reset_value); + ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); + + /* + * After a reset, the HW starts writing into CSB entry [0]. We + * therefore have to set our HEAD pointer back one entry so that + * the *first* entry we check is entry 0. To complicate this further, + * as we don't wait for the first interrupt after reset, we have to + * fake the HW write to point back to the last entry so that our + * inline comparison of our cached head position against the last HW + * write works even before the first interrupt. + */ + execlists->csb_head = reset_value; + WRITE_ONCE(*execlists->csb_write, reset_value); + wmb(); /* Make sure this is visible to HW (paranoia?) */ + + /* Check that the GPU does indeed update the CSB entries! */ + memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64)); + invalidate_csb_entries(&execlists->csb_status[0], + &execlists->csb_status[reset_value]); + + /* Once more for luck and our trusty paranoia */ + ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, + 0xffff << 16 | reset_value << 8 | reset_value); + ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); + + GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value); +} + +static void sanitize_hwsp(struct intel_engine_cs *engine) +{ + struct intel_timeline *tl; + + list_for_each_entry(tl, &engine->status_page.timelines, engine_link) + intel_timeline_reset_seqno(tl); +} + +static void execlists_sanitize(struct intel_engine_cs *engine) +{ + GEM_BUG_ON(execlists_active(&engine->execlists)); + + /* + * Poison residual state on resume, in case the suspend didn't! + * + * We have to assume that across suspend/resume (or other loss + * of control) that the contents of our pinned buffers has been + * lost, replaced by garbage. Since this doesn't always happen, + * let's poison such state so that we more quickly spot when + * we falsely assume it has been preserved. + */ + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); + + reset_csb_pointers(engine); + + /* + * The kernel_context HWSP is stored in the status_page. As above, + * that may be lost on resume/initialisation, and so we need to + * reset the value in the HWSP. + */ + sanitize_hwsp(engine); + + /* And scrub the dirty cachelines for the HWSP */ + clflush_cache_range(engine->status_page.addr, PAGE_SIZE); +} + +static void enable_error_interrupt(struct intel_engine_cs *engine) +{ + u32 status; + + engine->execlists.error_interrupt = 0; + ENGINE_WRITE(engine, RING_EMR, ~0u); + ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */ + + status = ENGINE_READ(engine, RING_ESR); + if (unlikely(status)) { + drm_err(&engine->i915->drm, + "engine '%s' resumed still in error: %08x\n", + engine->name, status); + __intel_gt_reset(engine->gt, engine->mask); + } + + /* + * On current gen8+, we have 2 signals to play with + * + * - I915_ERROR_INSTUCTION (bit 0) + * + * Generate an error if the command parser encounters an invalid + * instruction + * + * This is a fatal error. + * + * - CP_PRIV (bit 2) + * + * Generate an error on privilege violation (where the CP replaces + * the instruction with a no-op). This also fires for writes into + * read-only scratch pages. + * + * This is a non-fatal error, parsing continues. + * + * * there are a few others defined for odd HW that we do not use + * + * Since CP_PRIV fires for cases where we have chosen to ignore the + * error (as the HW is validating and suppressing the mistakes), we + * only unmask the instruction error bit. + */ + ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION); +} + +static void enable_execlists(struct intel_engine_cs *engine) +{ + u32 mode; + + assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); + + intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ + + if (INTEL_GEN(engine->i915) >= 11) + mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE); + else + mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE); + ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode); + + ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); + + ENGINE_WRITE_FW(engine, + RING_HWS_PGA, + i915_ggtt_offset(engine->status_page.vma)); + ENGINE_POSTING_READ(engine, RING_HWS_PGA); + + enable_error_interrupt(engine); +} + +static bool unexpected_starting_state(struct intel_engine_cs *engine) +{ + bool unexpected = false; + + if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) { + drm_dbg(&engine->i915->drm, + "STOP_RING still set in RING_MI_MODE\n"); + unexpected = true; + } + + return unexpected; +} + +static int execlists_resume(struct intel_engine_cs *engine) +{ + intel_mocs_init_engine(engine); + + intel_breadcrumbs_reset(engine->breadcrumbs); + + if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) { + struct drm_printer p = drm_debug_printer(__func__); + + intel_engine_dump(engine, &p, NULL); + } + + enable_execlists(engine); + + return 0; +} + +static void execlists_reset_prepare(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + + ENGINE_TRACE(engine, "depth<-%d\n", + atomic_read(&execlists->tasklet.count)); + + /* + * Prevent request submission to the hardware until we have + * completed the reset in i915_gem_reset_finish(). If a request + * is completed by one engine, it may then queue a request + * to a second via its execlists->tasklet *just* as we are + * calling engine->resume() and also writing the ELSP. + * Turning off the execlists->tasklet until the reset is over + * prevents the race. + */ + __tasklet_disable_sync_once(&execlists->tasklet); + GEM_BUG_ON(!reset_in_progress(execlists)); + + /* + * We stop engines, otherwise we might get failed reset and a + * dead gpu (on elk). Also as modern gpu as kbl can suffer + * from system hang if batchbuffer is progressing when + * the reset is issued, regardless of READY_TO_RESET ack. + * Thus assume it is best to stop engines on all gens + * where we have a gpu reset. + * + * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) + * + * FIXME: Wa for more modern gens needs to be validated + */ + ring_set_paused(engine, 1); + intel_engine_stop_cs(engine); + + engine->execlists.reset_ccid = active_ccid(engine); +} + +static struct i915_request ** +reset_csb(struct intel_engine_cs *engine, struct i915_request **inactive) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + + mb(); /* paranoia: read the CSB pointers from after the reset */ + clflush(execlists->csb_write); + mb(); + + inactive = process_csb(engine, inactive); /* drain preemption events */ + + /* Following the reset, we need to reload the CSB read/write pointers */ + reset_csb_pointers(engine); + + return inactive; +} + +static void +execlists_reset_active(struct intel_engine_cs *engine, bool stalled) +{ + struct intel_context *ce; + struct i915_request *rq; + u32 head; + + /* + * Save the currently executing context, even if we completed + * its request, it was still running at the time of the + * reset and will have been clobbered. + */ + rq = active_context(engine, engine->execlists.reset_ccid); + if (!rq) + return; + + ce = rq->context; + GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); + + if (__i915_request_is_complete(rq)) { + /* Idle context; tidy up the ring so we can restart afresh */ + head = intel_ring_wrap(ce->ring, rq->tail); + goto out_replay; + } + + /* We still have requests in-flight; the engine should be active */ + GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); + + /* Context has requests still in-flight; it should not be idle! */ + GEM_BUG_ON(i915_active_is_idle(&ce->active)); + + rq = active_request(ce->timeline, rq); + head = intel_ring_wrap(ce->ring, rq->head); + GEM_BUG_ON(head == ce->ring->tail); + + /* + * If this request hasn't started yet, e.g. it is waiting on a + * semaphore, we need to avoid skipping the request or else we + * break the signaling chain. However, if the context is corrupt + * the request will not restart and we will be stuck with a wedged + * device. It is quite often the case that if we issue a reset + * while the GPU is loading the context image, that the context + * image becomes corrupt. + * + * Otherwise, if we have not started yet, the request should replay + * perfectly and we do not need to flag the result as being erroneous. + */ + if (!__i915_request_has_started(rq)) + goto out_replay; + + /* + * If the request was innocent, we leave the request in the ELSP + * and will try to replay it on restarting. The context image may + * have been corrupted by the reset, in which case we may have + * to service a new GPU hang, but more likely we can continue on + * without impact. + * + * If the request was guilty, we presume the context is corrupt + * and have to at least restore the RING register in the context + * image back to the expected values to skip over the guilty request. + */ + __i915_request_reset(rq, stalled); + + /* + * We want a simple context + ring to execute the breadcrumb update. + * We cannot rely on the context being intact across the GPU hang, + * so clear it and rebuild just what we need for the breadcrumb. + * All pending requests for this context will be zapped, and any + * future request will be after userspace has had the opportunity + * to recreate its own state. + */ +out_replay: + ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n", + head, ce->ring->tail); + lrc_reset_regs(ce, engine); + ce->lrc.lrca = lrc_update_regs(ce, engine, head); +} + +static void execlists_reset_csb(struct intel_engine_cs *engine, bool stalled) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_request *post[2 * EXECLIST_MAX_PORTS]; + struct i915_request **inactive; + + rcu_read_lock(); + inactive = reset_csb(engine, post); + + execlists_reset_active(engine, true); + + inactive = cancel_port_requests(execlists, inactive); + post_process_csb(post, inactive); + rcu_read_unlock(); +} + +static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled) +{ + unsigned long flags; + + ENGINE_TRACE(engine, "\n"); + + /* Process the csb, find the guilty context and throw away */ + execlists_reset_csb(engine, stalled); + + /* Push back any incomplete requests for replay after the reset. */ + rcu_read_lock(); + spin_lock_irqsave(&engine->active.lock, flags); + __unwind_incomplete_requests(engine); + spin_unlock_irqrestore(&engine->active.lock, flags); + rcu_read_unlock(); +} + +static void nop_submission_tasklet(unsigned long data) +{ + struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; + + /* The driver is wedged; don't process any more events. */ + WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN); +} + +static void execlists_reset_cancel(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + struct i915_request *rq, *rn; + struct rb_node *rb; + unsigned long flags; + + ENGINE_TRACE(engine, "\n"); + + /* + * Before we call engine->cancel_requests(), we should have exclusive + * access to the submission state. This is arranged for us by the + * caller disabling the interrupt generation, the tasklet and other + * threads that may then access the same state, giving us a free hand + * to reset state. However, we still need to let lockdep be aware that + * we know this state may be accessed in hardirq context, so we + * disable the irq around this manipulation and we want to keep + * the spinlock focused on its duties and not accidentally conflate + * coverage to the submission's irq state. (Similarly, although we + * shouldn't need to disable irq around the manipulation of the + * submission's irq state, we also wish to remind ourselves that + * it is irq state.) + */ + execlists_reset_csb(engine, true); + + rcu_read_lock(); + spin_lock_irqsave(&engine->active.lock, flags); + + /* Mark all executing requests as skipped. */ + list_for_each_entry(rq, &engine->active.requests, sched.link) + i915_request_mark_eio(rq); + intel_engine_signal_breadcrumbs(engine); + + /* Flush the queued requests to the timeline list (for retiring). */ + while ((rb = rb_first_cached(&execlists->queue))) { + struct i915_priolist *p = to_priolist(rb); + int i; + + priolist_for_each_request_consume(rq, rn, p, i) { + i915_request_mark_eio(rq); + __i915_request_submit(rq); + } + + rb_erase_cached(&p->node, &execlists->queue); + i915_priolist_free(p); + } + + /* On-hold requests will be flushed to timeline upon their release */ + list_for_each_entry(rq, &engine->active.hold, sched.link) + i915_request_mark_eio(rq); + + /* Cancel all attached virtual engines */ + while ((rb = rb_first_cached(&execlists->virtual))) { + struct virtual_engine *ve = + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); + + rb_erase_cached(rb, &execlists->virtual); + RB_CLEAR_NODE(rb); + + spin_lock(&ve->base.active.lock); + rq = fetch_and_zero(&ve->request); + if (rq) { + i915_request_mark_eio(rq); + + rq->engine = engine; + __i915_request_submit(rq); + i915_request_put(rq); + + ve->base.execlists.queue_priority_hint = INT_MIN; + } + spin_unlock(&ve->base.active.lock); + } + + /* Remaining _unready_ requests will be nop'ed when submitted */ + + execlists->queue_priority_hint = INT_MIN; + execlists->queue = RB_ROOT_CACHED; + + GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); + execlists->tasklet.func = nop_submission_tasklet; + + spin_unlock_irqrestore(&engine->active.lock, flags); + rcu_read_unlock(); +} + +static void execlists_reset_finish(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + + /* + * After a GPU reset, we may have requests to replay. Do so now while + * we still have the forcewake to be sure that the GPU is not allowed + * to sleep before we restart and reload a context. + * + * If the GPU reset fails, the engine may still be alive with requests + * inflight. We expect those to complete, or for the device to be + * reset as the next level of recovery, and as a final resort we + * will declare the device wedged. + */ + GEM_BUG_ON(!reset_in_progress(execlists)); + + /* And kick in case we missed a new request submission. */ + if (__tasklet_enable(&execlists->tasklet)) + __execlists_kick(execlists); + + ENGINE_TRACE(engine, "depth->%d\n", + atomic_read(&execlists->tasklet.count)); +} + +static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine) +{ + ENGINE_WRITE(engine, RING_IMR, + ~(engine->irq_enable_mask | engine->irq_keep_mask)); + ENGINE_POSTING_READ(engine, RING_IMR); +} + +static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) +{ + ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask); +} + +static void execlists_park(struct intel_engine_cs *engine) +{ + cancel_timer(&engine->execlists.timer); + cancel_timer(&engine->execlists.preempt); +} + +static bool can_preempt(struct intel_engine_cs *engine) +{ + if (INTEL_GEN(engine->i915) > 8) + return true; + + /* GPGPU on bdw requires extra w/a; not implemented */ + return engine->class != RENDER_CLASS; +} + +static void execlists_set_default_submission(struct intel_engine_cs *engine) +{ + engine->submit_request = execlists_submit_request; + engine->schedule = i915_schedule; + engine->execlists.tasklet.func = execlists_submission_tasklet; + + engine->reset.prepare = execlists_reset_prepare; + engine->reset.rewind = execlists_reset_rewind; + engine->reset.cancel = execlists_reset_cancel; + engine->reset.finish = execlists_reset_finish; + + engine->park = execlists_park; + engine->unpark = NULL; + + engine->flags |= I915_ENGINE_SUPPORTS_STATS; + if (!intel_vgpu_active(engine->i915)) { + engine->flags |= I915_ENGINE_HAS_SEMAPHORES; + if (can_preempt(engine)) { + engine->flags |= I915_ENGINE_HAS_PREEMPTION; + if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + engine->flags |= I915_ENGINE_HAS_TIMESLICES; + } + } + + if (intel_engine_has_preemption(engine)) + engine->emit_bb_start = gen8_emit_bb_start; + else + engine->emit_bb_start = gen8_emit_bb_start_noarb; +} + +static void execlists_shutdown(struct intel_engine_cs *engine) +{ + /* Synchronise with residual timers and any softirq they raise */ + del_timer_sync(&engine->execlists.timer); + del_timer_sync(&engine->execlists.preempt); + tasklet_kill(&engine->execlists.tasklet); +} + +static void execlists_release(struct intel_engine_cs *engine) +{ + engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ + + execlists_shutdown(engine); + + intel_engine_cleanup_common(engine); + lrc_fini_wa_ctx(engine); +} + +static void +logical_ring_default_vfuncs(struct intel_engine_cs *engine) +{ + /* Default vfuncs which can be overriden by each engine. */ + + engine->resume = execlists_resume; + + engine->cops = &execlists_context_ops; + engine->request_alloc = execlists_request_alloc; + + engine->emit_flush = gen8_emit_flush_xcs; + engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; + engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; + if (INTEL_GEN(engine->i915) >= 12) { + engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; + engine->emit_flush = gen12_emit_flush_xcs; + } + engine->set_default_submission = execlists_set_default_submission; + + if (INTEL_GEN(engine->i915) < 11) { + engine->irq_enable = gen8_logical_ring_enable_irq; + engine->irq_disable = gen8_logical_ring_disable_irq; + } else { + /* + * TODO: On Gen11 interrupt masks need to be clear + * to allow C6 entry. Keep interrupts enabled at + * and take the hit of generating extra interrupts + * until a more refined solution exists. + */ + } +} + +static inline void +logical_ring_default_irqs(struct intel_engine_cs *engine) +{ + unsigned int shift = 0; + + if (INTEL_GEN(engine->i915) < 11) { + const u8 irq_shifts[] = { + [RCS0] = GEN8_RCS_IRQ_SHIFT, + [BCS0] = GEN8_BCS_IRQ_SHIFT, + [VCS0] = GEN8_VCS0_IRQ_SHIFT, + [VCS1] = GEN8_VCS1_IRQ_SHIFT, + [VECS0] = GEN8_VECS_IRQ_SHIFT, + }; + + shift = irq_shifts[engine->id]; + } + + engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; + engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; + engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift; + engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift; +} + +static void rcs_submission_override(struct intel_engine_cs *engine) +{ + switch (INTEL_GEN(engine->i915)) { + case 12: + engine->emit_flush = gen12_emit_flush_rcs; + engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; + break; + case 11: + engine->emit_flush = gen11_emit_flush_rcs; + engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; + break; + default: + engine->emit_flush = gen8_emit_flush_rcs; + engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; + break; + } +} + +int intel_execlists_submission_setup(struct intel_engine_cs *engine) +{ + struct intel_engine_execlists * const execlists = &engine->execlists; + struct drm_i915_private *i915 = engine->i915; + struct intel_uncore *uncore = engine->uncore; + u32 base = engine->mmio_base; + + tasklet_init(&engine->execlists.tasklet, + execlists_submission_tasklet, (unsigned long)engine); + timer_setup(&engine->execlists.timer, execlists_timeslice, 0); + timer_setup(&engine->execlists.preempt, execlists_preempt, 0); + + logical_ring_default_vfuncs(engine); + logical_ring_default_irqs(engine); + + if (engine->class == RENDER_CLASS) + rcs_submission_override(engine); + + lrc_init_wa_ctx(engine); + + if (HAS_LOGICAL_RING_ELSQ(i915)) { + execlists->submit_reg = uncore->regs + + i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base)); + execlists->ctrl_reg = uncore->regs + + i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base)); + } else { + execlists->submit_reg = uncore->regs + + i915_mmio_reg_offset(RING_ELSP(base)); + } + + execlists->csb_status = + (u64 *)&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; + + execlists->csb_write = + &engine->status_page.addr[intel_hws_csb_write_index(i915)]; + + if (INTEL_GEN(i915) < 11) + execlists->csb_size = GEN8_CSB_ENTRIES; + else + execlists->csb_size = GEN11_CSB_ENTRIES; + + engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0); + if (INTEL_GEN(engine->i915) >= 11) { + execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32); + execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32); + } + + /* Finally, take ownership and responsibility for cleanup! */ + engine->sanitize = execlists_sanitize; + engine->release = execlists_release; + + return 0; +} + +static struct list_head *virtual_queue(struct virtual_engine *ve) +{ + return &ve->base.execlists.default_priolist.requests[0]; +} + +static void rcu_virtual_context_destroy(struct work_struct *wrk) +{ + struct virtual_engine *ve = + container_of(wrk, typeof(*ve), rcu.work); + unsigned int n; + + GEM_BUG_ON(ve->context.inflight); + + /* Preempt-to-busy may leave a stale request behind. */ + if (unlikely(ve->request)) { + struct i915_request *old; + + spin_lock_irq(&ve->base.active.lock); + + old = fetch_and_zero(&ve->request); + if (old) { + GEM_BUG_ON(!i915_request_completed(old)); + __i915_request_submit(old); + i915_request_put(old); + } + + spin_unlock_irq(&ve->base.active.lock); + } + + /* + * Flush the tasklet in case it is still running on another core. + * + * This needs to be done before we remove ourselves from the siblings' + * rbtrees as in the case it is running in parallel, it may reinsert + * the rb_node into a sibling. + */ + tasklet_kill(&ve->base.execlists.tasklet); + + /* Decouple ourselves from the siblings, no more access allowed. */ + for (n = 0; n < ve->num_siblings; n++) { + struct intel_engine_cs *sibling = ve->siblings[n]; + struct rb_node *node = &ve->nodes[sibling->id].rb; + + if (RB_EMPTY_NODE(node)) + continue; + + spin_lock_irq(&sibling->active.lock); + + /* Detachment is lazily performed in the execlists tasklet */ + if (!RB_EMPTY_NODE(node)) + rb_erase_cached(node, &sibling->execlists.virtual); + + spin_unlock_irq(&sibling->active.lock); + } + GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); + GEM_BUG_ON(!list_empty(virtual_queue(ve))); + + lrc_fini(&ve->context); + intel_context_fini(&ve->context); + + intel_breadcrumbs_free(ve->base.breadcrumbs); + intel_engine_free_request_pool(&ve->base); + + kfree(ve->bonds); + kfree(ve); +} + +static void virtual_context_destroy(struct kref *kref) +{ + struct virtual_engine *ve = + container_of(kref, typeof(*ve), context.ref); + + GEM_BUG_ON(!list_empty(&ve->context.signals)); + + /* + * When destroying the virtual engine, we have to be aware that + * it may still be in use from an hardirq/softirq context causing + * the resubmission of a completed request (background completion + * due to preempt-to-busy). Before we can free the engine, we need + * to flush the submission code and tasklets that are still potentially + * accessing the engine. Flushing the tasklets requires process context, + * and since we can guard the resubmit onto the engine with an RCU read + * lock, we can delegate the free of the engine to an RCU worker. + */ + INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy); + queue_rcu_work(system_wq, &ve->rcu); +} + +static void virtual_engine_initial_hint(struct virtual_engine *ve) +{ + int swp; + + /* + * Pick a random sibling on starting to help spread the load around. + * + * New contexts are typically created with exactly the same order + * of siblings, and often started in batches. Due to the way we iterate + * the array of sibling when submitting requests, sibling[0] is + * prioritised for dequeuing. If we make sure that sibling[0] is fairly + * randomised across the system, we also help spread the load by the + * first engine we inspect being different each time. + * + * NB This does not force us to execute on this engine, it will just + * typically be the first we inspect for submission. + */ + swp = prandom_u32_max(ve->num_siblings); + if (swp) + swap(ve->siblings[swp], ve->siblings[0]); +} + +static int virtual_context_alloc(struct intel_context *ce) +{ + struct virtual_engine *ve = container_of(ce, typeof(*ve), context); + + return lrc_alloc(ce, ve->siblings[0]); +} + +static int virtual_context_pre_pin(struct intel_context *ce, + struct i915_gem_ww_ctx *ww, + void **vaddr) +{ + struct virtual_engine *ve = container_of(ce, typeof(*ve), context); + + /* Note: we must use a real engine class for setting up reg state */ + return lrc_pre_pin(ce, ve->siblings[0], ww, vaddr); +} + +static int virtual_context_pin(struct intel_context *ce, void *vaddr) +{ + struct virtual_engine *ve = container_of(ce, typeof(*ve), context); + + return lrc_pin(ce, ve->siblings[0], vaddr); +} + +static void virtual_context_enter(struct intel_context *ce) +{ + struct virtual_engine *ve = container_of(ce, typeof(*ve), context); + unsigned int n; + + for (n = 0; n < ve->num_siblings; n++) + intel_engine_pm_get(ve->siblings[n]); + + intel_timeline_enter(ce->timeline); +} + +static void virtual_context_exit(struct intel_context *ce) +{ + struct virtual_engine *ve = container_of(ce, typeof(*ve), context); + unsigned int n; + + intel_timeline_exit(ce->timeline); + + for (n = 0; n < ve->num_siblings; n++) + intel_engine_pm_put(ve->siblings[n]); +} + +static const struct intel_context_ops virtual_context_ops = { + .flags = COPS_HAS_INFLIGHT, + + .alloc = virtual_context_alloc, + + .pre_pin = virtual_context_pre_pin, + .pin = virtual_context_pin, + .unpin = lrc_unpin, + .post_unpin = lrc_post_unpin, + + .enter = virtual_context_enter, + .exit = virtual_context_exit, + + .destroy = virtual_context_destroy, +}; + +static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) +{ + struct i915_request *rq; + intel_engine_mask_t mask; + + rq = READ_ONCE(ve->request); + if (!rq) + return 0; + + /* The rq is ready for submission; rq->execution_mask is now stable. */ + mask = rq->execution_mask; + if (unlikely(!mask)) { + /* Invalid selection, submit to a random engine in error */ + i915_request_set_error_once(rq, -ENODEV); + mask = ve->siblings[0]->mask; + } + + ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n", + rq->fence.context, rq->fence.seqno, + mask, ve->base.execlists.queue_priority_hint); + + return mask; +} + +static void virtual_submission_tasklet(unsigned long data) +{ + struct virtual_engine * const ve = (struct virtual_engine *)data; + const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint); + intel_engine_mask_t mask; + unsigned int n; + + rcu_read_lock(); + mask = virtual_submission_mask(ve); + rcu_read_unlock(); + if (unlikely(!mask)) + return; + + for (n = 0; n < ve->num_siblings; n++) { + struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]); + struct ve_node * const node = &ve->nodes[sibling->id]; + struct rb_node **parent, *rb; + bool first; + + if (!READ_ONCE(ve->request)) + break; /* already handled by a sibling's tasklet */ + + spin_lock_irq(&sibling->active.lock); + + if (unlikely(!(mask & sibling->mask))) { + if (!RB_EMPTY_NODE(&node->rb)) { + rb_erase_cached(&node->rb, + &sibling->execlists.virtual); + RB_CLEAR_NODE(&node->rb); + } + + goto unlock_engine; + } + + if (unlikely(!RB_EMPTY_NODE(&node->rb))) { + /* + * Cheat and avoid rebalancing the tree if we can + * reuse this node in situ. + */ + first = rb_first_cached(&sibling->execlists.virtual) == + &node->rb; + if (prio == node->prio || (prio > node->prio && first)) + goto submit_engine; + + rb_erase_cached(&node->rb, &sibling->execlists.virtual); + } + + rb = NULL; + first = true; + parent = &sibling->execlists.virtual.rb_root.rb_node; + while (*parent) { + struct ve_node *other; + + rb = *parent; + other = rb_entry(rb, typeof(*other), rb); + if (prio > other->prio) { + parent = &rb->rb_left; + } else { + parent = &rb->rb_right; + first = false; + } + } + + rb_link_node(&node->rb, rb, parent); + rb_insert_color_cached(&node->rb, + &sibling->execlists.virtual, + first); + +submit_engine: + GEM_BUG_ON(RB_EMPTY_NODE(&node->rb)); + node->prio = prio; + if (first && prio > sibling->execlists.queue_priority_hint) + tasklet_hi_schedule(&sibling->execlists.tasklet); + +unlock_engine: + spin_unlock_irq(&sibling->active.lock); + + if (intel_context_inflight(&ve->context)) + break; + } +} + +static void virtual_submit_request(struct i915_request *rq) +{ + struct virtual_engine *ve = to_virtual_engine(rq->engine); + unsigned long flags; + + ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n", + rq->fence.context, + rq->fence.seqno); + + GEM_BUG_ON(ve->base.submit_request != virtual_submit_request); + + spin_lock_irqsave(&ve->base.active.lock, flags); + + /* By the time we resubmit a request, it may be completed */ + if (__i915_request_is_complete(rq)) { + __i915_request_submit(rq); + goto unlock; + } + + if (ve->request) { /* background completion from preempt-to-busy */ + GEM_BUG_ON(!i915_request_completed(ve->request)); + __i915_request_submit(ve->request); + i915_request_put(ve->request); + } + + ve->base.execlists.queue_priority_hint = rq_prio(rq); + ve->request = i915_request_get(rq); + + GEM_BUG_ON(!list_empty(virtual_queue(ve))); + list_move_tail(&rq->sched.link, virtual_queue(ve)); + + tasklet_hi_schedule(&ve->base.execlists.tasklet); + +unlock: + spin_unlock_irqrestore(&ve->base.active.lock, flags); +} + +static struct ve_bond * +virtual_find_bond(struct virtual_engine *ve, + const struct intel_engine_cs *master) +{ + int i; + + for (i = 0; i < ve->num_bonds; i++) { + if (ve->bonds[i].master == master) + return &ve->bonds[i]; + } + + return NULL; +} + +static void +virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal) +{ + struct virtual_engine *ve = to_virtual_engine(rq->engine); + intel_engine_mask_t allowed, exec; + struct ve_bond *bond; + + allowed = ~to_request(signal)->engine->mask; + + bond = virtual_find_bond(ve, to_request(signal)->engine); + if (bond) + allowed &= bond->sibling_mask; + + /* Restrict the bonded request to run on only the available engines */ + exec = READ_ONCE(rq->execution_mask); + while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed)) + ; + + /* Prevent the master from being re-run on the bonded engines */ + to_request(signal)->execution_mask &= ~allowed; +} + +struct intel_context * +intel_execlists_create_virtual(struct intel_engine_cs **siblings, + unsigned int count) +{ + struct virtual_engine *ve; + unsigned int n; + int err; + + if (count == 0) + return ERR_PTR(-EINVAL); + + if (count == 1) + return intel_context_create(siblings[0]); + + ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL); + if (!ve) + return ERR_PTR(-ENOMEM); + + ve->base.i915 = siblings[0]->i915; + ve->base.gt = siblings[0]->gt; + ve->base.uncore = siblings[0]->uncore; + ve->base.id = -1; + + ve->base.class = OTHER_CLASS; + ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; + ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; + ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; + + /* + * The decision on whether to submit a request using semaphores + * depends on the saturated state of the engine. We only compute + * this during HW submission of the request, and we need for this + * state to be globally applied to all requests being submitted + * to this engine. Virtual engines encompass more than one physical + * engine and so we cannot accurately tell in advance if one of those + * engines is already saturated and so cannot afford to use a semaphore + * and be pessimized in priority for doing so -- if we are the only + * context using semaphores after all other clients have stopped, we + * will be starved on the saturated system. Such a global switch for + * semaphores is less than ideal, but alas is the current compromise. + */ + ve->base.saturated = ALL_ENGINES; + + snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); + + intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); + intel_engine_init_execlists(&ve->base); + + ve->base.cops = &virtual_context_ops; + ve->base.request_alloc = execlists_request_alloc; + + ve->base.schedule = i915_schedule; + ve->base.submit_request = virtual_submit_request; + ve->base.bond_execute = virtual_bond_execute; + + INIT_LIST_HEAD(virtual_queue(ve)); + ve->base.execlists.queue_priority_hint = INT_MIN; + tasklet_init(&ve->base.execlists.tasklet, + virtual_submission_tasklet, + (unsigned long)ve); + + intel_context_init(&ve->context, &ve->base); + + ve->base.breadcrumbs = intel_breadcrumbs_create(NULL); + if (!ve->base.breadcrumbs) { + err = -ENOMEM; + goto err_put; + } + + for (n = 0; n < count; n++) { + struct intel_engine_cs *sibling = siblings[n]; + + GEM_BUG_ON(!is_power_of_2(sibling->mask)); + if (sibling->mask & ve->base.mask) { + DRM_DEBUG("duplicate %s entry in load balancer\n", + sibling->name); + err = -EINVAL; + goto err_put; + } + + /* + * The virtual engine implementation is tightly coupled to + * the execlists backend -- we push out request directly + * into a tree inside each physical engine. We could support + * layering if we handle cloning of the requests and + * submitting a copy into each backend. + */ + if (sibling->execlists.tasklet.func != + execlists_submission_tasklet) { + err = -ENODEV; + goto err_put; + } + + GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb)); + RB_CLEAR_NODE(&ve->nodes[sibling->id].rb); + + ve->siblings[ve->num_siblings++] = sibling; + ve->base.mask |= sibling->mask; + + /* + * All physical engines must be compatible for their emission + * functions (as we build the instructions during request + * construction and do not alter them before submission + * on the physical engine). We use the engine class as a guide + * here, although that could be refined. + */ + if (ve->base.class != OTHER_CLASS) { + if (ve->base.class != sibling->class) { + DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", + sibling->class, ve->base.class); + err = -EINVAL; + goto err_put; + } + continue; + } + + ve->base.class = sibling->class; + ve->base.uabi_class = sibling->uabi_class; + snprintf(ve->base.name, sizeof(ve->base.name), + "v%dx%d", ve->base.class, count); + ve->base.context_size = sibling->context_size; + + ve->base.emit_bb_start = sibling->emit_bb_start; + ve->base.emit_flush = sibling->emit_flush; + ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb; + ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb; + ve->base.emit_fini_breadcrumb_dw = + sibling->emit_fini_breadcrumb_dw; + + ve->base.flags = sibling->flags; + } + + ve->base.flags |= I915_ENGINE_IS_VIRTUAL; + + virtual_engine_initial_hint(ve); + return &ve->context; + +err_put: + intel_context_put(&ve->context); + return ERR_PTR(err); +} + +struct intel_context * +intel_execlists_clone_virtual(struct intel_engine_cs *src) +{ + struct virtual_engine *se = to_virtual_engine(src); + struct intel_context *dst; + + dst = intel_execlists_create_virtual(se->siblings, + se->num_siblings); + if (IS_ERR(dst)) + return dst; + + if (se->num_bonds) { + struct virtual_engine *de = to_virtual_engine(dst->engine); + + de->bonds = kmemdup(se->bonds, + sizeof(*se->bonds) * se->num_bonds, + GFP_KERNEL); + if (!de->bonds) { + intel_context_put(dst); + return ERR_PTR(-ENOMEM); + } + + de->num_bonds = se->num_bonds; + } + + return dst; +} + +int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, + const struct intel_engine_cs *master, + const struct intel_engine_cs *sibling) +{ + struct virtual_engine *ve = to_virtual_engine(engine); + struct ve_bond *bond; + int n; + + /* Sanity check the sibling is part of the virtual engine */ + for (n = 0; n < ve->num_siblings; n++) + if (sibling == ve->siblings[n]) + break; + if (n == ve->num_siblings) + return -EINVAL; + + bond = virtual_find_bond(ve, master); + if (bond) { + bond->sibling_mask |= sibling->mask; + return 0; + } + + bond = krealloc(ve->bonds, + sizeof(*bond) * (ve->num_bonds + 1), + GFP_KERNEL); + if (!bond) + return -ENOMEM; + + bond[ve->num_bonds].master = master; + bond[ve->num_bonds].sibling_mask = sibling->mask; + + ve->bonds = bond; + ve->num_bonds++; + + return 0; +} + +void intel_execlists_show_requests(struct intel_engine_cs *engine, + struct drm_printer *m, + void (*show_request)(struct drm_printer *m, + const struct i915_request *rq, + const char *prefix, + int indent), + unsigned int max) +{ + const struct intel_engine_execlists *execlists = &engine->execlists; + struct i915_request *rq, *last; + unsigned long flags; + unsigned int count; + struct rb_node *rb; + + spin_lock_irqsave(&engine->active.lock, flags); + + last = NULL; + count = 0; + list_for_each_entry(rq, &engine->active.requests, sched.link) { + if (count++ < max - 1) + show_request(m, rq, "\t\t", 0); + else + last = rq; + } + if (last) { + if (count > max) { + drm_printf(m, + "\t\t...skipping %d executing requests...\n", + count - max); + } + show_request(m, last, "\t\t", 0); + } + + if (execlists->queue_priority_hint != INT_MIN) + drm_printf(m, "\t\tQueue priority hint: %d\n", + READ_ONCE(execlists->queue_priority_hint)); + + last = NULL; + count = 0; + for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { + struct i915_priolist *p = rb_entry(rb, typeof(*p), node); + int i; + + priolist_for_each_request(rq, p, i) { + if (count++ < max - 1) + show_request(m, rq, "\t\t", 0); + else + last = rq; + } + } + if (last) { + if (count > max) { + drm_printf(m, + "\t\t...skipping %d queued requests...\n", + count - max); + } + show_request(m, last, "\t\t", 0); + } + + last = NULL; + count = 0; + for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) { + struct virtual_engine *ve = + rb_entry(rb, typeof(*ve), nodes[engine->id].rb); + struct i915_request *rq = READ_ONCE(ve->request); + + if (rq) { + if (count++ < max - 1) + show_request(m, rq, "\t\t", 0); + else + last = rq; + } + } + if (last) { + if (count > max) { + drm_printf(m, + "\t\t...skipping %d virtual requests...\n", + count - max); + } + show_request(m, last, "\t\t", 0); + } + + spin_unlock_irqrestore(&engine->active.lock, flags); +} + +bool +intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine) +{ + return engine->set_default_submission == + execlists_set_default_submission; +} + +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) +#include "selftest_execlists.c" +#endif diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.h b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h new file mode 100644 index 000000000000..a8fd7adefd82 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2014 Intel Corporation + */ + +#ifndef __INTEL_EXECLISTS_SUBMISSION_H__ +#define __INTEL_EXECLISTS_SUBMISSION_H__ + +#include <linux/types.h> + +struct drm_printer; + +struct i915_request; +struct intel_context; +struct intel_engine_cs; + +enum { + INTEL_CONTEXT_SCHEDULE_IN = 0, + INTEL_CONTEXT_SCHEDULE_OUT, + INTEL_CONTEXT_SCHEDULE_PREEMPTED, +}; + +int intel_execlists_submission_setup(struct intel_engine_cs *engine); + +void intel_execlists_show_requests(struct intel_engine_cs *engine, + struct drm_printer *m, + void (*show_request)(struct drm_printer *m, + const struct i915_request *rq, + const char *prefix, + int indent), + unsigned int max); + +struct intel_context * +intel_execlists_create_virtual(struct intel_engine_cs **siblings, + unsigned int count); + +struct intel_context * +intel_execlists_clone_virtual(struct intel_engine_cs *src); + +int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, + const struct intel_engine_cs *master, + const struct intel_engine_cs *sibling); + +bool +intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine); + +#endif /* __INTEL_EXECLISTS_SUBMISSION_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c index cf94525be2c1..eece0844fbe9 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c @@ -101,7 +101,16 @@ static bool needs_idle_maps(struct drm_i915_private *i915) * Query intel_iommu to see if we need the workaround. Presumably that * was loaded first. */ - return IS_GEN(i915, 5) && IS_MOBILE(i915) && intel_vtd_active(); + if (!intel_vtd_active()) + return false; + + if (IS_GEN(i915, 5) && IS_MOBILE(i915)) + return true; + + if (IS_GEN(i915, 12)) + return true; /* XXX DMAR fault reason 7 */ + + return false; } void i915_ggtt_suspend(struct i915_ggtt *ggtt) @@ -1050,7 +1059,12 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt) ggtt->vm.alloc_pt_dma = alloc_pt_dma; - ggtt->do_idle_maps = needs_idle_maps(i915); + if (needs_idle_maps(i915)) { + drm_notice(&i915->drm, + "Flushing DMA requests before IOMMU unmaps; performance may be degraded\n"); + ggtt->do_idle_maps = true; + } + ggtt->vm.insert_page = i915_ggtt_insert_page; ggtt->vm.insert_entries = i915_ggtt_insert_entries; ggtt->vm.clear_range = i915_ggtt_clear_range; diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c index 7fb36b12fe7a..a357bb431815 100644 --- a/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c +++ b/drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c @@ -320,13 +320,31 @@ void i915_vma_revoke_fence(struct i915_vma *vma) fence_write(fence); } +static bool fence_is_active(const struct i915_fence_reg *fence) +{ + return fence->vma && i915_vma_is_active(fence->vma); +} + static struct i915_fence_reg *fence_find(struct i915_ggtt *ggtt) { - struct i915_fence_reg *fence; + struct i915_fence_reg *active = NULL; + struct i915_fence_reg *fence, *fn; - list_for_each_entry(fence, &ggtt->fence_list, link) { + list_for_each_entry_safe(fence, fn, &ggtt->fence_list, link) { GEM_BUG_ON(fence->vma && fence->vma->fence != fence); + if (fence == active) /* now seen this fence twice */ + active = ERR_PTR(-EAGAIN); + + /* Prefer idle fences so we do not have to wait on the GPU */ + if (active != ERR_PTR(-EAGAIN) && fence_is_active(fence)) { + if (!active) + active = fence; + + list_move_tail(&fence->link, &ggtt->fence_list); + continue; + } + if (atomic_read(&fence->pin_count)) continue; diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c index 44f1d51e5ae5..d8e1ab412634 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt.c +++ b/drivers/gpu/drm/i915/gt/intel_gt.c @@ -46,6 +46,8 @@ void intel_gt_init_hw_early(struct intel_gt *gt, struct i915_ggtt *ggtt) int intel_gt_init_mmio(struct intel_gt *gt) { + intel_gt_init_clock_frequency(gt); + intel_uc_init_mmio(>->uc); intel_sseu_info_init(gt); @@ -546,8 +548,6 @@ int intel_gt_init(struct intel_gt *gt) */ intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); - intel_gt_init_clock_frequency(gt); - err = intel_gt_init_scratch(gt, IS_GEN(gt->i915, 2) ? SZ_256K : SZ_4K); if (err) goto out_fw; diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c index 999079686846..a4242ca8dcd7 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.c @@ -7,34 +7,146 @@ #include "intel_gt.h" #include "intel_gt_clock_utils.h" -#define MHZ_12 12000000 /* 12MHz (24MHz/2), 83.333ns */ -#define MHZ_12_5 12500000 /* 12.5MHz (25MHz/2), 80ns */ -#define MHZ_19_2 19200000 /* 19.2MHz, 52.083ns */ +static u32 read_reference_ts_freq(struct intel_uncore *uncore) +{ + u32 ts_override = intel_uncore_read(uncore, GEN9_TIMESTAMP_OVERRIDE); + u32 base_freq, frac_freq; + + base_freq = ((ts_override & GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >> + GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT) + 1; + base_freq *= 1000000; + + frac_freq = ((ts_override & + GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >> + GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT); + frac_freq = 1000000 / (frac_freq + 1); + + return base_freq + frac_freq; +} + +static u32 gen10_get_crystal_clock_freq(struct intel_uncore *uncore, + u32 rpm_config_reg) +{ + u32 f19_2_mhz = 19200000; + u32 f24_mhz = 24000000; + u32 crystal_clock = + (rpm_config_reg & GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> + GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; -static u32 read_clock_frequency(const struct intel_gt *gt) + switch (crystal_clock) { + case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ: + return f19_2_mhz; + case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: + return f24_mhz; + default: + MISSING_CASE(crystal_clock); + return 0; + } +} + +static u32 gen11_get_crystal_clock_freq(struct intel_uncore *uncore, + u32 rpm_config_reg) { - if (INTEL_GEN(gt->i915) >= 11) { - u32 config; - - config = intel_uncore_read(gt->uncore, RPM_CONFIG0); - config &= GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK; - config >>= GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; - - switch (config) { - case 0: return MHZ_12; - case 1: - case 2: return MHZ_19_2; - default: - case 3: return MHZ_12_5; + u32 f19_2_mhz = 19200000; + u32 f24_mhz = 24000000; + u32 f25_mhz = 25000000; + u32 f38_4_mhz = 38400000; + u32 crystal_clock = + (rpm_config_reg & GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> + GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; + + switch (crystal_clock) { + case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: + return f24_mhz; + case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ: + return f19_2_mhz; + case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ: + return f38_4_mhz; + case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ: + return f25_mhz; + default: + MISSING_CASE(crystal_clock); + return 0; + } +} + +static u32 read_clock_frequency(struct intel_uncore *uncore) +{ + u32 f12_5_mhz = 12500000; + u32 f19_2_mhz = 19200000; + u32 f24_mhz = 24000000; + + if (INTEL_GEN(uncore->i915) <= 4) { + /* + * PRMs say: + * + * "The value in this register increments once every 16 + * hclks." (through the “Clocking Configuration” + * (“CLKCFG”) MCHBAR register) + */ + return RUNTIME_INFO(uncore->i915)->rawclk_freq * 1000 / 16; + } else if (INTEL_GEN(uncore->i915) <= 8) { + /* + * PRMs say: + * + * "The PCU TSC counts 10ns increments; this timestamp + * reflects bits 38:3 of the TSC (i.e. 80ns granularity, + * rolling over every 1.5 hours). + */ + return f12_5_mhz; + } else if (INTEL_GEN(uncore->i915) <= 9) { + u32 ctc_reg = intel_uncore_read(uncore, CTC_MODE); + u32 freq = 0; + + if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) { + freq = read_reference_ts_freq(uncore); + } else { + freq = IS_GEN9_LP(uncore->i915) ? f19_2_mhz : f24_mhz; + + /* + * Now figure out how the command stream's timestamp + * register increments from this frequency (it might + * increment only every few clock cycle). + */ + freq >>= 3 - ((ctc_reg & CTC_SHIFT_PARAMETER_MASK) >> + CTC_SHIFT_PARAMETER_SHIFT); } - } else if (INTEL_GEN(gt->i915) >= 9) { - if (IS_GEN9_LP(gt->i915)) - return MHZ_19_2; - else - return MHZ_12; - } else { - return MHZ_12_5; + + return freq; + } else if (INTEL_GEN(uncore->i915) <= 12) { + u32 ctc_reg = intel_uncore_read(uncore, CTC_MODE); + u32 freq = 0; + + /* + * First figure out the reference frequency. There are 2 ways + * we can compute the frequency, either through the + * TIMESTAMP_OVERRIDE register or through RPM_CONFIG. CTC_MODE + * tells us which one we should use. + */ + if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) { + freq = read_reference_ts_freq(uncore); + } else { + u32 c0 = intel_uncore_read(uncore, RPM_CONFIG0); + + if (INTEL_GEN(uncore->i915) <= 10) + freq = gen10_get_crystal_clock_freq(uncore, c0); + else + freq = gen11_get_crystal_clock_freq(uncore, c0); + + /* + * Now figure out how the command stream's timestamp + * register increments from this frequency (it might + * increment only every few clock cycle). + */ + freq >>= 3 - ((c0 & GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> + GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT); + } + + return freq; } + + MISSING_CASE("Unknown gen, unable to read command streamer timestamp frequency\n"); + return 0; } void intel_gt_init_clock_frequency(struct intel_gt *gt) @@ -43,20 +155,27 @@ void intel_gt_init_clock_frequency(struct intel_gt *gt) * Note that on gen11+, the clock frequency may be reconfigured. * We do not, and we assume nobody else does. */ - gt->clock_frequency = read_clock_frequency(gt); + gt->clock_frequency = read_clock_frequency(gt->uncore); + if (gt->clock_frequency) + gt->clock_period_ns = intel_gt_clock_interval_to_ns(gt, 1); + GT_TRACE(gt, - "Using clock frequency: %dkHz\n", - gt->clock_frequency / 1000); + "Using clock frequency: %dkHz, period: %dns, wrap: %lldms\n", + gt->clock_frequency / 1000, + gt->clock_period_ns, + div_u64(mul_u32_u32(gt->clock_period_ns, S32_MAX), + USEC_PER_SEC)); + } #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) void intel_gt_check_clock_frequency(const struct intel_gt *gt) { - if (gt->clock_frequency != read_clock_frequency(gt)) { + if (gt->clock_frequency != read_clock_frequency(gt->uncore)) { dev_err(gt->i915->drm.dev, "GT clock frequency changed, was %uHz, now %uHz!\n", gt->clock_frequency, - read_clock_frequency(gt)); + read_clock_frequency(gt->uncore)); } } #endif @@ -66,26 +185,24 @@ static u64 div_u64_roundup(u64 nom, u32 den) return div_u64(nom + den - 1, den); } -u32 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u32 count) +u64 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u64 count) { - return div_u64_roundup(mul_u32_u32(count, 1000 * 1000 * 1000), - gt->clock_frequency); + return div_u64_roundup(count * NSEC_PER_SEC, gt->clock_frequency); } -u32 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u32 count) +u64 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u64 count) { return intel_gt_clock_interval_to_ns(gt, 16 * count); } -u32 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u32 ns) +u64 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u64 ns) { - return div_u64_roundup(mul_u32_u32(gt->clock_frequency, ns), - 1000 * 1000 * 1000); + return div_u64_roundup(gt->clock_frequency * ns, NSEC_PER_SEC); } -u32 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u32 ns) +u64 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u64 ns) { - u32 val; + u64 val; /* * Make these a multiple of magic 25 to avoid SNB (eg. Dell XPS @@ -94,9 +211,9 @@ u32 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u32 ns) * EI/thresholds are "bad", leading to a very sluggish or even * frozen machine. */ - val = DIV_ROUND_UP(intel_gt_ns_to_clock_interval(gt, ns), 16); + val = div_u64_roundup(intel_gt_ns_to_clock_interval(gt, ns), 16); if (IS_GEN(gt->i915, 6)) - val = roundup(val, 25); + val = div_u64_roundup(val, 25) * 25; return val; } diff --git a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.h b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.h index f793c89f2cbd..8b03e97a85df 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_clock_utils.h @@ -18,10 +18,10 @@ void intel_gt_check_clock_frequency(const struct intel_gt *gt); static inline void intel_gt_check_clock_frequency(const struct intel_gt *gt) {} #endif -u32 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u32 count); -u32 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u32 count); +u64 intel_gt_clock_interval_to_ns(const struct intel_gt *gt, u64 count); +u64 intel_gt_pm_interval_to_ns(const struct intel_gt *gt, u64 count); -u32 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u32 ns); -u32 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u32 ns); +u64 intel_gt_ns_to_clock_interval(const struct intel_gt *gt, u64 ns); +u64 intel_gt_ns_to_pm_interval(const struct intel_gt *gt, u64 ns); #endif /* __INTEL_GT_CLOCK_UTILS_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_gt_irq.c b/drivers/gpu/drm/i915/gt/intel_gt_irq.c index 257063a57101..9830342aa6f4 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_irq.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_irq.c @@ -11,6 +11,7 @@ #include "intel_breadcrumbs.h" #include "intel_gt.h" #include "intel_gt_irq.h" +#include "intel_lrc_reg.h" #include "intel_uncore.h" #include "intel_rps.h" diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c index 274aa0dd7050..c94e8ac884eb 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c @@ -39,6 +39,28 @@ static void user_forcewake(struct intel_gt *gt, bool suspend) intel_gt_pm_put(gt); } +static void runtime_begin(struct intel_gt *gt) +{ + local_irq_disable(); + write_seqcount_begin(>->stats.lock); + gt->stats.start = ktime_get(); + gt->stats.active = true; + write_seqcount_end(>->stats.lock); + local_irq_enable(); +} + +static void runtime_end(struct intel_gt *gt) +{ + local_irq_disable(); + write_seqcount_begin(>->stats.lock); + gt->stats.active = false; + gt->stats.total = + ktime_add(gt->stats.total, + ktime_sub(ktime_get(), gt->stats.start)); + write_seqcount_end(>->stats.lock); + local_irq_enable(); +} + static int __gt_unpark(struct intel_wakeref *wf) { struct intel_gt *gt = container_of(wf, typeof(*gt), wakeref); @@ -67,6 +89,7 @@ static int __gt_unpark(struct intel_wakeref *wf) i915_pmu_gt_unparked(i915); intel_gt_unpark_requests(gt); + runtime_begin(gt); return 0; } @@ -79,6 +102,7 @@ static int __gt_park(struct intel_wakeref *wf) GT_TRACE(gt, "\n"); + runtime_end(gt); intel_gt_park_requests(gt); i915_vma_parked(gt); @@ -106,6 +130,7 @@ static const struct intel_wakeref_ops wf_ops = { void intel_gt_pm_init_early(struct intel_gt *gt) { intel_wakeref_init(>->wakeref, gt->uncore->rpm, &wf_ops); + seqcount_mutex_init(>->stats.lock, >->wakeref.mutex); } void intel_gt_pm_init(struct intel_gt *gt) @@ -339,6 +364,30 @@ int intel_gt_runtime_resume(struct intel_gt *gt) return intel_uc_runtime_resume(>->uc); } +static ktime_t __intel_gt_get_awake_time(const struct intel_gt *gt) +{ + ktime_t total = gt->stats.total; + + if (gt->stats.active) + total = ktime_add(total, + ktime_sub(ktime_get(), gt->stats.start)); + + return total; +} + +ktime_t intel_gt_get_awake_time(const struct intel_gt *gt) +{ + unsigned int seq; + ktime_t total; + + do { + seq = read_seqcount_begin(>->stats.lock); + total = __intel_gt_get_awake_time(gt); + } while (read_seqcount_retry(>->stats.lock, seq)); + + return total; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftest_gt_pm.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.h b/drivers/gpu/drm/i915/gt/intel_gt_pm.h index 60f0e2fbe55c..63846a856e7e 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_pm.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.h @@ -58,6 +58,8 @@ int intel_gt_resume(struct intel_gt *gt); void intel_gt_runtime_suspend(struct intel_gt *gt); int intel_gt_runtime_resume(struct intel_gt *gt); +ktime_t intel_gt_get_awake_time(const struct intel_gt *gt); + static inline bool is_mock_gt(const struct intel_gt *gt) { return I915_SELFTEST_ONLY(gt->awake == -ENODEV); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_requests.c b/drivers/gpu/drm/i915/gt/intel_gt_requests.c index 66fcbf9d0fdd..dc06c78c9eeb 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_requests.c +++ b/drivers/gpu/drm/i915/gt/intel_gt_requests.c @@ -135,13 +135,8 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) struct intel_gt_timelines *timelines = >->timelines; struct intel_timeline *tl, *tn; unsigned long active_count = 0; - bool interruptible; LIST_HEAD(free); - interruptible = true; - if (unlikely(timeout < 0)) - timeout = -timeout, interruptible = false; - flush_submission(gt, timeout); /* kick the ksoftirqd tasklets */ spin_lock(&timelines->lock); list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { @@ -163,7 +158,7 @@ long intel_gt_retire_requests_timeout(struct intel_gt *gt, long timeout) mutex_unlock(&tl->mutex); timeout = dma_fence_wait_timeout(fence, - interruptible, + true, timeout); dma_fence_put(fence); diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h b/drivers/gpu/drm/i915/gt/intel_gt_types.h index 6d39a4a11bf3..a83d3e18254d 100644 --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h @@ -75,6 +75,7 @@ struct intel_gt { intel_wakeref_t awake; u32 clock_frequency; + u32 clock_period_ns; struct intel_llc llc; struct intel_rc6 rc6; @@ -87,6 +88,30 @@ struct intel_gt { u32 pm_guc_events; + struct { + bool active; + + /** + * @lock: Lock protecting the below fields. + */ + seqcount_mutex_t lock; + + /** + * @total: Total time this engine was busy. + * + * Accumulated time not counting the most recent block in cases + * where engine is currently busy (active > 0). + */ + ktime_t total; + + /** + * @start: Timestamp of the last idle to active transition. + * + * Idle is defined as active == 0, active is active > 0. + */ + ktime_t start; + } stats; + struct intel_engine_cs *engine[I915_NUM_ENGINES]; struct intel_engine_cs *engine_class[MAX_ENGINE_CLASS + 1] [MAX_ENGINE_INSTANCE + 1]; diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c index 7bfe9072be9a..04aa6601e984 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.c +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c @@ -422,6 +422,35 @@ void setup_private_pat(struct intel_uncore *uncore) bdw_setup_private_ppat(uncore); } +struct i915_vma * +__vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + obj = i915_gem_object_create_internal(vm->i915, PAGE_ALIGN(size)); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + err = i915_vma_pin(vma, 0, 0, + i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER); + if (err) { + i915_vma_put(vma); + return ERR_PTR(err); + } + + return vma; +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_gtt.c" #endif diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h index 8a33940a71f3..29c10fde8ce3 100644 --- a/drivers/gpu/drm/i915/gt/intel_gtt.h +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h @@ -573,6 +573,9 @@ int i915_vm_pin_pt_stash(struct i915_address_space *vm, void i915_vm_free_pt_stash(struct i915_address_space *vm, struct i915_vm_pt_stash *stash); +struct i915_vma * +__vm_create_scratch_for_read(struct i915_address_space *vm, unsigned long size); + static inline struct sgt_dma { struct scatterlist *sg; dma_addr_t dma, max; diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.c b/drivers/gpu/drm/i915/gt/intel_lrc.c index 7614a3d24fca..a0fc78c89b61 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.c +++ b/drivers/gpu/drm/i915/gt/intel_lrc.c @@ -1,599 +1,23 @@ +// SPDX-License-Identifier: MIT /* * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Ben Widawsky <ben@bwidawsk.net> - * Michel Thierry <michel.thierry@intel.com> - * Thomas Daniel <thomas.daniel@intel.com> - * Oscar Mateo <oscar.mateo@intel.com> - * - */ - -/** - * DOC: Logical Rings, Logical Ring Contexts and Execlists - * - * Motivation: - * GEN8 brings an expansion of the HW contexts: "Logical Ring Contexts". - * These expanded contexts enable a number of new abilities, especially - * "Execlists" (also implemented in this file). - * - * One of the main differences with the legacy HW contexts is that logical - * ring contexts incorporate many more things to the context's state, like - * PDPs or ringbuffer control registers: - * - * The reason why PDPs are included in the context is straightforward: as - * PPGTTs (per-process GTTs) are actually per-context, having the PDPs - * contained there mean you don't need to do a ppgtt->switch_mm yourself, - * instead, the GPU will do it for you on the context switch. - * - * But, what about the ringbuffer control registers (head, tail, etc..)? - * shouldn't we just need a set of those per engine command streamer? This is - * where the name "Logical Rings" starts to make sense: by virtualizing the - * rings, the engine cs shifts to a new "ring buffer" with every context - * switch. When you want to submit a workload to the GPU you: A) choose your - * context, B) find its appropriate virtualized ring, C) write commands to it - * and then, finally, D) tell the GPU to switch to that context. - * - * Instead of the legacy MI_SET_CONTEXT, the way you tell the GPU to switch - * to a contexts is via a context execution list, ergo "Execlists". - * - * LRC implementation: - * Regarding the creation of contexts, we have: - * - * - One global default context. - * - One local default context for each opened fd. - * - One local extra context for each context create ioctl call. - * - * Now that ringbuffers belong per-context (and not per-engine, like before) - * and that contexts are uniquely tied to a given engine (and not reusable, - * like before) we need: - * - * - One ringbuffer per-engine inside each context. - * - One backing object per-engine inside each context. - * - * The global default context starts its life with these new objects fully - * allocated and populated. The local default context for each opened fd is - * more complex, because we don't know at creation time which engine is going - * to use them. To handle this, we have implemented a deferred creation of LR - * contexts: - * - * The local context starts its life as a hollow or blank holder, that only - * gets populated for a given engine once we receive an execbuffer. If later - * on we receive another execbuffer ioctl for the same context but a different - * engine, we allocate/populate a new ringbuffer and context backing object and - * so on. - * - * Finally, regarding local contexts created using the ioctl call: as they are - * only allowed with the render ring, we can allocate & populate them right - * away (no need to defer anything, at least for now). - * - * Execlists implementation: - * Execlists are the new method by which, on gen8+ hardware, workloads are - * submitted for execution (as opposed to the legacy, ringbuffer-based, method). - * This method works as follows: - * - * When a request is committed, its commands (the BB start and any leading or - * trailing commands, like the seqno breadcrumbs) are placed in the ringbuffer - * for the appropriate context. The tail pointer in the hardware context is not - * updated at this time, but instead, kept by the driver in the ringbuffer - * structure. A structure representing this request is added to a request queue - * for the appropriate engine: this structure contains a copy of the context's - * tail after the request was written to the ring buffer and a pointer to the - * context itself. - * - * If the engine's request queue was empty before the request was added, the - * queue is processed immediately. Otherwise the queue will be processed during - * a context switch interrupt. In any case, elements on the queue will get sent - * (in pairs) to the GPU's ExecLists Submit Port (ELSP, for short) with a - * globally unique 20-bits submission ID. - * - * When execution of a request completes, the GPU updates the context status - * buffer with a context complete event and generates a context switch interrupt. - * During the interrupt handling, the driver examines the events in the buffer: - * for each context complete event, if the announced ID matches that on the head - * of the request queue, then that request is retired and removed from the queue. - * - * After processing, if any requests were retired and the queue is not empty - * then a new execution list can be submitted. The two requests at the front of - * the queue are next to be submitted but since a context may not occur twice in - * an execution list, if subsequent requests have the same ID as the first then - * the two requests must be combined. This is done simply by discarding requests - * at the head of the queue until either only one requests is left (in which case - * we use a NULL second context) or the first two requests have unique IDs. - * - * By always executing the first two requests in the queue the driver ensures - * that the GPU is kept as busy as possible. In the case where a single context - * completes but a second context is still executing, the request for this second - * context will be at the head of the queue when we remove the first one. This - * request will then be resubmitted along with a new request for a different context, - * which will cause the hardware to continue executing the second request and queue - * the new request (the GPU detects the condition of a context getting preempted - * with the same context and optimizes the context switch flow by not doing - * preemption, but just sampling the new tail pointer). - * */ -#include <linux/interrupt.h> +#include "gen8_engine_cs.h" #include "i915_drv.h" #include "i915_perf.h" -#include "i915_trace.h" -#include "i915_vgpu.h" -#include "intel_breadcrumbs.h" -#include "intel_context.h" -#include "intel_engine_pm.h" +#include "intel_engine.h" +#include "intel_gpu_commands.h" #include "intel_gt.h" -#include "intel_gt_pm.h" -#include "intel_gt_requests.h" +#include "intel_lrc.h" #include "intel_lrc_reg.h" -#include "intel_mocs.h" -#include "intel_reset.h" #include "intel_ring.h" -#include "intel_workarounds.h" #include "shmem_utils.h" -#define RING_EXECLIST_QFULL (1 << 0x2) -#define RING_EXECLIST1_VALID (1 << 0x3) -#define RING_EXECLIST0_VALID (1 << 0x4) -#define RING_EXECLIST_ACTIVE_STATUS (3 << 0xE) -#define RING_EXECLIST1_ACTIVE (1 << 0x11) -#define RING_EXECLIST0_ACTIVE (1 << 0x12) - -#define GEN8_CTX_STATUS_IDLE_ACTIVE (1 << 0) -#define GEN8_CTX_STATUS_PREEMPTED (1 << 1) -#define GEN8_CTX_STATUS_ELEMENT_SWITCH (1 << 2) -#define GEN8_CTX_STATUS_ACTIVE_IDLE (1 << 3) -#define GEN8_CTX_STATUS_COMPLETE (1 << 4) -#define GEN8_CTX_STATUS_LITE_RESTORE (1 << 15) - -#define GEN8_CTX_STATUS_COMPLETED_MASK \ - (GEN8_CTX_STATUS_COMPLETE | GEN8_CTX_STATUS_PREEMPTED) - -#define CTX_DESC_FORCE_RESTORE BIT_ULL(2) - -#define GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE (0x1) /* lower csb dword */ -#define GEN12_CTX_SWITCH_DETAIL(csb_dw) ((csb_dw) & 0xF) /* upper csb dword */ -#define GEN12_CSB_SW_CTX_ID_MASK GENMASK(25, 15) -#define GEN12_IDLE_CTX_ID 0x7FF -#define GEN12_CSB_CTX_VALID(csb_dw) \ - (FIELD_GET(GEN12_CSB_SW_CTX_ID_MASK, csb_dw) != GEN12_IDLE_CTX_ID) - -/* Typical size of the average request (2 pipecontrols and a MI_BB) */ -#define EXECLISTS_REQUEST_SIZE 64 /* bytes */ - -struct virtual_engine { - struct intel_engine_cs base; - struct intel_context context; - struct rcu_work rcu; - - /* - * We allow only a single request through the virtual engine at a time - * (each request in the timeline waits for the completion fence of - * the previous before being submitted). By restricting ourselves to - * only submitting a single request, each request is placed on to a - * physical to maximise load spreading (by virtue of the late greedy - * scheduling -- each real engine takes the next available request - * upon idling). - */ - struct i915_request *request; - - /* - * We keep a rbtree of available virtual engines inside each physical - * engine, sorted by priority. Here we preallocate the nodes we need - * for the virtual engine, indexed by physical_engine->id. - */ - struct ve_node { - struct rb_node rb; - int prio; - } nodes[I915_NUM_ENGINES]; - - /* - * Keep track of bonded pairs -- restrictions upon on our selection - * of physical engines any particular request may be submitted to. - * If we receive a submit-fence from a master engine, we will only - * use one of sibling_mask physical engines. - */ - struct ve_bond { - const struct intel_engine_cs *master; - intel_engine_mask_t sibling_mask; - } *bonds; - unsigned int num_bonds; - - /* And finally, which physical engines this virtual engine maps onto. */ - unsigned int num_siblings; - struct intel_engine_cs *siblings[]; -}; - -static struct virtual_engine *to_virtual_engine(struct intel_engine_cs *engine) -{ - GEM_BUG_ON(!intel_engine_is_virtual(engine)); - return container_of(engine, struct virtual_engine, base); -} - -static int __execlists_context_alloc(struct intel_context *ce, - struct intel_engine_cs *engine); - -static void execlists_init_reg_state(u32 *reg_state, - const struct intel_context *ce, - const struct intel_engine_cs *engine, - const struct intel_ring *ring, - bool close); -static void -__execlists_update_reg_state(const struct intel_context *ce, - const struct intel_engine_cs *engine, - u32 head); - -static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) -{ - if (INTEL_GEN(engine->i915) >= 12) - return 0x60; - else if (INTEL_GEN(engine->i915) >= 9) - return 0x54; - else if (engine->class == RENDER_CLASS) - return 0x58; - else - return -1; -} - -static int lrc_ring_gpr0(const struct intel_engine_cs *engine) -{ - if (INTEL_GEN(engine->i915) >= 12) - return 0x74; - else if (INTEL_GEN(engine->i915) >= 9) - return 0x68; - else if (engine->class == RENDER_CLASS) - return 0xd8; - else - return -1; -} - -static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine) -{ - if (INTEL_GEN(engine->i915) >= 12) - return 0x12; - else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS) - return 0x18; - else - return -1; -} - -static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine) -{ - int x; - - x = lrc_ring_wa_bb_per_ctx(engine); - if (x < 0) - return x; - - return x + 2; -} - -static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine) -{ - int x; - - x = lrc_ring_indirect_ptr(engine); - if (x < 0) - return x; - - return x + 2; -} - -static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine) -{ - if (engine->class != RENDER_CLASS) - return -1; - - if (INTEL_GEN(engine->i915) >= 12) - return 0xb6; - else if (INTEL_GEN(engine->i915) >= 11) - return 0xaa; - else - return -1; -} - -static u32 -lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine) -{ - switch (INTEL_GEN(engine->i915)) { - default: - MISSING_CASE(INTEL_GEN(engine->i915)); - fallthrough; - case 12: - return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - case 11: - return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - case 10: - return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - case 9: - return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - case 8: - return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; - } -} - -static void -lrc_ring_setup_indirect_ctx(u32 *regs, - const struct intel_engine_cs *engine, - u32 ctx_bb_ggtt_addr, - u32 size) -{ - GEM_BUG_ON(!size); - GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES)); - GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1); - regs[lrc_ring_indirect_ptr(engine) + 1] = - ctx_bb_ggtt_addr | (size / CACHELINE_BYTES); - - GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1); - regs[lrc_ring_indirect_offset(engine) + 1] = - lrc_ring_indirect_offset_default(engine) << 6; -} - -static u32 intel_context_get_runtime(const struct intel_context *ce) -{ - /* - * We can use either ppHWSP[16] which is recorded before the context - * switch (and so excludes the cost of context switches) or use the - * value from the context image itself, which is saved/restored earlier - * and so includes the cost of the save. - */ - return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]); -} - -static void mark_eio(struct i915_request *rq) -{ - if (i915_request_completed(rq)) - return; - - GEM_BUG_ON(i915_request_signaled(rq)); - - i915_request_set_error_once(rq, -EIO); - i915_request_mark_complete(rq); -} - -static struct i915_request * -active_request(const struct intel_timeline * const tl, struct i915_request *rq) -{ - struct i915_request *active = rq; - - rcu_read_lock(); - list_for_each_entry_continue_reverse(rq, &tl->requests, link) { - if (i915_request_completed(rq)) - break; - - active = rq; - } - rcu_read_unlock(); - - return active; -} - -static inline u32 intel_hws_preempt_address(struct intel_engine_cs *engine) -{ - return (i915_ggtt_offset(engine->status_page.vma) + - I915_GEM_HWS_PREEMPT_ADDR); -} - -static inline void -ring_set_paused(const struct intel_engine_cs *engine, int state) -{ - /* - * We inspect HWS_PREEMPT with a semaphore inside - * engine->emit_fini_breadcrumb. If the dword is true, - * the ring is paused as the semaphore will busywait - * until the dword is false. - */ - engine->status_page.addr[I915_GEM_HWS_PREEMPT] = state; - if (state) - wmb(); -} - -static inline struct i915_priolist *to_priolist(struct rb_node *rb) -{ - return rb_entry(rb, struct i915_priolist, node); -} - -static inline int rq_prio(const struct i915_request *rq) -{ - return READ_ONCE(rq->sched.attr.priority); -} - -static int effective_prio(const struct i915_request *rq) -{ - int prio = rq_prio(rq); - - /* - * If this request is special and must not be interrupted at any - * cost, so be it. Note we are only checking the most recent request - * in the context and so may be masking an earlier vip request. It - * is hoped that under the conditions where nopreempt is used, this - * will not matter (i.e. all requests to that context will be - * nopreempt for as long as desired). - */ - if (i915_request_has_nopreempt(rq)) - prio = I915_PRIORITY_UNPREEMPTABLE; - - return prio; -} - -static int queue_prio(const struct intel_engine_execlists *execlists) -{ - struct i915_priolist *p; - struct rb_node *rb; - - rb = rb_first_cached(&execlists->queue); - if (!rb) - return INT_MIN; - - /* - * As the priolist[] are inverted, with the highest priority in [0], - * we have to flip the index value to become priority. - */ - p = to_priolist(rb); - if (!I915_USER_PRIORITY_SHIFT) - return p->priority; - - return ((p->priority + 1) << I915_USER_PRIORITY_SHIFT) - ffs(p->used); -} - -static inline bool need_preempt(const struct intel_engine_cs *engine, - const struct i915_request *rq, - struct rb_node *rb) -{ - int last_prio; - - if (!intel_engine_has_semaphores(engine)) - return false; - - /* - * Check if the current priority hint merits a preemption attempt. - * - * We record the highest value priority we saw during rescheduling - * prior to this dequeue, therefore we know that if it is strictly - * less than the current tail of ESLP[0], we do not need to force - * a preempt-to-idle cycle. - * - * However, the priority hint is a mere hint that we may need to - * preempt. If that hint is stale or we may be trying to preempt - * ourselves, ignore the request. - * - * More naturally we would write - * prio >= max(0, last); - * except that we wish to prevent triggering preemption at the same - * priority level: the task that is running should remain running - * to preserve FIFO ordering of dependencies. - */ - last_prio = max(effective_prio(rq), I915_PRIORITY_NORMAL - 1); - if (engine->execlists.queue_priority_hint <= last_prio) - return false; - - /* - * Check against the first request in ELSP[1], it will, thanks to the - * power of PI, be the highest priority of that context. - */ - if (!list_is_last(&rq->sched.link, &engine->active.requests) && - rq_prio(list_next_entry(rq, sched.link)) > last_prio) - return true; - - if (rb) { - struct virtual_engine *ve = - rb_entry(rb, typeof(*ve), nodes[engine->id].rb); - bool preempt = false; - - if (engine == ve->siblings[0]) { /* only preempt one sibling */ - struct i915_request *next; - - rcu_read_lock(); - next = READ_ONCE(ve->request); - if (next) - preempt = rq_prio(next) > last_prio; - rcu_read_unlock(); - } - - if (preempt) - return preempt; - } - - /* - * If the inflight context did not trigger the preemption, then maybe - * it was the set of queued requests? Pick the highest priority in - * the queue (the first active priolist) and see if it deserves to be - * running instead of ELSP[0]. - * - * The highest priority request in the queue can not be either - * ELSP[0] or ELSP[1] as, thanks again to PI, if it was the same - * context, it's priority would not exceed ELSP[0] aka last_prio. - */ - return queue_prio(&engine->execlists) > last_prio; -} - -__maybe_unused static inline bool -assert_priority_queue(const struct i915_request *prev, - const struct i915_request *next) -{ - /* - * Without preemption, the prev may refer to the still active element - * which we refuse to let go. - * - * Even with preemption, there are times when we think it is better not - * to preempt and leave an ostensibly lower priority request in flight. - */ - if (i915_request_is_active(prev)) - return true; - - return rq_prio(prev) >= rq_prio(next); -} - -/* - * The context descriptor encodes various attributes of a context, - * including its GTT address and some flags. Because it's fairly - * expensive to calculate, we'll just do it once and cache the result, - * which remains valid until the context is unpinned. - * - * This is what a descriptor looks like, from LSB to MSB:: - * - * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template) - * bits 12-31: LRCA, GTT address of (the HWSP of) this context - * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC) - * bits 53-54: mbz, reserved for use by hardware - * bits 55-63: group ID, currently unused and set to 0 - * - * Starting from Gen11, the upper dword of the descriptor has a new format: - * - * bits 32-36: reserved - * bits 37-47: SW context ID - * bits 48:53: engine instance - * bit 54: mbz, reserved for use by hardware - * bits 55-60: SW counter - * bits 61-63: engine class - * - * engine info, SW context ID and SW counter need to form a unique number - * (Context ID) per lrc. - */ -static u32 -lrc_descriptor(struct intel_context *ce, struct intel_engine_cs *engine) -{ - u32 desc; - - desc = INTEL_LEGACY_32B_CONTEXT; - if (i915_vm_is_4lvl(ce->vm)) - desc = INTEL_LEGACY_64B_CONTEXT; - desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT; - - desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; - if (IS_GEN(engine->i915, 8)) - desc |= GEN8_CTX_L3LLC_COHERENT; - - return i915_ggtt_offset(ce->state) | desc; -} - -static inline unsigned int dword_in_page(void *addr) -{ - return offset_in_page(addr) / sizeof(u32); -} - static void set_offsets(u32 *regs, const u8 *data, const struct intel_engine_cs *engine, - bool clear) + bool close) #define NOP(x) (BIT(7) | (x)) #define LRI(count, flags) ((flags) << 6 | (count) | BUILD_BUG_ON_ZERO(count >= BIT(6))) #define POSTED BIT(0) @@ -601,7 +25,7 @@ static void set_offsets(u32 *regs, #define REG16(x) \ (((x) >> 9) | BIT(7) | BUILD_BUG_ON_ZERO(x >= 0x10000)), \ (((x) >> 2) & 0x7f) -#define END(total_state_size) 0, (total_state_size) +#define END 0 { const u32 base = engine->mmio_base; @@ -610,8 +34,6 @@ static void set_offsets(u32 *regs, if (*data & BIT(7)) { /* skip */ count = *data++ & ~BIT(7); - if (clear) - memset32(regs, MI_NOOP, count); regs += count; continue; } @@ -639,19 +61,11 @@ static void set_offsets(u32 *regs, } while (v & BIT(7)); regs[0] = base + (offset << 2); - if (clear) - regs[1] = 0; regs += 2; } while (--count); } - if (clear) { - u8 count = *++data; - - /* Clear past the tail for HW access */ - GEM_BUG_ON(dword_in_page(regs) > count); - memset32(regs, MI_NOOP, count - dword_in_page(regs)); - + if (close) { /* Close the batch; used mainly by live_lrc_layout() */ *regs = MI_BATCH_BUFFER_END; if (INTEL_GEN(engine->i915) >= 10) @@ -691,7 +105,7 @@ static const u8 gen8_xcs_offsets[] = { REG16(0x200), REG(0x028), - END(80) + END }; static const u8 gen9_xcs_offsets[] = { @@ -775,7 +189,7 @@ static const u8 gen9_xcs_offsets[] = { REG16(0x67c), REG(0x068), - END(176) + END }; static const u8 gen12_xcs_offsets[] = { @@ -807,7 +221,7 @@ static const u8 gen12_xcs_offsets[] = { REG16(0x274), REG16(0x270), - END(80) + END }; static const u8 gen8_rcs_offsets[] = { @@ -844,7 +258,7 @@ static const u8 gen8_rcs_offsets[] = { LRI(1, 0), REG(0x0c8), - END(80) + END }; static const u8 gen9_rcs_offsets[] = { @@ -928,7 +342,7 @@ static const u8 gen9_rcs_offsets[] = { REG16(0x67c), REG(0x68), - END(176) + END }; static const u8 gen11_rcs_offsets[] = { @@ -969,7 +383,7 @@ static const u8 gen11_rcs_offsets[] = { LRI(1, 0), REG(0x0c8), - END(80) + END }; static const u8 gen12_rcs_offsets[] = { @@ -1065,7 +479,7 @@ static const u8 gen12_rcs_offsets[] = { REG(0x084), NOP(1), - END(192) + END }; #undef END @@ -1104,2284 +518,440 @@ static const u8 *reg_offsets(const struct intel_engine_cs *engine) } } -static struct i915_request * -__unwind_incomplete_requests(struct intel_engine_cs *engine) -{ - struct i915_request *rq, *rn, *active = NULL; - struct list_head *pl; - int prio = I915_PRIORITY_INVALID; - - lockdep_assert_held(&engine->active.lock); - - list_for_each_entry_safe_reverse(rq, rn, - &engine->active.requests, - sched.link) { - if (i915_request_completed(rq)) - continue; /* XXX */ - - __i915_request_unsubmit(rq); - - /* - * Push the request back into the queue for later resubmission. - * If this request is not native to this physical engine (i.e. - * it came from a virtual source), push it back onto the virtual - * engine so that it can be moved across onto another physical - * engine as load dictates. - */ - if (likely(rq->execution_mask == engine->mask)) { - GEM_BUG_ON(rq_prio(rq) == I915_PRIORITY_INVALID); - if (rq_prio(rq) != prio) { - prio = rq_prio(rq); - pl = i915_sched_lookup_priolist(engine, prio); - } - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); - - list_move(&rq->sched.link, pl); - set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); - - /* Check in case we rollback so far we wrap [size/2] */ - if (intel_ring_direction(rq->ring, - rq->tail, - rq->ring->tail + 8) > 0) - rq->context->lrc.desc |= CTX_DESC_FORCE_RESTORE; - - active = rq; - } else { - struct intel_engine_cs *owner = rq->context->engine; - - WRITE_ONCE(rq->engine, owner); - owner->submit_request(rq); - active = NULL; - } - } - - return active; -} - -struct i915_request * -execlists_unwind_incomplete_requests(struct intel_engine_execlists *execlists) -{ - struct intel_engine_cs *engine = - container_of(execlists, typeof(*engine), execlists); - - return __unwind_incomplete_requests(engine); -} - -static inline void -execlists_context_status_change(struct i915_request *rq, unsigned long status) +static int lrc_ring_mi_mode(const struct intel_engine_cs *engine) { - /* - * Only used when GVT-g is enabled now. When GVT-g is disabled, - * The compiler should eliminate this function as dead-code. - */ - if (!IS_ENABLED(CONFIG_DRM_I915_GVT)) - return; - - atomic_notifier_call_chain(&rq->engine->context_status_notifier, - status, rq); + if (INTEL_GEN(engine->i915) >= 12) + return 0x60; + else if (INTEL_GEN(engine->i915) >= 9) + return 0x54; + else if (engine->class == RENDER_CLASS) + return 0x58; + else + return -1; } -static void intel_engine_context_in(struct intel_engine_cs *engine) +static int lrc_ring_gpr0(const struct intel_engine_cs *engine) { - unsigned long flags; - - if (atomic_add_unless(&engine->stats.active, 1, 0)) - return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - if (!atomic_add_unless(&engine->stats.active, 1, 0)) { - engine->stats.start = ktime_get(); - atomic_inc(&engine->stats.active); - } - write_sequnlock_irqrestore(&engine->stats.lock, flags); + if (INTEL_GEN(engine->i915) >= 12) + return 0x74; + else if (INTEL_GEN(engine->i915) >= 9) + return 0x68; + else if (engine->class == RENDER_CLASS) + return 0xd8; + else + return -1; } -static void intel_engine_context_out(struct intel_engine_cs *engine) +static int lrc_ring_wa_bb_per_ctx(const struct intel_engine_cs *engine) { - unsigned long flags; - - GEM_BUG_ON(!atomic_read(&engine->stats.active)); - - if (atomic_add_unless(&engine->stats.active, -1, 1)) - return; - - write_seqlock_irqsave(&engine->stats.lock, flags); - if (atomic_dec_and_test(&engine->stats.active)) { - engine->stats.total = - ktime_add(engine->stats.total, - ktime_sub(ktime_get(), engine->stats.start)); - } - write_sequnlock_irqrestore(&engine->stats.lock, flags); + if (INTEL_GEN(engine->i915) >= 12) + return 0x12; + else if (INTEL_GEN(engine->i915) >= 9 || engine->class == RENDER_CLASS) + return 0x18; + else + return -1; } -static void -execlists_check_context(const struct intel_context *ce, - const struct intel_engine_cs *engine, - const char *when) +static int lrc_ring_indirect_ptr(const struct intel_engine_cs *engine) { - const struct intel_ring *ring = ce->ring; - u32 *regs = ce->lrc_reg_state; - bool valid = true; int x; - if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) { - pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n", - engine->name, - regs[CTX_RING_START], - i915_ggtt_offset(ring->vma)); - regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); - valid = false; - } - - if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) != - (RING_CTL_SIZE(ring->size) | RING_VALID)) { - pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n", - engine->name, - regs[CTX_RING_CTL], - (u32)(RING_CTL_SIZE(ring->size) | RING_VALID)); - regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; - valid = false; - } - - x = lrc_ring_mi_mode(engine); - if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) { - pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n", - engine->name, regs[x + 1]); - regs[x + 1] &= ~STOP_RING; - regs[x + 1] |= STOP_RING << 16; - valid = false; - } + x = lrc_ring_wa_bb_per_ctx(engine); + if (x < 0) + return x; - WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when); + return x + 2; } -static void restore_default_state(struct intel_context *ce, - struct intel_engine_cs *engine) +static int lrc_ring_indirect_offset(const struct intel_engine_cs *engine) { - u32 *regs; + int x; - regs = memset(ce->lrc_reg_state, 0, engine->context_size - PAGE_SIZE); - execlists_init_reg_state(regs, ce, engine, ce->ring, true); + x = lrc_ring_indirect_ptr(engine); + if (x < 0) + return x; - ce->runtime.last = intel_context_get_runtime(ce); + return x + 2; } -static void reset_active(struct i915_request *rq, - struct intel_engine_cs *engine) +static int lrc_ring_cmd_buf_cctl(const struct intel_engine_cs *engine) { - struct intel_context * const ce = rq->context; - u32 head; - - /* - * The executing context has been cancelled. We want to prevent - * further execution along this context and propagate the error on - * to anything depending on its results. - * - * In __i915_request_submit(), we apply the -EIO and remove the - * requests' payloads for any banned requests. But first, we must - * rewind the context back to the start of the incomplete request so - * that we do not jump back into the middle of the batch. - * - * We preserve the breadcrumbs and semaphores of the incomplete - * requests so that inter-timeline dependencies (i.e other timelines) - * remain correctly ordered. And we defer to __i915_request_submit() - * so that all asynchronous waits are correctly handled. - */ - ENGINE_TRACE(engine, "{ rq=%llx:%lld }\n", - rq->fence.context, rq->fence.seqno); + if (engine->class != RENDER_CLASS) + return -1; - /* On resubmission of the active request, payload will be scrubbed */ - if (i915_request_completed(rq)) - head = rq->tail; + if (INTEL_GEN(engine->i915) >= 12) + return 0xb6; + else if (INTEL_GEN(engine->i915) >= 11) + return 0xaa; else - head = active_request(ce->timeline, rq)->head; - head = intel_ring_wrap(ce->ring, head); - - /* Scrub the context image to prevent replaying the previous batch */ - restore_default_state(ce, engine); - __execlists_update_reg_state(ce, engine, head); - - /* We've switched away, so this should be a no-op, but intent matters */ - ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; -} - -static void st_update_runtime_underflow(struct intel_context *ce, s32 dt) -{ -#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) - ce->runtime.num_underflow += dt < 0; - ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt); -#endif + return -1; } -static void intel_context_update_runtime(struct intel_context *ce) +static u32 +lrc_ring_indirect_offset_default(const struct intel_engine_cs *engine) { - u32 old; - s32 dt; - - if (intel_context_is_barrier(ce)) - return; - - old = ce->runtime.last; - ce->runtime.last = intel_context_get_runtime(ce); - dt = ce->runtime.last - old; - - if (unlikely(dt <= 0)) { - CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n", - old, ce->runtime.last, dt); - st_update_runtime_underflow(ce, dt); - return; + switch (INTEL_GEN(engine->i915)) { + default: + MISSING_CASE(INTEL_GEN(engine->i915)); + fallthrough; + case 12: + return GEN12_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + case 11: + return GEN11_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + case 10: + return GEN10_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + case 9: + return GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; + case 8: + return GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT; } - - ewma_runtime_add(&ce->runtime.avg, dt); - ce->runtime.total += dt; } -static inline struct intel_engine_cs * -__execlists_schedule_in(struct i915_request *rq) +static void +lrc_setup_indirect_ctx(u32 *regs, + const struct intel_engine_cs *engine, + u32 ctx_bb_ggtt_addr, + u32 size) { - struct intel_engine_cs * const engine = rq->engine; - struct intel_context * const ce = rq->context; - - intel_context_get(ce); - - if (unlikely(intel_context_is_banned(ce))) - reset_active(rq, engine); - - if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - execlists_check_context(ce, engine, "before"); - - if (ce->tag) { - /* Use a fixed tag for OA and friends */ - GEM_BUG_ON(ce->tag <= BITS_PER_LONG); - ce->lrc.ccid = ce->tag; - } else { - /* We don't need a strict matching tag, just different values */ - unsigned int tag = ffs(READ_ONCE(engine->context_tag)); - - GEM_BUG_ON(tag == 0 || tag >= BITS_PER_LONG); - clear_bit(tag - 1, &engine->context_tag); - ce->lrc.ccid = tag << (GEN11_SW_CTX_ID_SHIFT - 32); - - BUILD_BUG_ON(BITS_PER_LONG > GEN12_MAX_CONTEXT_HW_ID); - } - - ce->lrc.ccid |= engine->execlists.ccid; - - __intel_gt_pm_get(engine->gt); - if (engine->fw_domain && !atomic_fetch_inc(&engine->fw_active)) - intel_uncore_forcewake_get(engine->uncore, engine->fw_domain); - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN); - intel_engine_context_in(engine); + GEM_BUG_ON(!size); + GEM_BUG_ON(!IS_ALIGNED(size, CACHELINE_BYTES)); + GEM_BUG_ON(lrc_ring_indirect_ptr(engine) == -1); + regs[lrc_ring_indirect_ptr(engine) + 1] = + ctx_bb_ggtt_addr | (size / CACHELINE_BYTES); - return engine; + GEM_BUG_ON(lrc_ring_indirect_offset(engine) == -1); + regs[lrc_ring_indirect_offset(engine) + 1] = + lrc_ring_indirect_offset_default(engine) << 6; } -static inline struct i915_request * -execlists_schedule_in(struct i915_request *rq, int idx) +static void init_common_regs(u32 * const regs, + const struct intel_context *ce, + const struct intel_engine_cs *engine, + bool inhibit) { - struct intel_context * const ce = rq->context; - struct intel_engine_cs *old; - - GEM_BUG_ON(!intel_engine_pm_is_awake(rq->engine)); - trace_i915_request_in(rq, idx); - - old = READ_ONCE(ce->inflight); - do { - if (!old) { - WRITE_ONCE(ce->inflight, __execlists_schedule_in(rq)); - break; - } - } while (!try_cmpxchg(&ce->inflight, &old, ptr_inc(old))); - - GEM_BUG_ON(intel_context_inflight(ce) != rq->engine); - return i915_request_get(rq); -} + u32 ctl; -static void kick_siblings(struct i915_request *rq, struct intel_context *ce) -{ - struct virtual_engine *ve = container_of(ce, typeof(*ve), context); - struct i915_request *next = READ_ONCE(ve->request); + ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); + ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); + if (inhibit) + ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; + if (INTEL_GEN(engine->i915) < 11) + ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | + CTX_CTRL_RS_CTX_ENABLE); + regs[CTX_CONTEXT_CONTROL] = ctl; - if (next == rq || (next && next->execution_mask & ~rq->execution_mask)) - tasklet_hi_schedule(&ve->base.execlists.tasklet); + regs[CTX_TIMESTAMP] = ce->runtime.last; } -static inline void -__execlists_schedule_out(struct i915_request *rq, - struct intel_engine_cs * const engine, - unsigned int ccid) +static void init_wa_bb_regs(u32 * const regs, + const struct intel_engine_cs *engine) { - struct intel_context * const ce = rq->context; - - /* - * NB process_csb() is not under the engine->active.lock and hence - * schedule_out can race with schedule_in meaning that we should - * refrain from doing non-trivial work here. - */ + const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx; - if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - execlists_check_context(ce, engine, "after"); + if (wa_ctx->per_ctx.size) { + const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - /* - * If we have just completed this context, the engine may now be - * idle and we want to re-enter powersaving. - */ - if (list_is_last_rcu(&rq->link, &ce->timeline->requests) && - i915_request_completed(rq)) - intel_engine_add_retire(engine, ce->timeline); - - ccid >>= GEN11_SW_CTX_ID_SHIFT - 32; - ccid &= GEN12_MAX_CONTEXT_HW_ID; - if (ccid < BITS_PER_LONG) { - GEM_BUG_ON(ccid == 0); - GEM_BUG_ON(test_bit(ccid - 1, &engine->context_tag)); - set_bit(ccid - 1, &engine->context_tag); + GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1); + regs[lrc_ring_wa_bb_per_ctx(engine) + 1] = + (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; } - intel_context_update_runtime(ce); - intel_engine_context_out(engine); - execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT); - if (engine->fw_domain && !atomic_dec_return(&engine->fw_active)) - intel_uncore_forcewake_put(engine->uncore, engine->fw_domain); - intel_gt_pm_put_async(engine->gt); - - /* - * If this is part of a virtual engine, its next request may - * have been blocked waiting for access to the active context. - * We have to kick all the siblings again in case we need to - * switch (e.g. the next request is not runnable on this - * engine). Hopefully, we will already have submitted the next - * request before the tasklet runs and do not need to rebuild - * each virtual tree and kick everyone again. - */ - if (ce->engine != engine) - kick_siblings(rq, ce); - - intel_context_put(ce); -} - -static inline void -execlists_schedule_out(struct i915_request *rq) -{ - struct intel_context * const ce = rq->context; - struct intel_engine_cs *cur, *old; - u32 ccid; - - trace_i915_request_out(rq); - - ccid = rq->context->lrc.ccid; - old = READ_ONCE(ce->inflight); - do - cur = ptr_unmask_bits(old, 2) ? ptr_dec(old) : NULL; - while (!try_cmpxchg(&ce->inflight, &old, cur)); - if (!cur) - __execlists_schedule_out(rq, old, ccid); - - i915_request_put(rq); -} - -static u64 execlists_update_context(struct i915_request *rq) -{ - struct intel_context *ce = rq->context; - u64 desc = ce->lrc.desc; - u32 tail, prev; - - /* - * WaIdleLiteRestore:bdw,skl - * - * We should never submit the context with the same RING_TAIL twice - * just in case we submit an empty ring, which confuses the HW. - * - * We append a couple of NOOPs (gen8_emit_wa_tail) after the end of - * the normal request to be able to always advance the RING_TAIL on - * subsequent resubmissions (for lite restore). Should that fail us, - * and we try and submit the same tail again, force the context - * reload. - * - * If we need to return to a preempted context, we need to skip the - * lite-restore and force it to reload the RING_TAIL. Otherwise, the - * HW has a tendency to ignore us rewinding the TAIL to the end of - * an earlier request. - */ - GEM_BUG_ON(ce->lrc_reg_state[CTX_RING_TAIL] != rq->ring->tail); - prev = rq->ring->tail; - tail = intel_ring_set_tail(rq->ring, rq->tail); - if (unlikely(intel_ring_direction(rq->ring, tail, prev) <= 0)) - desc |= CTX_DESC_FORCE_RESTORE; - ce->lrc_reg_state[CTX_RING_TAIL] = tail; - rq->tail = rq->wa_tail; - - /* - * Make sure the context image is complete before we submit it to HW. - * - * Ostensibly, writes (including the WCB) should be flushed prior to - * an uncached write such as our mmio register access, the empirical - * evidence (esp. on Braswell) suggests that the WC write into memory - * may not be visible to the HW prior to the completion of the UC - * register write and that we may begin execution from the context - * before its image is complete leading to invalid PD chasing. - */ - wmb(); - - ce->lrc.desc &= ~CTX_DESC_FORCE_RESTORE; - return desc; -} - -static inline void write_desc(struct intel_engine_execlists *execlists, u64 desc, u32 port) -{ - if (execlists->ctrl_reg) { - writel(lower_32_bits(desc), execlists->submit_reg + port * 2); - writel(upper_32_bits(desc), execlists->submit_reg + port * 2 + 1); - } else { - writel(upper_32_bits(desc), execlists->submit_reg); - writel(lower_32_bits(desc), execlists->submit_reg); + if (wa_ctx->indirect_ctx.size) { + lrc_setup_indirect_ctx(regs, engine, + i915_ggtt_offset(wa_ctx->vma) + + wa_ctx->indirect_ctx.offset, + wa_ctx->indirect_ctx.size); } } -static __maybe_unused char * -dump_port(char *buf, int buflen, const char *prefix, struct i915_request *rq) +static void init_ppgtt_regs(u32 *regs, const struct i915_ppgtt *ppgtt) { - if (!rq) - return ""; - - snprintf(buf, buflen, "%sccid:%x %llx:%lld%s prio %d", - prefix, - rq->context->lrc.ccid, - rq->fence.context, rq->fence.seqno, - i915_request_completed(rq) ? "!" : - i915_request_started(rq) ? "*" : - "", - rq_prio(rq)); - - return buf; -} - -static __maybe_unused void -trace_ports(const struct intel_engine_execlists *execlists, - const char *msg, - struct i915_request * const *ports) -{ - const struct intel_engine_cs *engine = - container_of(execlists, typeof(*engine), execlists); - char __maybe_unused p0[40], p1[40]; - - if (!ports[0]) - return; - - ENGINE_TRACE(engine, "%s { %s%s }\n", msg, - dump_port(p0, sizeof(p0), "", ports[0]), - dump_port(p1, sizeof(p1), ", ", ports[1])); -} - -static inline bool -reset_in_progress(const struct intel_engine_execlists *execlists) -{ - return unlikely(!__tasklet_is_enabled(&execlists->tasklet)); -} - -static __maybe_unused bool -assert_pending_valid(const struct intel_engine_execlists *execlists, - const char *msg) -{ - struct intel_engine_cs *engine = - container_of(execlists, typeof(*engine), execlists); - struct i915_request * const *port, *rq; - struct intel_context *ce = NULL; - bool sentinel = false; - u32 ccid = -1; - - trace_ports(execlists, msg, execlists->pending); - - /* We may be messing around with the lists during reset, lalala */ - if (reset_in_progress(execlists)) - return true; - - if (!execlists->pending[0]) { - GEM_TRACE_ERR("%s: Nothing pending for promotion!\n", - engine->name); - return false; - } - - if (execlists->pending[execlists_num_ports(execlists)]) { - GEM_TRACE_ERR("%s: Excess pending[%d] for promotion!\n", - engine->name, execlists_num_ports(execlists)); - return false; - } - - for (port = execlists->pending; (rq = *port); port++) { - unsigned long flags; - bool ok = true; - - GEM_BUG_ON(!kref_read(&rq->fence.refcount)); - GEM_BUG_ON(!i915_request_is_active(rq)); - - if (ce == rq->context) { - GEM_TRACE_ERR("%s: Dup context:%llx in pending[%zd]\n", - engine->name, - ce->timeline->fence_context, - port - execlists->pending); - return false; - } - ce = rq->context; - - if (ccid == ce->lrc.ccid) { - GEM_TRACE_ERR("%s: Dup ccid:%x context:%llx in pending[%zd]\n", - engine->name, - ccid, ce->timeline->fence_context, - port - execlists->pending); - return false; - } - ccid = ce->lrc.ccid; - - /* - * Sentinels are supposed to be the last request so they flush - * the current execution off the HW. Check that they are the only - * request in the pending submission. + if (i915_vm_is_4lvl(&ppgtt->vm)) { + /* 64b PPGTT (48bit canonical) + * PDP0_DESCRIPTOR contains the base address to PML4 and + * other PDP Descriptors are ignored. */ - if (sentinel) { - GEM_TRACE_ERR("%s: context:%llx after sentinel in pending[%zd]\n", - engine->name, - ce->timeline->fence_context, - port - execlists->pending); - return false; - } - sentinel = i915_request_has_sentinel(rq); - - /* Hold tightly onto the lock to prevent concurrent retires! */ - if (!spin_trylock_irqsave(&rq->lock, flags)) - continue; - - if (i915_request_completed(rq)) - goto unlock; - - if (i915_active_is_idle(&ce->active) && - !intel_context_is_barrier(ce)) { - GEM_TRACE_ERR("%s: Inactive context:%llx in pending[%zd]\n", - engine->name, - ce->timeline->fence_context, - port - execlists->pending); - ok = false; - goto unlock; - } - - if (!i915_vma_is_pinned(ce->state)) { - GEM_TRACE_ERR("%s: Unpinned context:%llx in pending[%zd]\n", - engine->name, - ce->timeline->fence_context, - port - execlists->pending); - ok = false; - goto unlock; - } - - if (!i915_vma_is_pinned(ce->ring->vma)) { - GEM_TRACE_ERR("%s: Unpinned ring:%llx in pending[%zd]\n", - engine->name, - ce->timeline->fence_context, - port - execlists->pending); - ok = false; - goto unlock; - } - -unlock: - spin_unlock_irqrestore(&rq->lock, flags); - if (!ok) - return false; - } - - return ce; -} - -static void execlists_submit_ports(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists *execlists = &engine->execlists; - unsigned int n; - - GEM_BUG_ON(!assert_pending_valid(execlists, "submit")); - - /* - * We can skip acquiring intel_runtime_pm_get() here as it was taken - * on our behalf by the request (see i915_gem_mark_busy()) and it will - * not be relinquished until the device is idle (see - * i915_gem_idle_work_handler()). As a precaution, we make sure - * that all ELSP are drained i.e. we have processed the CSB, - * before allowing ourselves to idle and calling intel_runtime_pm_put(). - */ - GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); - - /* - * ELSQ note: the submit queue is not cleared after being submitted - * to the HW so we need to make sure we always clean it up. This is - * currently ensured by the fact that we always write the same number - * of elsq entries, keep this in mind before changing the loop below. - */ - for (n = execlists_num_ports(execlists); n--; ) { - struct i915_request *rq = execlists->pending[n]; - - write_desc(execlists, - rq ? execlists_update_context(rq) : 0, - n); - } - - /* we need to manually load the submit queue */ - if (execlists->ctrl_reg) - writel(EL_CTRL_LOAD, execlists->ctrl_reg); -} - -static bool ctx_single_port_submission(const struct intel_context *ce) -{ - return (IS_ENABLED(CONFIG_DRM_I915_GVT) && - intel_context_force_single_submission(ce)); -} - -static bool can_merge_ctx(const struct intel_context *prev, - const struct intel_context *next) -{ - if (prev != next) - return false; - - if (ctx_single_port_submission(prev)) - return false; - - return true; -} - -static unsigned long i915_request_flags(const struct i915_request *rq) -{ - return READ_ONCE(rq->fence.flags); -} - -static bool can_merge_rq(const struct i915_request *prev, - const struct i915_request *next) -{ - GEM_BUG_ON(prev == next); - GEM_BUG_ON(!assert_priority_queue(prev, next)); - - /* - * We do not submit known completed requests. Therefore if the next - * request is already completed, we can pretend to merge it in - * with the previous context (and we will skip updating the ELSP - * and tracking). Thus hopefully keeping the ELSP full with active - * contexts, despite the best efforts of preempt-to-busy to confuse - * us. - */ - if (i915_request_completed(next)) - return true; - - if (unlikely((i915_request_flags(prev) ^ i915_request_flags(next)) & - (BIT(I915_FENCE_FLAG_NOPREEMPT) | - BIT(I915_FENCE_FLAG_SENTINEL)))) - return false; - - if (!can_merge_ctx(prev->context, next->context)) - return false; - - GEM_BUG_ON(i915_seqno_passed(prev->fence.seqno, next->fence.seqno)); - return true; -} - -static void virtual_update_register_offsets(u32 *regs, - struct intel_engine_cs *engine) -{ - set_offsets(regs, reg_offsets(engine), engine, false); -} - -static bool virtual_matches(const struct virtual_engine *ve, - const struct i915_request *rq, - const struct intel_engine_cs *engine) -{ - const struct intel_engine_cs *inflight; - - if (!(rq->execution_mask & engine->mask)) /* We peeked too soon! */ - return false; - - /* - * We track when the HW has completed saving the context image - * (i.e. when we have seen the final CS event switching out of - * the context) and must not overwrite the context image before - * then. This restricts us to only using the active engine - * while the previous virtualized request is inflight (so - * we reuse the register offsets). This is a very small - * hystersis on the greedy seelction algorithm. - */ - inflight = intel_context_inflight(&ve->context); - if (inflight && inflight != engine) - return false; - - return true; -} - -static void virtual_xfer_context(struct virtual_engine *ve, - struct intel_engine_cs *engine) -{ - unsigned int n; - - if (likely(engine == ve->siblings[0])) - return; - - GEM_BUG_ON(READ_ONCE(ve->context.inflight)); - if (!intel_engine_has_relative_mmio(engine)) - virtual_update_register_offsets(ve->context.lrc_reg_state, - engine); - - /* - * Move the bound engine to the top of the list for - * future execution. We then kick this tasklet first - * before checking others, so that we preferentially - * reuse this set of bound registers. - */ - for (n = 1; n < ve->num_siblings; n++) { - if (ve->siblings[n] == engine) { - swap(ve->siblings[n], ve->siblings[0]); - break; - } + ASSIGN_CTX_PML4(ppgtt, regs); + } else { + ASSIGN_CTX_PDP(ppgtt, regs, 3); + ASSIGN_CTX_PDP(ppgtt, regs, 2); + ASSIGN_CTX_PDP(ppgtt, regs, 1); + ASSIGN_CTX_PDP(ppgtt, regs, 0); } } -#define for_each_waiter(p__, rq__) \ - list_for_each_entry_lockless(p__, \ - &(rq__)->sched.waiters_list, \ - wait_link) - -#define for_each_signaler(p__, rq__) \ - list_for_each_entry_rcu(p__, \ - &(rq__)->sched.signalers_list, \ - signal_link) - -static void defer_request(struct i915_request *rq, struct list_head * const pl) -{ - LIST_HEAD(list); - - /* - * We want to move the interrupted request to the back of - * the round-robin list (i.e. its priority level), but - * in doing so, we must then move all requests that were in - * flight and were waiting for the interrupted request to - * be run after it again. - */ - do { - struct i915_dependency *p; - - GEM_BUG_ON(i915_request_is_active(rq)); - list_move_tail(&rq->sched.link, pl); - - for_each_waiter(p, rq) { - struct i915_request *w = - container_of(p->waiter, typeof(*w), sched); - - if (p->flags & I915_DEPENDENCY_WEAK) - continue; - - /* Leave semaphores spinning on the other engines */ - if (w->engine != rq->engine) - continue; - - /* No waiter should start before its signaler */ - GEM_BUG_ON(i915_request_has_initial_breadcrumb(w) && - i915_request_started(w) && - !i915_request_completed(rq)); - - GEM_BUG_ON(i915_request_is_active(w)); - if (!i915_request_is_ready(w)) - continue; - - if (rq_prio(w) < rq_prio(rq)) - continue; - - GEM_BUG_ON(rq_prio(w) > rq_prio(rq)); - list_move_tail(&w->sched.link, &list); - } - - rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); - } while (rq); -} - -static void defer_active(struct intel_engine_cs *engine) +static struct i915_ppgtt *vm_alias(struct i915_address_space *vm) { - struct i915_request *rq; - - rq = __unwind_incomplete_requests(engine); - if (!rq) - return; - - defer_request(rq, i915_sched_lookup_priolist(engine, rq_prio(rq))); + if (i915_is_ggtt(vm)) + return i915_vm_to_ggtt(vm)->alias; + else + return i915_vm_to_ppgtt(vm); } -static bool -need_timeslice(const struct intel_engine_cs *engine, - const struct i915_request *rq, - const struct rb_node *rb) +static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine) { - int hint; - - if (!intel_engine_has_timeslices(engine)) - return false; - - hint = engine->execlists.queue_priority_hint; - - if (rb) { - const struct virtual_engine *ve = - rb_entry(rb, typeof(*ve), nodes[engine->id].rb); - const struct intel_engine_cs *inflight = - intel_context_inflight(&ve->context); - - if (!inflight || inflight == engine) { - struct i915_request *next; + int x; - rcu_read_lock(); - next = READ_ONCE(ve->request); - if (next) - hint = max(hint, rq_prio(next)); - rcu_read_unlock(); - } + x = lrc_ring_mi_mode(engine); + if (x != -1) { + regs[x + 1] &= ~STOP_RING; + regs[x + 1] |= STOP_RING << 16; } - - if (!list_is_last(&rq->sched.link, &engine->active.requests)) - hint = max(hint, rq_prio(list_next_entry(rq, sched.link))); - - GEM_BUG_ON(hint >= I915_PRIORITY_UNPREEMPTABLE); - return hint >= effective_prio(rq); } -static bool -timeslice_yield(const struct intel_engine_execlists *el, - const struct i915_request *rq) +static void __lrc_init_regs(u32 *regs, + const struct intel_context *ce, + const struct intel_engine_cs *engine, + bool inhibit) { /* - * Once bitten, forever smitten! + * A context is actually a big batch buffer with several + * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The + * values we are setting here are only for the first context restore: + * on a subsequent save, the GPU will recreate this batchbuffer with new + * values (including all the missing MI_LOAD_REGISTER_IMM commands that + * we are not initializing here). * - * If the active context ever busy-waited on a semaphore, - * it will be treated as a hog until the end of its timeslice (i.e. - * until it is scheduled out and replaced by a new submission, - * possibly even its own lite-restore). The HW only sends an interrupt - * on the first miss, and we do know if that semaphore has been - * signaled, or even if it is now stuck on another semaphore. Play - * safe, yield if it might be stuck -- it will be given a fresh - * timeslice in the near future. + * Must keep consistent with virtual_update_register_offsets(). */ - return rq->context->lrc.ccid == READ_ONCE(el->yield); -} - -static bool -timeslice_expired(const struct intel_engine_execlists *el, - const struct i915_request *rq) -{ - return timer_expired(&el->timer) || timeslice_yield(el, rq); -} - -static int -switch_prio(struct intel_engine_cs *engine, const struct i915_request *rq) -{ - if (list_is_last(&rq->sched.link, &engine->active.requests)) - return engine->execlists.queue_priority_hint; - - return rq_prio(list_next_entry(rq, sched.link)); -} - -static inline unsigned long -timeslice(const struct intel_engine_cs *engine) -{ - return READ_ONCE(engine->props.timeslice_duration_ms); -} -static unsigned long active_timeslice(const struct intel_engine_cs *engine) -{ - const struct intel_engine_execlists *execlists = &engine->execlists; - const struct i915_request *rq = *execlists->active; - - if (!rq || i915_request_completed(rq)) - return 0; - - if (READ_ONCE(execlists->switch_priority_hint) < effective_prio(rq)) - return 0; - - return timeslice(engine); -} + if (inhibit) + memset(regs, 0, PAGE_SIZE); -static void set_timeslice(struct intel_engine_cs *engine) -{ - unsigned long duration; + set_offsets(regs, reg_offsets(engine), engine, inhibit); - if (!intel_engine_has_timeslices(engine)) - return; + init_common_regs(regs, ce, engine, inhibit); + init_ppgtt_regs(regs, vm_alias(ce->vm)); - duration = active_timeslice(engine); - ENGINE_TRACE(engine, "bump timeslicing, interval:%lu", duration); + init_wa_bb_regs(regs, engine); - set_timer_ms(&engine->execlists.timer, duration); + __reset_stop_ring(regs, engine); } -static void start_timeslice(struct intel_engine_cs *engine, int prio) +void lrc_init_regs(const struct intel_context *ce, + const struct intel_engine_cs *engine, + bool inhibit) { - struct intel_engine_execlists *execlists = &engine->execlists; - unsigned long duration; - - if (!intel_engine_has_timeslices(engine)) - return; - - WRITE_ONCE(execlists->switch_priority_hint, prio); - if (prio == INT_MIN) - return; - - if (timer_pending(&execlists->timer)) - return; - - duration = timeslice(engine); - ENGINE_TRACE(engine, - "start timeslicing, prio:%d, interval:%lu", - prio, duration); - - set_timer_ms(&execlists->timer, duration); + __lrc_init_regs(ce->lrc_reg_state, ce, engine, inhibit); } -static void record_preemption(struct intel_engine_execlists *execlists) +void lrc_reset_regs(const struct intel_context *ce, + const struct intel_engine_cs *engine) { - (void)I915_SELFTEST_ONLY(execlists->preempt_hang.count++); + __reset_stop_ring(ce->lrc_reg_state, engine); } -static unsigned long active_preempt_timeout(struct intel_engine_cs *engine, - const struct i915_request *rq) +static void +set_redzone(void *vaddr, const struct intel_engine_cs *engine) { - if (!rq) - return 0; + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + return; - /* Force a fast reset for terminated contexts (ignoring sysfs!) */ - if (unlikely(intel_context_is_banned(rq->context))) - return 1; + vaddr += engine->context_size; - return READ_ONCE(engine->props.preempt_timeout_ms); + memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE); } -static void set_preempt_timeout(struct intel_engine_cs *engine, - const struct i915_request *rq) +static void +check_redzone(const void *vaddr, const struct intel_engine_cs *engine) { - if (!intel_engine_has_preempt_reset(engine)) + if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) return; - set_timer_ms(&engine->execlists.preempt, - active_preempt_timeout(engine, rq)); -} - -static inline void clear_ports(struct i915_request **ports, int count) -{ - memset_p((void **)ports, NULL, count); -} + vaddr += engine->context_size; -static inline void -copy_ports(struct i915_request **dst, struct i915_request **src, int count) -{ - /* A memcpy_p() would be very useful here! */ - while (count--) - WRITE_ONCE(*dst++, *src++); /* avoid write tearing */ + if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE)) + drm_err_once(&engine->i915->drm, + "%s context redzone overwritten!\n", + engine->name); } -static void execlists_dequeue(struct intel_engine_cs *engine) +void lrc_init_state(struct intel_context *ce, + struct intel_engine_cs *engine, + void *state) { - struct intel_engine_execlists * const execlists = &engine->execlists; - struct i915_request **port = execlists->pending; - struct i915_request ** const last_port = port + execlists->port_mask; - struct i915_request * const *active; - struct i915_request *last; - struct rb_node *rb; - bool submit = false; - - /* - * Hardware submission is through 2 ports. Conceptually each port - * has a (RING_START, RING_HEAD, RING_TAIL) tuple. RING_START is - * static for a context, and unique to each, so we only execute - * requests belonging to a single context from each ring. RING_HEAD - * is maintained by the CS in the context image, it marks the place - * where it got up to last time, and through RING_TAIL we tell the CS - * where we want to execute up to this time. - * - * In this list the requests are in order of execution. Consecutive - * requests from the same context are adjacent in the ringbuffer. We - * can combine these requests into a single RING_TAIL update: - * - * RING_HEAD...req1...req2 - * ^- RING_TAIL - * since to execute req2 the CS must first execute req1. - * - * Our goal then is to point each port to the end of a consecutive - * sequence of requests as being the most optimal (fewest wake ups - * and context switches) submission. - */ - - for (rb = rb_first_cached(&execlists->virtual); rb; ) { - struct virtual_engine *ve = - rb_entry(rb, typeof(*ve), nodes[engine->id].rb); - struct i915_request *rq = READ_ONCE(ve->request); - - if (!rq) { /* lazily cleanup after another engine handled rq */ - rb_erase_cached(rb, &execlists->virtual); - RB_CLEAR_NODE(rb); - rb = rb_first_cached(&execlists->virtual); - continue; - } - - if (!virtual_matches(ve, rq, engine)) { - rb = rb_next(rb); - continue; - } - - break; - } - - /* - * If the queue is higher priority than the last - * request in the currently active context, submit afresh. - * We will resubmit again afterwards in case we need to split - * the active context to interject the preemption request, - * i.e. we will retrigger preemption following the ack in case - * of trouble. - */ - active = READ_ONCE(execlists->active); - - /* - * In theory we can skip over completed contexts that have not - * yet been processed by events (as those events are in flight): - * - * while ((last = *active) && i915_request_completed(last)) - * active++; - * - * However, the GPU cannot handle this as it will ultimately - * find itself trying to jump back into a context it has just - * completed and barf. - */ - - if ((last = *active)) { - if (need_preempt(engine, last, rb)) { - if (i915_request_completed(last)) { - tasklet_hi_schedule(&execlists->tasklet); - return; - } - - ENGINE_TRACE(engine, - "preempting last=%llx:%lld, prio=%d, hint=%d\n", - last->fence.context, - last->fence.seqno, - last->sched.attr.priority, - execlists->queue_priority_hint); - record_preemption(execlists); - - /* - * Don't let the RING_HEAD advance past the breadcrumb - * as we unwind (and until we resubmit) so that we do - * not accidentally tell it to go backwards. - */ - ring_set_paused(engine, 1); - - /* - * Note that we have not stopped the GPU at this point, - * so we are unwinding the incomplete requests as they - * remain inflight and so by the time we do complete - * the preemption, some of the unwound requests may - * complete! - */ - __unwind_incomplete_requests(engine); - - last = NULL; - } else if (need_timeslice(engine, last, rb) && - timeslice_expired(execlists, last)) { - if (i915_request_completed(last)) { - tasklet_hi_schedule(&execlists->tasklet); - return; - } - - ENGINE_TRACE(engine, - "expired last=%llx:%lld, prio=%d, hint=%d, yield?=%s\n", - last->fence.context, - last->fence.seqno, - last->sched.attr.priority, - execlists->queue_priority_hint, - yesno(timeslice_yield(execlists, last))); - - ring_set_paused(engine, 1); - defer_active(engine); - - /* - * Unlike for preemption, if we rewind and continue - * executing the same context as previously active, - * the order of execution will remain the same and - * the tail will only advance. We do not need to - * force a full context restore, as a lite-restore - * is sufficient to resample the monotonic TAIL. - * - * If we switch to any other context, similarly we - * will not rewind TAIL of current context, and - * normal save/restore will preserve state and allow - * us to later continue executing the same request. - */ - last = NULL; - } else { - /* - * Otherwise if we already have a request pending - * for execution after the current one, we can - * just wait until the next CS event before - * queuing more. In either case we will force a - * lite-restore preemption event, but if we wait - * we hopefully coalesce several updates into a single - * submission. - */ - if (!list_is_last(&last->sched.link, - &engine->active.requests)) { - /* - * Even if ELSP[1] is occupied and not worthy - * of timeslices, our queue might be. - */ - start_timeslice(engine, queue_prio(execlists)); - return; - } - } - } - - while (rb) { /* XXX virtual is always taking precedence */ - struct virtual_engine *ve = - rb_entry(rb, typeof(*ve), nodes[engine->id].rb); - struct i915_request *rq; - - spin_lock(&ve->base.active.lock); - - rq = ve->request; - if (unlikely(!rq)) { /* lost the race to a sibling */ - spin_unlock(&ve->base.active.lock); - rb_erase_cached(rb, &execlists->virtual); - RB_CLEAR_NODE(rb); - rb = rb_first_cached(&execlists->virtual); - continue; - } + bool inhibit = true; - GEM_BUG_ON(rq != ve->request); - GEM_BUG_ON(rq->engine != &ve->base); - GEM_BUG_ON(rq->context != &ve->context); - - if (rq_prio(rq) >= queue_prio(execlists)) { - if (!virtual_matches(ve, rq, engine)) { - spin_unlock(&ve->base.active.lock); - rb = rb_next(rb); - continue; - } - - if (last && !can_merge_rq(last, rq)) { - spin_unlock(&ve->base.active.lock); - start_timeslice(engine, rq_prio(rq)); - return; /* leave this for another sibling */ - } - - ENGINE_TRACE(engine, - "virtual rq=%llx:%lld%s, new engine? %s\n", - rq->fence.context, - rq->fence.seqno, - i915_request_completed(rq) ? "!" : - i915_request_started(rq) ? "*" : - "", - yesno(engine != ve->siblings[0])); - - WRITE_ONCE(ve->request, NULL); - WRITE_ONCE(ve->base.execlists.queue_priority_hint, - INT_MIN); - rb_erase_cached(rb, &execlists->virtual); - RB_CLEAR_NODE(rb); - - GEM_BUG_ON(!(rq->execution_mask & engine->mask)); - WRITE_ONCE(rq->engine, engine); - - if (__i915_request_submit(rq)) { - /* - * Only after we confirm that we will submit - * this request (i.e. it has not already - * completed), do we want to update the context. - * - * This serves two purposes. It avoids - * unnecessary work if we are resubmitting an - * already completed request after timeslicing. - * But more importantly, it prevents us altering - * ve->siblings[] on an idle context, where - * we may be using ve->siblings[] in - * virtual_context_enter / virtual_context_exit. - */ - virtual_xfer_context(ve, engine); - GEM_BUG_ON(ve->siblings[0] != engine); - - submit = true; - last = rq; - } - i915_request_put(rq); - - /* - * Hmm, we have a bunch of virtual engine requests, - * but the first one was already completed (thanks - * preempt-to-busy!). Keep looking at the veng queue - * until we have no more relevant requests (i.e. - * the normal submit queue has higher priority). - */ - if (!submit) { - spin_unlock(&ve->base.active.lock); - rb = rb_first_cached(&execlists->virtual); - continue; - } - } + set_redzone(state, engine); - spin_unlock(&ve->base.active.lock); - break; + if (engine->default_state) { + shmem_read(engine->default_state, 0, + state, engine->context_size); + __set_bit(CONTEXT_VALID_BIT, &ce->flags); + inhibit = false; } - while ((rb = rb_first_cached(&execlists->queue))) { - struct i915_priolist *p = to_priolist(rb); - struct i915_request *rq, *rn; - int i; - - priolist_for_each_request_consume(rq, rn, p, i) { - bool merge = true; - - /* - * Can we combine this request with the current port? - * It has to be the same context/ringbuffer and not - * have any exceptions (e.g. GVT saying never to - * combine contexts). - * - * If we can combine the requests, we can execute both - * by updating the RING_TAIL to point to the end of the - * second request, and so we never need to tell the - * hardware about the first. - */ - if (last && !can_merge_rq(last, rq)) { - /* - * If we are on the second port and cannot - * combine this request with the last, then we - * are done. - */ - if (port == last_port) - goto done; - - /* - * We must not populate both ELSP[] with the - * same LRCA, i.e. we must submit 2 different - * contexts if we submit 2 ELSP. - */ - if (last->context == rq->context) - goto done; - - if (i915_request_has_sentinel(last)) - goto done; - - /* - * If GVT overrides us we only ever submit - * port[0], leaving port[1] empty. Note that we - * also have to be careful that we don't queue - * the same context (even though a different - * request) to the second port. - */ - if (ctx_single_port_submission(last->context) || - ctx_single_port_submission(rq->context)) - goto done; - - merge = false; - } - - if (__i915_request_submit(rq)) { - if (!merge) { - *port = execlists_schedule_in(last, port - execlists->pending); - port++; - last = NULL; - } - - GEM_BUG_ON(last && - !can_merge_ctx(last->context, - rq->context)); - GEM_BUG_ON(last && - i915_seqno_passed(last->fence.seqno, - rq->fence.seqno)); - - submit = true; - last = rq; - } - } - - rb_erase_cached(&p->node, &execlists->queue); - i915_priolist_free(p); - } + /* Clear the ppHWSP (inc. per-context counters) */ + memset(state, 0, PAGE_SIZE); -done: /* - * Here be a bit of magic! Or sleight-of-hand, whichever you prefer. - * - * We choose the priority hint such that if we add a request of greater - * priority than this, we kick the submission tasklet to decide on - * the right order of submitting the requests to hardware. We must - * also be prepared to reorder requests as they are in-flight on the - * HW. We derive the priority hint then as the first "hole" in - * the HW submission ports and if there are no available slots, - * the priority of the lowest executing request, i.e. last. - * - * When we do receive a higher priority request ready to run from the - * user, see queue_request(), the priority hint is bumped to that - * request triggering preemption on the next dequeue (or subsequent - * interrupt for secondary ports). + * The second page of the context object contains some registers which + * must be set up prior to the first execution. */ - execlists->queue_priority_hint = queue_prio(execlists); - - if (submit) { - *port = execlists_schedule_in(last, port - execlists->pending); - execlists->switch_priority_hint = - switch_prio(engine, *execlists->pending); - - /* - * Skip if we ended up with exactly the same set of requests, - * e.g. trying to timeslice a pair of ordered contexts - */ - if (!memcmp(active, execlists->pending, - (port - execlists->pending + 1) * sizeof(*port))) { - do - execlists_schedule_out(fetch_and_zero(port)); - while (port-- != execlists->pending); - - goto skip_submit; - } - clear_ports(port + 1, last_port - port); - - WRITE_ONCE(execlists->yield, -1); - set_preempt_timeout(engine, *active); - execlists_submit_ports(engine); - } else { - start_timeslice(engine, execlists->queue_priority_hint); -skip_submit: - ring_set_paused(engine, 0); - } + __lrc_init_regs(state + LRC_STATE_OFFSET, ce, engine, inhibit); } -static void -cancel_port_requests(struct intel_engine_execlists * const execlists) +static struct i915_vma * +__lrc_alloc_state(struct intel_context *ce, struct intel_engine_cs *engine) { - struct i915_request * const *port; - - for (port = execlists->pending; *port; port++) - execlists_schedule_out(*port); - clear_ports(execlists->pending, ARRAY_SIZE(execlists->pending)); - - /* Mark the end of active before we overwrite *active */ - for (port = xchg(&execlists->active, execlists->pending); *port; port++) - execlists_schedule_out(*port); - clear_ports(execlists->inflight, ARRAY_SIZE(execlists->inflight)); - - smp_wmb(); /* complete the seqlock for execlists_active() */ - WRITE_ONCE(execlists->active, execlists->inflight); -} + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 context_size; -static inline void -invalidate_csb_entries(const u64 *first, const u64 *last) -{ - clflush((void *)first); - clflush((void *)last); -} + context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); -/* - * Starting with Gen12, the status has a new format: - * - * bit 0: switched to new queue - * bit 1: reserved - * bit 2: semaphore wait mode (poll or signal), only valid when - * switch detail is set to "wait on semaphore" - * bits 3-5: engine class - * bits 6-11: engine instance - * bits 12-14: reserved - * bits 15-25: sw context id of the lrc the GT switched to - * bits 26-31: sw counter of the lrc the GT switched to - * bits 32-35: context switch detail - * - 0: ctx complete - * - 1: wait on sync flip - * - 2: wait on vblank - * - 3: wait on scanline - * - 4: wait on semaphore - * - 5: context preempted (not on SEMAPHORE_WAIT or - * WAIT_FOR_EVENT) - * bit 36: reserved - * bits 37-43: wait detail (for switch detail 1 to 4) - * bits 44-46: reserved - * bits 47-57: sw context id of the lrc the GT switched away from - * bits 58-63: sw counter of the lrc the GT switched away from - */ -static inline bool gen12_csb_parse(const u64 csb) -{ - bool ctx_away_valid = GEN12_CSB_CTX_VALID(upper_32_bits(csb)); - bool new_queue = - lower_32_bits(csb) & GEN12_CTX_STATUS_SWITCHED_TO_NEW_QUEUE; + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + context_size += I915_GTT_PAGE_SIZE; /* for redzone */ - /* - * The context switch detail is not guaranteed to be 5 when a preemption - * occurs, so we can't just check for that. The check below works for - * all the cases we care about, including preemptions of WAIT - * instructions and lite-restore. Preempt-to-idle via the CTRL register - * would require some extra handling, but we don't support that. - */ - if (!ctx_away_valid || new_queue) { - GEM_BUG_ON(!GEN12_CSB_CTX_VALID(lower_32_bits(csb))); - return true; + if (INTEL_GEN(engine->i915) == 12) { + ce->wa_bb_page = context_size / PAGE_SIZE; + context_size += PAGE_SIZE; } - /* - * switch detail = 5 is covered by the case above and we do not expect a - * context switch on an unsuccessful wait instruction since we always - * use polling mode. - */ - GEM_BUG_ON(GEN12_CTX_SWITCH_DETAIL(upper_32_bits(csb))); - return false; -} - -static inline bool gen8_csb_parse(const u64 csb) -{ - return csb & (GEN8_CTX_STATUS_IDLE_ACTIVE | GEN8_CTX_STATUS_PREEMPTED); -} - -static noinline u64 -wa_csb_read(const struct intel_engine_cs *engine, u64 * const csb) -{ - u64 entry; - - /* - * Reading from the HWSP has one particular advantage: we can detect - * a stale entry. Since the write into HWSP is broken, we have no reason - * to trust the HW at all, the mmio entry may equally be unordered, so - * we prefer the path that is self-checking and as a last resort, - * return the mmio value. - * - * tgl,dg1:HSDES#22011327657 - */ - preempt_disable(); - if (wait_for_atomic_us((entry = READ_ONCE(*csb)) != -1, 10)) { - int idx = csb - engine->execlists.csb_status; - int status; - - status = GEN8_EXECLISTS_STATUS_BUF; - if (idx >= 6) { - status = GEN11_EXECLISTS_STATUS_BUF2; - idx -= 6; - } - status += sizeof(u64) * idx; + obj = i915_gem_object_create_shmem(engine->i915, context_size); + if (IS_ERR(obj)) + return ERR_CAST(obj); - entry = intel_uncore_read64(engine->uncore, - _MMIO(engine->mmio_base + status)); + vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; } - preempt_enable(); - - return entry; -} -static inline u64 -csb_read(const struct intel_engine_cs *engine, u64 * const csb) -{ - u64 entry = READ_ONCE(*csb); - - /* - * Unfortunately, the GPU does not always serialise its write - * of the CSB entries before its write of the CSB pointer, at least - * from the perspective of the CPU, using what is known as a Global - * Observation Point. We may read a new CSB tail pointer, but then - * read the stale CSB entries, causing us to misinterpret the - * context-switch events, and eventually declare the GPU hung. - * - * icl:HSDES#1806554093 - * tgl:HSDES#22011248461 - */ - if (unlikely(entry == -1)) - entry = wa_csb_read(engine, csb); - - /* Consume this entry so that we can spot its future reuse. */ - WRITE_ONCE(*csb, -1); - - /* ELSP is an implicit wmb() before the GPU wraps and overwrites csb */ - return entry; + return vma; } -static void process_csb(struct intel_engine_cs *engine) +static struct intel_timeline * +pinned_timeline(struct intel_context *ce, struct intel_engine_cs *engine) { - struct intel_engine_execlists * const execlists = &engine->execlists; - u64 * const buf = execlists->csb_status; - const u8 num_entries = execlists->csb_size; - u8 head, tail; - - /* - * As we modify our execlists state tracking we require exclusive - * access. Either we are inside the tasklet, or the tasklet is disabled - * and we assume that is only inside the reset paths and so serialised. - */ - GEM_BUG_ON(!tasklet_is_locked(&execlists->tasklet) && - !reset_in_progress(execlists)); - GEM_BUG_ON(!intel_engine_in_execlists_submission_mode(engine)); - - /* - * Note that csb_write, csb_status may be either in HWSP or mmio. - * When reading from the csb_write mmio register, we have to be - * careful to only use the GEN8_CSB_WRITE_PTR portion, which is - * the low 4bits. As it happens we know the next 4bits are always - * zero and so we can simply masked off the low u8 of the register - * and treat it identically to reading from the HWSP (without having - * to use explicit shifting and masking, and probably bifurcating - * the code to handle the legacy mmio read). - */ - head = execlists->csb_head; - tail = READ_ONCE(*execlists->csb_write); - if (unlikely(head == tail)) - return; - - /* - * We will consume all events from HW, or at least pretend to. - * - * The sequence of events from the HW is deterministic, and derived - * from our writes to the ELSP, with a smidgen of variability for - * the arrival of the asynchronous requests wrt to the inflight - * execution. If the HW sends an event that does not correspond with - * the one we are expecting, we have to abandon all hope as we lose - * all tracking of what the engine is actually executing. We will - * only detect we are out of sequence with the HW when we get an - * 'impossible' event because we have already drained our own - * preemption/promotion queue. If this occurs, we know that we likely - * lost track of execution earlier and must unwind and restart, the - * simplest way is by stop processing the event queue and force the - * engine to reset. - */ - execlists->csb_head = tail; - ENGINE_TRACE(engine, "cs-irq head=%d, tail=%d\n", head, tail); - - /* - * Hopefully paired with a wmb() in HW! - * - * We must complete the read of the write pointer before any reads - * from the CSB, so that we do not see stale values. Without an rmb - * (lfence) the HW may speculatively perform the CSB[] reads *before* - * we perform the READ_ONCE(*csb_write). - */ - rmb(); - do { - bool promote; - u64 csb; - - if (++head == num_entries) - head = 0; - - /* - * We are flying near dragons again. - * - * We hold a reference to the request in execlist_port[] - * but no more than that. We are operating in softirq - * context and so cannot hold any mutex or sleep. That - * prevents us stopping the requests we are processing - * in port[] from being retired simultaneously (the - * breadcrumb will be complete before we see the - * context-switch). As we only hold the reference to the - * request, any pointer chasing underneath the request - * is subject to a potential use-after-free. Thus we - * store all of the bookkeeping within port[] as - * required, and avoid using unguarded pointers beneath - * request itself. The same applies to the atomic - * status notifier. - */ - - csb = csb_read(engine, buf + head); - ENGINE_TRACE(engine, "csb[%d]: status=0x%08x:0x%08x\n", - head, upper_32_bits(csb), lower_32_bits(csb)); - - if (INTEL_GEN(engine->i915) >= 12) - promote = gen12_csb_parse(csb); - else - promote = gen8_csb_parse(csb); - if (promote) { - struct i915_request * const *old = execlists->active; - - if (GEM_WARN_ON(!*execlists->pending)) { - execlists->error_interrupt |= ERROR_CSB; - break; - } - - ring_set_paused(engine, 0); - - /* Point active to the new ELSP; prevent overwriting */ - WRITE_ONCE(execlists->active, execlists->pending); - smp_wmb(); /* notify execlists_active() */ - - /* cancel old inflight, prepare for switch */ - trace_ports(execlists, "preempted", old); - while (*old) - execlists_schedule_out(*old++); - - /* switch pending to inflight */ - GEM_BUG_ON(!assert_pending_valid(execlists, "promote")); - copy_ports(execlists->inflight, - execlists->pending, - execlists_num_ports(execlists)); - smp_wmb(); /* complete the seqlock */ - WRITE_ONCE(execlists->active, execlists->inflight); - - /* XXX Magic delay for tgl */ - ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); - - WRITE_ONCE(execlists->pending[0], NULL); - } else { - if (GEM_WARN_ON(!*execlists->active)) { - execlists->error_interrupt |= ERROR_CSB; - break; - } - - /* port0 completed, advanced to port1 */ - trace_ports(execlists, "completed", execlists->active); - - /* - * We rely on the hardware being strongly - * ordered, that the breadcrumb write is - * coherent (visible from the CPU) before the - * user interrupt is processed. One might assume - * that the breadcrumb write being before the - * user interrupt and the CS event for the context - * switch would therefore be before the CS event - * itself... - */ - if (GEM_SHOW_DEBUG() && - !i915_request_completed(*execlists->active)) { - struct i915_request *rq = *execlists->active; - const u32 *regs __maybe_unused = - rq->context->lrc_reg_state; - - ENGINE_TRACE(engine, - "context completed before request!\n"); - ENGINE_TRACE(engine, - "ring:{start:0x%08x, head:%04x, tail:%04x, ctl:%08x, mode:%08x}\n", - ENGINE_READ(engine, RING_START), - ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR, - ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR, - ENGINE_READ(engine, RING_CTL), - ENGINE_READ(engine, RING_MI_MODE)); - ENGINE_TRACE(engine, - "rq:{start:%08x, head:%04x, tail:%04x, seqno:%llx:%d, hwsp:%d}, ", - i915_ggtt_offset(rq->ring->vma), - rq->head, rq->tail, - rq->fence.context, - lower_32_bits(rq->fence.seqno), - hwsp_seqno(rq)); - ENGINE_TRACE(engine, - "ctx:{start:%08x, head:%04x, tail:%04x}, ", - regs[CTX_RING_START], - regs[CTX_RING_HEAD], - regs[CTX_RING_TAIL]); - } - - execlists_schedule_out(*execlists->active++); - - GEM_BUG_ON(execlists->active - execlists->inflight > - execlists_num_ports(execlists)); - } - } while (head != tail); - - set_timeslice(engine); - - /* - * Gen11 has proven to fail wrt global observation point between - * entry and tail update, failing on the ordering and thus - * we see an old entry in the context status buffer. - * - * Forcibly evict out entries for the next gpu csb update, - * to increase the odds that we get a fresh entries with non - * working hardware. The cost for doing so comes out mostly with - * the wash as hardware, working or not, will need to do the - * invalidation before. - */ - invalidate_csb_entries(&buf[0], &buf[num_entries - 1]); -} + struct intel_timeline *tl = fetch_and_zero(&ce->timeline); -static void __execlists_submission_tasklet(struct intel_engine_cs *const engine) -{ - lockdep_assert_held(&engine->active.lock); - if (!READ_ONCE(engine->execlists.pending[0])) { - rcu_read_lock(); /* protect peeking at execlists->active */ - execlists_dequeue(engine); - rcu_read_unlock(); - } + return intel_timeline_create_from_engine(engine, page_unmask_bits(tl)); } -static void __execlists_hold(struct i915_request *rq) +int lrc_alloc(struct intel_context *ce, struct intel_engine_cs *engine) { - LIST_HEAD(list); - - do { - struct i915_dependency *p; - - if (i915_request_is_active(rq)) - __i915_request_unsubmit(rq); - - clear_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); - list_move_tail(&rq->sched.link, &rq->engine->active.hold); - i915_request_set_hold(rq); - RQ_TRACE(rq, "on hold\n"); - - for_each_waiter(p, rq) { - struct i915_request *w = - container_of(p->waiter, typeof(*w), sched); - - /* Leave semaphores spinning on the other engines */ - if (w->engine != rq->engine) - continue; - - if (!i915_request_is_ready(w)) - continue; - - if (i915_request_completed(w)) - continue; - - if (i915_request_on_hold(w)) - continue; - - list_move_tail(&w->sched.link, &list); - } - - rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); - } while (rq); -} + struct intel_ring *ring; + struct i915_vma *vma; + int err; -static bool execlists_hold(struct intel_engine_cs *engine, - struct i915_request *rq) -{ - if (i915_request_on_hold(rq)) - return false; + GEM_BUG_ON(ce->state); - spin_lock_irq(&engine->active.lock); + vma = __lrc_alloc_state(ce, engine); + if (IS_ERR(vma)) + return PTR_ERR(vma); - if (i915_request_completed(rq)) { /* too late! */ - rq = NULL; - goto unlock; + ring = intel_engine_create_ring(engine, (unsigned long)ce->ring); + if (IS_ERR(ring)) { + err = PTR_ERR(ring); + goto err_vma; } - if (rq->engine != engine) { /* preempted virtual engine */ - struct virtual_engine *ve = to_virtual_engine(rq->engine); - - /* - * intel_context_inflight() is only protected by virtue - * of process_csb() being called only by the tasklet (or - * directly from inside reset while the tasklet is suspended). - * Assert that neither of those are allowed to run while we - * poke at the request queues. - */ - GEM_BUG_ON(!reset_in_progress(&engine->execlists)); + if (!page_mask_bits(ce->timeline)) { + struct intel_timeline *tl; /* - * An unsubmitted request along a virtual engine will - * remain on the active (this) engine until we are able - * to process the context switch away (and so mark the - * context as no longer in flight). That cannot have happened - * yet, otherwise we would not be hanging! + * Use the static global HWSP for the kernel context, and + * a dynamically allocated cacheline for everyone else. */ - spin_lock(&ve->base.active.lock); - GEM_BUG_ON(intel_context_inflight(rq->context) != engine); - GEM_BUG_ON(ve->request != rq); - ve->request = NULL; - spin_unlock(&ve->base.active.lock); - i915_request_put(rq); - - rq->engine = engine; - } - - /* - * Transfer this request onto the hold queue to prevent it - * being resumbitted to HW (and potentially completed) before we have - * released it. Since we may have already submitted following - * requests, we need to remove those as well. - */ - GEM_BUG_ON(i915_request_on_hold(rq)); - GEM_BUG_ON(rq->engine != engine); - __execlists_hold(rq); - GEM_BUG_ON(list_empty(&engine->active.hold)); - -unlock: - spin_unlock_irq(&engine->active.lock); - return rq; -} - -static bool hold_request(const struct i915_request *rq) -{ - struct i915_dependency *p; - bool result = false; - - /* - * If one of our ancestors is on hold, we must also be on hold, - * otherwise we will bypass it and execute before it. - */ - rcu_read_lock(); - for_each_signaler(p, rq) { - const struct i915_request *s = - container_of(p->signaler, typeof(*s), sched); - - if (s->engine != rq->engine) - continue; - - result = i915_request_on_hold(s); - if (result) - break; - } - rcu_read_unlock(); - - return result; -} - -static void __execlists_unhold(struct i915_request *rq) -{ - LIST_HEAD(list); - - do { - struct i915_dependency *p; - - RQ_TRACE(rq, "hold release\n"); - - GEM_BUG_ON(!i915_request_on_hold(rq)); - GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); - - i915_request_clear_hold(rq); - list_move_tail(&rq->sched.link, - i915_sched_lookup_priolist(rq->engine, - rq_prio(rq))); - set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); - - /* Also release any children on this engine that are ready */ - for_each_waiter(p, rq) { - struct i915_request *w = - container_of(p->waiter, typeof(*w), sched); - - /* Propagate any change in error status */ - if (rq->fence.error) - i915_request_set_error_once(w, rq->fence.error); - - if (w->engine != rq->engine) - continue; - - if (!i915_request_on_hold(w)) - continue; - - /* Check that no other parents are also on hold */ - if (hold_request(w)) - continue; - - list_move_tail(&w->sched.link, &list); - } - - rq = list_first_entry_or_null(&list, typeof(*rq), sched.link); - } while (rq); -} - -static void execlists_unhold(struct intel_engine_cs *engine, - struct i915_request *rq) -{ - spin_lock_irq(&engine->active.lock); - - /* - * Move this request back to the priority queue, and all of its - * children and grandchildren that were suspended along with it. - */ - __execlists_unhold(rq); - - if (rq_prio(rq) > engine->execlists.queue_priority_hint) { - engine->execlists.queue_priority_hint = rq_prio(rq); - tasklet_hi_schedule(&engine->execlists.tasklet); - } - - spin_unlock_irq(&engine->active.lock); -} - -struct execlists_capture { - struct work_struct work; - struct i915_request *rq; - struct i915_gpu_coredump *error; -}; - -static void execlists_capture_work(struct work_struct *work) -{ - struct execlists_capture *cap = container_of(work, typeof(*cap), work); - const gfp_t gfp = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN; - struct intel_engine_cs *engine = cap->rq->engine; - struct intel_gt_coredump *gt = cap->error->gt; - struct intel_engine_capture_vma *vma; - - /* Compress all the objects attached to the request, slow! */ - vma = intel_engine_coredump_add_request(gt->engine, cap->rq, gfp); - if (vma) { - struct i915_vma_compress *compress = - i915_vma_capture_prepare(gt); - - intel_engine_coredump_add_vma(gt->engine, vma, compress); - i915_vma_capture_finish(gt, compress); - } - - gt->simulated = gt->engine->simulated; - cap->error->simulated = gt->simulated; - - /* Publish the error state, and announce it to the world */ - i915_error_state_store(cap->error); - i915_gpu_coredump_put(cap->error); - - /* Return this request and all that depend upon it for signaling */ - execlists_unhold(engine, cap->rq); - i915_request_put(cap->rq); - - kfree(cap); -} - -static struct execlists_capture *capture_regs(struct intel_engine_cs *engine) -{ - const gfp_t gfp = GFP_ATOMIC | __GFP_NOWARN; - struct execlists_capture *cap; - - cap = kmalloc(sizeof(*cap), gfp); - if (!cap) - return NULL; - - cap->error = i915_gpu_coredump_alloc(engine->i915, gfp); - if (!cap->error) - goto err_cap; - - cap->error->gt = intel_gt_coredump_alloc(engine->gt, gfp); - if (!cap->error->gt) - goto err_gpu; - - cap->error->gt->engine = intel_engine_coredump_alloc(engine, gfp); - if (!cap->error->gt->engine) - goto err_gt; - - cap->error->gt->engine->hung = true; - - return cap; - -err_gt: - kfree(cap->error->gt); -err_gpu: - kfree(cap->error); -err_cap: - kfree(cap); - return NULL; -} - -static struct i915_request * -active_context(struct intel_engine_cs *engine, u32 ccid) -{ - const struct intel_engine_execlists * const el = &engine->execlists; - struct i915_request * const *port, *rq; - - /* - * Use the most recent result from process_csb(), but just in case - * we trigger an error (via interrupt) before the first CS event has - * been written, peek at the next submission. - */ - - for (port = el->active; (rq = *port); port++) { - if (rq->context->lrc.ccid == ccid) { - ENGINE_TRACE(engine, - "ccid found at active:%zd\n", - port - el->active); - return rq; - } - } - - for (port = el->pending; (rq = *port); port++) { - if (rq->context->lrc.ccid == ccid) { - ENGINE_TRACE(engine, - "ccid found at pending:%zd\n", - port - el->pending); - return rq; + if (unlikely(ce->timeline)) + tl = pinned_timeline(ce, engine); + else + tl = intel_timeline_create(engine->gt); + if (IS_ERR(tl)) { + err = PTR_ERR(tl); + goto err_ring; } - } - - ENGINE_TRACE(engine, "ccid:%x not found\n", ccid); - return NULL; -} - -static u32 active_ccid(struct intel_engine_cs *engine) -{ - return ENGINE_READ_FW(engine, RING_EXECLIST_STATUS_HI); -} - -static void execlists_capture(struct intel_engine_cs *engine) -{ - struct execlists_capture *cap; - - if (!IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)) - return; - - /* - * We need to _quickly_ capture the engine state before we reset. - * We are inside an atomic section (softirq) here and we are delaying - * the forced preemption event. - */ - cap = capture_regs(engine); - if (!cap) - return; - spin_lock_irq(&engine->active.lock); - cap->rq = active_context(engine, active_ccid(engine)); - if (cap->rq) { - cap->rq = active_request(cap->rq->context->timeline, cap->rq); - cap->rq = i915_request_get_rcu(cap->rq); + ce->timeline = tl; } - spin_unlock_irq(&engine->active.lock); - if (!cap->rq) - goto err_free; - - /* - * Remove the request from the execlists queue, and take ownership - * of the request. We pass it to our worker who will _slowly_ compress - * all the pages the _user_ requested for debugging their batch, after - * which we return it to the queue for signaling. - * - * By removing them from the execlists queue, we also remove the - * requests from being processed by __unwind_incomplete_requests() - * during the intel_engine_reset(), and so they will *not* be replayed - * afterwards. - * - * Note that because we have not yet reset the engine at this point, - * it is possible for the request that we have identified as being - * guilty, did in fact complete and we will then hit an arbitration - * point allowing the outstanding preemption to succeed. The likelihood - * of that is very low (as capturing of the engine registers should be - * fast enough to run inside an irq-off atomic section!), so we will - * simply hold that request accountable for being non-preemptible - * long enough to force the reset. - */ - if (!execlists_hold(engine, cap->rq)) - goto err_rq; - - INIT_WORK(&cap->work, execlists_capture_work); - schedule_work(&cap->work); - return; - -err_rq: - i915_request_put(cap->rq); -err_free: - i915_gpu_coredump_put(cap->error); - kfree(cap); -} - -static void execlists_reset(struct intel_engine_cs *engine, const char *msg) -{ - const unsigned int bit = I915_RESET_ENGINE + engine->id; - unsigned long *lock = &engine->gt->reset.flags; - if (!intel_has_reset_engine(engine->gt)) - return; - - if (test_and_set_bit(bit, lock)) - return; - - ENGINE_TRACE(engine, "reset for %s\n", msg); - - /* Mark this tasklet as disabled to avoid waiting for it to complete */ - tasklet_disable_nosync(&engine->execlists.tasklet); + ce->ring = ring; + ce->state = vma; - ring_set_paused(engine, 1); /* Freeze the current request in place */ - execlists_capture(engine); - intel_engine_reset(engine, msg); + return 0; - tasklet_enable(&engine->execlists.tasklet); - clear_and_wake_up_bit(bit, lock); +err_ring: + intel_ring_put(ring); +err_vma: + i915_vma_put(vma); + return err; } -static bool preempt_timeout(const struct intel_engine_cs *const engine) +void lrc_reset(struct intel_context *ce) { - const struct timer_list *t = &engine->execlists.preempt; - - if (!CONFIG_DRM_I915_PREEMPT_TIMEOUT) - return false; + GEM_BUG_ON(!intel_context_is_pinned(ce)); - if (!timer_expired(t)) - return false; + intel_ring_reset(ce->ring, ce->ring->emit); - return READ_ONCE(engine->execlists.pending[0]); + /* Scrub away the garbage */ + lrc_init_regs(ce, ce->engine, true); + ce->lrc.lrca = lrc_update_regs(ce, ce->engine, ce->ring->tail); } -/* - * Check the unread Context Status Buffers and manage the submission of new - * contexts to the ELSP accordingly. - */ -static void execlists_submission_tasklet(unsigned long data) +int +lrc_pre_pin(struct intel_context *ce, + struct intel_engine_cs *engine, + struct i915_gem_ww_ctx *ww, + void **vaddr) { - struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; - bool timeout = preempt_timeout(engine); - - process_csb(engine); - - if (unlikely(READ_ONCE(engine->execlists.error_interrupt))) { - const char *msg; - - /* Generate the error message in priority wrt to the user! */ - if (engine->execlists.error_interrupt & GENMASK(15, 0)) - msg = "CS error"; /* thrown by a user payload */ - else if (engine->execlists.error_interrupt & ERROR_CSB) - msg = "invalid CSB event"; - else - msg = "internal error"; - - engine->execlists.error_interrupt = 0; - execlists_reset(engine, msg); - } - - if (!READ_ONCE(engine->execlists.pending[0]) || timeout) { - unsigned long flags; + GEM_BUG_ON(!ce->state); + GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); - spin_lock_irqsave(&engine->active.lock, flags); - __execlists_submission_tasklet(engine); - spin_unlock_irqrestore(&engine->active.lock, flags); + *vaddr = i915_gem_object_pin_map(ce->state->obj, + i915_coherent_map_type(ce->engine->i915) | + I915_MAP_OVERRIDE); - /* Recheck after serialising with direct-submission */ - if (unlikely(timeout && preempt_timeout(engine))) { - cancel_timer(&engine->execlists.preempt); - execlists_reset(engine, "preemption time out"); - } - } + return PTR_ERR_OR_ZERO(*vaddr); } -static void __execlists_kick(struct intel_engine_execlists *execlists) +int +lrc_pin(struct intel_context *ce, + struct intel_engine_cs *engine, + void *vaddr) { - /* Kick the tasklet for some interrupt coalescing and reset handling */ - tasklet_hi_schedule(&execlists->tasklet); -} + ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET; -#define execlists_kick(t, member) \ - __execlists_kick(container_of(t, struct intel_engine_execlists, member)) + if (!__test_and_set_bit(CONTEXT_INIT_BIT, &ce->flags)) + lrc_init_state(ce, engine, vaddr); -static void execlists_timeslice(struct timer_list *timer) -{ - execlists_kick(timer, timer); -} - -static void execlists_preempt(struct timer_list *timer) -{ - execlists_kick(timer, preempt); + ce->lrc.lrca = lrc_update_regs(ce, engine, ce->ring->tail); + return 0; } -static void queue_request(struct intel_engine_cs *engine, - struct i915_request *rq) +void lrc_unpin(struct intel_context *ce) { - GEM_BUG_ON(!list_empty(&rq->sched.link)); - list_add_tail(&rq->sched.link, - i915_sched_lookup_priolist(engine, rq_prio(rq))); - set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); + check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, + ce->engine); } -static void __submit_queue_imm(struct intel_engine_cs *engine) +void lrc_post_unpin(struct intel_context *ce) { - struct intel_engine_execlists * const execlists = &engine->execlists; - - if (reset_in_progress(execlists)) - return; /* defer until we restart the engine following reset */ - - __execlists_submission_tasklet(engine); + i915_gem_object_unpin_map(ce->state->obj); } -static void submit_queue(struct intel_engine_cs *engine, - const struct i915_request *rq) +void lrc_fini(struct intel_context *ce) { - struct intel_engine_execlists *execlists = &engine->execlists; - - if (rq_prio(rq) <= execlists->queue_priority_hint) + if (!ce->state) return; - execlists->queue_priority_hint = rq_prio(rq); - __submit_queue_imm(engine); + intel_ring_put(fetch_and_zero(&ce->ring)); + i915_vma_put(fetch_and_zero(&ce->state)); } -static bool ancestor_on_hold(const struct intel_engine_cs *engine, - const struct i915_request *rq) -{ - GEM_BUG_ON(i915_request_on_hold(rq)); - return !list_empty(&engine->active.hold) && hold_request(rq); -} - -static void flush_csb(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists *el = &engine->execlists; - - if (READ_ONCE(el->pending[0]) && tasklet_trylock(&el->tasklet)) { - if (!reset_in_progress(el)) - process_csb(engine); - tasklet_unlock(&el->tasklet); - } -} - -static void execlists_submit_request(struct i915_request *request) -{ - struct intel_engine_cs *engine = request->engine; - unsigned long flags; - - /* Hopefully we clear execlists->pending[] to let us through */ - flush_csb(engine); - - /* Will be called from irq-context when using foreign fences. */ - spin_lock_irqsave(&engine->active.lock, flags); - - if (unlikely(ancestor_on_hold(engine, request))) { - RQ_TRACE(request, "ancestor on hold\n"); - list_add_tail(&request->sched.link, &engine->active.hold); - i915_request_set_hold(request); - } else { - queue_request(engine, request); - - GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); - GEM_BUG_ON(list_empty(&request->sched.link)); - - submit_queue(engine, request); - } - - spin_unlock_irqrestore(&engine->active.lock, flags); -} - -static void __execlists_context_fini(struct intel_context *ce) -{ - intel_ring_put(ce->ring); - i915_vma_put(ce->state); -} - -static void execlists_context_destroy(struct kref *kref) +void lrc_destroy(struct kref *kref) { struct intel_context *ce = container_of(kref, typeof(*ce), ref); GEM_BUG_ON(!i915_active_is_idle(&ce->active)); GEM_BUG_ON(intel_context_is_pinned(ce)); - if (ce->state) - __execlists_context_fini(ce); + lrc_fini(ce); intel_context_fini(ce); intel_context_free(ce); } -static void -set_redzone(void *vaddr, const struct intel_engine_cs *engine) -{ - if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - return; - - vaddr += engine->context_size; - - memset(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE); -} - -static void -check_redzone(const void *vaddr, const struct intel_engine_cs *engine) -{ - if (!IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - return; - - vaddr += engine->context_size; - - if (memchr_inv(vaddr, CONTEXT_REDZONE, I915_GTT_PAGE_SIZE)) - drm_err_once(&engine->i915->drm, - "%s context redzone overwritten!\n", - engine->name); -} - -static void execlists_context_unpin(struct intel_context *ce) -{ - check_redzone((void *)ce->lrc_reg_state - LRC_STATE_OFFSET, - ce->engine); -} - -static void execlists_context_post_unpin(struct intel_context *ce) -{ - i915_gem_object_unpin_map(ce->state->obj); -} - static u32 * gen12_emit_timestamp_wa(const struct intel_context *ce, u32 *cs) { @@ -3496,16 +1066,57 @@ setup_indirect_ctx_bb(const struct intel_context *ce, while ((unsigned long)cs % CACHELINE_BYTES) *cs++ = MI_NOOP; - lrc_ring_setup_indirect_ctx(ce->lrc_reg_state, engine, - i915_ggtt_offset(ce->state) + - context_wa_bb_offset(ce), - (cs - start) * sizeof(*cs)); + lrc_setup_indirect_ctx(ce->lrc_reg_state, engine, + i915_ggtt_offset(ce->state) + + context_wa_bb_offset(ce), + (cs - start) * sizeof(*cs)); } -static void -__execlists_update_reg_state(const struct intel_context *ce, - const struct intel_engine_cs *engine, - u32 head) +/* + * The context descriptor encodes various attributes of a context, + * including its GTT address and some flags. Because it's fairly + * expensive to calculate, we'll just do it once and cache the result, + * which remains valid until the context is unpinned. + * + * This is what a descriptor looks like, from LSB to MSB:: + * + * bits 0-11: flags, GEN8_CTX_* (cached in ctx->desc_template) + * bits 12-31: LRCA, GTT address of (the HWSP of) this context + * bits 32-52: ctx ID, a globally unique tag (highest bit used by GuC) + * bits 53-54: mbz, reserved for use by hardware + * bits 55-63: group ID, currently unused and set to 0 + * + * Starting from Gen11, the upper dword of the descriptor has a new format: + * + * bits 32-36: reserved + * bits 37-47: SW context ID + * bits 48:53: engine instance + * bit 54: mbz, reserved for use by hardware + * bits 55-60: SW counter + * bits 61-63: engine class + * + * engine info, SW context ID and SW counter need to form a unique number + * (Context ID) per lrc. + */ +static inline u32 lrc_descriptor(const struct intel_context *ce) +{ + u32 desc; + + desc = INTEL_LEGACY_32B_CONTEXT; + if (i915_vm_is_4lvl(ce->vm)) + desc = INTEL_LEGACY_64B_CONTEXT; + desc <<= GEN8_CTX_ADDRESSING_MODE_SHIFT; + + desc |= GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE; + if (IS_GEN(ce->vm->i915, 8)) + desc |= GEN8_CTX_L3LLC_COHERENT; + + return i915_ggtt_offset(ce->state) | desc; +} + +u32 lrc_update_regs(const struct intel_context *ce, + const struct intel_engine_cs *engine, + u32 head) { struct intel_ring *ring = ce->ring; u32 *regs = ce->lrc_reg_state; @@ -3537,205 +1148,54 @@ __execlists_update_reg_state(const struct intel_context *ce, GEM_BUG_ON(engine->wa_ctx.indirect_ctx.size); setup_indirect_ctx_bb(ce, engine, fn); } -} -static int -execlists_context_pre_pin(struct intel_context *ce, - struct i915_gem_ww_ctx *ww, void **vaddr) -{ - GEM_BUG_ON(!ce->state); - GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); - - *vaddr = i915_gem_object_pin_map(ce->state->obj, - i915_coherent_map_type(ce->engine->i915) | - I915_MAP_OVERRIDE); - - return PTR_ERR_OR_ZERO(*vaddr); -} - -static int -__execlists_context_pin(struct intel_context *ce, - struct intel_engine_cs *engine, - void *vaddr) -{ - ce->lrc.lrca = lrc_descriptor(ce, engine) | CTX_DESC_FORCE_RESTORE; - ce->lrc_reg_state = vaddr + LRC_STATE_OFFSET; - __execlists_update_reg_state(ce, engine, ce->ring->tail); - - return 0; -} - -static int execlists_context_pin(struct intel_context *ce, void *vaddr) -{ - return __execlists_context_pin(ce, ce->engine, vaddr); + return lrc_descriptor(ce) | CTX_DESC_FORCE_RESTORE; } -static int execlists_context_alloc(struct intel_context *ce) +void lrc_update_offsets(struct intel_context *ce, + struct intel_engine_cs *engine) { - return __execlists_context_alloc(ce, ce->engine); + set_offsets(ce->lrc_reg_state, reg_offsets(engine), engine, false); } -static void execlists_context_reset(struct intel_context *ce) +void lrc_check_regs(const struct intel_context *ce, + const struct intel_engine_cs *engine, + const char *when) { - CE_TRACE(ce, "reset\n"); - GEM_BUG_ON(!intel_context_is_pinned(ce)); - - intel_ring_reset(ce->ring, ce->ring->emit); - - /* Scrub away the garbage */ - execlists_init_reg_state(ce->lrc_reg_state, - ce, ce->engine, ce->ring, true); - __execlists_update_reg_state(ce, ce->engine, ce->ring->tail); - - ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; -} - -static const struct intel_context_ops execlists_context_ops = { - .alloc = execlists_context_alloc, - - .pre_pin = execlists_context_pre_pin, - .pin = execlists_context_pin, - .unpin = execlists_context_unpin, - .post_unpin = execlists_context_post_unpin, - - .enter = intel_context_enter_engine, - .exit = intel_context_exit_engine, - - .reset = execlists_context_reset, - .destroy = execlists_context_destroy, -}; - -static u32 hwsp_offset(const struct i915_request *rq) -{ - const struct intel_timeline_cacheline *cl; - - /* Before the request is executed, the timeline/cachline is fixed */ - - cl = rcu_dereference_protected(rq->hwsp_cacheline, 1); - if (cl) - return cl->ggtt_offset; - - return rcu_dereference_protected(rq->timeline, 1)->hwsp_offset; -} - -static int gen8_emit_init_breadcrumb(struct i915_request *rq) -{ - u32 *cs; - - GEM_BUG_ON(i915_request_has_initial_breadcrumb(rq)); - if (!i915_request_timeline(rq)->has_initial_breadcrumb) - return 0; - - cs = intel_ring_begin(rq, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* - * Check if we have been preempted before we even get started. - * - * After this point i915_request_started() reports true, even if - * we get preempted and so are no longer running. - */ - *cs++ = MI_ARB_CHECK; - *cs++ = MI_NOOP; - - *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = hwsp_offset(rq); - *cs++ = 0; - *cs++ = rq->fence.seqno - 1; - - intel_ring_advance(rq, cs); - - /* Record the updated position of the request's payload */ - rq->infix = intel_ring_offset(rq, cs); - - __set_bit(I915_FENCE_FLAG_INITIAL_BREADCRUMB, &rq->fence.flags); - - return 0; -} - -static int emit_pdps(struct i915_request *rq) -{ - const struct intel_engine_cs * const engine = rq->engine; - struct i915_ppgtt * const ppgtt = i915_vm_to_ppgtt(rq->context->vm); - int err, i; - u32 *cs; - - GEM_BUG_ON(intel_vgpu_active(rq->engine->i915)); - - /* - * Beware ye of the dragons, this sequence is magic! - * - * Small changes to this sequence can cause anything from - * GPU hangs to forcewake errors and machine lockups! - */ - - /* Flush any residual operations from the context load */ - err = engine->emit_flush(rq, EMIT_FLUSH); - if (err) - return err; + const struct intel_ring *ring = ce->ring; + u32 *regs = ce->lrc_reg_state; + bool valid = true; + int x; - /* Magic required to prevent forcewake errors! */ - err = engine->emit_flush(rq, EMIT_INVALIDATE); - if (err) - return err; - - cs = intel_ring_begin(rq, 4 * GEN8_3LVL_PDPES + 2); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* Ensure the LRI have landed before we invalidate & continue */ - *cs++ = MI_LOAD_REGISTER_IMM(2 * GEN8_3LVL_PDPES) | MI_LRI_FORCE_POSTED; - for (i = GEN8_3LVL_PDPES; i--; ) { - const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i); - u32 base = engine->mmio_base; - - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(base, i)); - *cs++ = upper_32_bits(pd_daddr); - *cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(base, i)); - *cs++ = lower_32_bits(pd_daddr); + if (regs[CTX_RING_START] != i915_ggtt_offset(ring->vma)) { + pr_err("%s: context submitted with incorrect RING_START [%08x], expected %08x\n", + engine->name, + regs[CTX_RING_START], + i915_ggtt_offset(ring->vma)); + regs[CTX_RING_START] = i915_ggtt_offset(ring->vma); + valid = false; } - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - return 0; -} - -static int execlists_request_alloc(struct i915_request *request) -{ - int ret; - - GEM_BUG_ON(!intel_context_is_pinned(request->context)); - - /* - * Flush enough space to reduce the likelihood of waiting after - * we start building the request - in which case we will just - * have to repeat work. - */ - request->reserved_space += EXECLISTS_REQUEST_SIZE; - - /* - * Note that after this point, we have committed to using - * this request as it is being used to both track the - * state of engine initialisation and liveness of the - * golden renderstate above. Think twice before you try - * to cancel/unwind this request now. - */ - - if (!i915_vm_is_4lvl(request->context->vm)) { - ret = emit_pdps(request); - if (ret) - return ret; + if ((regs[CTX_RING_CTL] & ~(RING_WAIT | RING_WAIT_SEMAPHORE)) != + (RING_CTL_SIZE(ring->size) | RING_VALID)) { + pr_err("%s: context submitted with incorrect RING_CTL [%08x], expected %08x\n", + engine->name, + regs[CTX_RING_CTL], + (u32)(RING_CTL_SIZE(ring->size) | RING_VALID)); + regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; + valid = false; } - /* Unconditionally invalidate GPU caches and TLBs. */ - ret = request->engine->emit_flush(request, EMIT_INVALIDATE); - if (ret) - return ret; + x = lrc_ring_mi_mode(engine); + if (x != -1 && regs[x + 1] & (regs[x + 1] >> 16) & STOP_RING) { + pr_err("%s: context submitted with STOP_RING [%08x] in RING_MI_MODE\n", + engine->name, regs[x + 1]); + regs[x + 1] &= ~STOP_RING; + regs[x + 1] |= STOP_RING << 16; + valid = false; + } - request->reserved_space -= EXECLISTS_REQUEST_SIZE; - return 0; + WARN_ONCE(!valid, "Invalid lrc state found %s submission\n", when); } /* @@ -3955,7 +1415,7 @@ gen10_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch) return batch; } -#define CTX_WA_BB_OBJ_SIZE (PAGE_SIZE) +#define CTX_WA_BB_SIZE (PAGE_SIZE) static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) { @@ -3963,7 +1423,7 @@ static int lrc_setup_wa_ctx(struct intel_engine_cs *engine) struct i915_vma *vma; int err; - obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_OBJ_SIZE); + obj = i915_gem_object_create_shmem(engine->i915, CTX_WA_BB_SIZE); if (IS_ERR(obj)) return PTR_ERR(obj); @@ -3985,30 +1445,34 @@ err: return err; } -static void lrc_destroy_wa_ctx(struct intel_engine_cs *engine) +void lrc_fini_wa_ctx(struct intel_engine_cs *engine) { i915_vma_unpin_and_release(&engine->wa_ctx.vma, 0); + + /* Called on error unwind, clear all flags to prevent further use */ + memset(&engine->wa_ctx, 0, sizeof(engine->wa_ctx)); } typedef u32 *(*wa_bb_func_t)(struct intel_engine_cs *engine, u32 *batch); -static int intel_init_workaround_bb(struct intel_engine_cs *engine) +void lrc_init_wa_ctx(struct intel_engine_cs *engine) { struct i915_ctx_workarounds *wa_ctx = &engine->wa_ctx; - struct i915_wa_ctx_bb *wa_bb[2] = { &wa_ctx->indirect_ctx, - &wa_ctx->per_ctx }; - wa_bb_func_t wa_bb_fn[2]; + struct i915_wa_ctx_bb *wa_bb[] = { + &wa_ctx->indirect_ctx, &wa_ctx->per_ctx + }; + wa_bb_func_t wa_bb_fn[ARRAY_SIZE(wa_bb)]; void *batch, *batch_ptr; unsigned int i; - int ret; + int err; if (engine->class != RENDER_CLASS) - return 0; + return; switch (INTEL_GEN(engine->i915)) { case 12: case 11: - return 0; + return; case 10: wa_bb_fn[0] = gen10_init_indirectctx_bb; wa_bb_fn[1] = NULL; @@ -4023,14 +1487,20 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) break; default: MISSING_CASE(INTEL_GEN(engine->i915)); - return 0; + return; } - ret = lrc_setup_wa_ctx(engine); - if (ret) { - drm_dbg(&engine->i915->drm, - "Failed to setup context WA page: %d\n", ret); - return ret; + err = lrc_setup_wa_ctx(engine); + if (err) { + /* + * We continue even if we fail to initialize WA batch + * because we only expect rare glitches but nothing + * critical to prevent us from using GPU + */ + drm_err(&engine->i915->drm, + "Ignoring context switch w/a allocation error:%d\n", + err); + return; } batch = i915_gem_object_pin_map(wa_ctx->vma->obj, I915_MAP_WB); @@ -4045,2104 +1515,52 @@ static int intel_init_workaround_bb(struct intel_engine_cs *engine) wa_bb[i]->offset = batch_ptr - batch; if (GEM_DEBUG_WARN_ON(!IS_ALIGNED(wa_bb[i]->offset, CACHELINE_BYTES))) { - ret = -EINVAL; + err = -EINVAL; break; } if (wa_bb_fn[i]) batch_ptr = wa_bb_fn[i](engine, batch_ptr); wa_bb[i]->size = batch_ptr - (batch + wa_bb[i]->offset); } - GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_OBJ_SIZE); + GEM_BUG_ON(batch_ptr - batch > CTX_WA_BB_SIZE); __i915_gem_object_flush_map(wa_ctx->vma->obj, 0, batch_ptr - batch); __i915_gem_object_release_map(wa_ctx->vma->obj); - if (ret) - lrc_destroy_wa_ctx(engine); - - return ret; -} - -static void reset_csb_pointers(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - const unsigned int reset_value = execlists->csb_size - 1; - - ring_set_paused(engine, 0); - - /* - * Sometimes Icelake forgets to reset its pointers on a GPU reset. - * Bludgeon them with a mmio update to be sure. - */ - ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, - 0xffff << 16 | reset_value << 8 | reset_value); - ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); - - /* - * After a reset, the HW starts writing into CSB entry [0]. We - * therefore have to set our HEAD pointer back one entry so that - * the *first* entry we check is entry 0. To complicate this further, - * as we don't wait for the first interrupt after reset, we have to - * fake the HW write to point back to the last entry so that our - * inline comparison of our cached head position against the last HW - * write works even before the first interrupt. - */ - execlists->csb_head = reset_value; - WRITE_ONCE(*execlists->csb_write, reset_value); - wmb(); /* Make sure this is visible to HW (paranoia?) */ - - /* Check that the GPU does indeed update the CSB entries! */ - memset(execlists->csb_status, -1, (reset_value + 1) * sizeof(u64)); - invalidate_csb_entries(&execlists->csb_status[0], - &execlists->csb_status[reset_value]); - - /* Once more for luck and our trusty paranoia */ - ENGINE_WRITE(engine, RING_CONTEXT_STATUS_PTR, - 0xffff << 16 | reset_value << 8 | reset_value); - ENGINE_POSTING_READ(engine, RING_CONTEXT_STATUS_PTR); - - GEM_BUG_ON(READ_ONCE(*execlists->csb_write) != reset_value); -} - -static void execlists_sanitize(struct intel_engine_cs *engine) -{ - GEM_BUG_ON(execlists_active(&engine->execlists)); - - /* - * Poison residual state on resume, in case the suspend didn't! - * - * We have to assume that across suspend/resume (or other loss - * of control) that the contents of our pinned buffers has been - * lost, replaced by garbage. Since this doesn't always happen, - * let's poison such state so that we more quickly spot when - * we falsely assume it has been preserved. - */ - if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); - - reset_csb_pointers(engine); - - /* - * The kernel_context HWSP is stored in the status_page. As above, - * that may be lost on resume/initialisation, and so we need to - * reset the value in the HWSP. - */ - intel_timeline_reset_seqno(engine->kernel_context->timeline); - - /* And scrub the dirty cachelines for the HWSP */ - clflush_cache_range(engine->status_page.addr, PAGE_SIZE); -} - -static void enable_error_interrupt(struct intel_engine_cs *engine) -{ - u32 status; - - engine->execlists.error_interrupt = 0; - ENGINE_WRITE(engine, RING_EMR, ~0u); - ENGINE_WRITE(engine, RING_EIR, ~0u); /* clear all existing errors */ - - status = ENGINE_READ(engine, RING_ESR); - if (unlikely(status)) { - drm_err(&engine->i915->drm, - "engine '%s' resumed still in error: %08x\n", - engine->name, status); - __intel_gt_reset(engine->gt, engine->mask); - } - - /* - * On current gen8+, we have 2 signals to play with - * - * - I915_ERROR_INSTUCTION (bit 0) - * - * Generate an error if the command parser encounters an invalid - * instruction - * - * This is a fatal error. - * - * - CP_PRIV (bit 2) - * - * Generate an error on privilege violation (where the CP replaces - * the instruction with a no-op). This also fires for writes into - * read-only scratch pages. - * - * This is a non-fatal error, parsing continues. - * - * * there are a few others defined for odd HW that we do not use - * - * Since CP_PRIV fires for cases where we have chosen to ignore the - * error (as the HW is validating and suppressing the mistakes), we - * only unmask the instruction error bit. - */ - ENGINE_WRITE(engine, RING_EMR, ~I915_ERROR_INSTRUCTION); -} - -static void enable_execlists(struct intel_engine_cs *engine) -{ - u32 mode; - - assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); - - intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ - - if (INTEL_GEN(engine->i915) >= 11) - mode = _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE); - else - mode = _MASKED_BIT_ENABLE(GFX_RUN_LIST_ENABLE); - ENGINE_WRITE_FW(engine, RING_MODE_GEN7, mode); - - ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); - - ENGINE_WRITE_FW(engine, - RING_HWS_PGA, - i915_ggtt_offset(engine->status_page.vma)); - ENGINE_POSTING_READ(engine, RING_HWS_PGA); - - enable_error_interrupt(engine); - - engine->context_tag = GENMASK(BITS_PER_LONG - 2, 0); -} - -static bool unexpected_starting_state(struct intel_engine_cs *engine) -{ - bool unexpected = false; - - if (ENGINE_READ_FW(engine, RING_MI_MODE) & STOP_RING) { - drm_dbg(&engine->i915->drm, - "STOP_RING still set in RING_MI_MODE\n"); - unexpected = true; - } - - return unexpected; -} - -static int execlists_resume(struct intel_engine_cs *engine) -{ - intel_mocs_init_engine(engine); - - intel_breadcrumbs_reset(engine->breadcrumbs); - - if (GEM_SHOW_DEBUG() && unexpected_starting_state(engine)) { - struct drm_printer p = drm_debug_printer(__func__); - - intel_engine_dump(engine, &p, NULL); - } - - enable_execlists(engine); - - return 0; -} -static void execlists_reset_prepare(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - unsigned long flags; - - ENGINE_TRACE(engine, "depth<-%d\n", - atomic_read(&execlists->tasklet.count)); - - /* - * Prevent request submission to the hardware until we have - * completed the reset in i915_gem_reset_finish(). If a request - * is completed by one engine, it may then queue a request - * to a second via its execlists->tasklet *just* as we are - * calling engine->resume() and also writing the ELSP. - * Turning off the execlists->tasklet until the reset is over - * prevents the race. - */ - __tasklet_disable_sync_once(&execlists->tasklet); - GEM_BUG_ON(!reset_in_progress(execlists)); - - /* And flush any current direct submission. */ - spin_lock_irqsave(&engine->active.lock, flags); - spin_unlock_irqrestore(&engine->active.lock, flags); - - /* - * We stop engines, otherwise we might get failed reset and a - * dead gpu (on elk). Also as modern gpu as kbl can suffer - * from system hang if batchbuffer is progressing when - * the reset is issued, regardless of READY_TO_RESET ack. - * Thus assume it is best to stop engines on all gens - * where we have a gpu reset. - * - * WaKBLVECSSemaphoreWaitPoll:kbl (on ALL_ENGINES) - * - * FIXME: Wa for more modern gens needs to be validated - */ - ring_set_paused(engine, 1); - intel_engine_stop_cs(engine); - - engine->execlists.reset_ccid = active_ccid(engine); -} - -static void __reset_stop_ring(u32 *regs, const struct intel_engine_cs *engine) -{ - int x; - - x = lrc_ring_mi_mode(engine); - if (x != -1) { - regs[x + 1] &= ~STOP_RING; - regs[x + 1] |= STOP_RING << 16; - } -} - -static void __execlists_reset_reg_state(const struct intel_context *ce, - const struct intel_engine_cs *engine) -{ - u32 *regs = ce->lrc_reg_state; - - __reset_stop_ring(regs, engine); + /* Verify that we can handle failure to setup the wa_ctx */ + if (err || i915_inject_probe_error(engine->i915, -ENODEV)) + lrc_fini_wa_ctx(engine); } -static void __execlists_reset(struct intel_engine_cs *engine, bool stalled) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - struct intel_context *ce; - struct i915_request *rq; - u32 head; - - mb(); /* paranoia: read the CSB pointers from after the reset */ - clflush(execlists->csb_write); - mb(); - - process_csb(engine); /* drain preemption events */ - - /* Following the reset, we need to reload the CSB read/write pointers */ - reset_csb_pointers(engine); - - /* - * Save the currently executing context, even if we completed - * its request, it was still running at the time of the - * reset and will have been clobbered. - */ - rq = active_context(engine, engine->execlists.reset_ccid); - if (!rq) - goto unwind; - - ce = rq->context; - GEM_BUG_ON(!i915_vma_is_pinned(ce->state)); - - if (i915_request_completed(rq)) { - /* Idle context; tidy up the ring so we can restart afresh */ - head = intel_ring_wrap(ce->ring, rq->tail); - goto out_replay; - } - - /* We still have requests in-flight; the engine should be active */ - GEM_BUG_ON(!intel_engine_pm_is_awake(engine)); - - /* Context has requests still in-flight; it should not be idle! */ - GEM_BUG_ON(i915_active_is_idle(&ce->active)); - - rq = active_request(ce->timeline, rq); - head = intel_ring_wrap(ce->ring, rq->head); - GEM_BUG_ON(head == ce->ring->tail); - - /* - * If this request hasn't started yet, e.g. it is waiting on a - * semaphore, we need to avoid skipping the request or else we - * break the signaling chain. However, if the context is corrupt - * the request will not restart and we will be stuck with a wedged - * device. It is quite often the case that if we issue a reset - * while the GPU is loading the context image, that the context - * image becomes corrupt. - * - * Otherwise, if we have not started yet, the request should replay - * perfectly and we do not need to flag the result as being erroneous. - */ - if (!i915_request_started(rq)) - goto out_replay; - - /* - * If the request was innocent, we leave the request in the ELSP - * and will try to replay it on restarting. The context image may - * have been corrupted by the reset, in which case we may have - * to service a new GPU hang, but more likely we can continue on - * without impact. - * - * If the request was guilty, we presume the context is corrupt - * and have to at least restore the RING register in the context - * image back to the expected values to skip over the guilty request. - */ - __i915_request_reset(rq, stalled); - - /* - * We want a simple context + ring to execute the breadcrumb update. - * We cannot rely on the context being intact across the GPU hang, - * so clear it and rebuild just what we need for the breadcrumb. - * All pending requests for this context will be zapped, and any - * future request will be after userspace has had the opportunity - * to recreate its own state. - */ -out_replay: - ENGINE_TRACE(engine, "replay {head:%04x, tail:%04x}\n", - head, ce->ring->tail); - __execlists_reset_reg_state(ce, engine); - __execlists_update_reg_state(ce, engine, head); - ce->lrc.desc |= CTX_DESC_FORCE_RESTORE; /* paranoid: GPU was reset! */ - -unwind: - /* Push back any incomplete requests for replay after the reset. */ - cancel_port_requests(execlists); - __unwind_incomplete_requests(engine); -} - -static void execlists_reset_rewind(struct intel_engine_cs *engine, bool stalled) -{ - unsigned long flags; - - ENGINE_TRACE(engine, "\n"); - - spin_lock_irqsave(&engine->active.lock, flags); - - __execlists_reset(engine, stalled); - - spin_unlock_irqrestore(&engine->active.lock, flags); -} - -static void nop_submission_tasklet(unsigned long data) -{ - struct intel_engine_cs * const engine = (struct intel_engine_cs *)data; - - /* The driver is wedged; don't process any more events. */ - WRITE_ONCE(engine->execlists.queue_priority_hint, INT_MIN); -} - -static void execlists_reset_cancel(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - struct i915_request *rq, *rn; - struct rb_node *rb; - unsigned long flags; - - ENGINE_TRACE(engine, "\n"); - - /* - * Before we call engine->cancel_requests(), we should have exclusive - * access to the submission state. This is arranged for us by the - * caller disabling the interrupt generation, the tasklet and other - * threads that may then access the same state, giving us a free hand - * to reset state. However, we still need to let lockdep be aware that - * we know this state may be accessed in hardirq context, so we - * disable the irq around this manipulation and we want to keep - * the spinlock focused on its duties and not accidentally conflate - * coverage to the submission's irq state. (Similarly, although we - * shouldn't need to disable irq around the manipulation of the - * submission's irq state, we also wish to remind ourselves that - * it is irq state.) - */ - spin_lock_irqsave(&engine->active.lock, flags); - - __execlists_reset(engine, true); - - /* Mark all executing requests as skipped. */ - list_for_each_entry(rq, &engine->active.requests, sched.link) - mark_eio(rq); - intel_engine_signal_breadcrumbs(engine); - - /* Flush the queued requests to the timeline list (for retiring). */ - while ((rb = rb_first_cached(&execlists->queue))) { - struct i915_priolist *p = to_priolist(rb); - int i; - - priolist_for_each_request_consume(rq, rn, p, i) { - mark_eio(rq); - __i915_request_submit(rq); - } - - rb_erase_cached(&p->node, &execlists->queue); - i915_priolist_free(p); - } - - /* On-hold requests will be flushed to timeline upon their release */ - list_for_each_entry(rq, &engine->active.hold, sched.link) - mark_eio(rq); - - /* Cancel all attached virtual engines */ - while ((rb = rb_first_cached(&execlists->virtual))) { - struct virtual_engine *ve = - rb_entry(rb, typeof(*ve), nodes[engine->id].rb); - - rb_erase_cached(rb, &execlists->virtual); - RB_CLEAR_NODE(rb); - - spin_lock(&ve->base.active.lock); - rq = fetch_and_zero(&ve->request); - if (rq) { - mark_eio(rq); - - rq->engine = engine; - __i915_request_submit(rq); - i915_request_put(rq); - - ve->base.execlists.queue_priority_hint = INT_MIN; - } - spin_unlock(&ve->base.active.lock); - } - - /* Remaining _unready_ requests will be nop'ed when submitted */ - - execlists->queue_priority_hint = INT_MIN; - execlists->queue = RB_ROOT_CACHED; - - GEM_BUG_ON(__tasklet_is_enabled(&execlists->tasklet)); - execlists->tasklet.func = nop_submission_tasklet; - - spin_unlock_irqrestore(&engine->active.lock, flags); -} - -static void execlists_reset_finish(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - - /* - * After a GPU reset, we may have requests to replay. Do so now while - * we still have the forcewake to be sure that the GPU is not allowed - * to sleep before we restart and reload a context. - */ - GEM_BUG_ON(!reset_in_progress(execlists)); - if (!RB_EMPTY_ROOT(&execlists->queue.rb_root)) - execlists->tasklet.func(execlists->tasklet.data); - - if (__tasklet_enable(&execlists->tasklet)) - /* And kick in case we missed a new request submission. */ - tasklet_hi_schedule(&execlists->tasklet); - ENGINE_TRACE(engine, "depth->%d\n", - atomic_read(&execlists->tasklet.count)); -} - -static int gen8_emit_bb_start_noarb(struct i915_request *rq, - u64 offset, u32 len, - const unsigned int flags) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* - * WaDisableCtxRestoreArbitration:bdw,chv - * - * We don't need to perform MI_ARB_ENABLE as often as we do (in - * particular all the gen that do not need the w/a at all!), if we - * took care to make sure that on every switch into this context - * (both ordinary and for preemption) that arbitrartion was enabled - * we would be fine. However, for gen8 there is another w/a that - * requires us to not preempt inside GPGPU execution, so we keep - * arbitration disabled for gen8 batches. Arbitration will be - * re-enabled before we close the request - * (engine->emit_fini_breadcrumb). - */ - *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; - - /* FIXME(BDW+): Address space and security selectors. */ - *cs++ = MI_BATCH_BUFFER_START_GEN8 | - (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); - *cs++ = lower_32_bits(offset); - *cs++ = upper_32_bits(offset); - - intel_ring_advance(rq, cs); - - return 0; -} - -static int gen8_emit_bb_start(struct i915_request *rq, - u64 offset, u32 len, - const unsigned int flags) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - - *cs++ = MI_BATCH_BUFFER_START_GEN8 | - (flags & I915_DISPATCH_SECURE ? 0 : BIT(8)); - *cs++ = lower_32_bits(offset); - *cs++ = upper_32_bits(offset); - - *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; - *cs++ = MI_NOOP; - - intel_ring_advance(rq, cs); - - return 0; -} - -static void gen8_logical_ring_enable_irq(struct intel_engine_cs *engine) -{ - ENGINE_WRITE(engine, RING_IMR, - ~(engine->irq_enable_mask | engine->irq_keep_mask)); - ENGINE_POSTING_READ(engine, RING_IMR); -} - -static void gen8_logical_ring_disable_irq(struct intel_engine_cs *engine) -{ - ENGINE_WRITE(engine, RING_IMR, ~engine->irq_keep_mask); -} - -static int gen8_emit_flush(struct i915_request *request, u32 mode) -{ - u32 cmd, *cs; - - cs = intel_ring_begin(request, 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - cmd = MI_FLUSH_DW + 1; - - /* We always require a command barrier so that subsequent - * commands, such as breadcrumb interrupts, are strictly ordered - * wrt the contents of the write cache being flushed to memory - * (and thus being coherent from the CPU). - */ - cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; - - if (mode & EMIT_INVALIDATE) { - cmd |= MI_INVALIDATE_TLB; - if (request->engine->class == VIDEO_DECODE_CLASS) - cmd |= MI_INVALIDATE_BSD; - } - - *cs++ = cmd; - *cs++ = LRC_PPHWSP_SCRATCH_ADDR; - *cs++ = 0; /* upper addr */ - *cs++ = 0; /* value */ - intel_ring_advance(request, cs); - - return 0; -} - -static int gen8_emit_flush_render(struct i915_request *request, - u32 mode) -{ - bool vf_flush_wa = false, dc_flush_wa = false; - u32 *cs, flags = 0; - int len; - - flags |= PIPE_CONTROL_CS_STALL; - - if (mode & EMIT_FLUSH) { - flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; - flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; - flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; - flags |= PIPE_CONTROL_FLUSH_ENABLE; - } - - if (mode & EMIT_INVALIDATE) { - flags |= PIPE_CONTROL_TLB_INVALIDATE; - flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_STORE_DATA_INDEX; - - /* - * On GEN9: before VF_CACHE_INVALIDATE we need to emit a NULL - * pipe control. - */ - if (IS_GEN(request->engine->i915, 9)) - vf_flush_wa = true; - - /* WaForGAMHang:kbl */ - if (IS_KBL_GT_REVID(request->engine->i915, 0, KBL_REVID_B0)) - dc_flush_wa = true; - } - - len = 6; - - if (vf_flush_wa) - len += 6; - - if (dc_flush_wa) - len += 12; - - cs = intel_ring_begin(request, len); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - if (vf_flush_wa) - cs = gen8_emit_pipe_control(cs, 0, 0); - - if (dc_flush_wa) - cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_DC_FLUSH_ENABLE, - 0); - - cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); - - if (dc_flush_wa) - cs = gen8_emit_pipe_control(cs, PIPE_CONTROL_CS_STALL, 0); - - intel_ring_advance(request, cs); - - return 0; -} - -static int gen11_emit_flush_render(struct i915_request *request, - u32 mode) -{ - if (mode & EMIT_FLUSH) { - u32 *cs; - u32 flags = 0; - - flags |= PIPE_CONTROL_CS_STALL; - - flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; - flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; - flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; - flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; - flags |= PIPE_CONTROL_FLUSH_ENABLE; - flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_STORE_DATA_INDEX; - - cs = intel_ring_begin(request, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); - intel_ring_advance(request, cs); - } - - if (mode & EMIT_INVALIDATE) { - u32 *cs; - u32 flags = 0; - - flags |= PIPE_CONTROL_CS_STALL; - - flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_TLB_INVALIDATE; - flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_QW_WRITE; - flags |= PIPE_CONTROL_STORE_DATA_INDEX; - - cs = intel_ring_begin(request, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); - intel_ring_advance(request, cs); - } - - return 0; -} - -static u32 preparser_disable(bool state) -{ - return MI_ARB_CHECK | 1 << 8 | state; -} - -static i915_reg_t aux_inv_reg(const struct intel_engine_cs *engine) -{ - static const i915_reg_t vd[] = { - GEN12_VD0_AUX_NV, - GEN12_VD1_AUX_NV, - GEN12_VD2_AUX_NV, - GEN12_VD3_AUX_NV, - }; - - static const i915_reg_t ve[] = { - GEN12_VE0_AUX_NV, - GEN12_VE1_AUX_NV, - }; - - if (engine->class == VIDEO_DECODE_CLASS) - return vd[engine->instance]; - - if (engine->class == VIDEO_ENHANCEMENT_CLASS) - return ve[engine->instance]; - - GEM_BUG_ON("unknown aux_inv_reg\n"); - - return INVALID_MMIO_REG; -} - -static u32 * -gen12_emit_aux_table_inv(const i915_reg_t inv_reg, u32 *cs) -{ - *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = i915_mmio_reg_offset(inv_reg); - *cs++ = AUX_INV; - *cs++ = MI_NOOP; - - return cs; -} - -static int gen12_emit_flush_render(struct i915_request *request, - u32 mode) -{ - if (mode & EMIT_FLUSH) { - u32 flags = 0; - u32 *cs; - - flags |= PIPE_CONTROL_TILE_CACHE_FLUSH; - flags |= PIPE_CONTROL_FLUSH_L3; - flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH; - flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH; - /* Wa_1409600907:tgl */ - flags |= PIPE_CONTROL_DEPTH_STALL; - flags |= PIPE_CONTROL_DC_FLUSH_ENABLE; - flags |= PIPE_CONTROL_FLUSH_ENABLE; - - flags |= PIPE_CONTROL_STORE_DATA_INDEX; - flags |= PIPE_CONTROL_QW_WRITE; - - flags |= PIPE_CONTROL_CS_STALL; - - cs = intel_ring_begin(request, 6); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - cs = gen12_emit_pipe_control(cs, - PIPE_CONTROL0_HDC_PIPELINE_FLUSH, - flags, LRC_PPHWSP_SCRATCH_ADDR); - intel_ring_advance(request, cs); - } - - if (mode & EMIT_INVALIDATE) { - u32 flags = 0; - u32 *cs; - - flags |= PIPE_CONTROL_COMMAND_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_TLB_INVALIDATE; - flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE; - flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE; - - flags |= PIPE_CONTROL_STORE_DATA_INDEX; - flags |= PIPE_CONTROL_QW_WRITE; - - flags |= PIPE_CONTROL_CS_STALL; - - cs = intel_ring_begin(request, 8 + 4); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - /* - * Prevent the pre-parser from skipping past the TLB - * invalidate and loading a stale page for the batch - * buffer / request payload. - */ - *cs++ = preparser_disable(true); - - cs = gen8_emit_pipe_control(cs, flags, LRC_PPHWSP_SCRATCH_ADDR); - - /* hsdes: 1809175790 */ - cs = gen12_emit_aux_table_inv(GEN12_GFX_CCS_AUX_NV, cs); - - *cs++ = preparser_disable(false); - intel_ring_advance(request, cs); - } - - return 0; -} - -static int gen12_emit_flush(struct i915_request *request, u32 mode) -{ - intel_engine_mask_t aux_inv = 0; - u32 cmd, *cs; - - cmd = 4; - if (mode & EMIT_INVALIDATE) - cmd += 2; - if (mode & EMIT_INVALIDATE) - aux_inv = request->engine->mask & ~BIT(BCS0); - if (aux_inv) - cmd += 2 * hweight8(aux_inv) + 2; - - cs = intel_ring_begin(request, cmd); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - if (mode & EMIT_INVALIDATE) - *cs++ = preparser_disable(true); - - cmd = MI_FLUSH_DW + 1; - - /* We always require a command barrier so that subsequent - * commands, such as breadcrumb interrupts, are strictly ordered - * wrt the contents of the write cache being flushed to memory - * (and thus being coherent from the CPU). - */ - cmd |= MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW; - - if (mode & EMIT_INVALIDATE) { - cmd |= MI_INVALIDATE_TLB; - if (request->engine->class == VIDEO_DECODE_CLASS) - cmd |= MI_INVALIDATE_BSD; - } - - *cs++ = cmd; - *cs++ = LRC_PPHWSP_SCRATCH_ADDR; - *cs++ = 0; /* upper addr */ - *cs++ = 0; /* value */ - - if (aux_inv) { /* hsdes: 1809175790 */ - struct intel_engine_cs *engine; - unsigned int tmp; - - *cs++ = MI_LOAD_REGISTER_IMM(hweight8(aux_inv)); - for_each_engine_masked(engine, request->engine->gt, - aux_inv, tmp) { - *cs++ = i915_mmio_reg_offset(aux_inv_reg(engine)); - *cs++ = AUX_INV; - } - *cs++ = MI_NOOP; - } - - if (mode & EMIT_INVALIDATE) - *cs++ = preparser_disable(false); - - intel_ring_advance(request, cs); - - return 0; -} - -static void assert_request_valid(struct i915_request *rq) -{ - struct intel_ring *ring __maybe_unused = rq->ring; - - /* Can we unwind this request without appearing to go forwards? */ - GEM_BUG_ON(intel_ring_direction(ring, rq->wa_tail, rq->head) <= 0); -} - -/* - * Reserve space for 2 NOOPs at the end of each request to be - * used as a workaround for not being allowed to do lite - * restore with HEAD==TAIL (WaIdleLiteRestore). - */ -static u32 *gen8_emit_wa_tail(struct i915_request *request, u32 *cs) -{ - /* Ensure there's always at least one preemption point per-request. */ - *cs++ = MI_ARB_CHECK; - *cs++ = MI_NOOP; - request->wa_tail = intel_ring_offset(request, cs); - - /* Check that entire request is less than half the ring */ - assert_request_valid(request); - - return cs; -} - -static u32 *emit_preempt_busywait(struct i915_request *request, u32 *cs) -{ - *cs++ = MI_SEMAPHORE_WAIT | - MI_SEMAPHORE_GLOBAL_GTT | - MI_SEMAPHORE_POLL | - MI_SEMAPHORE_SAD_EQ_SDD; - *cs++ = 0; - *cs++ = intel_hws_preempt_address(request->engine); - *cs++ = 0; - - return cs; -} - -static __always_inline u32* -gen8_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs) -{ - *cs++ = MI_USER_INTERRUPT; - - *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - if (intel_engine_has_semaphores(request->engine)) - cs = emit_preempt_busywait(request, cs); - - request->tail = intel_ring_offset(request, cs); - assert_ring_tail_valid(request->ring, request->tail); - - return gen8_emit_wa_tail(request, cs); -} - -static u32 *emit_xcs_breadcrumb(struct i915_request *rq, u32 *cs) -{ - return gen8_emit_ggtt_write(cs, rq->fence.seqno, hwsp_offset(rq), 0); -} - -static u32 *gen8_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs) -{ - return gen8_emit_fini_breadcrumb_tail(rq, emit_xcs_breadcrumb(rq, cs)); -} - -static u32 *gen8_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) -{ - cs = gen8_emit_pipe_control(cs, - PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_DC_FLUSH_ENABLE, - 0); - - /* XXX flush+write+CS_STALL all in one upsets gem_concurrent_blt:kbl */ - cs = gen8_emit_ggtt_write_rcs(cs, - request->fence.seqno, - hwsp_offset(request), - PIPE_CONTROL_FLUSH_ENABLE | - PIPE_CONTROL_CS_STALL); - - return gen8_emit_fini_breadcrumb_tail(request, cs); -} - -static u32 * -gen11_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) -{ - cs = gen8_emit_ggtt_write_rcs(cs, - request->fence.seqno, - hwsp_offset(request), - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_TILE_CACHE_FLUSH | - PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_FLUSH_ENABLE); - - return gen8_emit_fini_breadcrumb_tail(request, cs); -} - -/* - * Note that the CS instruction pre-parser will not stall on the breadcrumb - * flush and will continue pre-fetching the instructions after it before the - * memory sync is completed. On pre-gen12 HW, the pre-parser will stop at - * BB_START/END instructions, so, even though we might pre-fetch the pre-amble - * of the next request before the memory has been flushed, we're guaranteed that - * we won't access the batch itself too early. - * However, on gen12+ the parser can pre-fetch across the BB_START/END commands, - * so, if the current request is modifying an instruction in the next request on - * the same intel_context, we might pre-fetch and then execute the pre-update - * instruction. To avoid this, the users of self-modifying code should either - * disable the parser around the code emitting the memory writes, via a new flag - * added to MI_ARB_CHECK, or emit the writes from a different intel_context. For - * the in-kernel use-cases we've opted to use a separate context, see - * reloc_gpu() as an example. - * All the above applies only to the instructions themselves. Non-inline data - * used by the instructions is not pre-fetched. - */ - -static u32 *gen12_emit_preempt_busywait(struct i915_request *request, u32 *cs) -{ - *cs++ = MI_SEMAPHORE_WAIT_TOKEN | - MI_SEMAPHORE_GLOBAL_GTT | - MI_SEMAPHORE_POLL | - MI_SEMAPHORE_SAD_EQ_SDD; - *cs++ = 0; - *cs++ = intel_hws_preempt_address(request->engine); - *cs++ = 0; - *cs++ = 0; - *cs++ = MI_NOOP; - - return cs; -} - -static __always_inline u32* -gen12_emit_fini_breadcrumb_tail(struct i915_request *request, u32 *cs) -{ - *cs++ = MI_USER_INTERRUPT; - - *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - if (intel_engine_has_semaphores(request->engine)) - cs = gen12_emit_preempt_busywait(request, cs); - - request->tail = intel_ring_offset(request, cs); - assert_ring_tail_valid(request->ring, request->tail); - - return gen8_emit_wa_tail(request, cs); -} - -static u32 *gen12_emit_fini_breadcrumb(struct i915_request *rq, u32 *cs) -{ - /* XXX Stalling flush before seqno write; post-sync not */ - cs = emit_xcs_breadcrumb(rq, __gen8_emit_flush_dw(cs, 0, 0, 0)); - return gen12_emit_fini_breadcrumb_tail(rq, cs); -} - -static u32 * -gen12_emit_fini_breadcrumb_rcs(struct i915_request *request, u32 *cs) -{ - cs = gen12_emit_ggtt_write_rcs(cs, - request->fence.seqno, - hwsp_offset(request), - PIPE_CONTROL0_HDC_PIPELINE_FLUSH, - PIPE_CONTROL_CS_STALL | - PIPE_CONTROL_TILE_CACHE_FLUSH | - PIPE_CONTROL_FLUSH_L3 | - PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH | - PIPE_CONTROL_DEPTH_CACHE_FLUSH | - /* Wa_1409600907:tgl */ - PIPE_CONTROL_DEPTH_STALL | - PIPE_CONTROL_DC_FLUSH_ENABLE | - PIPE_CONTROL_FLUSH_ENABLE); - - return gen12_emit_fini_breadcrumb_tail(request, cs); -} - -static void execlists_park(struct intel_engine_cs *engine) -{ - cancel_timer(&engine->execlists.timer); - cancel_timer(&engine->execlists.preempt); -} - -void intel_execlists_set_default_submission(struct intel_engine_cs *engine) -{ - engine->submit_request = execlists_submit_request; - engine->schedule = i915_schedule; - engine->execlists.tasklet.func = execlists_submission_tasklet; - - engine->reset.prepare = execlists_reset_prepare; - engine->reset.rewind = execlists_reset_rewind; - engine->reset.cancel = execlists_reset_cancel; - engine->reset.finish = execlists_reset_finish; - - engine->park = execlists_park; - engine->unpark = NULL; - - engine->flags |= I915_ENGINE_SUPPORTS_STATS; - if (!intel_vgpu_active(engine->i915)) { - engine->flags |= I915_ENGINE_HAS_SEMAPHORES; - if (HAS_LOGICAL_RING_PREEMPTION(engine->i915)) { - engine->flags |= I915_ENGINE_HAS_PREEMPTION; - if (IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) - engine->flags |= I915_ENGINE_HAS_TIMESLICES; - } - } - - if (INTEL_GEN(engine->i915) >= 12) - engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO; - - if (intel_engine_has_preemption(engine)) - engine->emit_bb_start = gen8_emit_bb_start; - else - engine->emit_bb_start = gen8_emit_bb_start_noarb; -} - -static void execlists_shutdown(struct intel_engine_cs *engine) -{ - /* Synchronise with residual timers and any softirq they raise */ - del_timer_sync(&engine->execlists.timer); - del_timer_sync(&engine->execlists.preempt); - tasklet_kill(&engine->execlists.tasklet); -} - -static void execlists_release(struct intel_engine_cs *engine) -{ - engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ - - execlists_shutdown(engine); - - intel_engine_cleanup_common(engine); - lrc_destroy_wa_ctx(engine); -} - -static void -logical_ring_default_vfuncs(struct intel_engine_cs *engine) -{ - /* Default vfuncs which can be overriden by each engine. */ - - engine->resume = execlists_resume; - - engine->cops = &execlists_context_ops; - engine->request_alloc = execlists_request_alloc; - - engine->emit_flush = gen8_emit_flush; - engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; - engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb; - if (INTEL_GEN(engine->i915) >= 12) { - engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb; - engine->emit_flush = gen12_emit_flush; - } - engine->set_default_submission = intel_execlists_set_default_submission; - - if (INTEL_GEN(engine->i915) < 11) { - engine->irq_enable = gen8_logical_ring_enable_irq; - engine->irq_disable = gen8_logical_ring_disable_irq; - } else { - /* - * TODO: On Gen11 interrupt masks need to be clear - * to allow C6 entry. Keep interrupts enabled at - * and take the hit of generating extra interrupts - * until a more refined solution exists. - */ - } -} - -static inline void -logical_ring_default_irqs(struct intel_engine_cs *engine) -{ - unsigned int shift = 0; - - if (INTEL_GEN(engine->i915) < 11) { - const u8 irq_shifts[] = { - [RCS0] = GEN8_RCS_IRQ_SHIFT, - [BCS0] = GEN8_BCS_IRQ_SHIFT, - [VCS0] = GEN8_VCS0_IRQ_SHIFT, - [VCS1] = GEN8_VCS1_IRQ_SHIFT, - [VECS0] = GEN8_VECS_IRQ_SHIFT, - }; - - shift = irq_shifts[engine->id]; - } - - engine->irq_enable_mask = GT_RENDER_USER_INTERRUPT << shift; - engine->irq_keep_mask = GT_CONTEXT_SWITCH_INTERRUPT << shift; - engine->irq_keep_mask |= GT_CS_MASTER_ERROR_INTERRUPT << shift; - engine->irq_keep_mask |= GT_WAIT_SEMAPHORE_INTERRUPT << shift; -} - -static void rcs_submission_override(struct intel_engine_cs *engine) -{ - switch (INTEL_GEN(engine->i915)) { - case 12: - engine->emit_flush = gen12_emit_flush_render; - engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; - break; - case 11: - engine->emit_flush = gen11_emit_flush_render; - engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; - break; - default: - engine->emit_flush = gen8_emit_flush_render; - engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; - break; - } -} - -int intel_execlists_submission_setup(struct intel_engine_cs *engine) -{ - struct intel_engine_execlists * const execlists = &engine->execlists; - struct drm_i915_private *i915 = engine->i915; - struct intel_uncore *uncore = engine->uncore; - u32 base = engine->mmio_base; - - tasklet_init(&engine->execlists.tasklet, - execlists_submission_tasklet, (unsigned long)engine); - timer_setup(&engine->execlists.timer, execlists_timeslice, 0); - timer_setup(&engine->execlists.preempt, execlists_preempt, 0); - - logical_ring_default_vfuncs(engine); - logical_ring_default_irqs(engine); - - if (engine->class == RENDER_CLASS) - rcs_submission_override(engine); - - if (intel_init_workaround_bb(engine)) - /* - * We continue even if we fail to initialize WA batch - * because we only expect rare glitches but nothing - * critical to prevent us from using GPU - */ - drm_err(&i915->drm, "WA batch buffer initialization failed\n"); - - if (HAS_LOGICAL_RING_ELSQ(i915)) { - execlists->submit_reg = uncore->regs + - i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(base)); - execlists->ctrl_reg = uncore->regs + - i915_mmio_reg_offset(RING_EXECLIST_CONTROL(base)); - } else { - execlists->submit_reg = uncore->regs + - i915_mmio_reg_offset(RING_ELSP(base)); - } - - execlists->csb_status = - (u64 *)&engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; - - execlists->csb_write = - &engine->status_page.addr[intel_hws_csb_write_index(i915)]; - - if (INTEL_GEN(i915) < 11) - execlists->csb_size = GEN8_CSB_ENTRIES; - else - execlists->csb_size = GEN11_CSB_ENTRIES; - - if (INTEL_GEN(engine->i915) >= 11) { - execlists->ccid |= engine->instance << (GEN11_ENGINE_INSTANCE_SHIFT - 32); - execlists->ccid |= engine->class << (GEN11_ENGINE_CLASS_SHIFT - 32); - } - - /* Finally, take ownership and responsibility for cleanup! */ - engine->sanitize = execlists_sanitize; - engine->release = execlists_release; - - return 0; -} - -static void init_common_reg_state(u32 * const regs, - const struct intel_engine_cs *engine, - const struct intel_ring *ring, - bool inhibit) -{ - u32 ctl; - - ctl = _MASKED_BIT_ENABLE(CTX_CTRL_INHIBIT_SYN_CTX_SWITCH); - ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT); - if (inhibit) - ctl |= CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT; - if (INTEL_GEN(engine->i915) < 11) - ctl |= _MASKED_BIT_DISABLE(CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT | - CTX_CTRL_RS_CTX_ENABLE); - regs[CTX_CONTEXT_CONTROL] = ctl; - - regs[CTX_RING_CTL] = RING_CTL_SIZE(ring->size) | RING_VALID; - regs[CTX_TIMESTAMP] = 0; -} - -static void init_wa_bb_reg_state(u32 * const regs, - const struct intel_engine_cs *engine) -{ - const struct i915_ctx_workarounds * const wa_ctx = &engine->wa_ctx; - - if (wa_ctx->per_ctx.size) { - const u32 ggtt_offset = i915_ggtt_offset(wa_ctx->vma); - - GEM_BUG_ON(lrc_ring_wa_bb_per_ctx(engine) == -1); - regs[lrc_ring_wa_bb_per_ctx(engine) + 1] = - (ggtt_offset + wa_ctx->per_ctx.offset) | 0x01; - } - - if (wa_ctx->indirect_ctx.size) { - lrc_ring_setup_indirect_ctx(regs, engine, - i915_ggtt_offset(wa_ctx->vma) + - wa_ctx->indirect_ctx.offset, - wa_ctx->indirect_ctx.size); - } -} - -static void init_ppgtt_reg_state(u32 *regs, const struct i915_ppgtt *ppgtt) -{ - if (i915_vm_is_4lvl(&ppgtt->vm)) { - /* 64b PPGTT (48bit canonical) - * PDP0_DESCRIPTOR contains the base address to PML4 and - * other PDP Descriptors are ignored. - */ - ASSIGN_CTX_PML4(ppgtt, regs); - } else { - ASSIGN_CTX_PDP(ppgtt, regs, 3); - ASSIGN_CTX_PDP(ppgtt, regs, 2); - ASSIGN_CTX_PDP(ppgtt, regs, 1); - ASSIGN_CTX_PDP(ppgtt, regs, 0); - } -} - -static struct i915_ppgtt *vm_alias(struct i915_address_space *vm) -{ - if (i915_is_ggtt(vm)) - return i915_vm_to_ggtt(vm)->alias; - else - return i915_vm_to_ppgtt(vm); -} - -static void execlists_init_reg_state(u32 *regs, - const struct intel_context *ce, - const struct intel_engine_cs *engine, - const struct intel_ring *ring, - bool inhibit) -{ - /* - * A context is actually a big batch buffer with several - * MI_LOAD_REGISTER_IMM commands followed by (reg, value) pairs. The - * values we are setting here are only for the first context restore: - * on a subsequent save, the GPU will recreate this batchbuffer with new - * values (including all the missing MI_LOAD_REGISTER_IMM commands that - * we are not initializing here). - * - * Must keep consistent with virtual_update_register_offsets(). - */ - set_offsets(regs, reg_offsets(engine), engine, inhibit); - - init_common_reg_state(regs, engine, ring, inhibit); - init_ppgtt_reg_state(regs, vm_alias(ce->vm)); - - init_wa_bb_reg_state(regs, engine); - - __reset_stop_ring(regs, engine); -} - -static int -populate_lr_context(struct intel_context *ce, - struct drm_i915_gem_object *ctx_obj, - struct intel_engine_cs *engine, - struct intel_ring *ring) -{ - bool inhibit = true; - void *vaddr; - - vaddr = i915_gem_object_pin_map(ctx_obj, I915_MAP_WB); - if (IS_ERR(vaddr)) { - drm_dbg(&engine->i915->drm, "Could not map object pages!\n"); - return PTR_ERR(vaddr); - } - - set_redzone(vaddr, engine); - - if (engine->default_state) { - shmem_read(engine->default_state, 0, - vaddr, engine->context_size); - __set_bit(CONTEXT_VALID_BIT, &ce->flags); - inhibit = false; - } - - /* Clear the ppHWSP (inc. per-context counters) */ - memset(vaddr, 0, PAGE_SIZE); - - /* - * The second page of the context object contains some registers which - * must be set up prior to the first execution. - */ - execlists_init_reg_state(vaddr + LRC_STATE_OFFSET, - ce, engine, ring, inhibit); - - __i915_gem_object_flush_map(ctx_obj, 0, engine->context_size); - i915_gem_object_unpin_map(ctx_obj); - return 0; -} - -static struct intel_timeline *pinned_timeline(struct intel_context *ce) -{ - struct intel_timeline *tl = fetch_and_zero(&ce->timeline); - - return intel_timeline_create_from_engine(ce->engine, - page_unmask_bits(tl)); -} - -static int __execlists_context_alloc(struct intel_context *ce, - struct intel_engine_cs *engine) -{ - struct drm_i915_gem_object *ctx_obj; - struct intel_ring *ring; - struct i915_vma *vma; - u32 context_size; - int ret; - - GEM_BUG_ON(ce->state); - context_size = round_up(engine->context_size, I915_GTT_PAGE_SIZE); - - if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) - context_size += I915_GTT_PAGE_SIZE; /* for redzone */ - - if (INTEL_GEN(engine->i915) == 12) { - ce->wa_bb_page = context_size / PAGE_SIZE; - context_size += PAGE_SIZE; - } - - ctx_obj = i915_gem_object_create_shmem(engine->i915, context_size); - if (IS_ERR(ctx_obj)) - return PTR_ERR(ctx_obj); - - vma = i915_vma_instance(ctx_obj, &engine->gt->ggtt->vm, NULL); - if (IS_ERR(vma)) { - ret = PTR_ERR(vma); - goto error_deref_obj; - } - - if (!page_mask_bits(ce->timeline)) { - struct intel_timeline *tl; - - /* - * Use the static global HWSP for the kernel context, and - * a dynamically allocated cacheline for everyone else. - */ - if (unlikely(ce->timeline)) - tl = pinned_timeline(ce); - else - tl = intel_timeline_create(engine->gt); - if (IS_ERR(tl)) { - ret = PTR_ERR(tl); - goto error_deref_obj; - } - - ce->timeline = tl; - } - - ring = intel_engine_create_ring(engine, (unsigned long)ce->ring); - if (IS_ERR(ring)) { - ret = PTR_ERR(ring); - goto error_deref_obj; - } - - ret = populate_lr_context(ce, ctx_obj, engine, ring); - if (ret) { - drm_dbg(&engine->i915->drm, - "Failed to populate LRC: %d\n", ret); - goto error_ring_free; - } - - ce->ring = ring; - ce->state = vma; - - return 0; - -error_ring_free: - intel_ring_put(ring); -error_deref_obj: - i915_gem_object_put(ctx_obj); - return ret; -} - -static struct list_head *virtual_queue(struct virtual_engine *ve) -{ - return &ve->base.execlists.default_priolist.requests[0]; -} - -static void rcu_virtual_context_destroy(struct work_struct *wrk) -{ - struct virtual_engine *ve = - container_of(wrk, typeof(*ve), rcu.work); - unsigned int n; - - GEM_BUG_ON(ve->context.inflight); - - /* Preempt-to-busy may leave a stale request behind. */ - if (unlikely(ve->request)) { - struct i915_request *old; - - spin_lock_irq(&ve->base.active.lock); - - old = fetch_and_zero(&ve->request); - if (old) { - GEM_BUG_ON(!i915_request_completed(old)); - __i915_request_submit(old); - i915_request_put(old); - } - - spin_unlock_irq(&ve->base.active.lock); - } - - /* - * Flush the tasklet in case it is still running on another core. - * - * This needs to be done before we remove ourselves from the siblings' - * rbtrees as in the case it is running in parallel, it may reinsert - * the rb_node into a sibling. - */ - tasklet_kill(&ve->base.execlists.tasklet); - - /* Decouple ourselves from the siblings, no more access allowed. */ - for (n = 0; n < ve->num_siblings; n++) { - struct intel_engine_cs *sibling = ve->siblings[n]; - struct rb_node *node = &ve->nodes[sibling->id].rb; - - if (RB_EMPTY_NODE(node)) - continue; - - spin_lock_irq(&sibling->active.lock); - - /* Detachment is lazily performed in the execlists tasklet */ - if (!RB_EMPTY_NODE(node)) - rb_erase_cached(node, &sibling->execlists.virtual); - - spin_unlock_irq(&sibling->active.lock); - } - GEM_BUG_ON(__tasklet_is_scheduled(&ve->base.execlists.tasklet)); - GEM_BUG_ON(!list_empty(virtual_queue(ve))); - - if (ve->context.state) - __execlists_context_fini(&ve->context); - intel_context_fini(&ve->context); - - intel_breadcrumbs_free(ve->base.breadcrumbs); - intel_engine_free_request_pool(&ve->base); - - kfree(ve->bonds); - kfree(ve); -} - -static void virtual_context_destroy(struct kref *kref) -{ - struct virtual_engine *ve = - container_of(kref, typeof(*ve), context.ref); - - GEM_BUG_ON(!list_empty(&ve->context.signals)); - - /* - * When destroying the virtual engine, we have to be aware that - * it may still be in use from an hardirq/softirq context causing - * the resubmission of a completed request (background completion - * due to preempt-to-busy). Before we can free the engine, we need - * to flush the submission code and tasklets that are still potentially - * accessing the engine. Flushing the tasklets requires process context, - * and since we can guard the resubmit onto the engine with an RCU read - * lock, we can delegate the free of the engine to an RCU worker. - */ - INIT_RCU_WORK(&ve->rcu, rcu_virtual_context_destroy); - queue_rcu_work(system_wq, &ve->rcu); -} - -static void virtual_engine_initial_hint(struct virtual_engine *ve) -{ - int swp; - - /* - * Pick a random sibling on starting to help spread the load around. - * - * New contexts are typically created with exactly the same order - * of siblings, and often started in batches. Due to the way we iterate - * the array of sibling when submitting requests, sibling[0] is - * prioritised for dequeuing. If we make sure that sibling[0] is fairly - * randomised across the system, we also help spread the load by the - * first engine we inspect being different each time. - * - * NB This does not force us to execute on this engine, it will just - * typically be the first we inspect for submission. - */ - swp = prandom_u32_max(ve->num_siblings); - if (swp) - swap(ve->siblings[swp], ve->siblings[0]); -} - -static int virtual_context_alloc(struct intel_context *ce) -{ - struct virtual_engine *ve = container_of(ce, typeof(*ve), context); - - return __execlists_context_alloc(ce, ve->siblings[0]); -} - -static int virtual_context_pin(struct intel_context *ce, void *vaddr) -{ - struct virtual_engine *ve = container_of(ce, typeof(*ve), context); - - /* Note: we must use a real engine class for setting up reg state */ - return __execlists_context_pin(ce, ve->siblings[0], vaddr); -} - -static void virtual_context_enter(struct intel_context *ce) -{ - struct virtual_engine *ve = container_of(ce, typeof(*ve), context); - unsigned int n; - - for (n = 0; n < ve->num_siblings; n++) - intel_engine_pm_get(ve->siblings[n]); - - intel_timeline_enter(ce->timeline); -} - -static void virtual_context_exit(struct intel_context *ce) +static void st_update_runtime_underflow(struct intel_context *ce, s32 dt) { - struct virtual_engine *ve = container_of(ce, typeof(*ve), context); - unsigned int n; - - intel_timeline_exit(ce->timeline); - - for (n = 0; n < ve->num_siblings; n++) - intel_engine_pm_put(ve->siblings[n]); +#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) + ce->runtime.num_underflow++; + ce->runtime.max_underflow = max_t(u32, ce->runtime.max_underflow, -dt); +#endif } -static const struct intel_context_ops virtual_context_ops = { - .alloc = virtual_context_alloc, - - .pre_pin = execlists_context_pre_pin, - .pin = virtual_context_pin, - .unpin = execlists_context_unpin, - .post_unpin = execlists_context_post_unpin, - - .enter = virtual_context_enter, - .exit = virtual_context_exit, - - .destroy = virtual_context_destroy, -}; - -static intel_engine_mask_t virtual_submission_mask(struct virtual_engine *ve) +void lrc_update_runtime(struct intel_context *ce) { - struct i915_request *rq; - intel_engine_mask_t mask; - - rq = READ_ONCE(ve->request); - if (!rq) - return 0; - - /* The rq is ready for submission; rq->execution_mask is now stable. */ - mask = rq->execution_mask; - if (unlikely(!mask)) { - /* Invalid selection, submit to a random engine in error */ - i915_request_set_error_once(rq, -ENODEV); - mask = ve->siblings[0]->mask; - } - - ENGINE_TRACE(&ve->base, "rq=%llx:%lld, mask=%x, prio=%d\n", - rq->fence.context, rq->fence.seqno, - mask, ve->base.execlists.queue_priority_hint); - - return mask; -} + u32 old; + s32 dt; -static void virtual_submission_tasklet(unsigned long data) -{ - struct virtual_engine * const ve = (struct virtual_engine *)data; - const int prio = READ_ONCE(ve->base.execlists.queue_priority_hint); - intel_engine_mask_t mask; - unsigned int n; - - rcu_read_lock(); - mask = virtual_submission_mask(ve); - rcu_read_unlock(); - if (unlikely(!mask)) + if (intel_context_is_barrier(ce)) return; - local_irq_disable(); - for (n = 0; n < ve->num_siblings; n++) { - struct intel_engine_cs *sibling = READ_ONCE(ve->siblings[n]); - struct ve_node * const node = &ve->nodes[sibling->id]; - struct rb_node **parent, *rb; - bool first; - - if (!READ_ONCE(ve->request)) - break; /* already handled by a sibling's tasklet */ - - if (unlikely(!(mask & sibling->mask))) { - if (!RB_EMPTY_NODE(&node->rb)) { - spin_lock(&sibling->active.lock); - rb_erase_cached(&node->rb, - &sibling->execlists.virtual); - RB_CLEAR_NODE(&node->rb); - spin_unlock(&sibling->active.lock); - } - continue; - } - - spin_lock(&sibling->active.lock); - - if (!RB_EMPTY_NODE(&node->rb)) { - /* - * Cheat and avoid rebalancing the tree if we can - * reuse this node in situ. - */ - first = rb_first_cached(&sibling->execlists.virtual) == - &node->rb; - if (prio == node->prio || (prio > node->prio && first)) - goto submit_engine; - - rb_erase_cached(&node->rb, &sibling->execlists.virtual); - } - - rb = NULL; - first = true; - parent = &sibling->execlists.virtual.rb_root.rb_node; - while (*parent) { - struct ve_node *other; - - rb = *parent; - other = rb_entry(rb, typeof(*other), rb); - if (prio > other->prio) { - parent = &rb->rb_left; - } else { - parent = &rb->rb_right; - first = false; - } - } - - rb_link_node(&node->rb, rb, parent); - rb_insert_color_cached(&node->rb, - &sibling->execlists.virtual, - first); - -submit_engine: - GEM_BUG_ON(RB_EMPTY_NODE(&node->rb)); - node->prio = prio; - if (first && prio > sibling->execlists.queue_priority_hint) - tasklet_hi_schedule(&sibling->execlists.tasklet); - - spin_unlock(&sibling->active.lock); - } - local_irq_enable(); -} - -static void virtual_submit_request(struct i915_request *rq) -{ - struct virtual_engine *ve = to_virtual_engine(rq->engine); - struct i915_request *old; - unsigned long flags; - - ENGINE_TRACE(&ve->base, "rq=%llx:%lld\n", - rq->fence.context, - rq->fence.seqno); - - GEM_BUG_ON(ve->base.submit_request != virtual_submit_request); - - spin_lock_irqsave(&ve->base.active.lock, flags); - - old = ve->request; - if (old) { /* background completion event from preempt-to-busy */ - GEM_BUG_ON(!i915_request_completed(old)); - __i915_request_submit(old); - i915_request_put(old); - } - - if (i915_request_completed(rq)) { - __i915_request_submit(rq); - - ve->base.execlists.queue_priority_hint = INT_MIN; - ve->request = NULL; - } else { - ve->base.execlists.queue_priority_hint = rq_prio(rq); - ve->request = i915_request_get(rq); - - GEM_BUG_ON(!list_empty(virtual_queue(ve))); - list_move_tail(&rq->sched.link, virtual_queue(ve)); - - tasklet_hi_schedule(&ve->base.execlists.tasklet); - } - - spin_unlock_irqrestore(&ve->base.active.lock, flags); -} - -static struct ve_bond * -virtual_find_bond(struct virtual_engine *ve, - const struct intel_engine_cs *master) -{ - int i; - - for (i = 0; i < ve->num_bonds; i++) { - if (ve->bonds[i].master == master) - return &ve->bonds[i]; - } - - return NULL; -} - -static void -virtual_bond_execute(struct i915_request *rq, struct dma_fence *signal) -{ - struct virtual_engine *ve = to_virtual_engine(rq->engine); - intel_engine_mask_t allowed, exec; - struct ve_bond *bond; - - allowed = ~to_request(signal)->engine->mask; - - bond = virtual_find_bond(ve, to_request(signal)->engine); - if (bond) - allowed &= bond->sibling_mask; - - /* Restrict the bonded request to run on only the available engines */ - exec = READ_ONCE(rq->execution_mask); - while (!try_cmpxchg(&rq->execution_mask, &exec, exec & allowed)) - ; - - /* Prevent the master from being re-run on the bonded engines */ - to_request(signal)->execution_mask &= ~allowed; -} - -struct intel_context * -intel_execlists_create_virtual(struct intel_engine_cs **siblings, - unsigned int count) -{ - struct virtual_engine *ve; - unsigned int n; - int err; - - if (count == 0) - return ERR_PTR(-EINVAL); - - if (count == 1) - return intel_context_create(siblings[0]); - - ve = kzalloc(struct_size(ve, siblings, count), GFP_KERNEL); - if (!ve) - return ERR_PTR(-ENOMEM); - - ve->base.i915 = siblings[0]->i915; - ve->base.gt = siblings[0]->gt; - ve->base.uncore = siblings[0]->uncore; - ve->base.id = -1; - - ve->base.class = OTHER_CLASS; - ve->base.uabi_class = I915_ENGINE_CLASS_INVALID; - ve->base.instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; - ve->base.uabi_instance = I915_ENGINE_CLASS_INVALID_VIRTUAL; - - /* - * The decision on whether to submit a request using semaphores - * depends on the saturated state of the engine. We only compute - * this during HW submission of the request, and we need for this - * state to be globally applied to all requests being submitted - * to this engine. Virtual engines encompass more than one physical - * engine and so we cannot accurately tell in advance if one of those - * engines is already saturated and so cannot afford to use a semaphore - * and be pessimized in priority for doing so -- if we are the only - * context using semaphores after all other clients have stopped, we - * will be starved on the saturated system. Such a global switch for - * semaphores is less than ideal, but alas is the current compromise. - */ - ve->base.saturated = ALL_ENGINES; - - snprintf(ve->base.name, sizeof(ve->base.name), "virtual"); - - intel_engine_init_active(&ve->base, ENGINE_VIRTUAL); - intel_engine_init_execlists(&ve->base); - - ve->base.cops = &virtual_context_ops; - ve->base.request_alloc = execlists_request_alloc; - - ve->base.schedule = i915_schedule; - ve->base.submit_request = virtual_submit_request; - ve->base.bond_execute = virtual_bond_execute; - - INIT_LIST_HEAD(virtual_queue(ve)); - ve->base.execlists.queue_priority_hint = INT_MIN; - tasklet_init(&ve->base.execlists.tasklet, - virtual_submission_tasklet, - (unsigned long)ve); - - intel_context_init(&ve->context, &ve->base); - - ve->base.breadcrumbs = intel_breadcrumbs_create(NULL); - if (!ve->base.breadcrumbs) { - err = -ENOMEM; - goto err_put; - } - - for (n = 0; n < count; n++) { - struct intel_engine_cs *sibling = siblings[n]; - - GEM_BUG_ON(!is_power_of_2(sibling->mask)); - if (sibling->mask & ve->base.mask) { - DRM_DEBUG("duplicate %s entry in load balancer\n", - sibling->name); - err = -EINVAL; - goto err_put; - } - - /* - * The virtual engine implementation is tightly coupled to - * the execlists backend -- we push out request directly - * into a tree inside each physical engine. We could support - * layering if we handle cloning of the requests and - * submitting a copy into each backend. - */ - if (sibling->execlists.tasklet.func != - execlists_submission_tasklet) { - err = -ENODEV; - goto err_put; - } - - GEM_BUG_ON(RB_EMPTY_NODE(&ve->nodes[sibling->id].rb)); - RB_CLEAR_NODE(&ve->nodes[sibling->id].rb); - - ve->siblings[ve->num_siblings++] = sibling; - ve->base.mask |= sibling->mask; - - /* - * All physical engines must be compatible for their emission - * functions (as we build the instructions during request - * construction and do not alter them before submission - * on the physical engine). We use the engine class as a guide - * here, although that could be refined. - */ - if (ve->base.class != OTHER_CLASS) { - if (ve->base.class != sibling->class) { - DRM_DEBUG("invalid mixing of engine class, sibling %d, already %d\n", - sibling->class, ve->base.class); - err = -EINVAL; - goto err_put; - } - continue; - } - - ve->base.class = sibling->class; - ve->base.uabi_class = sibling->uabi_class; - snprintf(ve->base.name, sizeof(ve->base.name), - "v%dx%d", ve->base.class, count); - ve->base.context_size = sibling->context_size; - - ve->base.emit_bb_start = sibling->emit_bb_start; - ve->base.emit_flush = sibling->emit_flush; - ve->base.emit_init_breadcrumb = sibling->emit_init_breadcrumb; - ve->base.emit_fini_breadcrumb = sibling->emit_fini_breadcrumb; - ve->base.emit_fini_breadcrumb_dw = - sibling->emit_fini_breadcrumb_dw; - - ve->base.flags = sibling->flags; - } - - ve->base.flags |= I915_ENGINE_IS_VIRTUAL; - - virtual_engine_initial_hint(ve); - return &ve->context; - -err_put: - intel_context_put(&ve->context); - return ERR_PTR(err); -} - -struct intel_context * -intel_execlists_clone_virtual(struct intel_engine_cs *src) -{ - struct virtual_engine *se = to_virtual_engine(src); - struct intel_context *dst; - - dst = intel_execlists_create_virtual(se->siblings, - se->num_siblings); - if (IS_ERR(dst)) - return dst; - - if (se->num_bonds) { - struct virtual_engine *de = to_virtual_engine(dst->engine); - - de->bonds = kmemdup(se->bonds, - sizeof(*se->bonds) * se->num_bonds, - GFP_KERNEL); - if (!de->bonds) { - intel_context_put(dst); - return ERR_PTR(-ENOMEM); - } - - de->num_bonds = se->num_bonds; - } - - return dst; -} - -int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, - const struct intel_engine_cs *master, - const struct intel_engine_cs *sibling) -{ - struct virtual_engine *ve = to_virtual_engine(engine); - struct ve_bond *bond; - int n; - - /* Sanity check the sibling is part of the virtual engine */ - for (n = 0; n < ve->num_siblings; n++) - if (sibling == ve->siblings[n]) - break; - if (n == ve->num_siblings) - return -EINVAL; - - bond = virtual_find_bond(ve, master); - if (bond) { - bond->sibling_mask |= sibling->mask; - return 0; - } - - bond = krealloc(ve->bonds, - sizeof(*bond) * (ve->num_bonds + 1), - GFP_KERNEL); - if (!bond) - return -ENOMEM; - - bond[ve->num_bonds].master = master; - bond[ve->num_bonds].sibling_mask = sibling->mask; - - ve->bonds = bond; - ve->num_bonds++; - - return 0; -} - -void intel_execlists_show_requests(struct intel_engine_cs *engine, - struct drm_printer *m, - void (*show_request)(struct drm_printer *m, - struct i915_request *rq, - const char *prefix), - unsigned int max) -{ - const struct intel_engine_execlists *execlists = &engine->execlists; - struct i915_request *rq, *last; - unsigned long flags; - unsigned int count; - struct rb_node *rb; - - spin_lock_irqsave(&engine->active.lock, flags); - - last = NULL; - count = 0; - list_for_each_entry(rq, &engine->active.requests, sched.link) { - if (count++ < max - 1) - show_request(m, rq, "\t\tE "); - else - last = rq; - } - if (last) { - if (count > max) { - drm_printf(m, - "\t\t...skipping %d executing requests...\n", - count - max); - } - show_request(m, last, "\t\tE "); - } - - if (execlists->switch_priority_hint != INT_MIN) - drm_printf(m, "\t\tSwitch priority hint: %d\n", - READ_ONCE(execlists->switch_priority_hint)); - if (execlists->queue_priority_hint != INT_MIN) - drm_printf(m, "\t\tQueue priority hint: %d\n", - READ_ONCE(execlists->queue_priority_hint)); - - last = NULL; - count = 0; - for (rb = rb_first_cached(&execlists->queue); rb; rb = rb_next(rb)) { - struct i915_priolist *p = rb_entry(rb, typeof(*p), node); - int i; - - priolist_for_each_request(rq, p, i) { - if (count++ < max - 1) - show_request(m, rq, "\t\tQ "); - else - last = rq; - } - } - if (last) { - if (count > max) { - drm_printf(m, - "\t\t...skipping %d queued requests...\n", - count - max); - } - show_request(m, last, "\t\tQ "); - } + old = ce->runtime.last; + ce->runtime.last = lrc_get_runtime(ce); + dt = ce->runtime.last - old; - last = NULL; - count = 0; - for (rb = rb_first_cached(&execlists->virtual); rb; rb = rb_next(rb)) { - struct virtual_engine *ve = - rb_entry(rb, typeof(*ve), nodes[engine->id].rb); - struct i915_request *rq = READ_ONCE(ve->request); - - if (rq) { - if (count++ < max - 1) - show_request(m, rq, "\t\tV "); - else - last = rq; - } - } - if (last) { - if (count > max) { - drm_printf(m, - "\t\t...skipping %d virtual requests...\n", - count - max); - } - show_request(m, last, "\t\tV "); + if (unlikely(dt < 0)) { + CE_TRACE(ce, "runtime underflow: last=%u, new=%u, delta=%d\n", + old, ce->runtime.last, dt); + st_update_runtime_underflow(ce, dt); + return; } - spin_unlock_irqrestore(&engine->active.lock, flags); -} - -void intel_lr_context_reset(struct intel_engine_cs *engine, - struct intel_context *ce, - u32 head, - bool scrub) -{ - GEM_BUG_ON(!intel_context_is_pinned(ce)); - - /* - * We want a simple context + ring to execute the breadcrumb update. - * We cannot rely on the context being intact across the GPU hang, - * so clear it and rebuild just what we need for the breadcrumb. - * All pending requests for this context will be zapped, and any - * future request will be after userspace has had the opportunity - * to recreate its own state. - */ - if (scrub) - restore_default_state(ce, engine); - - /* Rerun the request; its payload has been neutered (if guilty). */ - __execlists_update_reg_state(ce, engine, head); -} - -bool -intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine) -{ - return engine->set_default_submission == - intel_execlists_set_default_submission; + ewma_runtime_add(&ce->runtime.avg, dt); + ce->runtime.total += dt; } #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) diff --git a/drivers/gpu/drm/i915/gt/intel_lrc.h b/drivers/gpu/drm/i915/gt/intel_lrc.h index c2d287f25497..7f697845c4cf 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc.h @@ -1,90 +1,20 @@ +/* SPDX-License-Identifier: MIT */ /* * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. */ -#ifndef _INTEL_LRC_H_ -#define _INTEL_LRC_H_ +#ifndef __INTEL_LRC_H__ +#define __INTEL_LRC_H__ #include <linux/types.h> -struct drm_printer; +#include "intel_context.h" +#include "intel_lrc_reg.h" -struct drm_i915_private; -struct i915_gem_context; -struct i915_request; -struct intel_context; +struct drm_i915_gem_object; struct intel_engine_cs; +struct intel_ring; -/* Execlists regs */ -#define RING_ELSP(base) _MMIO((base) + 0x230) -#define RING_EXECLIST_STATUS_LO(base) _MMIO((base) + 0x234) -#define RING_EXECLIST_STATUS_HI(base) _MMIO((base) + 0x234 + 4) -#define RING_CONTEXT_CONTROL(base) _MMIO((base) + 0x244) -#define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH (1 << 3) -#define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT (1 << 0) -#define CTX_CTRL_RS_CTX_ENABLE (1 << 1) -#define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT (1 << 2) -#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE (1 << 8) -#define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0) -#define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510) -#define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550) - -#define EL_CTRL_LOAD (1 << 0) - -/* The docs specify that the write pointer wraps around after 5h, "After status - * is written out to the last available status QW at offset 5h, this pointer - * wraps to 0." - * - * Therefore, one must infer than even though there are 3 bits available, 6 and - * 7 appear to be * reserved. - */ -#define GEN8_CSB_ENTRIES 6 -#define GEN8_CSB_PTR_MASK 0x7 -#define GEN8_CSB_READ_PTR_MASK (GEN8_CSB_PTR_MASK << 8) -#define GEN8_CSB_WRITE_PTR_MASK (GEN8_CSB_PTR_MASK << 0) - -#define GEN11_CSB_ENTRIES 12 -#define GEN11_CSB_PTR_MASK 0xf -#define GEN11_CSB_READ_PTR_MASK (GEN11_CSB_PTR_MASK << 8) -#define GEN11_CSB_WRITE_PTR_MASK (GEN11_CSB_PTR_MASK << 0) - -#define MAX_CONTEXT_HW_ID (1<<21) /* exclusive */ -#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */ -#define GEN11_MAX_CONTEXT_HW_ID (1<<11) /* exclusive */ -/* in Gen12 ID 0x7FF is reserved to indicate idle */ -#define GEN12_MAX_CONTEXT_HW_ID (GEN11_MAX_CONTEXT_HW_ID - 1) - -enum { - INTEL_CONTEXT_SCHEDULE_IN = 0, - INTEL_CONTEXT_SCHEDULE_OUT, - INTEL_CONTEXT_SCHEDULE_PREEMPTED, -}; - -/* Logical Rings */ -void intel_logical_ring_cleanup(struct intel_engine_cs *engine); - -int intel_execlists_submission_setup(struct intel_engine_cs *engine); - -/* Logical Ring Contexts */ /* At the start of the context image is its per-process HWS page */ #define LRC_PPHWSP_PN (0) #define LRC_PPHWSP_SZ (1) @@ -96,32 +26,57 @@ int intel_execlists_submission_setup(struct intel_engine_cs *engine); #define LRC_PPHWSP_SCRATCH 0x34 #define LRC_PPHWSP_SCRATCH_ADDR (LRC_PPHWSP_SCRATCH * sizeof(u32)) -void intel_execlists_set_default_submission(struct intel_engine_cs *engine); - -void intel_lr_context_reset(struct intel_engine_cs *engine, - struct intel_context *ce, - u32 head, - bool scrub); - -void intel_execlists_show_requests(struct intel_engine_cs *engine, - struct drm_printer *m, - void (*show_request)(struct drm_printer *m, - struct i915_request *rq, - const char *prefix), - unsigned int max); - -struct intel_context * -intel_execlists_create_virtual(struct intel_engine_cs **siblings, - unsigned int count); - -struct intel_context * -intel_execlists_clone_virtual(struct intel_engine_cs *src); - -int intel_virtual_engine_attach_bond(struct intel_engine_cs *engine, - const struct intel_engine_cs *master, - const struct intel_engine_cs *sibling); - -bool -intel_engine_in_execlists_submission_mode(const struct intel_engine_cs *engine); - -#endif /* _INTEL_LRC_H_ */ +void lrc_init_wa_ctx(struct intel_engine_cs *engine); +void lrc_fini_wa_ctx(struct intel_engine_cs *engine); + +int lrc_alloc(struct intel_context *ce, + struct intel_engine_cs *engine); +void lrc_reset(struct intel_context *ce); +void lrc_fini(struct intel_context *ce); +void lrc_destroy(struct kref *kref); + +int +lrc_pre_pin(struct intel_context *ce, + struct intel_engine_cs *engine, + struct i915_gem_ww_ctx *ww, + void **vaddr); +int +lrc_pin(struct intel_context *ce, + struct intel_engine_cs *engine, + void *vaddr); +void lrc_unpin(struct intel_context *ce); +void lrc_post_unpin(struct intel_context *ce); + +void lrc_init_state(struct intel_context *ce, + struct intel_engine_cs *engine, + void *state); + +void lrc_init_regs(const struct intel_context *ce, + const struct intel_engine_cs *engine, + bool clear); +void lrc_reset_regs(const struct intel_context *ce, + const struct intel_engine_cs *engine); + +u32 lrc_update_regs(const struct intel_context *ce, + const struct intel_engine_cs *engine, + u32 head); +void lrc_update_offsets(struct intel_context *ce, + struct intel_engine_cs *engine); + +void lrc_check_regs(const struct intel_context *ce, + const struct intel_engine_cs *engine, + const char *when); + +void lrc_update_runtime(struct intel_context *ce); +static inline u32 lrc_get_runtime(const struct intel_context *ce) +{ + /* + * We can use either ppHWSP[16] which is recorded before the context + * switch (and so excludes the cost of context switches) or use the + * value from the context image itself, which is saved/restored earlier + * and so includes the cost of the save. + */ + return READ_ONCE(ce->lrc_reg_state[CTX_TIMESTAMP]); +} + +#endif /* __INTEL_LRC_H__ */ diff --git a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h index 1b51f7b9a5c3..65fe76738335 100644 --- a/drivers/gpu/drm/i915/gt/intel_lrc_reg.h +++ b/drivers/gpu/drm/i915/gt/intel_lrc_reg.h @@ -9,6 +9,8 @@ #include <linux/types.h> +#define CTX_DESC_FORCE_RESTORE BIT_ULL(2) + /* GEN8 to GEN12 Reg State Context */ #define CTX_CONTEXT_CONTROL (0x02 + 1) #define CTX_RING_HEAD (0x04 + 1) @@ -52,4 +54,43 @@ #define GEN8_EXECLISTS_STATUS_BUF 0x370 #define GEN11_EXECLISTS_STATUS_BUF2 0x3c0 +/* Execlists regs */ +#define RING_ELSP(base) _MMIO((base) + 0x230) +#define RING_EXECLIST_STATUS_LO(base) _MMIO((base) + 0x234) +#define RING_EXECLIST_STATUS_HI(base) _MMIO((base) + 0x234 + 4) +#define RING_CONTEXT_CONTROL(base) _MMIO((base) + 0x244) +#define CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT REG_BIT(0) +#define CTX_CTRL_RS_CTX_ENABLE REG_BIT(1) +#define CTX_CTRL_ENGINE_CTX_SAVE_INHIBIT REG_BIT(2) +#define CTX_CTRL_INHIBIT_SYN_CTX_SWITCH REG_BIT(3) +#define GEN12_CTX_CTRL_OAR_CONTEXT_ENABLE REG_BIT(8) +#define RING_CONTEXT_STATUS_PTR(base) _MMIO((base) + 0x3a0) +#define RING_EXECLIST_SQ_CONTENTS(base) _MMIO((base) + 0x510) +#define RING_EXECLIST_CONTROL(base) _MMIO((base) + 0x550) +#define EL_CTRL_LOAD REG_BIT(0) + +/* + * The docs specify that the write pointer wraps around after 5h, "After status + * is written out to the last available status QW at offset 5h, this pointer + * wraps to 0." + * + * Therefore, one must infer than even though there are 3 bits available, 6 and + * 7 appear to be * reserved. + */ +#define GEN8_CSB_ENTRIES 6 +#define GEN8_CSB_PTR_MASK 0x7 +#define GEN8_CSB_READ_PTR_MASK (GEN8_CSB_PTR_MASK << 8) +#define GEN8_CSB_WRITE_PTR_MASK (GEN8_CSB_PTR_MASK << 0) + +#define GEN11_CSB_ENTRIES 12 +#define GEN11_CSB_PTR_MASK 0xf +#define GEN11_CSB_READ_PTR_MASK (GEN11_CSB_PTR_MASK << 8) +#define GEN11_CSB_WRITE_PTR_MASK (GEN11_CSB_PTR_MASK << 0) + +#define MAX_CONTEXT_HW_ID (1 << 21) /* exclusive */ +#define MAX_GUC_CONTEXT_HW_ID (1 << 20) /* exclusive */ +#define GEN11_MAX_CONTEXT_HW_ID (1 << 11) /* exclusive */ +/* in Gen12 ID 0x7FF is reserved to indicate idle */ +#define GEN12_MAX_CONTEXT_HW_ID (GEN11_MAX_CONTEXT_HW_ID - 1) + #endif /* _INTEL_LRC_REG_H_ */ diff --git a/drivers/gpu/drm/i915/gt/intel_mocs.c b/drivers/gpu/drm/i915/gt/intel_mocs.c index ab6870242e18..c4512ee4daf2 100644 --- a/drivers/gpu/drm/i915/gt/intel_mocs.c +++ b/drivers/gpu/drm/i915/gt/intel_mocs.c @@ -24,8 +24,8 @@ #include "intel_engine.h" #include "intel_gt.h" +#include "intel_lrc_reg.h" #include "intel_mocs.h" -#include "intel_lrc.h" #include "intel_ring.h" /* structures required */ diff --git a/drivers/gpu/drm/i915/intel_region_lmem.c b/drivers/gpu/drm/i915/gt/intel_region_lmem.c index 40d8f1a95df6..83992312a34b 100644 --- a/drivers/gpu/drm/i915/intel_region_lmem.c +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.c @@ -95,7 +95,7 @@ region_lmem_init(struct intel_memory_region *mem) return ret; } -const struct intel_memory_region_ops intel_region_lmem_ops = { +static const struct intel_memory_region_ops intel_region_lmem_ops = { .init = region_lmem_init, .release = region_lmem_release, .create_object = __i915_gem_lmem_object_create, diff --git a/drivers/gpu/drm/i915/intel_region_lmem.h b/drivers/gpu/drm/i915/gt/intel_region_lmem.h index 213def7c7b8a..8ea43e538dab 100644 --- a/drivers/gpu/drm/i915/intel_region_lmem.h +++ b/drivers/gpu/drm/i915/gt/intel_region_lmem.h @@ -8,8 +8,6 @@ struct drm_i915_private; -extern const struct intel_memory_region_ops intel_region_lmem_ops; - struct intel_memory_region * intel_setup_fake_lmem(struct drm_i915_private *i915); diff --git a/drivers/gpu/drm/i915/gt/intel_renderstate.c b/drivers/gpu/drm/i915/gt/intel_renderstate.c index ea2a77c7b469..ca816ba22197 100644 --- a/drivers/gpu/drm/i915/gt/intel_renderstate.c +++ b/drivers/gpu/drm/i915/gt/intel_renderstate.c @@ -27,7 +27,8 @@ #include "i915_drv.h" #include "intel_renderstate.h" -#include "gt/intel_context.h" +#include "intel_context.h" +#include "intel_gpu_commands.h" #include "intel_ring.h" static const struct intel_renderstate_rodata * diff --git a/drivers/gpu/drm/i915/gt/intel_reset.c b/drivers/gpu/drm/i915/gt/intel_reset.c index 3654c955e6be..9d177297db79 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.c +++ b/drivers/gpu/drm/i915/gt/intel_reset.c @@ -40,20 +40,19 @@ static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) intel_uncore_rmw_fw(uncore, reg, clr, 0); } -static void engine_skip_context(struct i915_request *rq) +static void skip_context(struct i915_request *rq) { - struct intel_engine_cs *engine = rq->engine; struct intel_context *hung_ctx = rq->context; - if (!i915_request_is_active(rq)) - return; + list_for_each_entry_from_rcu(rq, &hung_ctx->timeline->requests, link) { + if (!i915_request_is_active(rq)) + return; - lockdep_assert_held(&engine->active.lock); - list_for_each_entry_continue(rq, &engine->active.requests, sched.link) if (rq->context == hung_ctx) { i915_request_set_error_once(rq, -EIO); __i915_request_skip(rq); } + } } static void client_mark_guilty(struct i915_gem_context *ctx, bool banned) @@ -160,7 +159,7 @@ void __i915_request_reset(struct i915_request *rq, bool guilty) i915_request_set_error_once(rq, -EIO); __i915_request_skip(rq); if (mark_guilty(rq)) - engine_skip_context(rq); + skip_context(rq); } else { i915_request_set_error_once(rq, -EAGAIN); mark_innocent(rq); @@ -231,7 +230,7 @@ static int g4x_do_reset(struct intel_gt *gt, GRDOM_MEDIA | GRDOM_RESET_ENABLE); ret = wait_for_atomic(g4x_reset_complete(pdev), 50); if (ret) { - drm_dbg(>->i915->drm, "Wait for media reset failed\n"); + GT_TRACE(gt, "Wait for media reset failed\n"); goto out; } @@ -239,7 +238,7 @@ static int g4x_do_reset(struct intel_gt *gt, GRDOM_RENDER | GRDOM_RESET_ENABLE); ret = wait_for_atomic(g4x_reset_complete(pdev), 50); if (ret) { - drm_dbg(>->i915->drm, "Wait for render reset failed\n"); + GT_TRACE(gt, "Wait for render reset failed\n"); goto out; } @@ -265,7 +264,7 @@ static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask, 5000, 0, NULL); if (ret) { - drm_dbg(>->i915->drm, "Wait for render reset failed\n"); + GT_TRACE(gt, "Wait for render reset failed\n"); goto out; } @@ -276,7 +275,7 @@ static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask, 5000, 0, NULL); if (ret) { - drm_dbg(>->i915->drm, "Wait for media reset failed\n"); + GT_TRACE(gt, "Wait for media reset failed\n"); goto out; } @@ -305,9 +304,9 @@ static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) 500, 0, NULL); if (err) - drm_dbg(>->i915->drm, - "Wait for 0x%08x engines reset failed\n", - hw_domain_mask); + GT_TRACE(gt, + "Wait for 0x%08x engines reset failed\n", + hw_domain_mask); return err; } @@ -407,8 +406,7 @@ static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask) return 0; if (ret) { - drm_dbg(&engine->i915->drm, - "Wait for SFC forced lock ack failed\n"); + ENGINE_TRACE(engine, "Wait for SFC forced lock ack failed\n"); return ret; } @@ -499,6 +497,9 @@ static int gen8_engine_reset_prepare(struct intel_engine_cs *engine) u32 request, mask, ack; int ret; + if (I915_SELFTEST_ONLY(should_fail(&engine->reset_timeout, 1))) + return -ETIMEDOUT; + ack = intel_uncore_read_fw(uncore, reg); if (ack & RESET_CTL_CAT_ERROR) { /* @@ -754,8 +755,10 @@ static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) if (err) return err; + local_bh_disable(); for_each_engine(engine, gt, id) __intel_engine_reset(engine, stalled_mask & engine->mask); + local_bh_enable(); intel_ggtt_restore_fences(gt->ggtt); @@ -833,9 +836,11 @@ static void __intel_gt_set_wedged(struct intel_gt *gt) set_bit(I915_WEDGED, >->reset.flags); /* Mark all executing requests as skipped */ + local_bh_disable(); for_each_engine(engine, gt, id) if (engine->reset.cancel) engine->reset.cancel(engine); + local_bh_enable(); reset_finish(gt, awake); @@ -1110,20 +1115,7 @@ static inline int intel_gt_reset_engine(struct intel_engine_cs *engine) return __intel_gt_reset(engine->gt, engine->mask); } -/** - * intel_engine_reset - reset GPU engine to recover from a hang - * @engine: engine to reset - * @msg: reason for GPU reset; or NULL for no drm_notice() - * - * Reset a specific GPU engine. Useful if a hang is detected. - * Returns zero on successful reset or otherwise an error code. - * - * Procedure is: - * - identifies the request that caused the hang and it is dropped - * - reset engine (which will force the engine to idle) - * - re-init/configure engine - */ -int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) +int __intel_engine_reset_bh(struct intel_engine_cs *engine, const char *msg) { struct intel_gt *gt = engine->gt; bool uses_guc = intel_engine_in_guc_submission_mode(engine); @@ -1148,8 +1140,7 @@ int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine); if (ret) { /* If we fail here, we expect to fallback to a global reset */ - drm_dbg(>->i915->drm, "%sFailed to reset %s, ret=%d\n", - uses_guc ? "GuC " : "", engine->name, ret); + ENGINE_TRACE(engine, "Failed to reset, err: %d\n", ret); goto out; } @@ -1174,6 +1165,30 @@ out: return ret; } +/** + * intel_engine_reset - reset GPU engine to recover from a hang + * @engine: engine to reset + * @msg: reason for GPU reset; or NULL for no drm_notice() + * + * Reset a specific GPU engine. Useful if a hang is detected. + * Returns zero on successful reset or otherwise an error code. + * + * Procedure is: + * - identifies the request that caused the hang and it is dropped + * - reset engine (which will force the engine to idle) + * - re-init/configure engine + */ +int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) +{ + int err; + + local_bh_disable(); + err = __intel_engine_reset_bh(engine, msg); + local_bh_enable(); + + return err; +} + static void intel_gt_reset_global(struct intel_gt *gt, u32 engine_mask, const char *reason) @@ -1186,7 +1201,7 @@ static void intel_gt_reset_global(struct intel_gt *gt, kobject_uevent_env(kobj, KOBJ_CHANGE, error_event); - drm_dbg(>->i915->drm, "resetting chip, engines=%x\n", engine_mask); + GT_TRACE(gt, "resetting chip, engines=%x\n", engine_mask); kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event); /* Use a watchdog to ensure that our reset completes */ @@ -1260,18 +1275,20 @@ void intel_gt_handle_error(struct intel_gt *gt, * single reset fails. */ if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) { + local_bh_disable(); for_each_engine_masked(engine, gt, engine_mask, tmp) { BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); if (test_and_set_bit(I915_RESET_ENGINE + engine->id, >->reset.flags)) continue; - if (intel_engine_reset(engine, msg) == 0) + if (__intel_engine_reset_bh(engine, msg) == 0) engine_mask &= ~engine->mask; clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags); } + local_bh_enable(); } if (!engine_mask) @@ -1380,6 +1397,17 @@ void intel_gt_init_reset(struct intel_gt *gt) mutex_init(>->reset.mutex); init_srcu_struct(>->reset.backoff_srcu); + /* + * While undesirable to wait inside the shrinker, complain anyway. + * + * If we have to wait during shrinking, we guarantee forward progress + * by forcing the reset. Therefore during the reset we must not + * re-enter the shrinker. By declaring that we take the reset mutex + * within the shrinker, we forbid ourselves from performing any + * fs-reclaim or taking related locks during reset. + */ + i915_gem_shrinker_taints_mutex(gt->i915, >->reset.mutex); + /* no GPU until we are ready! */ __set_bit(I915_WEDGED, >->reset.flags); } diff --git a/drivers/gpu/drm/i915/gt/intel_reset.h b/drivers/gpu/drm/i915/gt/intel_reset.h index a0eec7c11c0c..7dbf5cc8a333 100644 --- a/drivers/gpu/drm/i915/gt/intel_reset.h +++ b/drivers/gpu/drm/i915/gt/intel_reset.h @@ -34,6 +34,8 @@ void intel_gt_reset(struct intel_gt *gt, const char *reason); int intel_engine_reset(struct intel_engine_cs *engine, const char *reason); +int __intel_engine_reset_bh(struct intel_engine_cs *engine, + const char *reason); void __i915_request_reset(struct i915_request *rq, bool guilty); diff --git a/drivers/gpu/drm/i915/gt/intel_ring.c b/drivers/gpu/drm/i915/gt/intel_ring.c index 4034a4bac7f0..06385550450c 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring.c +++ b/drivers/gpu/drm/i915/gt/intel_ring.c @@ -5,9 +5,11 @@ */ #include "gem/i915_gem_object.h" + #include "i915_drv.h" #include "i915_vma.h" #include "intel_engine.h" +#include "intel_gpu_commands.h" #include "intel_ring.h" #include "intel_timeline.h" diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c index a41b43f445b8..20f42722be8b 100644 --- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c +++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c @@ -32,6 +32,7 @@ #include "gen6_ppgtt.h" #include "gen7_renderclear.h" #include "i915_drv.h" +#include "i915_mitigations.h" #include "intel_breadcrumbs.h" #include "intel_context.h" #include "intel_gt.h" @@ -158,30 +159,7 @@ static void ring_setup_status_page(struct intel_engine_cs *engine) static bool stop_ring(struct intel_engine_cs *engine) { - struct drm_i915_private *dev_priv = engine->i915; - - if (INTEL_GEN(dev_priv) > 2) { - ENGINE_WRITE(engine, - RING_MI_MODE, _MASKED_BIT_ENABLE(STOP_RING)); - if (intel_wait_for_register(engine->uncore, - RING_MI_MODE(engine->mmio_base), - MODE_IDLE, - MODE_IDLE, - 1000)) { - drm_err(&dev_priv->drm, - "%s : timed out trying to stop ring\n", - engine->name); - - /* - * Sometimes we observe that the idle flag is not - * set even though the ring is empty. So double - * check before giving up. - */ - if (ENGINE_READ(engine, RING_HEAD) != - ENGINE_READ(engine, RING_TAIL)) - return false; - } - } + intel_engine_stop_cs(engine); ENGINE_WRITE(engine, RING_HEAD, ENGINE_READ(engine, RING_TAIL)); @@ -321,6 +299,39 @@ out: return ret; } +static void sanitize_hwsp(struct intel_engine_cs *engine) +{ + struct intel_timeline *tl; + + list_for_each_entry(tl, &engine->status_page.timelines, engine_link) + intel_timeline_reset_seqno(tl); +} + +static void xcs_sanitize(struct intel_engine_cs *engine) +{ + /* + * Poison residual state on resume, in case the suspend didn't! + * + * We have to assume that across suspend/resume (or other loss + * of control) that the contents of our pinned buffers has been + * lost, replaced by garbage. Since this doesn't always happen, + * let's poison such state so that we more quickly spot when + * we falsely assume it has been preserved. + */ + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); + + /* + * The kernel_context HWSP is stored in the status_page. As above, + * that may be lost on resume/initialisation, and so we need to + * reset the value in the HWSP. + */ + sanitize_hwsp(engine); + + /* And scrub the dirty cachelines for the HWSP */ + clflush_cache_range(engine->status_page.addr, PAGE_SIZE); +} + static void reset_prepare(struct intel_engine_cs *engine) { struct intel_uncore *uncore = engine->uncore; @@ -440,10 +451,8 @@ static void reset_cancel(struct intel_engine_cs *engine) spin_lock_irqsave(&engine->active.lock, flags); /* Mark all submitted requests as skipped. */ - list_for_each_entry(request, &engine->active.requests, sched.link) { - i915_request_set_error_once(request, -EIO); - i915_request_mark_complete(request); - } + list_for_each_entry(request, &engine->active.requests, sched.link) + i915_request_mark_eio(request); intel_engine_signal_breadcrumbs(engine); /* Remaining _unready_ requests will be nop'ed when submitted */ @@ -602,6 +611,7 @@ static int ring_context_pin(struct intel_context *ce, void *unused) static void ring_context_reset(struct intel_context *ce) { intel_ring_reset(ce->ring, ce->ring->emit); + clear_bit(CONTEXT_VALID_BIT, &ce->flags); } static const struct intel_context_ops ring_context_ops = { @@ -886,7 +896,8 @@ static int switch_context(struct i915_request *rq) GEM_BUG_ON(HAS_EXECLISTS(engine->i915)); if (engine->wa_ctx.vma && ce != engine->kernel_context) { - if (engine->wa_ctx.vma->private != ce) { + if (engine->wa_ctx.vma->private != ce && + i915_mitigate_clear_residuals()) { ret = clear_residuals(rq); if (ret) return ret; @@ -1069,6 +1080,8 @@ static void setup_common(struct intel_engine_cs *engine) setup_irq(engine); engine->resume = xcs_resume; + engine->sanitize = xcs_sanitize; + engine->reset.prepare = reset_prepare; engine->reset.rewind = reset_rewind; engine->reset.cancel = reset_cancel; @@ -1290,7 +1303,7 @@ int intel_ring_submission_setup(struct intel_engine_cs *engine) GEM_BUG_ON(timeline->hwsp_ggtt != engine->status_page.vma); - if (IS_HASWELL(engine->i915) && engine->class == RENDER_CLASS) { + if (IS_GEN(engine->i915, 7) && engine->class == RENDER_CLASS) { err = gen7_ctx_switch_bb_init(engine); if (err) goto err_ring_unpin; diff --git a/drivers/gpu/drm/i915/gt/intel_rps.c b/drivers/gpu/drm/i915/gt/intel_rps.c index b629eeb14002..69e1bd46cc46 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps.c +++ b/drivers/gpu/drm/i915/gt/intel_rps.c @@ -400,7 +400,7 @@ static unsigned int gen5_invert_freq(struct intel_rps *rps, return val; } -static bool gen5_rps_set(struct intel_rps *rps, u8 val) +static int __gen5_rps_set(struct intel_rps *rps, u8 val) { struct intel_uncore *uncore = rps_to_uncore(rps); u16 rgvswctl; @@ -410,7 +410,7 @@ static bool gen5_rps_set(struct intel_rps *rps, u8 val) rgvswctl = intel_uncore_read16(uncore, MEMSWCTL); if (rgvswctl & MEMCTL_CMD_STS) { DRM_DEBUG("gpu busy, RCS change rejected\n"); - return false; /* still busy with another command */ + return -EBUSY; /* still busy with another command */ } /* Invert the frequency bin into an ips delay */ @@ -426,7 +426,18 @@ static bool gen5_rps_set(struct intel_rps *rps, u8 val) rgvswctl |= MEMCTL_CMD_STS; intel_uncore_write16(uncore, MEMSWCTL, rgvswctl); - return true; + return 0; +} + +static int gen5_rps_set(struct intel_rps *rps, u8 val) +{ + int err; + + spin_lock_irq(&mchdev_lock); + err = __gen5_rps_set(rps, val); + spin_unlock_irq(&mchdev_lock); + + return err; } static unsigned long intel_pxfreq(u32 vidfreq) @@ -557,7 +568,7 @@ static bool gen5_rps_enable(struct intel_rps *rps) "stuck trying to change perf mode\n"); mdelay(1); - gen5_rps_set(rps, rps->cur_freq); + __gen5_rps_set(rps, rps->cur_freq); rps->ips.last_count1 = intel_uncore_read(uncore, DMIEC); rps->ips.last_count1 += intel_uncore_read(uncore, DDREC); @@ -599,7 +610,7 @@ static void gen5_rps_disable(struct intel_rps *rps) intel_uncore_write(uncore, MEMINTRSTS, MEMINT_EVAL_CHG); /* Go back to the starting frequency */ - gen5_rps_set(rps, rps->idle_freq); + __gen5_rps_set(rps, rps->idle_freq); mdelay(1); rgvswctl |= MEMCTL_CMD_STS; intel_uncore_write(uncore, MEMSWCTL, rgvswctl); @@ -797,20 +808,19 @@ static int rps_set(struct intel_rps *rps, u8 val, bool update) struct drm_i915_private *i915 = rps_to_i915(rps); int err; - if (INTEL_GEN(i915) < 6) - return 0; - if (val == rps->last_freq) return 0; if (IS_VALLEYVIEW(i915) || IS_CHERRYVIEW(i915)) err = vlv_rps_set(rps, val); - else + else if (INTEL_GEN(i915) >= 6) err = gen6_rps_set(rps, val); + else + err = gen5_rps_set(rps, val); if (err) return err; - if (update) + if (update && INTEL_GEN(i915) >= 6) gen6_rps_set_thresholds(rps, val); rps->last_freq = val; @@ -852,6 +862,8 @@ void intel_rps_park(struct intel_rps *rps) { int adj; + GEM_BUG_ON(atomic_read(&rps->num_waiters)); + if (!intel_rps_clear_active(rps)) return; @@ -907,28 +919,27 @@ void intel_rps_park(struct intel_rps *rps) void intel_rps_boost(struct i915_request *rq) { - struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps; - unsigned long flags; - - if (i915_request_signaled(rq) || !intel_rps_is_active(rps)) + if (i915_request_signaled(rq) || i915_request_has_waitboost(rq)) return; /* Serializes with i915_request_retire() */ - spin_lock_irqsave(&rq->lock, flags); - if (!i915_request_has_waitboost(rq) && - !dma_fence_is_signaled_locked(&rq->fence)) { - set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags); + if (!test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) { + struct intel_rps *rps = &READ_ONCE(rq->engine)->gt->rps; + + if (atomic_fetch_inc(&rps->num_waiters)) + return; + + if (!intel_rps_is_active(rps)) + return; GT_TRACE(rps_to_gt(rps), "boost fence:%llx:%llx\n", rq->fence.context, rq->fence.seqno); - if (!atomic_fetch_inc(&rps->num_waiters) && - READ_ONCE(rps->cur_freq) < rps->boost_freq) + if (READ_ONCE(rps->cur_freq) < rps->boost_freq) schedule_work(&rps->work); - atomic_inc(&rps->boosts); + WRITE_ONCE(rps->boosts, rps->boosts + 1); /* debug only */ } - spin_unlock_irqrestore(&rq->lock, flags); } int intel_rps_set(struct intel_rps *rps, u8 val) @@ -1798,7 +1809,7 @@ void gen5_rps_irq_handler(struct intel_rps *rps) rps->min_freq_softlimit, rps->max_freq_softlimit); - if (new_freq != rps->cur_freq && gen5_rps_set(rps, new_freq)) + if (new_freq != rps->cur_freq && !__gen5_rps_set(rps, new_freq)) rps->cur_freq = new_freq; spin_unlock(&mchdev_lock); @@ -2109,7 +2120,7 @@ bool i915_gpu_turbo_disable(void) spin_lock_irq(&mchdev_lock); rps->max_freq_softlimit = rps->min_freq; - ret = gen5_rps_set(&i915->gt.rps, rps->min_freq); + ret = !__gen5_rps_set(&i915->gt.rps, rps->min_freq); spin_unlock_irq(&mchdev_lock); drm_dev_put(&i915->drm); diff --git a/drivers/gpu/drm/i915/gt/intel_rps_types.h b/drivers/gpu/drm/i915/gt/intel_rps_types.h index 38083f0402d9..029fe13cf303 100644 --- a/drivers/gpu/drm/i915/gt/intel_rps_types.h +++ b/drivers/gpu/drm/i915/gt/intel_rps_types.h @@ -93,7 +93,7 @@ struct intel_rps { } power; atomic_t num_waiters; - atomic_t boosts; + unsigned int boosts; /* manual wa residency calculations */ struct intel_rps_ei ei; diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.c b/drivers/gpu/drm/i915/gt/intel_timeline.c index 7ea94d201fe6..7fe05918a76e 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.c +++ b/drivers/gpu/drm/i915/gt/intel_timeline.c @@ -126,6 +126,10 @@ static void __rcu_cacheline_free(struct rcu_head *rcu) struct intel_timeline_cacheline *cl = container_of(rcu, typeof(*cl), rcu); + /* Must wait until after all *rq->hwsp are complete before removing */ + i915_gem_object_unpin_map(cl->hwsp->vma->obj); + __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS)); + i915_active_fini(&cl->active); kfree(cl); } @@ -133,11 +137,6 @@ static void __rcu_cacheline_free(struct rcu_head *rcu) static void __idle_cacheline_free(struct intel_timeline_cacheline *cl) { GEM_BUG_ON(!i915_active_is_idle(&cl->active)); - - i915_gem_object_unpin_map(cl->hwsp->vma->obj); - i915_vma_put(cl->hwsp->vma); - __idle_hwsp_free(cl->hwsp, ptr_unmask_bits(cl->vaddr, CACHELINE_BITS)); - call_rcu(&cl->rcu, __rcu_cacheline_free); } @@ -179,7 +178,6 @@ cacheline_alloc(struct intel_timeline_hwsp *hwsp, unsigned int cacheline) return ERR_CAST(vaddr); } - i915_vma_get(hwsp->vma); cl->hwsp = hwsp; cl->vaddr = page_pack_bits(vaddr, cacheline); @@ -321,6 +319,25 @@ __intel_timeline_create(struct intel_gt *gt, return timeline; } +struct intel_timeline * +intel_timeline_create_from_engine(struct intel_engine_cs *engine, + unsigned int offset) +{ + struct i915_vma *hwsp = engine->status_page.vma; + struct intel_timeline *tl; + + tl = __intel_timeline_create(engine->gt, hwsp, offset); + if (IS_ERR(tl)) + return tl; + + /* Borrow a nearby lock; we only create these timelines during init */ + mutex_lock(&hwsp->vm->mutex); + list_add_tail(&tl->engine_link, &engine->status_page.timelines); + mutex_unlock(&hwsp->vm->mutex); + + return tl; +} + void __intel_timeline_pin(struct intel_timeline *tl) { GEM_BUG_ON(!atomic_read(&tl->pin_count)); @@ -617,6 +634,86 @@ void intel_gt_fini_timelines(struct intel_gt *gt) GEM_BUG_ON(!list_empty(&timelines->hwsp_free_list)); } +void intel_gt_show_timelines(struct intel_gt *gt, + struct drm_printer *m, + void (*show_request)(struct drm_printer *m, + const struct i915_request *rq, + const char *prefix, + int indent)) +{ + struct intel_gt_timelines *timelines = >->timelines; + struct intel_timeline *tl, *tn; + LIST_HEAD(free); + + spin_lock(&timelines->lock); + list_for_each_entry_safe(tl, tn, &timelines->active_list, link) { + unsigned long count, ready, inflight; + struct i915_request *rq, *rn; + struct dma_fence *fence; + + if (!mutex_trylock(&tl->mutex)) { + drm_printf(m, "Timeline %llx: busy; skipping\n", + tl->fence_context); + continue; + } + + intel_timeline_get(tl); + GEM_BUG_ON(!atomic_read(&tl->active_count)); + atomic_inc(&tl->active_count); /* pin the list element */ + spin_unlock(&timelines->lock); + + count = 0; + ready = 0; + inflight = 0; + list_for_each_entry_safe(rq, rn, &tl->requests, link) { + if (i915_request_completed(rq)) + continue; + + count++; + if (i915_request_is_ready(rq)) + ready++; + if (i915_request_is_active(rq)) + inflight++; + } + + drm_printf(m, "Timeline %llx: { ", tl->fence_context); + drm_printf(m, "count: %lu, ready: %lu, inflight: %lu", + count, ready, inflight); + drm_printf(m, ", seqno: { current: %d, last: %d }", + *tl->hwsp_seqno, tl->seqno); + fence = i915_active_fence_get(&tl->last_request); + if (fence) { + drm_printf(m, ", engine: %s", + to_request(fence)->engine->name); + dma_fence_put(fence); + } + drm_printf(m, " }\n"); + + if (show_request) { + list_for_each_entry_safe(rq, rn, &tl->requests, link) + show_request(m, rq, "", 2); + } + + mutex_unlock(&tl->mutex); + spin_lock(&timelines->lock); + + /* Resume list iteration after reacquiring spinlock */ + list_safe_reset_next(tl, tn, link); + if (atomic_dec_and_test(&tl->active_count)) + list_del(&tl->link); + + /* Defer the final release to after the spinlock */ + if (refcount_dec_and_test(&tl->kref.refcount)) { + GEM_BUG_ON(atomic_read(&tl->active_count)); + list_add(&tl->link, &free); + } + } + spin_unlock(&timelines->lock); + + list_for_each_entry_safe(tl, tn, &free, link) + __intel_timeline_free(&tl->kref); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "gt/selftests/mock_timeline.c" #include "gt/selftest_timeline.c" diff --git a/drivers/gpu/drm/i915/gt/intel_timeline.h b/drivers/gpu/drm/i915/gt/intel_timeline.h index 9882cd911d8e..dcdee692a80e 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline.h @@ -31,6 +31,8 @@ #include "i915_syncmap.h" #include "intel_timeline_types.h" +struct drm_printer; + struct intel_timeline * __intel_timeline_create(struct intel_gt *gt, struct i915_vma *global_hwsp, @@ -42,14 +44,9 @@ intel_timeline_create(struct intel_gt *gt) return __intel_timeline_create(gt, NULL, 0); } -static inline struct intel_timeline * +struct intel_timeline * intel_timeline_create_from_engine(struct intel_engine_cs *engine, - unsigned int offset) -{ - return __intel_timeline_create(engine->gt, - engine->status_page.vma, - offset); -} + unsigned int offset); static inline struct intel_timeline * intel_timeline_get(struct intel_timeline *timeline) @@ -106,4 +103,18 @@ int intel_timeline_read_hwsp(struct i915_request *from, void intel_gt_init_timelines(struct intel_gt *gt); void intel_gt_fini_timelines(struct intel_gt *gt); +void intel_gt_show_timelines(struct intel_gt *gt, + struct drm_printer *m, + void (*show_request)(struct drm_printer *m, + const struct i915_request *rq, + const char *prefix, + int indent)); + +static inline bool +intel_timeline_is_last(const struct intel_timeline *tl, + const struct i915_request *rq) +{ + return list_is_last_rcu(&rq->link, &tl->requests); +} + #endif diff --git a/drivers/gpu/drm/i915/gt/intel_timeline_types.h b/drivers/gpu/drm/i915/gt/intel_timeline_types.h index 4474f487f589..e360f50706bf 100644 --- a/drivers/gpu/drm/i915/gt/intel_timeline_types.h +++ b/drivers/gpu/drm/i915/gt/intel_timeline_types.h @@ -84,6 +84,8 @@ struct intel_timeline { struct list_head link; struct intel_gt *gt; + struct list_head engine_link; + struct kref kref; struct rcu_head rcu; }; diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c index adc9a8ea410a..d99773e6776e 100644 --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c @@ -7,6 +7,7 @@ #include "i915_drv.h" #include "intel_context.h" #include "intel_engine_pm.h" +#include "intel_gpu_commands.h" #include "intel_gt.h" #include "intel_ring.h" #include "intel_workarounds.h" @@ -194,7 +195,7 @@ static void wa_add(struct i915_wa_list *wal, i915_reg_t reg, } static void -wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set) +wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set) { wa_add(wal, reg, clear, set, clear); } @@ -202,21 +203,32 @@ wa_write_masked_or(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set) static void wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set) { - wa_write_masked_or(wal, reg, ~0, set); + wa_write_clr_set(wal, reg, ~0, set); } static void wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set) { - wa_write_masked_or(wal, reg, set, set); + wa_write_clr_set(wal, reg, set, set); } static void wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr) { - wa_write_masked_or(wal, reg, clr, 0); + wa_write_clr_set(wal, reg, clr, 0); } +/* + * WA operations on "masked register". A masked register has the upper 16 bits + * documented as "masked" in b-spec. Its purpose is to allow writing to just a + * portion of the register without a rmw: you simply write in the upper 16 bits + * the mask of bits you are going to modify. + * + * The wa_masked_* family of functions already does the necessary operations to + * calculate the mask based on the parameters passed, so user only has to + * provide the lower 16 bits of that register. + */ + static void wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val) { @@ -229,38 +241,36 @@ wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val) wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val); } -#define WA_SET_BIT_MASKED(addr, mask) \ - wa_masked_en(wal, (addr), (mask)) - -#define WA_CLR_BIT_MASKED(addr, mask) \ - wa_masked_dis(wal, (addr), (mask)) - -#define WA_SET_FIELD_MASKED(addr, mask, value) \ - wa_write_masked_or(wal, (addr), 0, _MASKED_FIELD((mask), (value))) +static void +wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg, + u32 mask, u32 val) +{ + wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask); +} static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); + wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING); } static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); + wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING); } static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { - WA_SET_BIT_MASKED(INSTPM, INSTPM_FORCE_ORDERING); + wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING); /* WaDisableAsyncFlipPerfMode:bdw,chv */ - WA_SET_BIT_MASKED(MI_MODE, ASYNC_FLIP_PERF_DISABLE); + wa_masked_en(wal, MI_MODE, ASYNC_FLIP_PERF_DISABLE); /* WaDisablePartialInstShootdown:bdw,chv */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + wa_masked_en(wal, GEN8_ROW_CHICKEN, + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); /* Use Force Non-Coherent whenever executing a 3D context. This is a * workaround for for a possible hang in the unlikely event a TLB @@ -268,9 +278,9 @@ static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, */ /* WaForceEnableNonCoherent:bdw,chv */ /* WaHdcDisableFetchWhenMasked:bdw,chv */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_DONOT_FETCH_MEM_WHEN_MASKED | - HDC_FORCE_NON_COHERENT); + wa_masked_en(wal, HDC_CHICKEN0, + HDC_DONOT_FETCH_MEM_WHEN_MASKED | + HDC_FORCE_NON_COHERENT); /* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0: * "The Hierarchical Z RAW Stall Optimization allows non-overlapping @@ -280,10 +290,10 @@ static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, * * This optimization is off by default for BDW and CHV; turn it on. */ - WA_CLR_BIT_MASKED(CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); + wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); /* Wa4x4STCOptimizationDisable:bdw,chv */ - WA_SET_BIT_MASKED(CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); + wa_masked_en(wal, CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE); /* * BSpec recommends 8x4 when MSAA is used, @@ -293,7 +303,7 @@ static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine, * disable bit, which we don't touch here, but it's good * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). */ - WA_SET_FIELD_MASKED(GEN7_GT_MODE, + wa_masked_field_set(wal, GEN7_GT_MODE, GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4); } @@ -306,24 +316,24 @@ static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine, gen8_ctx_workarounds_init(engine, wal); /* WaDisableThreadStallDopClockGating:bdw (pre-production) */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); /* WaDisableDopClockGating:bdw * * Also see the related UCGTCL1 write in bdw_init_clock_gating() * to disable EUTC clock gating. */ - WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, - DOP_CLOCK_GATING_DISABLE); + wa_masked_en(wal, GEN7_ROW_CHICKEN2, + DOP_CLOCK_GATING_DISABLE); - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); + wa_masked_en(wal, HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); - WA_SET_BIT_MASKED(HDC_CHICKEN0, - /* WaForceContextSaveRestoreNonCoherent:bdw */ - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | - /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ - (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); + wa_masked_en(wal, HDC_CHICKEN0, + /* WaForceContextSaveRestoreNonCoherent:bdw */ + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | + /* WaDisableFenceDestinationToSLM:bdw (pre-prod) */ + (IS_BDW_GT3(i915) ? HDC_FENCE_DEST_SLM_DISABLE : 0)); } static void chv_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -332,10 +342,10 @@ static void chv_ctx_workarounds_init(struct intel_engine_cs *engine, gen8_ctx_workarounds_init(engine, wal); /* WaDisableThreadStallDopClockGating:chv */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); + wa_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE); /* Improve HiZ throughput on CHV. */ - WA_SET_BIT_MASKED(HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); + wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X); } static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -349,38 +359,38 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, * Must match Display Engine. See * WaCompressedResourceDisplayNewHashMode. */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN9_PBE_COMPRESSED_HASH_SELECTION); - WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, - GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); + wa_masked_en(wal, COMMON_SLICE_CHICKEN2, + GEN9_PBE_COMPRESSED_HASH_SELECTION); + wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, + GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR); } /* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */ /* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - FLOW_CONTROL_ENABLE | - PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); + wa_masked_en(wal, GEN8_ROW_CHICKEN, + FLOW_CONTROL_ENABLE | + PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE); /* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */ /* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */ - WA_SET_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN7, - GEN9_ENABLE_YV12_BUGFIX | - GEN9_ENABLE_GPGPU_PREEMPTION); + wa_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7, + GEN9_ENABLE_YV12_BUGFIX | + GEN9_ENABLE_GPGPU_PREEMPTION); /* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */ /* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */ - WA_SET_BIT_MASKED(CACHE_MODE_1, - GEN8_4x4_STC_OPTIMIZATION_DISABLE | - GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); + wa_masked_en(wal, CACHE_MODE_1, + GEN8_4x4_STC_OPTIMIZATION_DISABLE | + GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE); /* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */ - WA_CLR_BIT_MASKED(GEN9_HALF_SLICE_CHICKEN5, - GEN9_CCS_TLB_PREFETCH_ENABLE); + wa_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5, + GEN9_CCS_TLB_PREFETCH_ENABLE); /* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | - HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); + wa_masked_en(wal, HDC_CHICKEN0, + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT | + HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE); /* WaForceEnableNonCoherent and WaDisableHDCInvalidation are * both tied to WaForceContextSaveRestoreNonCoherent @@ -396,19 +406,19 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, */ /* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */ - WA_SET_BIT_MASKED(HDC_CHICKEN0, - HDC_FORCE_NON_COHERENT); + wa_masked_en(wal, HDC_CHICKEN0, + HDC_FORCE_NON_COHERENT); /* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */ if (IS_SKYLAKE(i915) || IS_KABYLAKE(i915) || IS_COFFEELAKE(i915) || IS_COMETLAKE(i915)) - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, - GEN8_SAMPLER_POWER_BYPASS_DIS); + wa_masked_en(wal, HALF_SLICE_CHICKEN3, + GEN8_SAMPLER_POWER_BYPASS_DIS); /* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */ - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); + wa_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE); /* * Supporting preemption with fine-granularity requires changes in the @@ -422,16 +432,16 @@ static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine, */ /* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */ - WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); + wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); /* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */ - WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, + wa_masked_field_set(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); /* WaClearHIZ_WM_CHICKEN3:bxt,glk */ if (IS_GEN9_LP(i915)) - WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ); + wa_masked_en(wal, GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ); } static void skl_tune_iz_hashing(struct intel_engine_cs *engine, @@ -465,7 +475,7 @@ static void skl_tune_iz_hashing(struct intel_engine_cs *engine, return; /* Tune IZ hashing. See intel_device_info_runtime_init() */ - WA_SET_FIELD_MASKED(GEN7_GT_MODE, + wa_masked_field_set(wal, GEN7_GT_MODE, GEN9_IZ_HASHING_MASK(2) | GEN9_IZ_HASHING_MASK(1) | GEN9_IZ_HASHING_MASK(0), @@ -487,12 +497,12 @@ static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine, gen9_ctx_workarounds_init(engine, wal); /* WaDisableThreadStallDopClockGating:bxt */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, - STALL_DOP_GATING_DISABLE); + wa_masked_en(wal, GEN8_ROW_CHICKEN, + STALL_DOP_GATING_DISABLE); /* WaToEnableHwFixForPushConstHWBug:bxt */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + wa_masked_en(wal, COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); } static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -504,12 +514,12 @@ static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine, /* WaToEnableHwFixForPushConstHWBug:kbl */ if (IS_KBL_GT_REVID(i915, KBL_REVID_C0, REVID_FOREVER)) - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + wa_masked_en(wal, COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); /* WaDisableSbeCacheDispatchPortSharing:kbl */ - WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); } static void glk_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -518,8 +528,8 @@ static void glk_ctx_workarounds_init(struct intel_engine_cs *engine, gen9_ctx_workarounds_init(engine, wal); /* WaToEnableHwFixForPushConstHWBug:glk */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + wa_masked_en(wal, COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); } static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -528,41 +538,41 @@ static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine, gen9_ctx_workarounds_init(engine, wal); /* WaToEnableHwFixForPushConstHWBug:cfl */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + wa_masked_en(wal, COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); /* WaDisableSbeCacheDispatchPortSharing:cfl */ - WA_SET_BIT_MASKED(GEN7_HALF_SLICE_CHICKEN1, - GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); + wa_masked_en(wal, GEN7_HALF_SLICE_CHICKEN1, + GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE); } static void cnl_ctx_workarounds_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) { /* WaForceContextSaveRestoreNonCoherent:cnl */ - WA_SET_BIT_MASKED(CNL_HDC_CHICKEN0, - HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); + wa_masked_en(wal, CNL_HDC_CHICKEN0, + HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT); /* WaDisableReplayBufferBankArbitrationOptimization:cnl */ - WA_SET_BIT_MASKED(COMMON_SLICE_CHICKEN2, - GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); + wa_masked_en(wal, COMMON_SLICE_CHICKEN2, + GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION); /* WaPushConstantDereferenceHoldDisable:cnl */ - WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); + wa_masked_en(wal, GEN7_ROW_CHICKEN2, PUSH_CONSTANT_DEREF_DISABLE); /* FtrEnableFastAnisoL1BankingFix:cnl */ - WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); + wa_masked_en(wal, HALF_SLICE_CHICKEN3, CNL_FAST_ANISO_L1_BANKING_FIX); /* WaDisable3DMidCmdPreemption:cnl */ - WA_CLR_BIT_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); + wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL); /* WaDisableGPGPUMidCmdPreemption:cnl */ - WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, + wa_masked_field_set(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); /* WaDisableEarlyEOT:cnl */ - WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); + wa_masked_en(wal, GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT); } static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, @@ -580,8 +590,8 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, * Formerly known as WaPushConstantDereferenceHoldDisable */ if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_B0)) - WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, - PUSH_CONSTANT_DEREF_DISABLE); + wa_masked_en(wal, GEN7_ROW_CHICKEN2, + PUSH_CONSTANT_DEREF_DISABLE); /* WaForceEnableNonCoherent:icl * This is not the same workaround as in early Gen9 platforms, where @@ -590,40 +600,40 @@ static void icl_ctx_workarounds_init(struct intel_engine_cs *engine, * (the register is whitelisted in hardware now, so UMDs can opt in * for coherency if they have a good reason). */ - WA_SET_BIT_MASKED(ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); + wa_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT); /* Wa_2006611047:icl (pre-prod) * Formerly known as WaDisableImprovedTdlClkGating */ if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) - WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2, - GEN11_TDL_CLOCK_GATING_FIX_DISABLE); + wa_masked_en(wal, GEN7_ROW_CHICKEN2, + GEN11_TDL_CLOCK_GATING_FIX_DISABLE); /* Wa_2006665173:icl (pre-prod) */ if (IS_ICL_REVID(i915, ICL_REVID_A0, ICL_REVID_A0)) - WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, - GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC); + wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3, + GEN11_BLEND_EMB_FIX_DISABLE_IN_RCC); /* WaEnableFloatBlendOptimization:icl */ - wa_write_masked_or(wal, - GEN10_CACHE_MODE_SS, - 0, /* write-only, so skip validation */ - _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE)); + wa_write_clr_set(wal, + GEN10_CACHE_MODE_SS, + 0, /* write-only, so skip validation */ + _MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE)); /* WaDisableGPGPUMidThreadPreemption:icl */ - WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, + wa_masked_field_set(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); /* allow headerless messages for preemptible GPGPU context */ - WA_SET_BIT_MASKED(GEN10_SAMPLER_MODE, - GEN11_SAMPLER_ENABLE_HEADLESS_MSG); + wa_masked_en(wal, GEN10_SAMPLER_MODE, + GEN11_SAMPLER_ENABLE_HEADLESS_MSG); /* Wa_1604278689:icl,ehl */ wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID); - wa_write_masked_or(wal, IVB_FBC_RT_BASE_UPPER, - 0, /* write-only register; skip validation */ - 0xFFFFFFFF); + wa_write_clr_set(wal, IVB_FBC_RT_BASE_UPPER, + 0, /* write-only register; skip validation */ + 0xFFFFFFFF); /* Wa_1406306137:icl,ehl */ wa_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU); @@ -643,11 +653,11 @@ static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine, * Wa_14010443199:rkl * Wa_14010698770:rkl */ - WA_SET_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, - GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); + wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3, + GEN12_DISABLE_CPS_AWARE_COLOR_PIPE); /* WaDisableGPGPUMidThreadPreemption:gen12 */ - WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, + wa_masked_field_set(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL); } @@ -680,12 +690,22 @@ static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine, gen12_ctx_workarounds_init(engine, wal); /* Wa_1409044764 */ - WA_CLR_BIT_MASKED(GEN11_COMMON_SLICE_CHICKEN3, - DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN); + wa_masked_dis(wal, GEN11_COMMON_SLICE_CHICKEN3, + DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN); /* Wa_22010493298 */ - WA_SET_BIT_MASKED(HIZ_CHICKEN, - DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE); + wa_masked_en(wal, HIZ_CHICKEN, + DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE); + + /* + * Wa_16011163337 + * + * Like in tgl_ctx_workarounds_init(), read verification is ignored due + * to Wa_1608008084. + */ + wa_add(wal, + FF_MODE2, + FF_MODE2_GS_TIMER_MASK, FF_MODE2_GS_TIMER_224, 0); } static void @@ -804,57 +824,11 @@ ilk_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) static void snb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { - /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ - wa_masked_en(wal, - _3D_CHICKEN, - _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB); - - /* WaDisable_RenderCache_OperationalFlush:snb */ - wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - wa_add(wal, - GEN6_GT_MODE, 0, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), - GEN6_WIZ_HASHING_16x4); - - wa_masked_dis(wal, CACHE_MODE_0, CM0_STC_EVICT_DISABLE_LRA_SNB); - - wa_masked_en(wal, - _3D_CHICKEN3, - /* WaStripsFansDisableFastClipPerformanceFix:snb */ - _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL | - /* - * Bspec says: - * "This bit must be set if 3DSTATE_CLIP clip mode is set - * to normal and 3DSTATE_SF number of SF output attributes - * is more than 16." - */ - _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH); } static void ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { - /* WaDisableEarlyCull:ivb */ - wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL); - - /* WaDisablePSDDualDispatchEnable:ivb */ - if (IS_IVB_GT1(i915)) - wa_masked_en(wal, - GEN7_HALF_SLICE_CHICKEN1, - GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); - - /* WaDisable_RenderCache_OperationalFlush:ivb */ - wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); - /* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */ wa_masked_dis(wal, GEN7_COMMON_SLICE_CHICKEN1, @@ -866,91 +840,15 @@ ivb_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) /* WaForceL3Serialization:ivb */ wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE); - - /* - * WaVSThreadDispatchOverride:ivb,vlv - * - * This actually overrides the dispatch - * mode for all thread types. - */ - wa_write_masked_or(wal, GEN7_FF_THREAD_MODE, - GEN7_FF_SCHED_MASK, - GEN7_FF_TS_SCHED_HW | - GEN7_FF_VS_SCHED_HW | - GEN7_FF_DS_SCHED_HW); - - if (0) { /* causes HiZ corruption on ivb:gt1 */ - /* enable HiZ Raw Stall Optimization */ - wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE); - } - - /* WaDisable4x2SubspanOptimization:ivb */ - wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - wa_add(wal, GEN7_GT_MODE, 0, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), - GEN6_WIZ_HASHING_16x4); } static void vlv_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) { - /* WaDisableEarlyCull:vlv */ - wa_masked_en(wal, _3D_CHICKEN3, _3D_CHICKEN_SF_DISABLE_OBJEND_CULL); - - /* WaPsdDispatchEnable:vlv */ - /* WaDisablePSDDualDispatchEnable:vlv */ - wa_masked_en(wal, - GEN7_HALF_SLICE_CHICKEN1, - GEN7_MAX_PS_THREAD_DEP | - GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); - - /* WaDisable_RenderCache_OperationalFlush:vlv */ - wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); - /* WaForceL3Serialization:vlv */ wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE); /* - * WaVSThreadDispatchOverride:ivb,vlv - * - * This actually overrides the dispatch - * mode for all thread types. - */ - wa_write_masked_or(wal, - GEN7_FF_THREAD_MODE, - GEN7_FF_SCHED_MASK, - GEN7_FF_TS_SCHED_HW | - GEN7_FF_VS_SCHED_HW | - GEN7_FF_DS_SCHED_HW); - - /* - * BSpec says this must be set, even though - * WaDisable4x2SubspanOptimization isn't listed for VLV. - */ - wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - wa_add(wal, GEN7_GT_MODE, 0, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), - GEN6_WIZ_HASHING_16x4); - - /* * WaIncreaseL3CreditsForVLVB0:vlv * This is the hardware default actually. */ @@ -970,31 +868,6 @@ hsw_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) /* WaVSRefCountFullforceMissDisable:hsw */ wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME); - - wa_masked_dis(wal, - CACHE_MODE_0_GEN7, - /* WaDisable_RenderCache_OperationalFlush:hsw */ - RC_OP_FLUSH_ENABLE | - /* enable HiZ Raw Stall Optimization */ - HIZ_RAW_STALL_OPT_DISABLE); - - /* WaDisable4x2SubspanOptimization:hsw */ - wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); - - /* - * BSpec recommends 8x4 when MSAA is used, - * however in practice 16x4 seems fastest. - * - * Note that PS/WM thread counts depend on the WIZ hashing - * disable bit, which we don't touch here, but it's good - * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). - */ - wa_add(wal, GEN7_GT_MODE, 0, - _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), - GEN6_WIZ_HASHING_16x4); - - /* WaSampleCChickenBitEnable:hsw */ - wa_masked_en(wal, HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE); } static void @@ -1164,7 +1037,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal) drm_dbg(&i915->drm, "MCR slice/subslice = %x\n", mcr); - wa_write_masked_or(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr); + wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr); } static void @@ -1189,10 +1062,10 @@ icl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS); /* WaModifyGamTlbPartitioning:icl */ - wa_write_masked_or(wal, - GEN11_GACB_PERF_CTRL, - GEN11_HASH_CTRL_MASK, - GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4); + wa_write_clr_set(wal, + GEN11_GACB_PERF_CTRL, + GEN11_HASH_CTRL_MASK, + GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4); /* Wa_1405766107:icl * Formerly known as WaCL2SFHalfMaxAlloc @@ -1260,6 +1133,11 @@ tgl_gt_workarounds_init(struct drm_i915_private *i915, struct i915_wa_list *wal) wa_write_or(wal, SLICE_UNIT_LEVEL_CLKGATE, L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS); + + /* Wa_1408615072:tgl[a0] */ + if (IS_TGL_UY_GT_REVID(i915, TGL_REVID_A0, TGL_REVID_A0)) + wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, + VSUNIT_CLKGATE_DIS_TGL); } static void @@ -1358,9 +1236,9 @@ static bool wa_verify(const struct i915_wa *wa, u32 cur, const char *name, const char *from) { if ((cur ^ wa->set) & wa->read) { - DRM_ERROR("%s workaround lost on %s! (%x=%x/%x, expected %x)\n", + DRM_ERROR("%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n", name, from, i915_mmio_reg_offset(wa->reg), - cur, cur & wa->read, wa->set); + cur, cur & wa->read, wa->set & wa->read); return false; } @@ -1425,6 +1303,7 @@ bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from) return wa_list_verify(gt->uncore, >->i915->gt_wa_list, from); } +__maybe_unused static inline bool is_nonpriv_flags_valid(u32 flags) { /* Check only valid flag bits are set */ @@ -1752,10 +1631,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) wa_write_or(wal, GEN7_SARCHKMD, GEN7_DISABLE_SAMPLER_PREFETCH); - - /* Wa_1408615072:tgl */ - wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, - VSUNIT_CLKGATE_DIS_TGL); } if (IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) { @@ -1770,6 +1645,19 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) */ wa_write_or(wal, GEN7_FF_THREAD_MODE, GEN12_FF_TESSELATION_DOP_GATE_DISABLE); + + /* + * Wa_1606700617:tgl,dg1 + * Wa_22010271021:tgl,rkl,dg1 + */ + wa_masked_en(wal, + GEN9_CS_DEBUG_MODE1, + FF_DOP_CLOCK_GATE_DISABLE); + + /* Wa_1406941453:tgl,rkl,dg1 */ + wa_masked_en(wal, + GEN10_SAMPLER_MODE, + ENABLE_SMALLPL); } if (IS_DG1_REVID(i915, DG1_REVID_A0, DG1_REVID_A0) || @@ -1798,21 +1686,6 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN6_RC_SLEEP_PSMI_CONTROL, GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE | GEN8_RC_SEMA_IDLE_MSG_DISABLE); - - /* - * Wa_1606700617:tgl - * Wa_22010271021:tgl,rkl - */ - wa_masked_en(wal, - GEN9_CS_DEBUG_MODE1, - FF_DOP_CLOCK_GATE_DISABLE); - } - - if (IS_GEN(i915, 12)) { - /* Wa_1406941453:gen12 */ - wa_masked_en(wal, - GEN10_SAMPLER_MODE, - ENABLE_SMALLPL); } if (IS_GEN(i915, 11)) { @@ -1838,14 +1711,14 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) * Wa_1604223664:icl * Formerly known as WaL3BankAddressHashing */ - wa_write_masked_or(wal, - GEN8_GARBCNTL, - GEN11_HASH_CTRL_EXCL_MASK, - GEN11_HASH_CTRL_EXCL_BIT0); - wa_write_masked_or(wal, - GEN11_GLBLINVL, - GEN11_BANK_HASH_ADDR_EXCL_MASK, - GEN11_BANK_HASH_ADDR_EXCL_BIT0); + wa_write_clr_set(wal, + GEN8_GARBCNTL, + GEN11_HASH_CTRL_EXCL_MASK, + GEN11_HASH_CTRL_EXCL_BIT0); + wa_write_clr_set(wal, + GEN11_GLBLINVL, + GEN11_BANK_HASH_ADDR_EXCL_MASK, + GEN11_BANK_HASH_ADDR_EXCL_BIT0); /* * Wa_1405733216:icl @@ -1874,10 +1747,10 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN7_DISABLE_SAMPLER_PREFETCH); /* Wa_1409178092:icl */ - wa_write_masked_or(wal, - GEN11_SCRATCH2, - GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE, - 0); + wa_write_clr_set(wal, + GEN11_SCRATCH2, + GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE, + 0); /* WaEnable32PlaneMode:icl */ wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS, @@ -1951,11 +1824,11 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) /* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */ if (IS_GEN9_LP(i915)) - wa_write_masked_or(wal, - GEN8_L3SQCREG1, - L3_PRIO_CREDITS_MASK, - L3_GENERAL_PRIO_CREDITS(62) | - L3_HIGH_PRIO_CREDITS(2)); + wa_write_clr_set(wal, + GEN8_L3SQCREG1, + L3_PRIO_CREDITS_MASK, + L3_GENERAL_PRIO_CREDITS(62) | + L3_HIGH_PRIO_CREDITS(2)); /* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */ wa_write_or(wal, @@ -1963,12 +1836,112 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GEN8_LQSC_FLUSH_COHERENT_LINES); } - if (IS_GEN(i915, 7)) + if (IS_HASWELL(i915)) { + /* WaSampleCChickenBitEnable:hsw */ + wa_masked_en(wal, + HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE); + + wa_masked_dis(wal, + CACHE_MODE_0_GEN7, + /* enable HiZ Raw Stall Optimization */ + HIZ_RAW_STALL_OPT_DISABLE); + + /* WaDisable4x2SubspanOptimization:hsw */ + wa_masked_en(wal, CACHE_MODE_1, PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); + } + + if (IS_VALLEYVIEW(i915)) { + /* WaDisableEarlyCull:vlv */ + wa_masked_en(wal, + _3D_CHICKEN3, + _3D_CHICKEN_SF_DISABLE_OBJEND_CULL); + + /* + * WaVSThreadDispatchOverride:ivb,vlv + * + * This actually overrides the dispatch + * mode for all thread types. + */ + wa_write_clr_set(wal, + GEN7_FF_THREAD_MODE, + GEN7_FF_SCHED_MASK, + GEN7_FF_TS_SCHED_HW | + GEN7_FF_VS_SCHED_HW | + GEN7_FF_DS_SCHED_HW); + + /* WaPsdDispatchEnable:vlv */ + /* WaDisablePSDDualDispatchEnable:vlv */ + wa_masked_en(wal, + GEN7_HALF_SLICE_CHICKEN1, + GEN7_MAX_PS_THREAD_DEP | + GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); + } + + if (IS_IVYBRIDGE(i915)) { + /* WaDisableEarlyCull:ivb */ + wa_masked_en(wal, + _3D_CHICKEN3, + _3D_CHICKEN_SF_DISABLE_OBJEND_CULL); + + if (0) { /* causes HiZ corruption on ivb:gt1 */ + /* enable HiZ Raw Stall Optimization */ + wa_masked_dis(wal, + CACHE_MODE_0_GEN7, + HIZ_RAW_STALL_OPT_DISABLE); + } + + /* + * WaVSThreadDispatchOverride:ivb,vlv + * + * This actually overrides the dispatch + * mode for all thread types. + */ + wa_write_clr_set(wal, + GEN7_FF_THREAD_MODE, + GEN7_FF_SCHED_MASK, + GEN7_FF_TS_SCHED_HW | + GEN7_FF_VS_SCHED_HW | + GEN7_FF_DS_SCHED_HW); + + /* WaDisablePSDDualDispatchEnable:ivb */ + if (IS_IVB_GT1(i915)) + wa_masked_en(wal, + GEN7_HALF_SLICE_CHICKEN1, + GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE); + } + + if (IS_GEN(i915, 7)) { /* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */ wa_masked_en(wal, GFX_MODE_GEN7, GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE); + /* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */ + wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE); + + /* + * BSpec says this must be set, even though + * WaDisable4x2SubspanOptimization:ivb,hsw + * WaDisable4x2SubspanOptimization isn't listed for VLV. + */ + wa_masked_en(wal, + CACHE_MODE_1, + PIXEL_SUBSPAN_COLLECT_OPT_DISABLE); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + wa_add(wal, GEN7_GT_MODE, 0, + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, + GEN6_WIZ_HASHING_16x4), + GEN6_WIZ_HASHING_16x4); + } + if (IS_GEN_RANGE(i915, 6, 7)) /* * We need to disable the AsyncFlip performance optimisations in @@ -1991,6 +1964,39 @@ rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal) GFX_MODE, GFX_TLB_INVALIDATE_EXPLICIT); + /* WaDisableHiZPlanesWhenMSAAEnabled:snb */ + wa_masked_en(wal, + _3D_CHICKEN, + _3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB); + + wa_masked_en(wal, + _3D_CHICKEN3, + /* WaStripsFansDisableFastClipPerformanceFix:snb */ + _3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL | + /* + * Bspec says: + * "This bit must be set if 3DSTATE_CLIP clip mode is set + * to normal and 3DSTATE_SF number of SF output attributes + * is more than 16." + */ + _3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH); + + /* + * BSpec recommends 8x4 when MSAA is used, + * however in practice 16x4 seems fastest. + * + * Note that PS/WM thread counts depend on the WIZ hashing + * disable bit, which we don't touch here, but it's good + * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM). + */ + wa_add(wal, + GEN6_GT_MODE, 0, + _MASKED_FIELD(GEN6_WIZ_HASHING_MASK, GEN6_WIZ_HASHING_16x4), + GEN6_WIZ_HASHING_16x4); + + /* WaDisable_RenderCache_OperationalFlush:snb */ + wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE); + /* * From the Sandybridge PRM, volume 1 part 3, page 24: * "If this bit is set, STCunit will have LRA as replacement @@ -2067,39 +2073,6 @@ void intel_engine_apply_workarounds(struct intel_engine_cs *engine) wa_list_apply(engine->uncore, &engine->wa_list); } -static struct i915_vma * -create_scratch(struct i915_address_space *vm, int count) -{ - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - unsigned int size; - int err; - - size = round_up(count * sizeof(u32), PAGE_SIZE); - obj = i915_gem_object_create_internal(vm->i915, size); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); - - vma = i915_vma_instance(obj, vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_obj; - } - - err = i915_vma_pin(vma, 0, 0, - i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER); - if (err) - goto err_obj; - - return vma; - -err_obj: - i915_gem_object_put(obj); - return ERR_PTR(err); -} - struct mcr_range { u32 start; u32 end; @@ -2202,7 +2175,8 @@ static int engine_wa_list_verify(struct intel_context *ce, if (!wal->count) return 0; - vma = create_scratch(&ce->engine->gt->ggtt->vm, wal->count); + vma = __vm_create_scratch_for_read(&ce->engine->gt->ggtt->vm, + wal->count * sizeof(u32)); if (IS_ERR(vma)) return PTR_ERR(vma); diff --git a/drivers/gpu/drm/i915/gt/mock_engine.c b/drivers/gpu/drm/i915/gt/mock_engine.c index 2f830017c51d..4b4f03b70df7 100644 --- a/drivers/gpu/drm/i915/gt/mock_engine.c +++ b/drivers/gpu/drm/i915/gt/mock_engine.c @@ -245,17 +245,6 @@ static void mock_reset_rewind(struct intel_engine_cs *engine, bool stalled) GEM_BUG_ON(stalled); } -static void mark_eio(struct i915_request *rq) -{ - if (i915_request_completed(rq)) - return; - - GEM_BUG_ON(i915_request_signaled(rq)); - - i915_request_set_error_once(rq, -EIO); - i915_request_mark_complete(rq); -} - static void mock_reset_cancel(struct intel_engine_cs *engine) { struct mock_engine *mock = @@ -269,12 +258,12 @@ static void mock_reset_cancel(struct intel_engine_cs *engine) /* Mark all submitted requests as skipped. */ list_for_each_entry(rq, &engine->active.requests, sched.link) - mark_eio(rq); + i915_request_mark_eio(rq); intel_engine_signal_breadcrumbs(engine); /* Cancel and submit all pending requests. */ list_for_each_entry(rq, &mock->hw_queue, mock.link) { - mark_eio(rq); + i915_request_mark_eio(rq); __i915_request_submit(rq); } INIT_LIST_HEAD(&mock->hw_queue); diff --git a/drivers/gpu/drm/i915/gt/selftest_context.c b/drivers/gpu/drm/i915/gt/selftest_context.c index 1f4020e906a8..db738d400168 100644 --- a/drivers/gpu/drm/i915/gt/selftest_context.c +++ b/drivers/gpu/drm/i915/gt/selftest_context.c @@ -25,7 +25,7 @@ static int request_sync(struct i915_request *rq) /* Opencode i915_request_add() so we can keep the timeline locked. */ __i915_request_commit(rq); rq->sched.attr.priority = I915_PRIORITY_BARRIER; - __i915_request_queue(rq, NULL); + __i915_request_queue_bh(rq); timeout = i915_request_wait(rq, 0, HZ / 10); if (timeout < 0) diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c index 729c3c7b11e2..439c8984f5fa 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_cs.c @@ -6,6 +6,7 @@ #include <linux/sort.h> +#include "intel_gpu_commands.h" #include "intel_gt_pm.h" #include "intel_rps.h" diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c index b88aa35ad75b..223ab88f7e57 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_heartbeat.c @@ -197,6 +197,7 @@ static int cmp_u32(const void *_a, const void *_b) static int __live_heartbeat_fast(struct intel_engine_cs *engine) { + const unsigned int error_threshold = max(20000u, jiffies_to_usecs(6)); struct intel_context *ce; struct i915_request *rq; ktime_t t0, t1; @@ -254,12 +255,18 @@ static int __live_heartbeat_fast(struct intel_engine_cs *engine) times[0], times[ARRAY_SIZE(times) - 1]); - /* Min work delay is 2 * 2 (worst), +1 for scheduling, +1 for slack */ - if (times[ARRAY_SIZE(times) / 2] > jiffies_to_usecs(6)) { + /* + * Ideally, the upper bound on min work delay would be something like + * 2 * 2 (worst), +1 for scheduling, +1 for slack. In practice, we + * are, even with system_wq_highpri, at the mercy of the CPU scheduler + * and may be stuck behind some slow work for many millisecond. Such + * as our very own display workers. + */ + if (times[ARRAY_SIZE(times) / 2] > error_threshold) { pr_err("%s: Heartbeat delay was %uus, expected less than %dus\n", engine->name, times[ARRAY_SIZE(times) / 2], - jiffies_to_usecs(6)); + error_threshold); err = -EINVAL; } diff --git a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c index b08fc5390e8a..c3d965279fc3 100644 --- a/drivers/gpu/drm/i915/gt/selftest_engine_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_engine_pm.c @@ -4,13 +4,215 @@ * Copyright © 2018 Intel Corporation */ +#include <linux/sort.h> + #include "i915_selftest.h" +#include "intel_gpu_commands.h" +#include "intel_gt_clock_utils.h" #include "selftest_engine.h" #include "selftest_engine_heartbeat.h" #include "selftests/igt_atomic.h" #include "selftests/igt_flush_test.h" #include "selftests/igt_spinner.h" +#define COUNT 5 + +static int cmp_u64(const void *A, const void *B) +{ + const u64 *a = A, *b = B; + + return *a - *b; +} + +static u64 trifilter(u64 *a) +{ + sort(a, COUNT, sizeof(*a), cmp_u64, NULL); + return (a[1] + 2 * a[2] + a[3]) >> 2; +} + +static u32 *emit_wait(u32 *cs, u32 offset, int op, u32 value) +{ + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + op; + *cs++ = value; + *cs++ = offset; + *cs++ = 0; + + return cs; +} + +static u32 *emit_store(u32 *cs, u32 offset, u32 value) +{ + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = offset; + *cs++ = 0; + *cs++ = value; + + return cs; +} + +static u32 *emit_srm(u32 *cs, i915_reg_t reg, u32 offset) +{ + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = i915_mmio_reg_offset(reg); + *cs++ = offset; + *cs++ = 0; + + return cs; +} + +static void write_semaphore(u32 *x, u32 value) +{ + WRITE_ONCE(*x, value); + wmb(); +} + +static int __measure_timestamps(struct intel_context *ce, + u64 *dt, u64 *d_ring, u64 *d_ctx) +{ + struct intel_engine_cs *engine = ce->engine; + u32 *sema = memset32(engine->status_page.addr + 1000, 0, 5); + u32 offset = i915_ggtt_offset(engine->status_page.vma); + struct i915_request *rq; + u32 *cs; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + cs = intel_ring_begin(rq, 28); + if (IS_ERR(cs)) { + i915_request_add(rq); + return PTR_ERR(cs); + } + + /* Signal & wait for start */ + cs = emit_store(cs, offset + 4008, 1); + cs = emit_wait(cs, offset + 4008, MI_SEMAPHORE_SAD_NEQ_SDD, 1); + + cs = emit_srm(cs, RING_TIMESTAMP(engine->mmio_base), offset + 4000); + cs = emit_srm(cs, RING_CTX_TIMESTAMP(engine->mmio_base), offset + 4004); + + /* Busy wait */ + cs = emit_wait(cs, offset + 4008, MI_SEMAPHORE_SAD_EQ_SDD, 1); + + cs = emit_srm(cs, RING_TIMESTAMP(engine->mmio_base), offset + 4016); + cs = emit_srm(cs, RING_CTX_TIMESTAMP(engine->mmio_base), offset + 4012); + + intel_ring_advance(rq, cs); + i915_request_get(rq); + i915_request_add(rq); + intel_engine_flush_submission(engine); + + /* Wait for the request to start executing, that then waits for us */ + while (READ_ONCE(sema[2]) == 0) + cpu_relax(); + + /* Run the request for a 100us, sampling timestamps before/after */ + preempt_disable(); + *dt = local_clock(); + write_semaphore(&sema[2], 0); + udelay(100); + *dt = local_clock() - *dt; + write_semaphore(&sema[2], 1); + preempt_enable(); + + if (i915_request_wait(rq, 0, HZ / 2) < 0) { + i915_request_put(rq); + return -ETIME; + } + i915_request_put(rq); + + pr_debug("%s CTX_TIMESTAMP: [%x, %x], RING_TIMESTAMP: [%x, %x]\n", + engine->name, sema[1], sema[3], sema[0], sema[4]); + + *d_ctx = sema[3] - sema[1]; + *d_ring = sema[4] - sema[0]; + return 0; +} + +static int __live_engine_timestamps(struct intel_engine_cs *engine) +{ + u64 s_ring[COUNT], s_ctx[COUNT], st[COUNT], d_ring, d_ctx, dt; + struct intel_context *ce; + int i, err = 0; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + for (i = 0; i < COUNT; i++) { + err = __measure_timestamps(ce, &st[i], &s_ring[i], &s_ctx[i]); + if (err) + break; + } + intel_context_put(ce); + if (err) + return err; + + dt = trifilter(st); + d_ring = trifilter(s_ring); + d_ctx = trifilter(s_ctx); + + pr_info("%s elapsed:%lldns, CTX_TIMESTAMP:%lldns, RING_TIMESTAMP:%lldns\n", + engine->name, dt, + intel_gt_clock_interval_to_ns(engine->gt, d_ctx), + intel_gt_clock_interval_to_ns(engine->gt, d_ring)); + + d_ring = intel_gt_clock_interval_to_ns(engine->gt, d_ring); + if (3 * dt > 4 * d_ring || 4 * dt < 3 * d_ring) { + pr_err("%s Mismatch between ring timestamp and walltime!\n", + engine->name); + return -EINVAL; + } + + d_ring = trifilter(s_ring); + d_ctx = trifilter(s_ctx); + + d_ctx *= engine->gt->clock_frequency; + if (IS_ICELAKE(engine->i915)) + d_ring *= 12500000; /* Fixed 80ns for icl ctx timestamp? */ + else + d_ring *= engine->gt->clock_frequency; + + if (3 * d_ctx > 4 * d_ring || 4 * d_ctx < 3 * d_ring) { + pr_err("%s Mismatch between ring and context timestamps!\n", + engine->name); + return -EINVAL; + } + + return 0; +} + +static int live_engine_timestamps(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* + * Check that CS_TIMESTAMP / CTX_TIMESTAMP are in sync, i.e. share + * the same CS clock. + */ + + if (INTEL_GEN(gt->i915) < 8) + return 0; + + for_each_engine(engine, gt, id) { + int err; + + st_engine_heartbeat_disable(engine); + err = __live_engine_timestamps(engine); + st_engine_heartbeat_enable(engine); + if (err) + return err; + } + + return 0; +} + static int live_engine_busy_stats(void *arg) { struct intel_gt *gt = arg; @@ -177,6 +379,7 @@ static int live_engine_pm(void *arg) int live_engine_pm_selftests(struct intel_gt *gt) { static const struct i915_subtest tests[] = { + SUBTEST(live_engine_timestamps), SUBTEST(live_engine_busy_stats), SUBTEST(live_engine_pm), }; diff --git a/drivers/gpu/drm/i915/gt/selftest_execlists.c b/drivers/gpu/drm/i915/gt/selftest_execlists.c new file mode 100644 index 000000000000..264b5ebdb021 --- /dev/null +++ b/drivers/gpu/drm/i915/gt/selftest_execlists.c @@ -0,0 +1,4741 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright © 2018 Intel Corporation + */ + +#include <linux/prime_numbers.h> + +#include "gem/i915_gem_pm.h" +#include "gt/intel_engine_heartbeat.h" +#include "gt/intel_reset.h" +#include "gt/selftest_engine_heartbeat.h" + +#include "i915_selftest.h" +#include "selftests/i915_random.h" +#include "selftests/igt_flush_test.h" +#include "selftests/igt_live_test.h" +#include "selftests/igt_spinner.h" +#include "selftests/lib_sw_fence.h" + +#include "gem/selftests/igt_gem_utils.h" +#include "gem/selftests/mock_context.h" + +#define CS_GPR(engine, n) ((engine)->mmio_base + 0x600 + (n) * 4) +#define NUM_GPR 16 +#define NUM_GPR_DW (NUM_GPR * 2) /* each GPR is 2 dwords */ + +static bool is_active(struct i915_request *rq) +{ + if (i915_request_is_active(rq)) + return true; + + if (i915_request_on_hold(rq)) + return true; + + if (i915_request_has_initial_breadcrumb(rq) && i915_request_started(rq)) + return true; + + return false; +} + +static int wait_for_submit(struct intel_engine_cs *engine, + struct i915_request *rq, + unsigned long timeout) +{ + /* Ignore our own attempts to suppress excess tasklets */ + tasklet_hi_schedule(&engine->execlists.tasklet); + + timeout += jiffies; + do { + bool done = time_after(jiffies, timeout); + + if (i915_request_completed(rq)) /* that was quick! */ + return 0; + + /* Wait until the HW has acknowleged the submission (or err) */ + intel_engine_flush_submission(engine); + if (!READ_ONCE(engine->execlists.pending[0]) && is_active(rq)) + return 0; + + if (done) + return -ETIME; + + cond_resched(); + } while (1); +} + +static int wait_for_reset(struct intel_engine_cs *engine, + struct i915_request *rq, + unsigned long timeout) +{ + timeout += jiffies; + + do { + cond_resched(); + intel_engine_flush_submission(engine); + + if (READ_ONCE(engine->execlists.pending[0])) + continue; + + if (i915_request_completed(rq)) + break; + + if (READ_ONCE(rq->fence.error)) + break; + } while (time_before(jiffies, timeout)); + + flush_scheduled_work(); + + if (rq->fence.error != -EIO) { + pr_err("%s: hanging request %llx:%lld not reset\n", + engine->name, + rq->fence.context, + rq->fence.seqno); + return -EINVAL; + } + + /* Give the request a jiffie to complete after flushing the worker */ + if (i915_request_wait(rq, 0, + max(0l, (long)(timeout - jiffies)) + 1) < 0) { + pr_err("%s: hanging request %llx:%lld did not complete\n", + engine->name, + rq->fence.context, + rq->fence.seqno); + return -ETIME; + } + + return 0; +} + +static int live_sanitycheck(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + + if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915)) + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + for_each_engine(engine, gt, id) { + struct intel_context *ce; + struct i915_request *rq; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + + rq = igt_spinner_create_request(&spin, ce, MI_NOOP); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_ctx; + } + + i915_request_add(rq); + if (!igt_wait_for_spinner(&spin, rq)) { + GEM_TRACE("spinner failed to start\n"); + GEM_TRACE_DUMP(); + intel_gt_set_wedged(gt); + err = -EIO; + goto out_ctx; + } + + igt_spinner_end(&spin); + if (igt_flush_test(gt->i915)) { + err = -EIO; + goto out_ctx; + } + +out_ctx: + intel_context_put(ce); + if (err) + break; + } + + igt_spinner_fini(&spin); + return err; +} + +static int live_unlite_restore(struct intel_gt *gt, int prio) +{ + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = -ENOMEM; + + /* + * Check that we can correctly context switch between 2 instances + * on the same engine from the same parent context. + */ + + if (igt_spinner_init(&spin, gt)) + return err; + + err = 0; + for_each_engine(engine, gt, id) { + struct intel_context *ce[2] = {}; + struct i915_request *rq[2]; + struct igt_live_test t; + int n; + + if (prio && !intel_engine_has_preemption(engine)) + continue; + + if (!intel_engine_can_store_dword(engine)) + continue; + + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { + err = -EIO; + break; + } + st_engine_heartbeat_disable(engine); + + for (n = 0; n < ARRAY_SIZE(ce); n++) { + struct intel_context *tmp; + + tmp = intel_context_create(engine); + if (IS_ERR(tmp)) { + err = PTR_ERR(tmp); + goto err_ce; + } + + err = intel_context_pin(tmp); + if (err) { + intel_context_put(tmp); + goto err_ce; + } + + /* + * Setup the pair of contexts such that if we + * lite-restore using the RING_TAIL from ce[1] it + * will execute garbage from ce[0]->ring. + */ + memset(tmp->ring->vaddr, + POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */ + tmp->ring->vma->size); + + ce[n] = tmp; + } + GEM_BUG_ON(!ce[1]->ring->size); + intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); + lrc_update_regs(ce[1], engine, ce[1]->ring->head); + + rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); + if (IS_ERR(rq[0])) { + err = PTR_ERR(rq[0]); + goto err_ce; + } + + i915_request_get(rq[0]); + i915_request_add(rq[0]); + GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit); + + if (!igt_wait_for_spinner(&spin, rq[0])) { + i915_request_put(rq[0]); + goto err_ce; + } + + rq[1] = i915_request_create(ce[1]); + if (IS_ERR(rq[1])) { + err = PTR_ERR(rq[1]); + i915_request_put(rq[0]); + goto err_ce; + } + + if (!prio) { + /* + * Ensure we do the switch to ce[1] on completion. + * + * rq[0] is already submitted, so this should reduce + * to a no-op (a wait on a request on the same engine + * uses the submit fence, not the completion fence), + * but it will install a dependency on rq[1] for rq[0] + * that will prevent the pair being reordered by + * timeslicing. + */ + i915_request_await_dma_fence(rq[1], &rq[0]->fence); + } + + i915_request_get(rq[1]); + i915_request_add(rq[1]); + GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix); + i915_request_put(rq[0]); + + if (prio) { + struct i915_sched_attr attr = { + .priority = prio, + }; + + /* Alternatively preempt the spinner with ce[1] */ + engine->schedule(rq[1], &attr); + } + + /* And switch back to ce[0] for good measure */ + rq[0] = i915_request_create(ce[0]); + if (IS_ERR(rq[0])) { + err = PTR_ERR(rq[0]); + i915_request_put(rq[1]); + goto err_ce; + } + + i915_request_await_dma_fence(rq[0], &rq[1]->fence); + i915_request_get(rq[0]); + i915_request_add(rq[0]); + GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix); + i915_request_put(rq[1]); + i915_request_put(rq[0]); + +err_ce: + intel_engine_flush_submission(engine); + igt_spinner_end(&spin); + for (n = 0; n < ARRAY_SIZE(ce); n++) { + if (IS_ERR_OR_NULL(ce[n])) + break; + + intel_context_unpin(ce[n]); + intel_context_put(ce[n]); + } + + st_engine_heartbeat_enable(engine); + if (igt_live_test_end(&t)) + err = -EIO; + if (err) + break; + } + + igt_spinner_fini(&spin); + return err; +} + +static int live_unlite_switch(void *arg) +{ + return live_unlite_restore(arg, 0); +} + +static int live_unlite_preempt(void *arg) +{ + return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); +} + +static int live_unlite_ring(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct igt_spinner spin; + enum intel_engine_id id; + int err = 0; + + /* + * Setup a preemption event that will cause almost the entire ring + * to be unwound, potentially fooling our intel_ring_direction() + * into emitting a forward lite-restore instead of the rollback. + */ + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + for_each_engine(engine, gt, id) { + struct intel_context *ce[2] = {}; + struct i915_request *rq; + struct igt_live_test t; + int n; + + if (!intel_engine_has_preemption(engine)) + continue; + + if (!intel_engine_can_store_dword(engine)) + continue; + + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { + err = -EIO; + break; + } + st_engine_heartbeat_disable(engine); + + for (n = 0; n < ARRAY_SIZE(ce); n++) { + struct intel_context *tmp; + + tmp = intel_context_create(engine); + if (IS_ERR(tmp)) { + err = PTR_ERR(tmp); + goto err_ce; + } + + err = intel_context_pin(tmp); + if (err) { + intel_context_put(tmp); + goto err_ce; + } + + memset32(tmp->ring->vaddr, + 0xdeadbeef, /* trigger a hang if executed */ + tmp->ring->vma->size / sizeof(u32)); + + ce[n] = tmp; + } + + /* Create max prio spinner, followed by N low prio nops */ + rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ce; + } + + i915_request_get(rq); + rq->sched.attr.priority = I915_PRIORITY_BARRIER; + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + intel_gt_set_wedged(gt); + i915_request_put(rq); + err = -ETIME; + goto err_ce; + } + + /* Fill the ring, until we will cause a wrap */ + n = 0; + while (intel_ring_direction(ce[0]->ring, + rq->wa_tail, + ce[0]->ring->tail) <= 0) { + struct i915_request *tmp; + + tmp = intel_context_create_request(ce[0]); + if (IS_ERR(tmp)) { + err = PTR_ERR(tmp); + i915_request_put(rq); + goto err_ce; + } + + i915_request_add(tmp); + intel_engine_flush_submission(engine); + n++; + } + intel_engine_flush_submission(engine); + pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n", + engine->name, n, + ce[0]->ring->size, + ce[0]->ring->tail, + ce[0]->ring->emit, + rq->tail); + GEM_BUG_ON(intel_ring_direction(ce[0]->ring, + rq->tail, + ce[0]->ring->tail) <= 0); + i915_request_put(rq); + + /* Create a second ring to preempt the first ring after rq[0] */ + rq = intel_context_create_request(ce[1]); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ce; + } + + rq->sched.attr.priority = I915_PRIORITY_BARRIER; + i915_request_get(rq); + i915_request_add(rq); + + err = wait_for_submit(engine, rq, HZ / 2); + i915_request_put(rq); + if (err) { + pr_err("%s: preemption request was not submitted\n", + engine->name); + err = -ETIME; + } + + pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n", + engine->name, + ce[0]->ring->tail, ce[0]->ring->emit, + ce[1]->ring->tail, ce[1]->ring->emit); + +err_ce: + intel_engine_flush_submission(engine); + igt_spinner_end(&spin); + for (n = 0; n < ARRAY_SIZE(ce); n++) { + if (IS_ERR_OR_NULL(ce[n])) + break; + + intel_context_unpin(ce[n]); + intel_context_put(ce[n]); + } + st_engine_heartbeat_enable(engine); + if (igt_live_test_end(&t)) + err = -EIO; + if (err) + break; + } + + igt_spinner_fini(&spin); + return err; +} + +static int live_pin_rewind(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = 0; + + /* + * We have to be careful not to trust intel_ring too much, for example + * ring->head is updated upon retire which is out of sync with pinning + * the context. Thus we cannot use ring->head to set CTX_RING_HEAD, + * or else we risk writing an older, stale value. + * + * To simulate this, let's apply a bit of deliberate sabotague. + */ + + for_each_engine(engine, gt, id) { + struct intel_context *ce; + struct i915_request *rq; + struct intel_ring *ring; + struct igt_live_test t; + + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { + err = -EIO; + break; + } + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + + err = intel_context_pin(ce); + if (err) { + intel_context_put(ce); + break; + } + + /* Keep the context awake while we play games */ + err = i915_active_acquire(&ce->active); + if (err) { + intel_context_unpin(ce); + intel_context_put(ce); + break; + } + ring = ce->ring; + + /* Poison the ring, and offset the next request from HEAD */ + memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32)); + ring->emit = ring->size / 2; + ring->tail = ring->emit; + GEM_BUG_ON(ring->head); + + intel_context_unpin(ce); + + /* Submit a simple nop request */ + GEM_BUG_ON(intel_context_is_pinned(ce)); + rq = intel_context_create_request(ce); + i915_active_release(&ce->active); /* e.g. async retire */ + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + break; + } + GEM_BUG_ON(!rq->head); + i915_request_add(rq); + + /* Expect not to hang! */ + if (igt_live_test_end(&t)) { + err = -EIO; + break; + } + } + + return err; +} + +static int live_hold_reset(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + + /* + * In order to support offline error capture for fast preempt reset, + * we need to decouple the guilty request and ensure that it and its + * descendents are not executed while the capture is in progress. + */ + + if (!intel_has_reset_engine(gt)) + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + for_each_engine(engine, gt, id) { + struct intel_context *ce; + struct i915_request *rq; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + + st_engine_heartbeat_disable(engine); + + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + intel_gt_set_wedged(gt); + err = -ETIME; + goto out; + } + + /* We have our request executing, now remove it and reset */ + + local_bh_disable(); + if (test_and_set_bit(I915_RESET_ENGINE + id, + >->reset.flags)) { + local_bh_enable(); + intel_gt_set_wedged(gt); + err = -EBUSY; + goto out; + } + tasklet_disable(&engine->execlists.tasklet); + + engine->execlists.tasklet.func(engine->execlists.tasklet.data); + GEM_BUG_ON(execlists_active(&engine->execlists) != rq); + + i915_request_get(rq); + execlists_hold(engine, rq); + GEM_BUG_ON(!i915_request_on_hold(rq)); + + __intel_engine_reset_bh(engine, NULL); + GEM_BUG_ON(rq->fence.error != -EIO); + + tasklet_enable(&engine->execlists.tasklet); + clear_and_wake_up_bit(I915_RESET_ENGINE + id, + >->reset.flags); + local_bh_enable(); + + /* Check that we do not resubmit the held request */ + if (!i915_request_wait(rq, 0, HZ / 5)) { + pr_err("%s: on hold request completed!\n", + engine->name); + i915_request_put(rq); + err = -EIO; + goto out; + } + GEM_BUG_ON(!i915_request_on_hold(rq)); + + /* But is resubmitted on release */ + execlists_unhold(engine, rq); + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + pr_err("%s: held request did not complete!\n", + engine->name); + intel_gt_set_wedged(gt); + err = -ETIME; + } + i915_request_put(rq); + +out: + st_engine_heartbeat_enable(engine); + intel_context_put(ce); + if (err) + break; + } + + igt_spinner_fini(&spin); + return err; +} + +static const char *error_repr(int err) +{ + return err ? "bad" : "good"; +} + +static int live_error_interrupt(void *arg) +{ + static const struct error_phase { + enum { GOOD = 0, BAD = -EIO } error[2]; + } phases[] = { + { { BAD, GOOD } }, + { { BAD, BAD } }, + { { BAD, GOOD } }, + { { GOOD, GOOD } }, /* sentinel */ + }; + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* + * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning + * of invalid commands in user batches that will cause a GPU hang. + * This is a faster mechanism than using hangcheck/heartbeats, but + * only detects problems the HW knows about -- it will not warn when + * we kill the HW! + * + * To verify our detection and reset, we throw some invalid commands + * at the HW and wait for the interrupt. + */ + + if (!intel_has_reset_engine(gt)) + return 0; + + for_each_engine(engine, gt, id) { + const struct error_phase *p; + int err = 0; + + st_engine_heartbeat_disable(engine); + + for (p = phases; p->error[0] != GOOD; p++) { + struct i915_request *client[ARRAY_SIZE(phases->error)]; + u32 *cs; + int i; + + memset(client, 0, sizeof(*client)); + for (i = 0; i < ARRAY_SIZE(client); i++) { + struct intel_context *ce; + struct i915_request *rq; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + + rq = intel_context_create_request(ce); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + if (rq->engine->emit_init_breadcrumb) { + err = rq->engine->emit_init_breadcrumb(rq); + if (err) { + i915_request_add(rq); + goto out; + } + } + + cs = intel_ring_begin(rq, 2); + if (IS_ERR(cs)) { + i915_request_add(rq); + err = PTR_ERR(cs); + goto out; + } + + if (p->error[i]) { + *cs++ = 0xdeadbeef; + *cs++ = 0xdeadbeef; + } else { + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + } + + client[i] = i915_request_get(rq); + i915_request_add(rq); + } + + err = wait_for_submit(engine, client[0], HZ / 2); + if (err) { + pr_err("%s: first request did not start within time!\n", + engine->name); + err = -ETIME; + goto out; + } + + for (i = 0; i < ARRAY_SIZE(client); i++) { + if (i915_request_wait(client[i], 0, HZ / 5) < 0) + pr_debug("%s: %s request incomplete!\n", + engine->name, + error_repr(p->error[i])); + + if (!i915_request_started(client[i])) { + pr_err("%s: %s request not started!\n", + engine->name, + error_repr(p->error[i])); + err = -ETIME; + goto out; + } + + /* Kick the tasklet to process the error */ + intel_engine_flush_submission(engine); + if (client[i]->fence.error != p->error[i]) { + pr_err("%s: %s request (%s) with wrong error code: %d\n", + engine->name, + error_repr(p->error[i]), + i915_request_completed(client[i]) ? "completed" : "running", + client[i]->fence.error); + err = -EINVAL; + goto out; + } + } + +out: + for (i = 0; i < ARRAY_SIZE(client); i++) + if (client[i]) + i915_request_put(client[i]); + if (err) { + pr_err("%s: failed at phase[%zd] { %d, %d }\n", + engine->name, p - phases, + p->error[0], p->error[1]); + break; + } + } + + st_engine_heartbeat_enable(engine); + if (err) { + intel_gt_set_wedged(gt); + return err; + } + } + + return 0; +} + +static int +emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) +{ + u32 *cs; + + cs = intel_ring_begin(rq, 10); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_NEQ_SDD; + *cs++ = 0; + *cs++ = i915_ggtt_offset(vma) + 4 * idx; + *cs++ = 0; + + if (idx > 0) { + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); + *cs++ = 0; + *cs++ = 1; + } else { + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + *cs++ = MI_NOOP; + } + + *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; + + intel_ring_advance(rq, cs); + return 0; +} + +static struct i915_request * +semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) +{ + struct intel_context *ce; + struct i915_request *rq; + int err; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return ERR_CAST(ce); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + goto out_ce; + + err = 0; + if (rq->engine->emit_init_breadcrumb) + err = rq->engine->emit_init_breadcrumb(rq); + if (err == 0) + err = emit_semaphore_chain(rq, vma, idx); + if (err == 0) + i915_request_get(rq); + i915_request_add(rq); + if (err) + rq = ERR_PTR(err); + +out_ce: + intel_context_put(ce); + return rq; +} + +static int +release_queue(struct intel_engine_cs *engine, + struct i915_vma *vma, + int idx, int prio) +{ + struct i915_sched_attr attr = { + .priority = prio, + }; + struct i915_request *rq; + u32 *cs; + + rq = intel_engine_create_kernel_request(engine); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + i915_request_add(rq); + return PTR_ERR(cs); + } + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); + *cs++ = 0; + *cs++ = 1; + + intel_ring_advance(rq, cs); + + i915_request_get(rq); + i915_request_add(rq); + + local_bh_disable(); + engine->schedule(rq, &attr); + local_bh_enable(); /* kick tasklet */ + + i915_request_put(rq); + + return 0; +} + +static int +slice_semaphore_queue(struct intel_engine_cs *outer, + struct i915_vma *vma, + int count) +{ + struct intel_engine_cs *engine; + struct i915_request *head; + enum intel_engine_id id; + int err, i, n = 0; + + head = semaphore_queue(outer, vma, n++); + if (IS_ERR(head)) + return PTR_ERR(head); + + for_each_engine(engine, outer->gt, id) { + if (!intel_engine_has_preemption(engine)) + continue; + + for (i = 0; i < count; i++) { + struct i915_request *rq; + + rq = semaphore_queue(engine, vma, n++); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + i915_request_put(rq); + } + } + + err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER); + if (err) + goto out; + + if (i915_request_wait(head, 0, + 2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) { + pr_err("%s: Failed to slice along semaphore chain of length (%d, %d)!\n", + outer->name, count, n); + GEM_TRACE_DUMP(); + intel_gt_set_wedged(outer->gt); + err = -EIO; + } + +out: + i915_request_put(head); + return err; +} + +static int live_timeslice_preempt(void *arg) +{ + struct intel_gt *gt = arg; + struct drm_i915_gem_object *obj; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct i915_vma *vma; + void *vaddr; + int err = 0; + + /* + * If a request takes too long, we would like to give other users + * a fair go on the GPU. In particular, users may create batches + * that wait upon external input, where that input may even be + * supplied by another GPU job. To avoid blocking forever, we + * need to preempt the current task and replace it with another + * ready task. + */ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return 0; + + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto err_map; + + err = i915_vma_sync(vma); + if (err) + goto err_pin; + + for_each_engine(engine, gt, id) { + if (!intel_engine_has_preemption(engine)) + continue; + + memset(vaddr, 0, PAGE_SIZE); + + st_engine_heartbeat_disable(engine); + err = slice_semaphore_queue(engine, vma, 5); + st_engine_heartbeat_enable(engine); + if (err) + goto err_pin; + + if (igt_flush_test(gt->i915)) { + err = -EIO; + goto err_pin; + } + } + +err_pin: + i915_vma_unpin(vma); +err_map: + i915_gem_object_unpin_map(obj); +err_obj: + i915_gem_object_put(obj); + return err; +} + +static struct i915_request * +create_rewinder(struct intel_context *ce, + struct i915_request *wait, + void *slot, int idx) +{ + const u32 offset = + i915_ggtt_offset(ce->engine->status_page.vma) + + offset_in_page(slot); + struct i915_request *rq; + u32 *cs; + int err; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) + return rq; + + if (wait) { + err = i915_request_await_dma_fence(rq, &wait->fence); + if (err) + goto err; + } + + cs = intel_ring_begin(rq, 14); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err; + } + + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + *cs++ = MI_NOOP; + + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_GTE_SDD; + *cs++ = idx; + *cs++ = offset; + *cs++ = 0; + + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); + *cs++ = offset + idx * sizeof(u32); + *cs++ = 0; + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = offset; + *cs++ = 0; + *cs++ = idx + 1; + + intel_ring_advance(rq, cs); + + rq->sched.attr.priority = I915_PRIORITY_MASK; + err = 0; +err: + i915_request_get(rq); + i915_request_add(rq); + if (err) { + i915_request_put(rq); + return ERR_PTR(err); + } + + return rq; +} + +static int live_timeslice_rewind(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* + * The usual presumption on timeslice expiration is that we replace + * the active context with another. However, given a chain of + * dependencies we may end up with replacing the context with itself, + * but only a few of those requests, forcing us to rewind the + * RING_TAIL of the original request. + */ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return 0; + + for_each_engine(engine, gt, id) { + enum { A1, A2, B1 }; + enum { X = 1, Z, Y }; + struct i915_request *rq[3] = {}; + struct intel_context *ce; + unsigned long timeslice; + int i, err = 0; + u32 *slot; + + if (!intel_engine_has_timeslices(engine)) + continue; + + /* + * A:rq1 -- semaphore wait, timestamp X + * A:rq2 -- write timestamp Y + * + * B:rq1 [await A:rq1] -- write timestamp Z + * + * Force timeslice, release semaphore. + * + * Expect execution/evaluation order XZY + */ + + st_engine_heartbeat_disable(engine); + timeslice = xchg(&engine->props.timeslice_duration_ms, 1); + + slot = memset32(engine->status_page.addr + 1000, 0, 4); + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto err; + } + + rq[A1] = create_rewinder(ce, NULL, slot, X); + if (IS_ERR(rq[A1])) { + intel_context_put(ce); + goto err; + } + + rq[A2] = create_rewinder(ce, NULL, slot, Y); + intel_context_put(ce); + if (IS_ERR(rq[A2])) + goto err; + + err = wait_for_submit(engine, rq[A2], HZ / 2); + if (err) { + pr_err("%s: failed to submit first context\n", + engine->name); + goto err; + } + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto err; + } + + rq[B1] = create_rewinder(ce, rq[A1], slot, Z); + intel_context_put(ce); + if (IS_ERR(rq[2])) + goto err; + + err = wait_for_submit(engine, rq[B1], HZ / 2); + if (err) { + pr_err("%s: failed to submit second context\n", + engine->name); + goto err; + } + + /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */ + ENGINE_TRACE(engine, "forcing tasklet for rewind\n"); + while (i915_request_is_active(rq[A2])) { /* semaphore yield! */ + /* Wait for the timeslice to kick in */ + del_timer(&engine->execlists.timer); + tasklet_hi_schedule(&engine->execlists.tasklet); + intel_engine_flush_submission(engine); + } + /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */ + GEM_BUG_ON(!i915_request_is_active(rq[A1])); + GEM_BUG_ON(!i915_request_is_active(rq[B1])); + GEM_BUG_ON(i915_request_is_active(rq[A2])); + + /* Release the hounds! */ + slot[0] = 1; + wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */ + + for (i = 1; i <= 3; i++) { + unsigned long timeout = jiffies + HZ / 2; + + while (!READ_ONCE(slot[i]) && + time_before(jiffies, timeout)) + ; + + if (!time_before(jiffies, timeout)) { + pr_err("%s: rq[%d] timed out\n", + engine->name, i - 1); + err = -ETIME; + goto err; + } + + pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]); + } + + /* XZY: XZ < XY */ + if (slot[Z] - slot[X] >= slot[Y] - slot[X]) { + pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n", + engine->name, + slot[Z] - slot[X], + slot[Y] - slot[X]); + err = -EINVAL; + } + +err: + memset32(&slot[0], -1, 4); + wmb(); + + engine->props.timeslice_duration_ms = timeslice; + st_engine_heartbeat_enable(engine); + for (i = 0; i < 3; i++) + i915_request_put(rq[i]); + if (igt_flush_test(gt->i915)) + err = -EIO; + if (err) + return err; + } + + return 0; +} + +static struct i915_request *nop_request(struct intel_engine_cs *engine) +{ + struct i915_request *rq; + + rq = intel_engine_create_kernel_request(engine); + if (IS_ERR(rq)) + return rq; + + i915_request_get(rq); + i915_request_add(rq); + + return rq; +} + +static long slice_timeout(struct intel_engine_cs *engine) +{ + long timeout; + + /* Enough time for a timeslice to kick in, and kick out */ + timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine)); + + /* Enough time for the nop request to complete */ + timeout += HZ / 5; + + return timeout + 1; +} + +static int live_timeslice_queue(void *arg) +{ + struct intel_gt *gt = arg; + struct drm_i915_gem_object *obj; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct i915_vma *vma; + void *vaddr; + int err = 0; + + /* + * Make sure that even if ELSP[0] and ELSP[1] are filled with + * timeslicing between them disabled, we *do* enable timeslicing + * if the queue demands it. (Normally, we do not submit if + * ELSP[1] is already occupied, so must rely on timeslicing to + * eject ELSP[0] in favour of the queue.) + */ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return 0; + + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); + if (IS_ERR(obj)) + return PTR_ERR(obj); + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto err_map; + + err = i915_vma_sync(vma); + if (err) + goto err_pin; + + for_each_engine(engine, gt, id) { + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), + }; + struct i915_request *rq, *nop; + + if (!intel_engine_has_preemption(engine)) + continue; + + st_engine_heartbeat_disable(engine); + memset(vaddr, 0, PAGE_SIZE); + + /* ELSP[0]: semaphore wait */ + rq = semaphore_queue(engine, vma, 0); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_heartbeat; + } + engine->schedule(rq, &attr); + err = wait_for_submit(engine, rq, HZ / 2); + if (err) { + pr_err("%s: Timed out trying to submit semaphores\n", + engine->name); + goto err_rq; + } + + /* ELSP[1]: nop request */ + nop = nop_request(engine); + if (IS_ERR(nop)) { + err = PTR_ERR(nop); + goto err_rq; + } + err = wait_for_submit(engine, nop, HZ / 2); + i915_request_put(nop); + if (err) { + pr_err("%s: Timed out trying to submit nop\n", + engine->name); + goto err_rq; + } + + GEM_BUG_ON(i915_request_completed(rq)); + GEM_BUG_ON(execlists_active(&engine->execlists) != rq); + + /* Queue: semaphore signal, matching priority as semaphore */ + err = release_queue(engine, vma, 1, effective_prio(rq)); + if (err) + goto err_rq; + + /* Wait until we ack the release_queue and start timeslicing */ + do { + cond_resched(); + intel_engine_flush_submission(engine); + } while (READ_ONCE(engine->execlists.pending[0])); + + /* Timeslice every jiffy, so within 2 we should signal */ + if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) { + struct drm_printer p = + drm_info_printer(gt->i915->drm.dev); + + pr_err("%s: Failed to timeslice into queue\n", + engine->name); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + memset(vaddr, 0xff, PAGE_SIZE); + err = -EIO; + } +err_rq: + i915_request_put(rq); +err_heartbeat: + st_engine_heartbeat_enable(engine); + if (err) + break; + } + +err_pin: + i915_vma_unpin(vma); +err_map: + i915_gem_object_unpin_map(obj); +err_obj: + i915_gem_object_put(obj); + return err; +} + +static int live_timeslice_nopreempt(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + struct igt_spinner spin; + int err = 0; + + /* + * We should not timeslice into a request that is marked with + * I915_REQUEST_NOPREEMPT. + */ + if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) + return 0; + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + for_each_engine(engine, gt, id) { + struct intel_context *ce; + struct i915_request *rq; + unsigned long timeslice; + + if (!intel_engine_has_preemption(engine)) + continue; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + break; + } + + st_engine_heartbeat_disable(engine); + timeslice = xchg(&engine->props.timeslice_duration_ms, 1); + + /* Create an unpreemptible spinner */ + + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_heartbeat; + } + + i915_request_get(rq); + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + i915_request_put(rq); + err = -ETIME; + goto out_spin; + } + + set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags); + i915_request_put(rq); + + /* Followed by a maximum priority barrier (heartbeat) */ + + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out_spin; + } + + rq = intel_context_create_request(ce); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_spin; + } + + rq->sched.attr.priority = I915_PRIORITY_BARRIER; + i915_request_get(rq); + i915_request_add(rq); + + /* + * Wait until the barrier is in ELSP, and we know timeslicing + * will have been activated. + */ + if (wait_for_submit(engine, rq, HZ / 2)) { + i915_request_put(rq); + err = -ETIME; + goto out_spin; + } + + /* + * Since the ELSP[0] request is unpreemptible, it should not + * allow the maximum priority barrier through. Wait long + * enough to see if it is timesliced in by mistake. + */ + if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) { + pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n", + engine->name); + err = -EINVAL; + } + i915_request_put(rq); + +out_spin: + igt_spinner_end(&spin); +out_heartbeat: + xchg(&engine->props.timeslice_duration_ms, timeslice); + st_engine_heartbeat_enable(engine); + if (err) + break; + + if (igt_flush_test(gt->i915)) { + err = -EIO; + break; + } + } + + igt_spinner_fini(&spin); + return err; +} + +static int live_busywait_preempt(void *arg) +{ + struct intel_gt *gt = arg; + struct i915_gem_context *ctx_hi, *ctx_lo; + struct intel_engine_cs *engine; + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + enum intel_engine_id id; + int err = -ENOMEM; + u32 *map; + + /* + * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can + * preempt the busywaits used to synchronise between rings. + */ + + ctx_hi = kernel_context(gt->i915); + if (!ctx_hi) + return -ENOMEM; + ctx_hi->sched.priority = + I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); + + ctx_lo = kernel_context(gt->i915); + if (!ctx_lo) + goto err_ctx_hi; + ctx_lo->sched.priority = + I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); + + obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_ctx_lo; + } + + map = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto err_obj; + } + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_map; + } + + err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); + if (err) + goto err_map; + + err = i915_vma_sync(vma); + if (err) + goto err_vma; + + for_each_engine(engine, gt, id) { + struct i915_request *lo, *hi; + struct igt_live_test t; + u32 *cs; + + if (!intel_engine_has_preemption(engine)) + continue; + + if (!intel_engine_can_store_dword(engine)) + continue; + + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { + err = -EIO; + goto err_vma; + } + + /* + * We create two requests. The low priority request + * busywaits on a semaphore (inside the ringbuffer where + * is should be preemptible) and the high priority requests + * uses a MI_STORE_DWORD_IMM to update the semaphore value + * allowing the first request to complete. If preemption + * fails, we hang instead. + */ + + lo = igt_request_alloc(ctx_lo, engine); + if (IS_ERR(lo)) { + err = PTR_ERR(lo); + goto err_vma; + } + + cs = intel_ring_begin(lo, 8); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + i915_request_add(lo); + goto err_vma; + } + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = i915_ggtt_offset(vma); + *cs++ = 0; + *cs++ = 1; + + /* XXX Do we need a flush + invalidate here? */ + + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_GLOBAL_GTT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = i915_ggtt_offset(vma); + *cs++ = 0; + + intel_ring_advance(lo, cs); + + i915_request_get(lo); + i915_request_add(lo); + + if (wait_for(READ_ONCE(*map), 10)) { + i915_request_put(lo); + err = -ETIMEDOUT; + goto err_vma; + } + + /* Low priority request should be busywaiting now */ + if (i915_request_wait(lo, 0, 1) != -ETIME) { + i915_request_put(lo); + pr_err("%s: Busywaiting request did not!\n", + engine->name); + err = -EIO; + goto err_vma; + } + + hi = igt_request_alloc(ctx_hi, engine); + if (IS_ERR(hi)) { + err = PTR_ERR(hi); + i915_request_put(lo); + goto err_vma; + } + + cs = intel_ring_begin(hi, 4); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + i915_request_add(hi); + i915_request_put(lo); + goto err_vma; + } + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = i915_ggtt_offset(vma); + *cs++ = 0; + *cs++ = 0; + + intel_ring_advance(hi, cs); + i915_request_add(hi); + + if (i915_request_wait(lo, 0, HZ / 5) < 0) { + struct drm_printer p = drm_info_printer(gt->i915->drm.dev); + + pr_err("%s: Failed to preempt semaphore busywait!\n", + engine->name); + + intel_engine_dump(engine, &p, "%s\n", engine->name); + GEM_TRACE_DUMP(); + + i915_request_put(lo); + intel_gt_set_wedged(gt); + err = -EIO; + goto err_vma; + } + GEM_BUG_ON(READ_ONCE(*map)); + i915_request_put(lo); + + if (igt_live_test_end(&t)) { + err = -EIO; + goto err_vma; + } + } + + err = 0; +err_vma: + i915_vma_unpin(vma); +err_map: + i915_gem_object_unpin_map(obj); +err_obj: + i915_gem_object_put(obj); +err_ctx_lo: + kernel_context_close(ctx_lo); +err_ctx_hi: + kernel_context_close(ctx_hi); + return err; +} + +static struct i915_request * +spinner_create_request(struct igt_spinner *spin, + struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + u32 arb) +{ + struct intel_context *ce; + struct i915_request *rq; + + ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); + if (IS_ERR(ce)) + return ERR_CAST(ce); + + rq = igt_spinner_create_request(spin, ce, arb); + intel_context_put(ce); + return rq; +} + +static int live_preempt(void *arg) +{ + struct intel_gt *gt = arg; + struct i915_gem_context *ctx_hi, *ctx_lo; + struct igt_spinner spin_hi, spin_lo; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = -ENOMEM; + + if (igt_spinner_init(&spin_hi, gt)) + return -ENOMEM; + + if (igt_spinner_init(&spin_lo, gt)) + goto err_spin_hi; + + ctx_hi = kernel_context(gt->i915); + if (!ctx_hi) + goto err_spin_lo; + ctx_hi->sched.priority = + I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); + + ctx_lo = kernel_context(gt->i915); + if (!ctx_lo) + goto err_ctx_hi; + ctx_lo->sched.priority = + I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); + + for_each_engine(engine, gt, id) { + struct igt_live_test t; + struct i915_request *rq; + + if (!intel_engine_has_preemption(engine)) + continue; + + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { + err = -EIO; + goto err_ctx_lo; + } + + rq = spinner_create_request(&spin_lo, ctx_lo, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + i915_request_add(rq); + if (!igt_wait_for_spinner(&spin_lo, rq)) { + GEM_TRACE("lo spinner failed to start\n"); + GEM_TRACE_DUMP(); + intel_gt_set_wedged(gt); + err = -EIO; + goto err_ctx_lo; + } + + rq = spinner_create_request(&spin_hi, ctx_hi, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) { + igt_spinner_end(&spin_lo); + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + i915_request_add(rq); + if (!igt_wait_for_spinner(&spin_hi, rq)) { + GEM_TRACE("hi spinner failed to start\n"); + GEM_TRACE_DUMP(); + intel_gt_set_wedged(gt); + err = -EIO; + goto err_ctx_lo; + } + + igt_spinner_end(&spin_hi); + igt_spinner_end(&spin_lo); + + if (igt_live_test_end(&t)) { + err = -EIO; + goto err_ctx_lo; + } + } + + err = 0; +err_ctx_lo: + kernel_context_close(ctx_lo); +err_ctx_hi: + kernel_context_close(ctx_hi); +err_spin_lo: + igt_spinner_fini(&spin_lo); +err_spin_hi: + igt_spinner_fini(&spin_hi); + return err; +} + +static int live_late_preempt(void *arg) +{ + struct intel_gt *gt = arg; + struct i915_gem_context *ctx_hi, *ctx_lo; + struct igt_spinner spin_hi, spin_lo; + struct intel_engine_cs *engine; + struct i915_sched_attr attr = {}; + enum intel_engine_id id; + int err = -ENOMEM; + + if (igt_spinner_init(&spin_hi, gt)) + return -ENOMEM; + + if (igt_spinner_init(&spin_lo, gt)) + goto err_spin_hi; + + ctx_hi = kernel_context(gt->i915); + if (!ctx_hi) + goto err_spin_lo; + + ctx_lo = kernel_context(gt->i915); + if (!ctx_lo) + goto err_ctx_hi; + + /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ + ctx_lo->sched.priority = I915_USER_PRIORITY(1); + + for_each_engine(engine, gt, id) { + struct igt_live_test t; + struct i915_request *rq; + + if (!intel_engine_has_preemption(engine)) + continue; + + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { + err = -EIO; + goto err_ctx_lo; + } + + rq = spinner_create_request(&spin_lo, ctx_lo, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + i915_request_add(rq); + if (!igt_wait_for_spinner(&spin_lo, rq)) { + pr_err("First context failed to start\n"); + goto err_wedged; + } + + rq = spinner_create_request(&spin_hi, ctx_hi, engine, + MI_NOOP); + if (IS_ERR(rq)) { + igt_spinner_end(&spin_lo); + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + i915_request_add(rq); + if (igt_wait_for_spinner(&spin_hi, rq)) { + pr_err("Second context overtook first?\n"); + goto err_wedged; + } + + attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); + engine->schedule(rq, &attr); + + if (!igt_wait_for_spinner(&spin_hi, rq)) { + pr_err("High priority context failed to preempt the low priority context\n"); + GEM_TRACE_DUMP(); + goto err_wedged; + } + + igt_spinner_end(&spin_hi); + igt_spinner_end(&spin_lo); + + if (igt_live_test_end(&t)) { + err = -EIO; + goto err_ctx_lo; + } + } + + err = 0; +err_ctx_lo: + kernel_context_close(ctx_lo); +err_ctx_hi: + kernel_context_close(ctx_hi); +err_spin_lo: + igt_spinner_fini(&spin_lo); +err_spin_hi: + igt_spinner_fini(&spin_hi); + return err; + +err_wedged: + igt_spinner_end(&spin_hi); + igt_spinner_end(&spin_lo); + intel_gt_set_wedged(gt); + err = -EIO; + goto err_ctx_lo; +} + +struct preempt_client { + struct igt_spinner spin; + struct i915_gem_context *ctx; +}; + +static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c) +{ + c->ctx = kernel_context(gt->i915); + if (!c->ctx) + return -ENOMEM; + + if (igt_spinner_init(&c->spin, gt)) + goto err_ctx; + + return 0; + +err_ctx: + kernel_context_close(c->ctx); + return -ENOMEM; +} + +static void preempt_client_fini(struct preempt_client *c) +{ + igt_spinner_fini(&c->spin); + kernel_context_close(c->ctx); +} + +static int live_nopreempt(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct preempt_client a, b; + enum intel_engine_id id; + int err = -ENOMEM; + + /* + * Verify that we can disable preemption for an individual request + * that may be being observed and not want to be interrupted. + */ + + if (preempt_client_init(gt, &a)) + return -ENOMEM; + if (preempt_client_init(gt, &b)) + goto err_client_a; + b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); + + for_each_engine(engine, gt, id) { + struct i915_request *rq_a, *rq_b; + + if (!intel_engine_has_preemption(engine)) + continue; + + engine->execlists.preempt_hang.count = 0; + + rq_a = spinner_create_request(&a.spin, + a.ctx, engine, + MI_ARB_CHECK); + if (IS_ERR(rq_a)) { + err = PTR_ERR(rq_a); + goto err_client_b; + } + + /* Low priority client, but unpreemptable! */ + __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags); + + i915_request_add(rq_a); + if (!igt_wait_for_spinner(&a.spin, rq_a)) { + pr_err("First client failed to start\n"); + goto err_wedged; + } + + rq_b = spinner_create_request(&b.spin, + b.ctx, engine, + MI_ARB_CHECK); + if (IS_ERR(rq_b)) { + err = PTR_ERR(rq_b); + goto err_client_b; + } + + i915_request_add(rq_b); + + /* B is much more important than A! (But A is unpreemptable.) */ + GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a)); + + /* Wait long enough for preemption and timeslicing */ + if (igt_wait_for_spinner(&b.spin, rq_b)) { + pr_err("Second client started too early!\n"); + goto err_wedged; + } + + igt_spinner_end(&a.spin); + + if (!igt_wait_for_spinner(&b.spin, rq_b)) { + pr_err("Second client failed to start\n"); + goto err_wedged; + } + + igt_spinner_end(&b.spin); + + if (engine->execlists.preempt_hang.count) { + pr_err("Preemption recorded x%d; should have been suppressed!\n", + engine->execlists.preempt_hang.count); + err = -EINVAL; + goto err_wedged; + } + + if (igt_flush_test(gt->i915)) + goto err_wedged; + } + + err = 0; +err_client_b: + preempt_client_fini(&b); +err_client_a: + preempt_client_fini(&a); + return err; + +err_wedged: + igt_spinner_end(&b.spin); + igt_spinner_end(&a.spin); + intel_gt_set_wedged(gt); + err = -EIO; + goto err_client_b; +} + +struct live_preempt_cancel { + struct intel_engine_cs *engine; + struct preempt_client a, b; +}; + +static int __cancel_active0(struct live_preempt_cancel *arg) +{ + struct i915_request *rq; + struct igt_live_test t; + int err; + + /* Preempt cancel of ELSP0 */ + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); + if (igt_live_test_begin(&t, arg->engine->i915, + __func__, arg->engine->name)) + return -EIO; + + rq = spinner_create_request(&arg->a.spin, + arg->a.ctx, arg->engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + clear_bit(CONTEXT_BANNED, &rq->context->flags); + i915_request_get(rq); + i915_request_add(rq); + if (!igt_wait_for_spinner(&arg->a.spin, rq)) { + err = -EIO; + goto out; + } + + intel_context_set_banned(rq->context); + err = intel_engine_pulse(arg->engine); + if (err) + goto out; + + err = wait_for_reset(arg->engine, rq, HZ / 2); + if (err) { + pr_err("Cancelled inflight0 request did not reset\n"); + goto out; + } + +out: + i915_request_put(rq); + if (igt_live_test_end(&t)) + err = -EIO; + return err; +} + +static int __cancel_active1(struct live_preempt_cancel *arg) +{ + struct i915_request *rq[2] = {}; + struct igt_live_test t; + int err; + + /* Preempt cancel of ELSP1 */ + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); + if (igt_live_test_begin(&t, arg->engine->i915, + __func__, arg->engine->name)) + return -EIO; + + rq[0] = spinner_create_request(&arg->a.spin, + arg->a.ctx, arg->engine, + MI_NOOP); /* no preemption */ + if (IS_ERR(rq[0])) + return PTR_ERR(rq[0]); + + clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); + i915_request_get(rq[0]); + i915_request_add(rq[0]); + if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { + err = -EIO; + goto out; + } + + rq[1] = spinner_create_request(&arg->b.spin, + arg->b.ctx, arg->engine, + MI_ARB_CHECK); + if (IS_ERR(rq[1])) { + err = PTR_ERR(rq[1]); + goto out; + } + + clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); + i915_request_get(rq[1]); + err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); + i915_request_add(rq[1]); + if (err) + goto out; + + intel_context_set_banned(rq[1]->context); + err = intel_engine_pulse(arg->engine); + if (err) + goto out; + + igt_spinner_end(&arg->a.spin); + err = wait_for_reset(arg->engine, rq[1], HZ / 2); + if (err) + goto out; + + if (rq[0]->fence.error != 0) { + pr_err("Normal inflight0 request did not complete\n"); + err = -EINVAL; + goto out; + } + + if (rq[1]->fence.error != -EIO) { + pr_err("Cancelled inflight1 request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + +out: + i915_request_put(rq[1]); + i915_request_put(rq[0]); + if (igt_live_test_end(&t)) + err = -EIO; + return err; +} + +static int __cancel_queued(struct live_preempt_cancel *arg) +{ + struct i915_request *rq[3] = {}; + struct igt_live_test t; + int err; + + /* Full ELSP and one in the wings */ + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); + if (igt_live_test_begin(&t, arg->engine->i915, + __func__, arg->engine->name)) + return -EIO; + + rq[0] = spinner_create_request(&arg->a.spin, + arg->a.ctx, arg->engine, + MI_ARB_CHECK); + if (IS_ERR(rq[0])) + return PTR_ERR(rq[0]); + + clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); + i915_request_get(rq[0]); + i915_request_add(rq[0]); + if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { + err = -EIO; + goto out; + } + + rq[1] = igt_request_alloc(arg->b.ctx, arg->engine); + if (IS_ERR(rq[1])) { + err = PTR_ERR(rq[1]); + goto out; + } + + clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); + i915_request_get(rq[1]); + err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); + i915_request_add(rq[1]); + if (err) + goto out; + + rq[2] = spinner_create_request(&arg->b.spin, + arg->a.ctx, arg->engine, + MI_ARB_CHECK); + if (IS_ERR(rq[2])) { + err = PTR_ERR(rq[2]); + goto out; + } + + i915_request_get(rq[2]); + err = i915_request_await_dma_fence(rq[2], &rq[1]->fence); + i915_request_add(rq[2]); + if (err) + goto out; + + intel_context_set_banned(rq[2]->context); + err = intel_engine_pulse(arg->engine); + if (err) + goto out; + + err = wait_for_reset(arg->engine, rq[2], HZ / 2); + if (err) + goto out; + + if (rq[0]->fence.error != -EIO) { + pr_err("Cancelled inflight0 request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + + if (rq[1]->fence.error != 0) { + pr_err("Normal inflight1 request did not complete\n"); + err = -EINVAL; + goto out; + } + + if (rq[2]->fence.error != -EIO) { + pr_err("Cancelled queued request did not report -EIO\n"); + err = -EINVAL; + goto out; + } + +out: + i915_request_put(rq[2]); + i915_request_put(rq[1]); + i915_request_put(rq[0]); + if (igt_live_test_end(&t)) + err = -EIO; + return err; +} + +static int __cancel_hostile(struct live_preempt_cancel *arg) +{ + struct i915_request *rq; + int err; + + /* Preempt cancel non-preemptible spinner in ELSP0 */ + if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) + return 0; + + if (!intel_has_reset_engine(arg->engine->gt)) + return 0; + + GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); + rq = spinner_create_request(&arg->a.spin, + arg->a.ctx, arg->engine, + MI_NOOP); /* preemption disabled */ + if (IS_ERR(rq)) + return PTR_ERR(rq); + + clear_bit(CONTEXT_BANNED, &rq->context->flags); + i915_request_get(rq); + i915_request_add(rq); + if (!igt_wait_for_spinner(&arg->a.spin, rq)) { + err = -EIO; + goto out; + } + + intel_context_set_banned(rq->context); + err = intel_engine_pulse(arg->engine); /* force reset */ + if (err) + goto out; + + err = wait_for_reset(arg->engine, rq, HZ / 2); + if (err) { + pr_err("Cancelled inflight0 request did not reset\n"); + goto out; + } + +out: + i915_request_put(rq); + if (igt_flush_test(arg->engine->i915)) + err = -EIO; + return err; +} + +static void force_reset_timeout(struct intel_engine_cs *engine) +{ + engine->reset_timeout.probability = 999; + atomic_set(&engine->reset_timeout.times, -1); +} + +static void cancel_reset_timeout(struct intel_engine_cs *engine) +{ + memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout)); +} + +static int __cancel_fail(struct live_preempt_cancel *arg) +{ + struct intel_engine_cs *engine = arg->engine; + struct i915_request *rq; + int err; + + if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) + return 0; + + if (!intel_has_reset_engine(engine->gt)) + return 0; + + GEM_TRACE("%s(%s)\n", __func__, engine->name); + rq = spinner_create_request(&arg->a.spin, + arg->a.ctx, engine, + MI_NOOP); /* preemption disabled */ + if (IS_ERR(rq)) + return PTR_ERR(rq); + + clear_bit(CONTEXT_BANNED, &rq->context->flags); + i915_request_get(rq); + i915_request_add(rq); + if (!igt_wait_for_spinner(&arg->a.spin, rq)) { + err = -EIO; + goto out; + } + + intel_context_set_banned(rq->context); + + err = intel_engine_pulse(engine); + if (err) + goto out; + + force_reset_timeout(engine); + + /* force preempt reset [failure] */ + while (!engine->execlists.pending[0]) + intel_engine_flush_submission(engine); + del_timer_sync(&engine->execlists.preempt); + intel_engine_flush_submission(engine); + + cancel_reset_timeout(engine); + + /* after failure, require heartbeats to reset device */ + intel_engine_set_heartbeat(engine, 1); + err = wait_for_reset(engine, rq, HZ / 2); + intel_engine_set_heartbeat(engine, + engine->defaults.heartbeat_interval_ms); + if (err) { + pr_err("Cancelled inflight0 request did not reset\n"); + goto out; + } + +out: + i915_request_put(rq); + if (igt_flush_test(engine->i915)) + err = -EIO; + return err; +} + +static int live_preempt_cancel(void *arg) +{ + struct intel_gt *gt = arg; + struct live_preempt_cancel data; + enum intel_engine_id id; + int err = -ENOMEM; + + /* + * To cancel an inflight context, we need to first remove it from the + * GPU. That sounds like preemption! Plus a little bit of bookkeeping. + */ + + if (preempt_client_init(gt, &data.a)) + return -ENOMEM; + if (preempt_client_init(gt, &data.b)) + goto err_client_a; + + for_each_engine(data.engine, gt, id) { + if (!intel_engine_has_preemption(data.engine)) + continue; + + err = __cancel_active0(&data); + if (err) + goto err_wedged; + + err = __cancel_active1(&data); + if (err) + goto err_wedged; + + err = __cancel_queued(&data); + if (err) + goto err_wedged; + + err = __cancel_hostile(&data); + if (err) + goto err_wedged; + + err = __cancel_fail(&data); + if (err) + goto err_wedged; + } + + err = 0; +err_client_b: + preempt_client_fini(&data.b); +err_client_a: + preempt_client_fini(&data.a); + return err; + +err_wedged: + GEM_TRACE_DUMP(); + igt_spinner_end(&data.b.spin); + igt_spinner_end(&data.a.spin); + intel_gt_set_wedged(gt); + goto err_client_b; +} + +static int live_suppress_self_preempt(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) + }; + struct preempt_client a, b; + enum intel_engine_id id; + int err = -ENOMEM; + + /* + * Verify that if a preemption request does not cause a change in + * the current execution order, the preempt-to-idle injection is + * skipped and that we do not accidentally apply it after the CS + * completion event. + */ + + if (intel_uc_uses_guc_submission(>->uc)) + return 0; /* presume black blox */ + + if (intel_vgpu_active(gt->i915)) + return 0; /* GVT forces single port & request submission */ + + if (preempt_client_init(gt, &a)) + return -ENOMEM; + if (preempt_client_init(gt, &b)) + goto err_client_a; + + for_each_engine(engine, gt, id) { + struct i915_request *rq_a, *rq_b; + int depth; + + if (!intel_engine_has_preemption(engine)) + continue; + + if (igt_flush_test(gt->i915)) + goto err_wedged; + + st_engine_heartbeat_disable(engine); + engine->execlists.preempt_hang.count = 0; + + rq_a = spinner_create_request(&a.spin, + a.ctx, engine, + MI_NOOP); + if (IS_ERR(rq_a)) { + err = PTR_ERR(rq_a); + st_engine_heartbeat_enable(engine); + goto err_client_b; + } + + i915_request_add(rq_a); + if (!igt_wait_for_spinner(&a.spin, rq_a)) { + pr_err("First client failed to start\n"); + st_engine_heartbeat_enable(engine); + goto err_wedged; + } + + /* Keep postponing the timer to avoid premature slicing */ + mod_timer(&engine->execlists.timer, jiffies + HZ); + for (depth = 0; depth < 8; depth++) { + rq_b = spinner_create_request(&b.spin, + b.ctx, engine, + MI_NOOP); + if (IS_ERR(rq_b)) { + err = PTR_ERR(rq_b); + st_engine_heartbeat_enable(engine); + goto err_client_b; + } + i915_request_add(rq_b); + + GEM_BUG_ON(i915_request_completed(rq_a)); + engine->schedule(rq_a, &attr); + igt_spinner_end(&a.spin); + + if (!igt_wait_for_spinner(&b.spin, rq_b)) { + pr_err("Second client failed to start\n"); + st_engine_heartbeat_enable(engine); + goto err_wedged; + } + + swap(a, b); + rq_a = rq_b; + } + igt_spinner_end(&a.spin); + + if (engine->execlists.preempt_hang.count) { + pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n", + engine->name, + engine->execlists.preempt_hang.count, + depth); + st_engine_heartbeat_enable(engine); + err = -EINVAL; + goto err_client_b; + } + + st_engine_heartbeat_enable(engine); + if (igt_flush_test(gt->i915)) + goto err_wedged; + } + + err = 0; +err_client_b: + preempt_client_fini(&b); +err_client_a: + preempt_client_fini(&a); + return err; + +err_wedged: + igt_spinner_end(&b.spin); + igt_spinner_end(&a.spin); + intel_gt_set_wedged(gt); + err = -EIO; + goto err_client_b; +} + +static int live_chain_preempt(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct preempt_client hi, lo; + enum intel_engine_id id; + int err = -ENOMEM; + + /* + * Build a chain AB...BA between two contexts (A, B) and request + * preemption of the last request. It should then complete before + * the previously submitted spinner in B. + */ + + if (preempt_client_init(gt, &hi)) + return -ENOMEM; + + if (preempt_client_init(gt, &lo)) + goto err_client_hi; + + for_each_engine(engine, gt, id) { + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), + }; + struct igt_live_test t; + struct i915_request *rq; + int ring_size, count, i; + + if (!intel_engine_has_preemption(engine)) + continue; + + rq = spinner_create_request(&lo.spin, + lo.ctx, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) + goto err_wedged; + + i915_request_get(rq); + i915_request_add(rq); + + ring_size = rq->wa_tail - rq->head; + if (ring_size < 0) + ring_size += rq->ring->size; + ring_size = rq->ring->size / ring_size; + pr_debug("%s(%s): Using maximum of %d requests\n", + __func__, engine->name, ring_size); + + igt_spinner_end(&lo.spin); + if (i915_request_wait(rq, 0, HZ / 2) < 0) { + pr_err("Timed out waiting to flush %s\n", engine->name); + i915_request_put(rq); + goto err_wedged; + } + i915_request_put(rq); + + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { + err = -EIO; + goto err_wedged; + } + + for_each_prime_number_from(count, 1, ring_size) { + rq = spinner_create_request(&hi.spin, + hi.ctx, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) + goto err_wedged; + i915_request_add(rq); + if (!igt_wait_for_spinner(&hi.spin, rq)) + goto err_wedged; + + rq = spinner_create_request(&lo.spin, + lo.ctx, engine, + MI_ARB_CHECK); + if (IS_ERR(rq)) + goto err_wedged; + i915_request_add(rq); + + for (i = 0; i < count; i++) { + rq = igt_request_alloc(lo.ctx, engine); + if (IS_ERR(rq)) + goto err_wedged; + i915_request_add(rq); + } + + rq = igt_request_alloc(hi.ctx, engine); + if (IS_ERR(rq)) + goto err_wedged; + + i915_request_get(rq); + i915_request_add(rq); + engine->schedule(rq, &attr); + + igt_spinner_end(&hi.spin); + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + struct drm_printer p = + drm_info_printer(gt->i915->drm.dev); + + pr_err("Failed to preempt over chain of %d\n", + count); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + i915_request_put(rq); + goto err_wedged; + } + igt_spinner_end(&lo.spin); + i915_request_put(rq); + + rq = igt_request_alloc(lo.ctx, engine); + if (IS_ERR(rq)) + goto err_wedged; + + i915_request_get(rq); + i915_request_add(rq); + + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + struct drm_printer p = + drm_info_printer(gt->i915->drm.dev); + + pr_err("Failed to flush low priority chain of %d requests\n", + count); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + i915_request_put(rq); + goto err_wedged; + } + i915_request_put(rq); + } + + if (igt_live_test_end(&t)) { + err = -EIO; + goto err_wedged; + } + } + + err = 0; +err_client_lo: + preempt_client_fini(&lo); +err_client_hi: + preempt_client_fini(&hi); + return err; + +err_wedged: + igt_spinner_end(&hi.spin); + igt_spinner_end(&lo.spin); + intel_gt_set_wedged(gt); + err = -EIO; + goto err_client_lo; +} + +static int create_gang(struct intel_engine_cs *engine, + struct i915_request **prev) +{ + struct drm_i915_gem_object *obj; + struct intel_context *ce; + struct i915_request *rq; + struct i915_vma *vma; + u32 *cs; + int err; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + obj = i915_gem_object_create_internal(engine->i915, 4096); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto err_ce; + } + + vma = i915_vma_instance(obj, ce->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_obj; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto err_obj; + + cs = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_obj; + } + + /* Semaphore target: spin until zero */ + *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; + + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_EQ_SDD; + *cs++ = 0; + *cs++ = lower_32_bits(vma->node.start); + *cs++ = upper_32_bits(vma->node.start); + + if (*prev) { + u64 offset = (*prev)->batch->node.start; + + /* Terminate the spinner in the next lower priority batch. */ + *cs++ = MI_STORE_DWORD_IMM_GEN4; + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + *cs++ = 0; + } + + *cs++ = MI_BATCH_BUFFER_END; + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_obj; + } + + rq->batch = i915_vma_get(vma); + i915_request_get(rq); + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, false); + if (!err) + err = i915_vma_move_to_active(vma, rq, 0); + if (!err) + err = rq->engine->emit_bb_start(rq, + vma->node.start, + PAGE_SIZE, 0); + i915_vma_unlock(vma); + i915_request_add(rq); + if (err) + goto err_rq; + + i915_gem_object_put(obj); + intel_context_put(ce); + + rq->mock.link.next = &(*prev)->mock.link; + *prev = rq; + return 0; + +err_rq: + i915_vma_put(rq->batch); + i915_request_put(rq); +err_obj: + i915_gem_object_put(obj); +err_ce: + intel_context_put(ce); + return err; +} + +static int __live_preempt_ring(struct intel_engine_cs *engine, + struct igt_spinner *spin, + int queue_sz, int ring_sz) +{ + struct intel_context *ce[2] = {}; + struct i915_request *rq; + struct igt_live_test t; + int err = 0; + int n; + + if (igt_live_test_begin(&t, engine->i915, __func__, engine->name)) + return -EIO; + + for (n = 0; n < ARRAY_SIZE(ce); n++) { + struct intel_context *tmp; + + tmp = intel_context_create(engine); + if (IS_ERR(tmp)) { + err = PTR_ERR(tmp); + goto err_ce; + } + + tmp->ring = __intel_context_ring_size(ring_sz); + + err = intel_context_pin(tmp); + if (err) { + intel_context_put(tmp); + goto err_ce; + } + + memset32(tmp->ring->vaddr, + 0xdeadbeef, /* trigger a hang if executed */ + tmp->ring->vma->size / sizeof(u32)); + + ce[n] = tmp; + } + + rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ce; + } + + i915_request_get(rq); + rq->sched.attr.priority = I915_PRIORITY_BARRIER; + i915_request_add(rq); + + if (!igt_wait_for_spinner(spin, rq)) { + intel_gt_set_wedged(engine->gt); + i915_request_put(rq); + err = -ETIME; + goto err_ce; + } + + /* Fill the ring, until we will cause a wrap */ + n = 0; + while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) { + struct i915_request *tmp; + + tmp = intel_context_create_request(ce[0]); + if (IS_ERR(tmp)) { + err = PTR_ERR(tmp); + i915_request_put(rq); + goto err_ce; + } + + i915_request_add(tmp); + intel_engine_flush_submission(engine); + n++; + } + intel_engine_flush_submission(engine); + pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n", + engine->name, queue_sz, n, + ce[0]->ring->size, + ce[0]->ring->tail, + ce[0]->ring->emit, + rq->tail); + i915_request_put(rq); + + /* Create a second request to preempt the first ring */ + rq = intel_context_create_request(ce[1]); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ce; + } + + rq->sched.attr.priority = I915_PRIORITY_BARRIER; + i915_request_get(rq); + i915_request_add(rq); + + err = wait_for_submit(engine, rq, HZ / 2); + i915_request_put(rq); + if (err) { + pr_err("%s: preemption request was not submited\n", + engine->name); + err = -ETIME; + } + + pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n", + engine->name, + ce[0]->ring->tail, ce[0]->ring->emit, + ce[1]->ring->tail, ce[1]->ring->emit); + +err_ce: + intel_engine_flush_submission(engine); + igt_spinner_end(spin); + for (n = 0; n < ARRAY_SIZE(ce); n++) { + if (IS_ERR_OR_NULL(ce[n])) + break; + + intel_context_unpin(ce[n]); + intel_context_put(ce[n]); + } + if (igt_live_test_end(&t)) + err = -EIO; + return err; +} + +static int live_preempt_ring(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct igt_spinner spin; + enum intel_engine_id id; + int err = 0; + + /* + * Check that we rollback large chunks of a ring in order to do a + * preemption event. Similar to live_unlite_ring, but looking at + * ring size rather than the impact of intel_ring_direction(). + */ + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + for_each_engine(engine, gt, id) { + int n; + + if (!intel_engine_has_preemption(engine)) + continue; + + if (!intel_engine_can_store_dword(engine)) + continue; + + st_engine_heartbeat_disable(engine); + + for (n = 0; n <= 3; n++) { + err = __live_preempt_ring(engine, &spin, + n * SZ_4K / 4, SZ_4K); + if (err) + break; + } + + st_engine_heartbeat_enable(engine); + if (err) + break; + } + + igt_spinner_fini(&spin); + return err; +} + +static int live_preempt_gang(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* + * Build as long a chain of preempters as we can, with each + * request higher priority than the last. Once we are ready, we release + * the last batch which then precolates down the chain, each releasing + * the next oldest in turn. The intent is to simply push as hard as we + * can with the number of preemptions, trying to exceed narrow HW + * limits. At a minimum, we insist that we can sort all the user + * high priority levels into execution order. + */ + + for_each_engine(engine, gt, id) { + struct i915_request *rq = NULL; + struct igt_live_test t; + IGT_TIMEOUT(end_time); + int prio = 0; + int err = 0; + u32 *cs; + + if (!intel_engine_has_preemption(engine)) + continue; + + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) + return -EIO; + + do { + struct i915_sched_attr attr = { + .priority = I915_USER_PRIORITY(prio++), + }; + + err = create_gang(engine, &rq); + if (err) + break; + + /* Submit each spinner at increasing priority */ + engine->schedule(rq, &attr); + } while (prio <= I915_PRIORITY_MAX && + !__igt_timeout(end_time, NULL)); + pr_debug("%s: Preempt chain of %d requests\n", + engine->name, prio); + + /* + * Such that the last spinner is the highest priority and + * should execute first. When that spinner completes, + * it will terminate the next lowest spinner until there + * are no more spinners and the gang is complete. + */ + cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC); + if (!IS_ERR(cs)) { + *cs = 0; + i915_gem_object_unpin_map(rq->batch->obj); + } else { + err = PTR_ERR(cs); + intel_gt_set_wedged(gt); + } + + while (rq) { /* wait for each rq from highest to lowest prio */ + struct i915_request *n = list_next_entry(rq, mock.link); + + if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) { + struct drm_printer p = + drm_info_printer(engine->i915->drm.dev); + + pr_err("Failed to flush chain of %d requests, at %d\n", + prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT); + intel_engine_dump(engine, &p, + "%s\n", engine->name); + + err = -ETIME; + } + + i915_vma_put(rq->batch); + i915_request_put(rq); + rq = n; + } + + if (igt_live_test_end(&t)) + err = -EIO; + if (err) + return err; + } + + return 0; +} + +static struct i915_vma * +create_gpr_user(struct intel_engine_cs *engine, + struct i915_vma *result, + unsigned int offset) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + u32 *cs; + int err; + int i; + + obj = i915_gem_object_create_internal(engine->i915, 4096); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, result->vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) { + i915_vma_put(vma); + return ERR_PTR(err); + } + + cs = i915_gem_object_pin_map(obj, I915_MAP_WC); + if (IS_ERR(cs)) { + i915_vma_put(vma); + return ERR_CAST(cs); + } + + /* All GPR are clear for new contexts. We use GPR(0) as a constant */ + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = CS_GPR(engine, 0); + *cs++ = 1; + + for (i = 1; i < NUM_GPR; i++) { + u64 addr; + + /* + * Perform: GPR[i]++ + * + * As we read and write into the context saved GPR[i], if + * we restart this batch buffer from an earlier point, we + * will repeat the increment and store a value > 1. + */ + *cs++ = MI_MATH(4); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i)); + *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0)); + *cs++ = MI_MATH_ADD; + *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU); + + addr = result->node.start + offset + i * sizeof(*cs); + *cs++ = MI_STORE_REGISTER_MEM_GEN8; + *cs++ = CS_GPR(engine, 2 * i); + *cs++ = lower_32_bits(addr); + *cs++ = upper_32_bits(addr); + + *cs++ = MI_SEMAPHORE_WAIT | + MI_SEMAPHORE_POLL | + MI_SEMAPHORE_SAD_GTE_SDD; + *cs++ = i; + *cs++ = lower_32_bits(result->node.start); + *cs++ = upper_32_bits(result->node.start); + } + + *cs++ = MI_BATCH_BUFFER_END; + i915_gem_object_flush_map(obj); + i915_gem_object_unpin_map(obj); + + return vma; +} + +static struct i915_vma *create_global(struct intel_gt *gt, size_t sz) +{ + struct drm_i915_gem_object *obj; + struct i915_vma *vma; + int err; + + obj = i915_gem_object_create_internal(gt->i915, sz); + if (IS_ERR(obj)) + return ERR_CAST(obj); + + vma = i915_vma_instance(obj, >->ggtt->vm, NULL); + if (IS_ERR(vma)) { + i915_gem_object_put(obj); + return vma; + } + + err = i915_ggtt_pin(vma, NULL, 0, 0); + if (err) { + i915_vma_put(vma); + return ERR_PTR(err); + } + + return vma; +} + +static struct i915_request * +create_gpr_client(struct intel_engine_cs *engine, + struct i915_vma *global, + unsigned int offset) +{ + struct i915_vma *batch, *vma; + struct intel_context *ce; + struct i915_request *rq; + int err; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return ERR_CAST(ce); + + vma = i915_vma_instance(global->obj, ce->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_ce; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_ce; + + batch = create_gpr_user(engine, vma, offset); + if (IS_ERR(batch)) { + err = PTR_ERR(batch); + goto out_vma; + } + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_batch; + } + + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, false); + if (!err) + err = i915_vma_move_to_active(vma, rq, 0); + i915_vma_unlock(vma); + + i915_vma_lock(batch); + if (!err) + err = i915_request_await_object(rq, batch->obj, false); + if (!err) + err = i915_vma_move_to_active(batch, rq, 0); + if (!err) + err = rq->engine->emit_bb_start(rq, + batch->node.start, + PAGE_SIZE, 0); + i915_vma_unlock(batch); + i915_vma_unpin(batch); + + if (!err) + i915_request_get(rq); + i915_request_add(rq); + +out_batch: + i915_vma_put(batch); +out_vma: + i915_vma_unpin(vma); +out_ce: + intel_context_put(ce); + return err ? ERR_PTR(err) : rq; +} + +static int preempt_user(struct intel_engine_cs *engine, + struct i915_vma *global, + int id) +{ + struct i915_sched_attr attr = { + .priority = I915_PRIORITY_MAX + }; + struct i915_request *rq; + int err = 0; + u32 *cs; + + rq = intel_engine_create_kernel_request(engine); + if (IS_ERR(rq)) + return PTR_ERR(rq); + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) { + i915_request_add(rq); + return PTR_ERR(cs); + } + + *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; + *cs++ = i915_ggtt_offset(global); + *cs++ = 0; + *cs++ = id; + + intel_ring_advance(rq, cs); + + i915_request_get(rq); + i915_request_add(rq); + + engine->schedule(rq, &attr); + + if (i915_request_wait(rq, 0, HZ / 2) < 0) + err = -ETIME; + i915_request_put(rq); + + return err; +} + +static int live_preempt_user(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + struct i915_vma *global; + enum intel_engine_id id; + u32 *result; + int err = 0; + + /* + * In our other tests, we look at preemption in carefully + * controlled conditions in the ringbuffer. Since most of the + * time is spent in user batches, most of our preemptions naturally + * occur there. We want to verify that when we preempt inside a batch + * we continue on from the current instruction and do not roll back + * to the start, or another earlier arbitration point. + * + * To verify this, we create a batch which is a mixture of + * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with + * a few preempting contexts thrown into the mix, we look for any + * repeated instructions (which show up as incorrect values). + */ + + global = create_global(gt, 4096); + if (IS_ERR(global)) + return PTR_ERR(global); + + result = i915_gem_object_pin_map(global->obj, I915_MAP_WC); + if (IS_ERR(result)) { + i915_vma_unpin_and_release(&global, 0); + return PTR_ERR(result); + } + + for_each_engine(engine, gt, id) { + struct i915_request *client[3] = {}; + struct igt_live_test t; + int i; + + if (!intel_engine_has_preemption(engine)) + continue; + + if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS) + continue; /* we need per-context GPR */ + + if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { + err = -EIO; + break; + } + + memset(result, 0, 4096); + + for (i = 0; i < ARRAY_SIZE(client); i++) { + struct i915_request *rq; + + rq = create_gpr_client(engine, global, + NUM_GPR * i * sizeof(u32)); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto end_test; + } + + client[i] = rq; + } + + /* Continuously preempt the set of 3 running contexts */ + for (i = 1; i <= NUM_GPR; i++) { + err = preempt_user(engine, global, i); + if (err) + goto end_test; + } + + if (READ_ONCE(result[0]) != NUM_GPR) { + pr_err("%s: Failed to release semaphore\n", + engine->name); + err = -EIO; + goto end_test; + } + + for (i = 0; i < ARRAY_SIZE(client); i++) { + int gpr; + + if (i915_request_wait(client[i], 0, HZ / 2) < 0) { + err = -ETIME; + goto end_test; + } + + for (gpr = 1; gpr < NUM_GPR; gpr++) { + if (result[NUM_GPR * i + gpr] != 1) { + pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n", + engine->name, + i, gpr, result[NUM_GPR * i + gpr]); + err = -EINVAL; + goto end_test; + } + } + } + +end_test: + for (i = 0; i < ARRAY_SIZE(client); i++) { + if (!client[i]) + break; + + i915_request_put(client[i]); + } + + /* Flush the semaphores on error */ + smp_store_mb(result[0], -1); + if (igt_live_test_end(&t)) + err = -EIO; + if (err) + break; + } + + i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP); + return err; +} + +static int live_preempt_timeout(void *arg) +{ + struct intel_gt *gt = arg; + struct i915_gem_context *ctx_hi, *ctx_lo; + struct igt_spinner spin_lo; + struct intel_engine_cs *engine; + enum intel_engine_id id; + int err = -ENOMEM; + + /* + * Check that we force preemption to occur by cancelling the previous + * context if it refuses to yield the GPU. + */ + if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) + return 0; + + if (!intel_has_reset_engine(gt)) + return 0; + + if (igt_spinner_init(&spin_lo, gt)) + return -ENOMEM; + + ctx_hi = kernel_context(gt->i915); + if (!ctx_hi) + goto err_spin_lo; + ctx_hi->sched.priority = + I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); + + ctx_lo = kernel_context(gt->i915); + if (!ctx_lo) + goto err_ctx_hi; + ctx_lo->sched.priority = + I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); + + for_each_engine(engine, gt, id) { + unsigned long saved_timeout; + struct i915_request *rq; + + if (!intel_engine_has_preemption(engine)) + continue; + + rq = spinner_create_request(&spin_lo, ctx_lo, engine, + MI_NOOP); /* preemption disabled */ + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + i915_request_add(rq); + if (!igt_wait_for_spinner(&spin_lo, rq)) { + intel_gt_set_wedged(gt); + err = -EIO; + goto err_ctx_lo; + } + + rq = igt_request_alloc(ctx_hi, engine); + if (IS_ERR(rq)) { + igt_spinner_end(&spin_lo); + err = PTR_ERR(rq); + goto err_ctx_lo; + } + + /* Flush the previous CS ack before changing timeouts */ + while (READ_ONCE(engine->execlists.pending[0])) + cpu_relax(); + + saved_timeout = engine->props.preempt_timeout_ms; + engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */ + + i915_request_get(rq); + i915_request_add(rq); + + intel_engine_flush_submission(engine); + engine->props.preempt_timeout_ms = saved_timeout; + + if (i915_request_wait(rq, 0, HZ / 10) < 0) { + intel_gt_set_wedged(gt); + i915_request_put(rq); + err = -ETIME; + goto err_ctx_lo; + } + + igt_spinner_end(&spin_lo); + i915_request_put(rq); + } + + err = 0; +err_ctx_lo: + kernel_context_close(ctx_lo); +err_ctx_hi: + kernel_context_close(ctx_hi); +err_spin_lo: + igt_spinner_fini(&spin_lo); + return err; +} + +static int random_range(struct rnd_state *rnd, int min, int max) +{ + return i915_prandom_u32_max_state(max - min, rnd) + min; +} + +static int random_priority(struct rnd_state *rnd) +{ + return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX); +} + +struct preempt_smoke { + struct intel_gt *gt; + struct i915_gem_context **contexts; + struct intel_engine_cs *engine; + struct drm_i915_gem_object *batch; + unsigned int ncontext; + struct rnd_state prng; + unsigned long count; +}; + +static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke) +{ + return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext, + &smoke->prng)]; +} + +static int smoke_submit(struct preempt_smoke *smoke, + struct i915_gem_context *ctx, int prio, + struct drm_i915_gem_object *batch) +{ + struct i915_request *rq; + struct i915_vma *vma = NULL; + int err = 0; + + if (batch) { + struct i915_address_space *vm; + + vm = i915_gem_context_get_vm_rcu(ctx); + vma = i915_vma_instance(batch, vm, NULL); + i915_vm_put(vm); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + return err; + } + + ctx->sched.priority = prio; + + rq = igt_request_alloc(ctx, smoke->engine); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto unpin; + } + + if (vma) { + i915_vma_lock(vma); + err = i915_request_await_object(rq, vma->obj, false); + if (!err) + err = i915_vma_move_to_active(vma, rq, 0); + if (!err) + err = rq->engine->emit_bb_start(rq, + vma->node.start, + PAGE_SIZE, 0); + i915_vma_unlock(vma); + } + + i915_request_add(rq); + +unpin: + if (vma) + i915_vma_unpin(vma); + + return err; +} + +static int smoke_crescendo_thread(void *arg) +{ + struct preempt_smoke *smoke = arg; + IGT_TIMEOUT(end_time); + unsigned long count; + + count = 0; + do { + struct i915_gem_context *ctx = smoke_context(smoke); + int err; + + err = smoke_submit(smoke, + ctx, count % I915_PRIORITY_MAX, + smoke->batch); + if (err) + return err; + + count++; + } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL)); + + smoke->count = count; + return 0; +} + +static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) +#define BATCH BIT(0) +{ + struct task_struct *tsk[I915_NUM_ENGINES] = {}; + struct preempt_smoke arg[I915_NUM_ENGINES]; + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned long count; + int err = 0; + + for_each_engine(engine, smoke->gt, id) { + arg[id] = *smoke; + arg[id].engine = engine; + if (!(flags & BATCH)) + arg[id].batch = NULL; + arg[id].count = 0; + + tsk[id] = kthread_run(smoke_crescendo_thread, &arg, + "igt/smoke:%d", id); + if (IS_ERR(tsk[id])) { + err = PTR_ERR(tsk[id]); + break; + } + get_task_struct(tsk[id]); + } + + yield(); /* start all threads before we kthread_stop() */ + + count = 0; + for_each_engine(engine, smoke->gt, id) { + int status; + + if (IS_ERR_OR_NULL(tsk[id])) + continue; + + status = kthread_stop(tsk[id]); + if (status && !err) + err = status; + + count += arg[id].count; + + put_task_struct(tsk[id]); + } + + pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", + count, flags, smoke->gt->info.num_engines, smoke->ncontext); + return 0; +} + +static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) +{ + enum intel_engine_id id; + IGT_TIMEOUT(end_time); + unsigned long count; + + count = 0; + do { + for_each_engine(smoke->engine, smoke->gt, id) { + struct i915_gem_context *ctx = smoke_context(smoke); + int err; + + err = smoke_submit(smoke, + ctx, random_priority(&smoke->prng), + flags & BATCH ? smoke->batch : NULL); + if (err) + return err; + + count++; + } + } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL)); + + pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", + count, flags, smoke->gt->info.num_engines, smoke->ncontext); + return 0; +} + +static int live_preempt_smoke(void *arg) +{ + struct preempt_smoke smoke = { + .gt = arg, + .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed), + .ncontext = 256, + }; + const unsigned int phase[] = { 0, BATCH }; + struct igt_live_test t; + int err = -ENOMEM; + u32 *cs; + int n; + + smoke.contexts = kmalloc_array(smoke.ncontext, + sizeof(*smoke.contexts), + GFP_KERNEL); + if (!smoke.contexts) + return -ENOMEM; + + smoke.batch = + i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE); + if (IS_ERR(smoke.batch)) { + err = PTR_ERR(smoke.batch); + goto err_free; + } + + cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto err_batch; + } + for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++) + cs[n] = MI_ARB_CHECK; + cs[n] = MI_BATCH_BUFFER_END; + i915_gem_object_flush_map(smoke.batch); + i915_gem_object_unpin_map(smoke.batch); + + if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) { + err = -EIO; + goto err_batch; + } + + for (n = 0; n < smoke.ncontext; n++) { + smoke.contexts[n] = kernel_context(smoke.gt->i915); + if (!smoke.contexts[n]) + goto err_ctx; + } + + for (n = 0; n < ARRAY_SIZE(phase); n++) { + err = smoke_crescendo(&smoke, phase[n]); + if (err) + goto err_ctx; + + err = smoke_random(&smoke, phase[n]); + if (err) + goto err_ctx; + } + +err_ctx: + if (igt_live_test_end(&t)) + err = -EIO; + + for (n = 0; n < smoke.ncontext; n++) { + if (!smoke.contexts[n]) + break; + kernel_context_close(smoke.contexts[n]); + } + +err_batch: + i915_gem_object_put(smoke.batch); +err_free: + kfree(smoke.contexts); + + return err; +} + +static int nop_virtual_engine(struct intel_gt *gt, + struct intel_engine_cs **siblings, + unsigned int nsibling, + unsigned int nctx, + unsigned int flags) +#define CHAIN BIT(0) +{ + IGT_TIMEOUT(end_time); + struct i915_request *request[16] = {}; + struct intel_context *ve[16]; + unsigned long n, prime, nc; + struct igt_live_test t; + ktime_t times[2] = {}; + int err; + + GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); + + for (n = 0; n < nctx; n++) { + ve[n] = intel_execlists_create_virtual(siblings, nsibling); + if (IS_ERR(ve[n])) { + err = PTR_ERR(ve[n]); + nctx = n; + goto out; + } + + err = intel_context_pin(ve[n]); + if (err) { + intel_context_put(ve[n]); + nctx = n; + goto out; + } + } + + err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name); + if (err) + goto out; + + for_each_prime_number_from(prime, 1, 8192) { + times[1] = ktime_get_raw(); + + if (flags & CHAIN) { + for (nc = 0; nc < nctx; nc++) { + for (n = 0; n < prime; n++) { + struct i915_request *rq; + + rq = i915_request_create(ve[nc]); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + if (request[nc]) + i915_request_put(request[nc]); + request[nc] = i915_request_get(rq); + i915_request_add(rq); + } + } + } else { + for (n = 0; n < prime; n++) { + for (nc = 0; nc < nctx; nc++) { + struct i915_request *rq; + + rq = i915_request_create(ve[nc]); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + if (request[nc]) + i915_request_put(request[nc]); + request[nc] = i915_request_get(rq); + i915_request_add(rq); + } + } + } + + for (nc = 0; nc < nctx; nc++) { + if (i915_request_wait(request[nc], 0, HZ / 10) < 0) { + pr_err("%s(%s): wait for %llx:%lld timed out\n", + __func__, ve[0]->engine->name, + request[nc]->fence.context, + request[nc]->fence.seqno); + + GEM_TRACE("%s(%s) failed at request %llx:%lld\n", + __func__, ve[0]->engine->name, + request[nc]->fence.context, + request[nc]->fence.seqno); + GEM_TRACE_DUMP(); + intel_gt_set_wedged(gt); + break; + } + } + + times[1] = ktime_sub(ktime_get_raw(), times[1]); + if (prime == 1) + times[0] = times[1]; + + for (nc = 0; nc < nctx; nc++) { + i915_request_put(request[nc]); + request[nc] = NULL; + } + + if (__igt_timeout(end_time, NULL)) + break; + } + + err = igt_live_test_end(&t); + if (err) + goto out; + + pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n", + nctx, ve[0]->engine->name, ktime_to_ns(times[0]), + prime, div64_u64(ktime_to_ns(times[1]), prime)); + +out: + if (igt_flush_test(gt->i915)) + err = -EIO; + + for (nc = 0; nc < nctx; nc++) { + i915_request_put(request[nc]); + intel_context_unpin(ve[nc]); + intel_context_put(ve[nc]); + } + return err; +} + +static unsigned int +__select_siblings(struct intel_gt *gt, + unsigned int class, + struct intel_engine_cs **siblings, + bool (*filter)(const struct intel_engine_cs *)) +{ + unsigned int n = 0; + unsigned int inst; + + for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { + if (!gt->engine_class[class][inst]) + continue; + + if (filter && !filter(gt->engine_class[class][inst])) + continue; + + siblings[n++] = gt->engine_class[class][inst]; + } + + return n; +} + +static unsigned int +select_siblings(struct intel_gt *gt, + unsigned int class, + struct intel_engine_cs **siblings) +{ + return __select_siblings(gt, class, siblings, NULL); +} + +static int live_virtual_engine(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; + struct intel_engine_cs *engine; + enum intel_engine_id id; + unsigned int class; + int err; + + if (intel_uc_uses_guc_submission(>->uc)) + return 0; + + for_each_engine(engine, gt, id) { + err = nop_virtual_engine(gt, &engine, 1, 1, 0); + if (err) { + pr_err("Failed to wrap engine %s: err=%d\n", + engine->name, err); + return err; + } + } + + for (class = 0; class <= MAX_ENGINE_CLASS; class++) { + int nsibling, n; + + nsibling = select_siblings(gt, class, siblings); + if (nsibling < 2) + continue; + + for (n = 1; n <= nsibling + 1; n++) { + err = nop_virtual_engine(gt, siblings, nsibling, + n, 0); + if (err) + return err; + } + + err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN); + if (err) + return err; + } + + return 0; +} + +static int mask_virtual_engine(struct intel_gt *gt, + struct intel_engine_cs **siblings, + unsigned int nsibling) +{ + struct i915_request *request[MAX_ENGINE_INSTANCE + 1]; + struct intel_context *ve; + struct igt_live_test t; + unsigned int n; + int err; + + /* + * Check that by setting the execution mask on a request, we can + * restrict it to our desired engine within the virtual engine. + */ + + ve = intel_execlists_create_virtual(siblings, nsibling); + if (IS_ERR(ve)) { + err = PTR_ERR(ve); + goto out_close; + } + + err = intel_context_pin(ve); + if (err) + goto out_put; + + err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); + if (err) + goto out_unpin; + + for (n = 0; n < nsibling; n++) { + request[n] = i915_request_create(ve); + if (IS_ERR(request[n])) { + err = PTR_ERR(request[n]); + nsibling = n; + goto out; + } + + /* Reverse order as it's more likely to be unnatural */ + request[n]->execution_mask = siblings[nsibling - n - 1]->mask; + + i915_request_get(request[n]); + i915_request_add(request[n]); + } + + for (n = 0; n < nsibling; n++) { + if (i915_request_wait(request[n], 0, HZ / 10) < 0) { + pr_err("%s(%s): wait for %llx:%lld timed out\n", + __func__, ve->engine->name, + request[n]->fence.context, + request[n]->fence.seqno); + + GEM_TRACE("%s(%s) failed at request %llx:%lld\n", + __func__, ve->engine->name, + request[n]->fence.context, + request[n]->fence.seqno); + GEM_TRACE_DUMP(); + intel_gt_set_wedged(gt); + err = -EIO; + goto out; + } + + if (request[n]->engine != siblings[nsibling - n - 1]) { + pr_err("Executed on wrong sibling '%s', expected '%s'\n", + request[n]->engine->name, + siblings[nsibling - n - 1]->name); + err = -EINVAL; + goto out; + } + } + + err = igt_live_test_end(&t); +out: + if (igt_flush_test(gt->i915)) + err = -EIO; + + for (n = 0; n < nsibling; n++) + i915_request_put(request[n]); + +out_unpin: + intel_context_unpin(ve); +out_put: + intel_context_put(ve); +out_close: + return err; +} + +static int live_virtual_mask(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; + unsigned int class; + int err; + + if (intel_uc_uses_guc_submission(>->uc)) + return 0; + + for (class = 0; class <= MAX_ENGINE_CLASS; class++) { + unsigned int nsibling; + + nsibling = select_siblings(gt, class, siblings); + if (nsibling < 2) + continue; + + err = mask_virtual_engine(gt, siblings, nsibling); + if (err) + return err; + } + + return 0; +} + +static int slicein_virtual_engine(struct intel_gt *gt, + struct intel_engine_cs **siblings, + unsigned int nsibling) +{ + const long timeout = slice_timeout(siblings[0]); + struct intel_context *ce; + struct i915_request *rq; + struct igt_spinner spin; + unsigned int n; + int err = 0; + + /* + * Virtual requests must take part in timeslicing on the target engines. + */ + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + for (n = 0; n < nsibling; n++) { + ce = intel_context_create(siblings[n]); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + i915_request_add(rq); + } + + ce = intel_execlists_create_virtual(siblings, nsibling); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + + rq = intel_context_create_request(ce); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + i915_request_get(rq); + i915_request_add(rq); + if (i915_request_wait(rq, 0, timeout) < 0) { + GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n", + __func__, rq->engine->name); + GEM_TRACE_DUMP(); + intel_gt_set_wedged(gt); + err = -EIO; + } + i915_request_put(rq); + +out: + igt_spinner_end(&spin); + if (igt_flush_test(gt->i915)) + err = -EIO; + igt_spinner_fini(&spin); + return err; +} + +static int sliceout_virtual_engine(struct intel_gt *gt, + struct intel_engine_cs **siblings, + unsigned int nsibling) +{ + const long timeout = slice_timeout(siblings[0]); + struct intel_context *ce; + struct i915_request *rq; + struct igt_spinner spin; + unsigned int n; + int err = 0; + + /* + * Virtual requests must allow others a fair timeslice. + */ + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + /* XXX We do not handle oversubscription and fairness with normal rq */ + for (n = 0; n < nsibling; n++) { + ce = intel_execlists_create_virtual(siblings, nsibling); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + + rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + i915_request_add(rq); + } + + for (n = 0; !err && n < nsibling; n++) { + ce = intel_context_create(siblings[n]); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + + rq = intel_context_create_request(ce); + intel_context_put(ce); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out; + } + + i915_request_get(rq); + i915_request_add(rq); + if (i915_request_wait(rq, 0, timeout) < 0) { + GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n", + __func__, siblings[n]->name); + GEM_TRACE_DUMP(); + intel_gt_set_wedged(gt); + err = -EIO; + } + i915_request_put(rq); + } + +out: + igt_spinner_end(&spin); + if (igt_flush_test(gt->i915)) + err = -EIO; + igt_spinner_fini(&spin); + return err; +} + +static int live_virtual_slice(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; + unsigned int class; + int err; + + if (intel_uc_uses_guc_submission(>->uc)) + return 0; + + for (class = 0; class <= MAX_ENGINE_CLASS; class++) { + unsigned int nsibling; + + nsibling = __select_siblings(gt, class, siblings, + intel_engine_has_timeslices); + if (nsibling < 2) + continue; + + err = slicein_virtual_engine(gt, siblings, nsibling); + if (err) + return err; + + err = sliceout_virtual_engine(gt, siblings, nsibling); + if (err) + return err; + } + + return 0; +} + +static int preserved_virtual_engine(struct intel_gt *gt, + struct intel_engine_cs **siblings, + unsigned int nsibling) +{ + struct i915_request *last = NULL; + struct intel_context *ve; + struct i915_vma *scratch; + struct igt_live_test t; + unsigned int n; + int err = 0; + u32 *cs; + + scratch = __vm_create_scratch_for_read(&siblings[0]->gt->ggtt->vm, + PAGE_SIZE); + if (IS_ERR(scratch)) + return PTR_ERR(scratch); + + err = i915_vma_sync(scratch); + if (err) + goto out_scratch; + + ve = intel_execlists_create_virtual(siblings, nsibling); + if (IS_ERR(ve)) { + err = PTR_ERR(ve); + goto out_scratch; + } + + err = intel_context_pin(ve); + if (err) + goto out_put; + + err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); + if (err) + goto out_unpin; + + for (n = 0; n < NUM_GPR_DW; n++) { + struct intel_engine_cs *engine = siblings[n % nsibling]; + struct i915_request *rq; + + rq = i915_request_create(ve); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_end; + } + + i915_request_put(last); + last = i915_request_get(rq); + + cs = intel_ring_begin(rq, 8); + if (IS_ERR(cs)) { + i915_request_add(rq); + err = PTR_ERR(cs); + goto out_end; + } + + *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; + *cs++ = CS_GPR(engine, n); + *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); + *cs++ = 0; + + *cs++ = MI_LOAD_REGISTER_IMM(1); + *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW); + *cs++ = n + 1; + + *cs++ = MI_NOOP; + intel_ring_advance(rq, cs); + + /* Restrict this request to run on a particular engine */ + rq->execution_mask = engine->mask; + i915_request_add(rq); + } + + if (i915_request_wait(last, 0, HZ / 5) < 0) { + err = -ETIME; + goto out_end; + } + + cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); + if (IS_ERR(cs)) { + err = PTR_ERR(cs); + goto out_end; + } + + for (n = 0; n < NUM_GPR_DW; n++) { + if (cs[n] != n) { + pr_err("Incorrect value[%d] found for GPR[%d]\n", + cs[n], n); + err = -EINVAL; + break; + } + } + + i915_gem_object_unpin_map(scratch->obj); + +out_end: + if (igt_live_test_end(&t)) + err = -EIO; + i915_request_put(last); +out_unpin: + intel_context_unpin(ve); +out_put: + intel_context_put(ve); +out_scratch: + i915_vma_unpin_and_release(&scratch, 0); + return err; +} + +static int live_virtual_preserved(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; + unsigned int class; + + /* + * Check that the context image retains non-privileged (user) registers + * from one engine to the next. For this we check that the CS_GPR + * are preserved. + */ + + if (intel_uc_uses_guc_submission(>->uc)) + return 0; + + /* As we use CS_GPR we cannot run before they existed on all engines. */ + if (INTEL_GEN(gt->i915) < 9) + return 0; + + for (class = 0; class <= MAX_ENGINE_CLASS; class++) { + int nsibling, err; + + nsibling = select_siblings(gt, class, siblings); + if (nsibling < 2) + continue; + + err = preserved_virtual_engine(gt, siblings, nsibling); + if (err) + return err; + } + + return 0; +} + +static int bond_virtual_engine(struct intel_gt *gt, + unsigned int class, + struct intel_engine_cs **siblings, + unsigned int nsibling, + unsigned int flags) +#define BOND_SCHEDULE BIT(0) +{ + struct intel_engine_cs *master; + struct i915_request *rq[16]; + enum intel_engine_id id; + struct igt_spinner spin; + unsigned long n; + int err; + + /* + * A set of bonded requests is intended to be run concurrently + * across a number of engines. We use one request per-engine + * and a magic fence to schedule each of the bonded requests + * at the same time. A consequence of our current scheduler is that + * we only move requests to the HW ready queue when the request + * becomes ready, that is when all of its prerequisite fences have + * been signaled. As one of those fences is the master submit fence, + * there is a delay on all secondary fences as the HW may be + * currently busy. Equally, as all the requests are independent, + * they may have other fences that delay individual request + * submission to HW. Ergo, we do not guarantee that all requests are + * immediately submitted to HW at the same time, just that if the + * rules are abided by, they are ready at the same time as the + * first is submitted. Userspace can embed semaphores in its batch + * to ensure parallel execution of its phases as it requires. + * Though naturally it gets requested that perhaps the scheduler should + * take care of parallel execution, even across preemption events on + * different HW. (The proper answer is of course "lalalala".) + * + * With the submit-fence, we have identified three possible phases + * of synchronisation depending on the master fence: queued (not + * ready), executing, and signaled. The first two are quite simple + * and checked below. However, the signaled master fence handling is + * contentious. Currently we do not distinguish between a signaled + * fence and an expired fence, as once signaled it does not convey + * any information about the previous execution. It may even be freed + * and hence checking later it may not exist at all. Ergo we currently + * do not apply the bonding constraint for an already signaled fence, + * as our expectation is that it should not constrain the secondaries + * and is outside of the scope of the bonded request API (i.e. all + * userspace requests are meant to be running in parallel). As + * it imposes no constraint, and is effectively a no-op, we do not + * check below as normal execution flows are checked extensively above. + * + * XXX Is the degenerate handling of signaled submit fences the + * expected behaviour for userpace? + */ + + GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + err = 0; + rq[0] = ERR_PTR(-ENOMEM); + for_each_engine(master, gt, id) { + struct i915_sw_fence fence = {}; + struct intel_context *ce; + + if (master->class == class) + continue; + + ce = intel_context_create(master); + if (IS_ERR(ce)) { + err = PTR_ERR(ce); + goto out; + } + + memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq)); + + rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP); + intel_context_put(ce); + if (IS_ERR(rq[0])) { + err = PTR_ERR(rq[0]); + goto out; + } + i915_request_get(rq[0]); + + if (flags & BOND_SCHEDULE) { + onstack_fence_init(&fence); + err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit, + &fence, + GFP_KERNEL); + } + + i915_request_add(rq[0]); + if (err < 0) + goto out; + + if (!(flags & BOND_SCHEDULE) && + !igt_wait_for_spinner(&spin, rq[0])) { + err = -EIO; + goto out; + } + + for (n = 0; n < nsibling; n++) { + struct intel_context *ve; + + ve = intel_execlists_create_virtual(siblings, nsibling); + if (IS_ERR(ve)) { + err = PTR_ERR(ve); + onstack_fence_fini(&fence); + goto out; + } + + err = intel_virtual_engine_attach_bond(ve->engine, + master, + siblings[n]); + if (err) { + intel_context_put(ve); + onstack_fence_fini(&fence); + goto out; + } + + err = intel_context_pin(ve); + intel_context_put(ve); + if (err) { + onstack_fence_fini(&fence); + goto out; + } + + rq[n + 1] = i915_request_create(ve); + intel_context_unpin(ve); + if (IS_ERR(rq[n + 1])) { + err = PTR_ERR(rq[n + 1]); + onstack_fence_fini(&fence); + goto out; + } + i915_request_get(rq[n + 1]); + + err = i915_request_await_execution(rq[n + 1], + &rq[0]->fence, + ve->engine->bond_execute); + i915_request_add(rq[n + 1]); + if (err < 0) { + onstack_fence_fini(&fence); + goto out; + } + } + onstack_fence_fini(&fence); + intel_engine_flush_submission(master); + igt_spinner_end(&spin); + + if (i915_request_wait(rq[0], 0, HZ / 10) < 0) { + pr_err("Master request did not execute (on %s)!\n", + rq[0]->engine->name); + err = -EIO; + goto out; + } + + for (n = 0; n < nsibling; n++) { + if (i915_request_wait(rq[n + 1], 0, + MAX_SCHEDULE_TIMEOUT) < 0) { + err = -EIO; + goto out; + } + + if (rq[n + 1]->engine != siblings[n]) { + pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n", + siblings[n]->name, + rq[n + 1]->engine->name, + rq[0]->engine->name); + err = -EINVAL; + goto out; + } + } + + for (n = 0; !IS_ERR(rq[n]); n++) + i915_request_put(rq[n]); + rq[0] = ERR_PTR(-ENOMEM); + } + +out: + for (n = 0; !IS_ERR(rq[n]); n++) + i915_request_put(rq[n]); + if (igt_flush_test(gt->i915)) + err = -EIO; + + igt_spinner_fini(&spin); + return err; +} + +static int live_virtual_bond(void *arg) +{ + static const struct phase { + const char *name; + unsigned int flags; + } phases[] = { + { "", 0 }, + { "schedule", BOND_SCHEDULE }, + { }, + }; + struct intel_gt *gt = arg; + struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; + unsigned int class; + int err; + + if (intel_uc_uses_guc_submission(>->uc)) + return 0; + + for (class = 0; class <= MAX_ENGINE_CLASS; class++) { + const struct phase *p; + int nsibling; + + nsibling = select_siblings(gt, class, siblings); + if (nsibling < 2) + continue; + + for (p = phases; p->name; p++) { + err = bond_virtual_engine(gt, + class, siblings, nsibling, + p->flags); + if (err) { + pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n", + __func__, p->name, class, nsibling, err); + return err; + } + } + } + + return 0; +} + +static int reset_virtual_engine(struct intel_gt *gt, + struct intel_engine_cs **siblings, + unsigned int nsibling) +{ + struct intel_engine_cs *engine; + struct intel_context *ve; + struct igt_spinner spin; + struct i915_request *rq; + unsigned int n; + int err = 0; + + /* + * In order to support offline error capture for fast preempt reset, + * we need to decouple the guilty request and ensure that it and its + * descendents are not executed while the capture is in progress. + */ + + if (igt_spinner_init(&spin, gt)) + return -ENOMEM; + + ve = intel_execlists_create_virtual(siblings, nsibling); + if (IS_ERR(ve)) { + err = PTR_ERR(ve); + goto out_spin; + } + + for (n = 0; n < nsibling; n++) + st_engine_heartbeat_disable(siblings[n]); + + rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK); + if (IS_ERR(rq)) { + err = PTR_ERR(rq); + goto out_heartbeat; + } + i915_request_add(rq); + + if (!igt_wait_for_spinner(&spin, rq)) { + intel_gt_set_wedged(gt); + err = -ETIME; + goto out_heartbeat; + } + + engine = rq->engine; + GEM_BUG_ON(engine == ve->engine); + + /* Take ownership of the reset and tasklet */ + local_bh_disable(); + if (test_and_set_bit(I915_RESET_ENGINE + engine->id, + >->reset.flags)) { + local_bh_enable(); + intel_gt_set_wedged(gt); + err = -EBUSY; + goto out_heartbeat; + } + tasklet_disable(&engine->execlists.tasklet); + + engine->execlists.tasklet.func(engine->execlists.tasklet.data); + GEM_BUG_ON(execlists_active(&engine->execlists) != rq); + + /* Fake a preemption event; failed of course */ + spin_lock_irq(&engine->active.lock); + __unwind_incomplete_requests(engine); + spin_unlock_irq(&engine->active.lock); + GEM_BUG_ON(rq->engine != engine); + + /* Reset the engine while keeping our active request on hold */ + execlists_hold(engine, rq); + GEM_BUG_ON(!i915_request_on_hold(rq)); + + __intel_engine_reset_bh(engine, NULL); + GEM_BUG_ON(rq->fence.error != -EIO); + + /* Release our grasp on the engine, letting CS flow again */ + tasklet_enable(&engine->execlists.tasklet); + clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags); + local_bh_enable(); + + /* Check that we do not resubmit the held request */ + i915_request_get(rq); + if (!i915_request_wait(rq, 0, HZ / 5)) { + pr_err("%s: on hold request completed!\n", + engine->name); + intel_gt_set_wedged(gt); + err = -EIO; + goto out_rq; + } + GEM_BUG_ON(!i915_request_on_hold(rq)); + + /* But is resubmitted on release */ + execlists_unhold(engine, rq); + if (i915_request_wait(rq, 0, HZ / 5) < 0) { + pr_err("%s: held request did not complete!\n", + engine->name); + intel_gt_set_wedged(gt); + err = -ETIME; + } + +out_rq: + i915_request_put(rq); +out_heartbeat: + for (n = 0; n < nsibling; n++) + st_engine_heartbeat_enable(siblings[n]); + + intel_context_put(ve); +out_spin: + igt_spinner_fini(&spin); + return err; +} + +static int live_virtual_reset(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; + unsigned int class; + + /* + * Check that we handle a reset event within a virtual engine. + * Only the physical engine is reset, but we have to check the flow + * of the virtual requests around the reset, and make sure it is not + * forgotten. + */ + + if (intel_uc_uses_guc_submission(>->uc)) + return 0; + + if (!intel_has_reset_engine(gt)) + return 0; + + for (class = 0; class <= MAX_ENGINE_CLASS; class++) { + int nsibling, err; + + nsibling = select_siblings(gt, class, siblings); + if (nsibling < 2) + continue; + + err = reset_virtual_engine(gt, siblings, nsibling); + if (err) + return err; + } + + return 0; +} + +int intel_execlists_live_selftests(struct drm_i915_private *i915) +{ + static const struct i915_subtest tests[] = { + SUBTEST(live_sanitycheck), + SUBTEST(live_unlite_switch), + SUBTEST(live_unlite_preempt), + SUBTEST(live_unlite_ring), + SUBTEST(live_pin_rewind), + SUBTEST(live_hold_reset), + SUBTEST(live_error_interrupt), + SUBTEST(live_timeslice_preempt), + SUBTEST(live_timeslice_rewind), + SUBTEST(live_timeslice_queue), + SUBTEST(live_timeslice_nopreempt), + SUBTEST(live_busywait_preempt), + SUBTEST(live_preempt), + SUBTEST(live_late_preempt), + SUBTEST(live_nopreempt), + SUBTEST(live_preempt_cancel), + SUBTEST(live_suppress_self_preempt), + SUBTEST(live_chain_preempt), + SUBTEST(live_preempt_ring), + SUBTEST(live_preempt_gang), + SUBTEST(live_preempt_timeout), + SUBTEST(live_preempt_user), + SUBTEST(live_preempt_smoke), + SUBTEST(live_virtual_engine), + SUBTEST(live_virtual_mask), + SUBTEST(live_virtual_preserved), + SUBTEST(live_virtual_slice), + SUBTEST(live_virtual_bond), + SUBTEST(live_virtual_reset), + }; + + if (!HAS_EXECLISTS(i915)) + return 0; + + if (intel_gt_is_wedged(&i915->gt)) + return 0; + + return intel_gt_live_subtests(tests, &i915->gt); +} diff --git a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c index 6180a47c1b51..5d911f724ebe 100644 --- a/drivers/gpu/drm/i915/gt/selftest_gt_pm.c +++ b/drivers/gpu/drm/i915/gt/selftest_gt_pm.c @@ -71,7 +71,7 @@ static int live_gt_clocks(void *arg) enum intel_engine_id id; int err = 0; - if (!RUNTIME_INFO(gt->i915)->cs_timestamp_frequency_hz) { /* unknown */ + if (!gt->clock_frequency) { /* unknown */ pr_info("CS_TIMESTAMP frequency unknown\n"); return 0; } @@ -112,12 +112,12 @@ static int live_gt_clocks(void *arg) measure_clocks(engine, &cycles, &dt); - time = i915_cs_timestamp_ticks_to_ns(engine->i915, cycles); - expected = i915_cs_timestamp_ns_to_ticks(engine->i915, dt); + time = intel_gt_clock_interval_to_ns(engine->gt, cycles); + expected = intel_gt_ns_to_clock_interval(engine->gt, dt); pr_info("%s: TIMESTAMP %d cycles [%lldns] in %lldns [%d cycles], using CS clock frequency of %uKHz\n", engine->name, cycles, time, dt, expected, - RUNTIME_INFO(engine->i915)->cs_timestamp_frequency_hz / 1000); + engine->gt->clock_frequency / 1000); if (9 * time < 8 * dt || 8 * time > 9 * dt) { pr_err("%s: CS ticks did not match walltime!\n", diff --git a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c index fb5ebf930ab2..460c3e9542f4 100644 --- a/drivers/gpu/drm/i915/gt/selftest_hangcheck.c +++ b/drivers/gpu/drm/i915/gt/selftest_hangcheck.c @@ -506,7 +506,8 @@ static int igt_reset_nop_engine(void *arg) } err = intel_engine_reset(engine, NULL); if (err) { - pr_err("i915_reset_engine failed\n"); + pr_err("intel_engine_reset(%s) failed, err:%d\n", + engine->name, err); break; } @@ -539,6 +540,149 @@ static int igt_reset_nop_engine(void *arg) return 0; } +static void force_reset_timeout(struct intel_engine_cs *engine) +{ + engine->reset_timeout.probability = 999; + atomic_set(&engine->reset_timeout.times, -1); +} + +static void cancel_reset_timeout(struct intel_engine_cs *engine) +{ + memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout)); +} + +static int igt_reset_fail_engine(void *arg) +{ + struct intel_gt *gt = arg; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* Check that we can recover from engine-reset failues */ + + if (!intel_has_reset_engine(gt)) + return 0; + + for_each_engine(engine, gt, id) { + unsigned int count; + struct intel_context *ce; + IGT_TIMEOUT(end_time); + int err; + + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); + + st_engine_heartbeat_disable(engine); + set_bit(I915_RESET_ENGINE + id, >->reset.flags); + + force_reset_timeout(engine); + err = intel_engine_reset(engine, NULL); + cancel_reset_timeout(engine); + if (err == 0) /* timeouts only generated on gen8+ */ + goto skip; + + count = 0; + do { + struct i915_request *last = NULL; + int i; + + if (!wait_for_idle(engine)) { + pr_err("%s failed to idle before reset\n", + engine->name); + err = -EIO; + break; + } + + for (i = 0; i < count % 15; i++) { + struct i915_request *rq; + + rq = intel_context_create_request(ce); + if (IS_ERR(rq)) { + struct drm_printer p = + drm_info_printer(gt->i915->drm.dev); + intel_engine_dump(engine, &p, + "%s(%s): failed to submit request\n", + __func__, + engine->name); + + GEM_TRACE("%s(%s): failed to submit request\n", + __func__, + engine->name); + GEM_TRACE_DUMP(); + + intel_gt_set_wedged(gt); + if (last) + i915_request_put(last); + + err = PTR_ERR(rq); + goto out; + } + + if (last) + i915_request_put(last); + last = i915_request_get(rq); + i915_request_add(rq); + } + + if (count & 1) { + err = intel_engine_reset(engine, NULL); + if (err) { + GEM_TRACE_ERR("intel_engine_reset(%s) failed, err:%d\n", + engine->name, err); + GEM_TRACE_DUMP(); + i915_request_put(last); + break; + } + } else { + force_reset_timeout(engine); + err = intel_engine_reset(engine, NULL); + cancel_reset_timeout(engine); + if (err != -ETIMEDOUT) { + pr_err("intel_engine_reset(%s) did not fail, err:%d\n", + engine->name, err); + i915_request_put(last); + break; + } + } + + err = 0; + if (last) { + if (i915_request_wait(last, 0, HZ / 2) < 0) { + struct drm_printer p = + drm_info_printer(gt->i915->drm.dev); + + intel_engine_dump(engine, &p, + "%s(%s): failed to complete request\n", + __func__, + engine->name); + + GEM_TRACE("%s(%s): failed to complete request\n", + __func__, + engine->name); + GEM_TRACE_DUMP(); + + err = -EIO; + } + i915_request_put(last); + } + count++; + } while (err == 0 && time_before(jiffies, end_time)); +out: + pr_info("%s(%s): %d resets\n", __func__, engine->name, count); +skip: + clear_bit(I915_RESET_ENGINE + id, >->reset.flags); + st_engine_heartbeat_enable(engine); + intel_context_put(ce); + + if (igt_flush_test(gt->i915)) + err = -EIO; + if (err) + return err; + } + + return 0; +} + static int __igt_reset_engine(struct intel_gt *gt, bool active) { struct i915_gpu_error *global = >->i915->gpu_error; @@ -608,7 +752,8 @@ static int __igt_reset_engine(struct intel_gt *gt, bool active) err = intel_engine_reset(engine, NULL); if (err) { - pr_err("i915_reset_engine failed\n"); + pr_err("intel_engine_reset(%s) failed, err:%d\n", + engine->name, err); break; } @@ -1576,12 +1721,17 @@ static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, engine->name, mode, p->name); tasklet_disable(t); + if (strcmp(p->name, "softirq")) + local_bh_disable(); p->critical_section_begin(); - err = intel_engine_reset(engine, NULL); + err = __intel_engine_reset_bh(engine, NULL); p->critical_section_end(); + if (strcmp(p->name, "softirq")) + local_bh_enable(); tasklet_enable(t); + tasklet_hi_schedule(t); if (err) pr_err("i915_reset_engine(%s:%s) failed under %s\n", @@ -1687,6 +1837,7 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_reset_nop_engine), SUBTEST(igt_reset_idle_engine), SUBTEST(igt_reset_active_engine), + SUBTEST(igt_reset_fail_engine), SUBTEST(igt_reset_engines), SUBTEST(igt_reset_engines_atomic), SUBTEST(igt_reset_queue), diff --git a/drivers/gpu/drm/i915/gt/selftest_lrc.c b/drivers/gpu/drm/i915/gt/selftest_lrc.c index 95d41c01d0e0..920979a89413 100644 --- a/drivers/gpu/drm/i915/gt/selftest_lrc.c +++ b/drivers/gpu/drm/i915/gt/selftest_lrc.c @@ -1,22 +1,22 @@ +// SPDX-License-Identifier: MIT /* - * SPDX-License-Identifier: MIT - * * Copyright © 2018 Intel Corporation */ #include <linux/prime_numbers.h> -#include "gem/i915_gem_pm.h" -#include "gt/intel_engine_heartbeat.h" -#include "gt/intel_reset.h" -#include "gt/selftest_engine_heartbeat.h" - #include "i915_selftest.h" +#include "intel_engine_heartbeat.h" +#include "intel_engine_pm.h" +#include "intel_reset.h" +#include "intel_ring.h" +#include "selftest_engine_heartbeat.h" #include "selftests/i915_random.h" #include "selftests/igt_flush_test.h" #include "selftests/igt_live_test.h" #include "selftests/igt_spinner.h" #include "selftests/lib_sw_fence.h" +#include "shmem_utils.h" #include "gem/selftests/igt_gem_utils.h" #include "gem/selftests/mock_context.h" @@ -27,29 +27,7 @@ static struct i915_vma *create_scratch(struct intel_gt *gt) { - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - int err; - - obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); - - vma = i915_vma_instance(obj, >->ggtt->vm, NULL); - if (IS_ERR(vma)) { - i915_gem_object_put(obj); - return vma; - } - - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); - if (err) { - i915_gem_object_put(obj); - return ERR_PTR(err); - } - - return vma; + return __vm_create_scratch_for_read(>->ggtt->vm, PAGE_SIZE); } static bool is_active(struct i915_request *rq) @@ -70,6 +48,9 @@ static int wait_for_submit(struct intel_engine_cs *engine, struct i915_request *rq, unsigned long timeout) { + /* Ignore our own attempts to suppress excess tasklets */ + tasklet_hi_schedule(&engine->execlists.tasklet); + timeout += jiffies; do { bool done = time_after(jiffies, timeout); @@ -89,4623 +70,6 @@ static int wait_for_submit(struct intel_engine_cs *engine, } while (1); } -static int wait_for_reset(struct intel_engine_cs *engine, - struct i915_request *rq, - unsigned long timeout) -{ - timeout += jiffies; - - do { - cond_resched(); - intel_engine_flush_submission(engine); - - if (READ_ONCE(engine->execlists.pending[0])) - continue; - - if (i915_request_completed(rq)) - break; - - if (READ_ONCE(rq->fence.error)) - break; - } while (time_before(jiffies, timeout)); - - flush_scheduled_work(); - - if (rq->fence.error != -EIO) { - pr_err("%s: hanging request %llx:%lld not reset\n", - engine->name, - rq->fence.context, - rq->fence.seqno); - return -EINVAL; - } - - /* Give the request a jiffie to complete after flushing the worker */ - if (i915_request_wait(rq, 0, - max(0l, (long)(timeout - jiffies)) + 1) < 0) { - pr_err("%s: hanging request %llx:%lld did not complete\n", - engine->name, - rq->fence.context, - rq->fence.seqno); - return -ETIME; - } - - return 0; -} - -static int live_sanitycheck(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - enum intel_engine_id id; - struct igt_spinner spin; - int err = 0; - - if (!HAS_LOGICAL_RING_CONTEXTS(gt->i915)) - return 0; - - if (igt_spinner_init(&spin, gt)) - return -ENOMEM; - - for_each_engine(engine, gt, id) { - struct intel_context *ce; - struct i915_request *rq; - - ce = intel_context_create(engine); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - break; - } - - rq = igt_spinner_create_request(&spin, ce, MI_NOOP); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_ctx; - } - - i915_request_add(rq); - if (!igt_wait_for_spinner(&spin, rq)) { - GEM_TRACE("spinner failed to start\n"); - GEM_TRACE_DUMP(); - intel_gt_set_wedged(gt); - err = -EIO; - goto out_ctx; - } - - igt_spinner_end(&spin); - if (igt_flush_test(gt->i915)) { - err = -EIO; - goto out_ctx; - } - -out_ctx: - intel_context_put(ce); - if (err) - break; - } - - igt_spinner_fini(&spin); - return err; -} - -static int live_unlite_restore(struct intel_gt *gt, int prio) -{ - struct intel_engine_cs *engine; - enum intel_engine_id id; - struct igt_spinner spin; - int err = -ENOMEM; - - /* - * Check that we can correctly context switch between 2 instances - * on the same engine from the same parent context. - */ - - if (igt_spinner_init(&spin, gt)) - return err; - - err = 0; - for_each_engine(engine, gt, id) { - struct intel_context *ce[2] = {}; - struct i915_request *rq[2]; - struct igt_live_test t; - int n; - - if (prio && !intel_engine_has_preemption(engine)) - continue; - - if (!intel_engine_can_store_dword(engine)) - continue; - - if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { - err = -EIO; - break; - } - st_engine_heartbeat_disable(engine); - - for (n = 0; n < ARRAY_SIZE(ce); n++) { - struct intel_context *tmp; - - tmp = intel_context_create(engine); - if (IS_ERR(tmp)) { - err = PTR_ERR(tmp); - goto err_ce; - } - - err = intel_context_pin(tmp); - if (err) { - intel_context_put(tmp); - goto err_ce; - } - - /* - * Setup the pair of contexts such that if we - * lite-restore using the RING_TAIL from ce[1] it - * will execute garbage from ce[0]->ring. - */ - memset(tmp->ring->vaddr, - POISON_INUSE, /* IPEHR: 0x5a5a5a5a [hung!] */ - tmp->ring->vma->size); - - ce[n] = tmp; - } - GEM_BUG_ON(!ce[1]->ring->size); - intel_ring_reset(ce[1]->ring, ce[1]->ring->size / 2); - __execlists_update_reg_state(ce[1], engine, ce[1]->ring->head); - - rq[0] = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); - if (IS_ERR(rq[0])) { - err = PTR_ERR(rq[0]); - goto err_ce; - } - - i915_request_get(rq[0]); - i915_request_add(rq[0]); - GEM_BUG_ON(rq[0]->postfix > ce[1]->ring->emit); - - if (!igt_wait_for_spinner(&spin, rq[0])) { - i915_request_put(rq[0]); - goto err_ce; - } - - rq[1] = i915_request_create(ce[1]); - if (IS_ERR(rq[1])) { - err = PTR_ERR(rq[1]); - i915_request_put(rq[0]); - goto err_ce; - } - - if (!prio) { - /* - * Ensure we do the switch to ce[1] on completion. - * - * rq[0] is already submitted, so this should reduce - * to a no-op (a wait on a request on the same engine - * uses the submit fence, not the completion fence), - * but it will install a dependency on rq[1] for rq[0] - * that will prevent the pair being reordered by - * timeslicing. - */ - i915_request_await_dma_fence(rq[1], &rq[0]->fence); - } - - i915_request_get(rq[1]); - i915_request_add(rq[1]); - GEM_BUG_ON(rq[1]->postfix <= rq[0]->postfix); - i915_request_put(rq[0]); - - if (prio) { - struct i915_sched_attr attr = { - .priority = prio, - }; - - /* Alternatively preempt the spinner with ce[1] */ - engine->schedule(rq[1], &attr); - } - - /* And switch back to ce[0] for good measure */ - rq[0] = i915_request_create(ce[0]); - if (IS_ERR(rq[0])) { - err = PTR_ERR(rq[0]); - i915_request_put(rq[1]); - goto err_ce; - } - - i915_request_await_dma_fence(rq[0], &rq[1]->fence); - i915_request_get(rq[0]); - i915_request_add(rq[0]); - GEM_BUG_ON(rq[0]->postfix > rq[1]->postfix); - i915_request_put(rq[1]); - i915_request_put(rq[0]); - -err_ce: - intel_engine_flush_submission(engine); - igt_spinner_end(&spin); - for (n = 0; n < ARRAY_SIZE(ce); n++) { - if (IS_ERR_OR_NULL(ce[n])) - break; - - intel_context_unpin(ce[n]); - intel_context_put(ce[n]); - } - - st_engine_heartbeat_enable(engine); - if (igt_live_test_end(&t)) - err = -EIO; - if (err) - break; - } - - igt_spinner_fini(&spin); - return err; -} - -static int live_unlite_switch(void *arg) -{ - return live_unlite_restore(arg, 0); -} - -static int live_unlite_preempt(void *arg) -{ - return live_unlite_restore(arg, I915_USER_PRIORITY(I915_PRIORITY_MAX)); -} - -static int live_unlite_ring(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - struct igt_spinner spin; - enum intel_engine_id id; - int err = 0; - - /* - * Setup a preemption event that will cause almost the entire ring - * to be unwound, potentially fooling our intel_ring_direction() - * into emitting a forward lite-restore instead of the rollback. - */ - - if (igt_spinner_init(&spin, gt)) - return -ENOMEM; - - for_each_engine(engine, gt, id) { - struct intel_context *ce[2] = {}; - struct i915_request *rq; - struct igt_live_test t; - int n; - - if (!intel_engine_has_preemption(engine)) - continue; - - if (!intel_engine_can_store_dword(engine)) - continue; - - if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { - err = -EIO; - break; - } - st_engine_heartbeat_disable(engine); - - for (n = 0; n < ARRAY_SIZE(ce); n++) { - struct intel_context *tmp; - - tmp = intel_context_create(engine); - if (IS_ERR(tmp)) { - err = PTR_ERR(tmp); - goto err_ce; - } - - err = intel_context_pin(tmp); - if (err) { - intel_context_put(tmp); - goto err_ce; - } - - memset32(tmp->ring->vaddr, - 0xdeadbeef, /* trigger a hang if executed */ - tmp->ring->vma->size / sizeof(u32)); - - ce[n] = tmp; - } - - /* Create max prio spinner, followed by N low prio nops */ - rq = igt_spinner_create_request(&spin, ce[0], MI_ARB_CHECK); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_ce; - } - - i915_request_get(rq); - rq->sched.attr.priority = I915_PRIORITY_BARRIER; - i915_request_add(rq); - - if (!igt_wait_for_spinner(&spin, rq)) { - intel_gt_set_wedged(gt); - i915_request_put(rq); - err = -ETIME; - goto err_ce; - } - - /* Fill the ring, until we will cause a wrap */ - n = 0; - while (intel_ring_direction(ce[0]->ring, - rq->wa_tail, - ce[0]->ring->tail) <= 0) { - struct i915_request *tmp; - - tmp = intel_context_create_request(ce[0]); - if (IS_ERR(tmp)) { - err = PTR_ERR(tmp); - i915_request_put(rq); - goto err_ce; - } - - i915_request_add(tmp); - intel_engine_flush_submission(engine); - n++; - } - intel_engine_flush_submission(engine); - pr_debug("%s: Filled ring with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n", - engine->name, n, - ce[0]->ring->size, - ce[0]->ring->tail, - ce[0]->ring->emit, - rq->tail); - GEM_BUG_ON(intel_ring_direction(ce[0]->ring, - rq->tail, - ce[0]->ring->tail) <= 0); - i915_request_put(rq); - - /* Create a second ring to preempt the first ring after rq[0] */ - rq = intel_context_create_request(ce[1]); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_ce; - } - - rq->sched.attr.priority = I915_PRIORITY_BARRIER; - i915_request_get(rq); - i915_request_add(rq); - - err = wait_for_submit(engine, rq, HZ / 2); - i915_request_put(rq); - if (err) { - pr_err("%s: preemption request was not submitted\n", - engine->name); - err = -ETIME; - } - - pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n", - engine->name, - ce[0]->ring->tail, ce[0]->ring->emit, - ce[1]->ring->tail, ce[1]->ring->emit); - -err_ce: - intel_engine_flush_submission(engine); - igt_spinner_end(&spin); - for (n = 0; n < ARRAY_SIZE(ce); n++) { - if (IS_ERR_OR_NULL(ce[n])) - break; - - intel_context_unpin(ce[n]); - intel_context_put(ce[n]); - } - st_engine_heartbeat_enable(engine); - if (igt_live_test_end(&t)) - err = -EIO; - if (err) - break; - } - - igt_spinner_fini(&spin); - return err; -} - -static int live_pin_rewind(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = 0; - - /* - * We have to be careful not to trust intel_ring too much, for example - * ring->head is updated upon retire which is out of sync with pinning - * the context. Thus we cannot use ring->head to set CTX_RING_HEAD, - * or else we risk writing an older, stale value. - * - * To simulate this, let's apply a bit of deliberate sabotague. - */ - - for_each_engine(engine, gt, id) { - struct intel_context *ce; - struct i915_request *rq; - struct intel_ring *ring; - struct igt_live_test t; - - if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { - err = -EIO; - break; - } - - ce = intel_context_create(engine); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - break; - } - - err = intel_context_pin(ce); - if (err) { - intel_context_put(ce); - break; - } - - /* Keep the context awake while we play games */ - err = i915_active_acquire(&ce->active); - if (err) { - intel_context_unpin(ce); - intel_context_put(ce); - break; - } - ring = ce->ring; - - /* Poison the ring, and offset the next request from HEAD */ - memset32(ring->vaddr, STACK_MAGIC, ring->size / sizeof(u32)); - ring->emit = ring->size / 2; - ring->tail = ring->emit; - GEM_BUG_ON(ring->head); - - intel_context_unpin(ce); - - /* Submit a simple nop request */ - GEM_BUG_ON(intel_context_is_pinned(ce)); - rq = intel_context_create_request(ce); - i915_active_release(&ce->active); /* e.g. async retire */ - intel_context_put(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - break; - } - GEM_BUG_ON(!rq->head); - i915_request_add(rq); - - /* Expect not to hang! */ - if (igt_live_test_end(&t)) { - err = -EIO; - break; - } - } - - return err; -} - -static int live_hold_reset(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - enum intel_engine_id id; - struct igt_spinner spin; - int err = 0; - - /* - * In order to support offline error capture for fast preempt reset, - * we need to decouple the guilty request and ensure that it and its - * descendents are not executed while the capture is in progress. - */ - - if (!intel_has_reset_engine(gt)) - return 0; - - if (igt_spinner_init(&spin, gt)) - return -ENOMEM; - - for_each_engine(engine, gt, id) { - struct intel_context *ce; - struct i915_request *rq; - - ce = intel_context_create(engine); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - break; - } - - st_engine_heartbeat_disable(engine); - - rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out; - } - i915_request_add(rq); - - if (!igt_wait_for_spinner(&spin, rq)) { - intel_gt_set_wedged(gt); - err = -ETIME; - goto out; - } - - /* We have our request executing, now remove it and reset */ - - if (test_and_set_bit(I915_RESET_ENGINE + id, - >->reset.flags)) { - intel_gt_set_wedged(gt); - err = -EBUSY; - goto out; - } - tasklet_disable(&engine->execlists.tasklet); - - engine->execlists.tasklet.func(engine->execlists.tasklet.data); - GEM_BUG_ON(execlists_active(&engine->execlists) != rq); - - i915_request_get(rq); - execlists_hold(engine, rq); - GEM_BUG_ON(!i915_request_on_hold(rq)); - - intel_engine_reset(engine, NULL); - GEM_BUG_ON(rq->fence.error != -EIO); - - tasklet_enable(&engine->execlists.tasklet); - clear_and_wake_up_bit(I915_RESET_ENGINE + id, - >->reset.flags); - - /* Check that we do not resubmit the held request */ - if (!i915_request_wait(rq, 0, HZ / 5)) { - pr_err("%s: on hold request completed!\n", - engine->name); - i915_request_put(rq); - err = -EIO; - goto out; - } - GEM_BUG_ON(!i915_request_on_hold(rq)); - - /* But is resubmitted on release */ - execlists_unhold(engine, rq); - if (i915_request_wait(rq, 0, HZ / 5) < 0) { - pr_err("%s: held request did not complete!\n", - engine->name); - intel_gt_set_wedged(gt); - err = -ETIME; - } - i915_request_put(rq); - -out: - st_engine_heartbeat_enable(engine); - intel_context_put(ce); - if (err) - break; - } - - igt_spinner_fini(&spin); - return err; -} - -static const char *error_repr(int err) -{ - return err ? "bad" : "good"; -} - -static int live_error_interrupt(void *arg) -{ - static const struct error_phase { - enum { GOOD = 0, BAD = -EIO } error[2]; - } phases[] = { - { { BAD, GOOD } }, - { { BAD, BAD } }, - { { BAD, GOOD } }, - { { GOOD, GOOD } }, /* sentinel */ - }; - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - enum intel_engine_id id; - - /* - * We hook up the CS_MASTER_ERROR_INTERRUPT to have forewarning - * of invalid commands in user batches that will cause a GPU hang. - * This is a faster mechanism than using hangcheck/heartbeats, but - * only detects problems the HW knows about -- it will not warn when - * we kill the HW! - * - * To verify our detection and reset, we throw some invalid commands - * at the HW and wait for the interrupt. - */ - - if (!intel_has_reset_engine(gt)) - return 0; - - for_each_engine(engine, gt, id) { - const struct error_phase *p; - int err = 0; - - st_engine_heartbeat_disable(engine); - - for (p = phases; p->error[0] != GOOD; p++) { - struct i915_request *client[ARRAY_SIZE(phases->error)]; - u32 *cs; - int i; - - memset(client, 0, sizeof(*client)); - for (i = 0; i < ARRAY_SIZE(client); i++) { - struct intel_context *ce; - struct i915_request *rq; - - ce = intel_context_create(engine); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto out; - } - - rq = intel_context_create_request(ce); - intel_context_put(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out; - } - - if (rq->engine->emit_init_breadcrumb) { - err = rq->engine->emit_init_breadcrumb(rq); - if (err) { - i915_request_add(rq); - goto out; - } - } - - cs = intel_ring_begin(rq, 2); - if (IS_ERR(cs)) { - i915_request_add(rq); - err = PTR_ERR(cs); - goto out; - } - - if (p->error[i]) { - *cs++ = 0xdeadbeef; - *cs++ = 0xdeadbeef; - } else { - *cs++ = MI_NOOP; - *cs++ = MI_NOOP; - } - - client[i] = i915_request_get(rq); - i915_request_add(rq); - } - - err = wait_for_submit(engine, client[0], HZ / 2); - if (err) { - pr_err("%s: first request did not start within time!\n", - engine->name); - err = -ETIME; - goto out; - } - - for (i = 0; i < ARRAY_SIZE(client); i++) { - if (i915_request_wait(client[i], 0, HZ / 5) < 0) - pr_debug("%s: %s request incomplete!\n", - engine->name, - error_repr(p->error[i])); - - if (!i915_request_started(client[i])) { - pr_err("%s: %s request not started!\n", - engine->name, - error_repr(p->error[i])); - err = -ETIME; - goto out; - } - - /* Kick the tasklet to process the error */ - intel_engine_flush_submission(engine); - if (client[i]->fence.error != p->error[i]) { - pr_err("%s: %s request (%s) with wrong error code: %d\n", - engine->name, - error_repr(p->error[i]), - i915_request_completed(client[i]) ? "completed" : "running", - client[i]->fence.error); - err = -EINVAL; - goto out; - } - } - -out: - for (i = 0; i < ARRAY_SIZE(client); i++) - if (client[i]) - i915_request_put(client[i]); - if (err) { - pr_err("%s: failed at phase[%zd] { %d, %d }\n", - engine->name, p - phases, - p->error[0], p->error[1]); - break; - } - } - - st_engine_heartbeat_enable(engine); - if (err) { - intel_gt_set_wedged(gt); - return err; - } - } - - return 0; -} - -static int -emit_semaphore_chain(struct i915_request *rq, struct i915_vma *vma, int idx) -{ - u32 *cs; - - cs = intel_ring_begin(rq, 10); - if (IS_ERR(cs)) - return PTR_ERR(cs); - - *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - - *cs++ = MI_SEMAPHORE_WAIT | - MI_SEMAPHORE_GLOBAL_GTT | - MI_SEMAPHORE_POLL | - MI_SEMAPHORE_SAD_NEQ_SDD; - *cs++ = 0; - *cs++ = i915_ggtt_offset(vma) + 4 * idx; - *cs++ = 0; - - if (idx > 0) { - *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); - *cs++ = 0; - *cs++ = 1; - } else { - *cs++ = MI_NOOP; - *cs++ = MI_NOOP; - *cs++ = MI_NOOP; - *cs++ = MI_NOOP; - } - - *cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE; - - intel_ring_advance(rq, cs); - return 0; -} - -static struct i915_request * -semaphore_queue(struct intel_engine_cs *engine, struct i915_vma *vma, int idx) -{ - struct intel_context *ce; - struct i915_request *rq; - int err; - - ce = intel_context_create(engine); - if (IS_ERR(ce)) - return ERR_CAST(ce); - - rq = intel_context_create_request(ce); - if (IS_ERR(rq)) - goto out_ce; - - err = 0; - if (rq->engine->emit_init_breadcrumb) - err = rq->engine->emit_init_breadcrumb(rq); - if (err == 0) - err = emit_semaphore_chain(rq, vma, idx); - if (err == 0) - i915_request_get(rq); - i915_request_add(rq); - if (err) - rq = ERR_PTR(err); - -out_ce: - intel_context_put(ce); - return rq; -} - -static int -release_queue(struct intel_engine_cs *engine, - struct i915_vma *vma, - int idx, int prio) -{ - struct i915_sched_attr attr = { - .priority = prio, - }; - struct i915_request *rq; - u32 *cs; - - rq = intel_engine_create_kernel_request(engine); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) { - i915_request_add(rq); - return PTR_ERR(cs); - } - - *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = i915_ggtt_offset(vma) + 4 * (idx - 1); - *cs++ = 0; - *cs++ = 1; - - intel_ring_advance(rq, cs); - - i915_request_get(rq); - i915_request_add(rq); - - local_bh_disable(); - engine->schedule(rq, &attr); - local_bh_enable(); /* kick tasklet */ - - i915_request_put(rq); - - return 0; -} - -static int -slice_semaphore_queue(struct intel_engine_cs *outer, - struct i915_vma *vma, - int count) -{ - struct intel_engine_cs *engine; - struct i915_request *head; - enum intel_engine_id id; - int err, i, n = 0; - - head = semaphore_queue(outer, vma, n++); - if (IS_ERR(head)) - return PTR_ERR(head); - - for_each_engine(engine, outer->gt, id) { - for (i = 0; i < count; i++) { - struct i915_request *rq; - - rq = semaphore_queue(engine, vma, n++); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out; - } - - i915_request_put(rq); - } - } - - err = release_queue(outer, vma, n, I915_PRIORITY_BARRIER); - if (err) - goto out; - - if (i915_request_wait(head, 0, - 2 * outer->gt->info.num_engines * (count + 2) * (count + 3)) < 0) { - pr_err("Failed to slice along semaphore chain of length (%d, %d)!\n", - count, n); - GEM_TRACE_DUMP(); - intel_gt_set_wedged(outer->gt); - err = -EIO; - } - -out: - i915_request_put(head); - return err; -} - -static int live_timeslice_preempt(void *arg) -{ - struct intel_gt *gt = arg; - struct drm_i915_gem_object *obj; - struct intel_engine_cs *engine; - enum intel_engine_id id; - struct i915_vma *vma; - void *vaddr; - int err = 0; - - /* - * If a request takes too long, we would like to give other users - * a fair go on the GPU. In particular, users may create batches - * that wait upon external input, where that input may even be - * supplied by another GPU job. To avoid blocking forever, we - * need to preempt the current task and replace it with another - * ready task. - */ - if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) - return 0; - - obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - vma = i915_vma_instance(obj, >->ggtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_obj; - } - - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_obj; - } - - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); - if (err) - goto err_map; - - err = i915_vma_sync(vma); - if (err) - goto err_pin; - - for_each_engine(engine, gt, id) { - if (!intel_engine_has_preemption(engine)) - continue; - - memset(vaddr, 0, PAGE_SIZE); - - st_engine_heartbeat_disable(engine); - err = slice_semaphore_queue(engine, vma, 5); - st_engine_heartbeat_enable(engine); - if (err) - goto err_pin; - - if (igt_flush_test(gt->i915)) { - err = -EIO; - goto err_pin; - } - } - -err_pin: - i915_vma_unpin(vma); -err_map: - i915_gem_object_unpin_map(obj); -err_obj: - i915_gem_object_put(obj); - return err; -} - -static struct i915_request * -create_rewinder(struct intel_context *ce, - struct i915_request *wait, - void *slot, int idx) -{ - const u32 offset = - i915_ggtt_offset(ce->engine->status_page.vma) + - offset_in_page(slot); - struct i915_request *rq; - u32 *cs; - int err; - - rq = intel_context_create_request(ce); - if (IS_ERR(rq)) - return rq; - - if (wait) { - err = i915_request_await_dma_fence(rq, &wait->fence); - if (err) - goto err; - } - - cs = intel_ring_begin(rq, 14); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - goto err; - } - - *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - *cs++ = MI_NOOP; - - *cs++ = MI_SEMAPHORE_WAIT | - MI_SEMAPHORE_GLOBAL_GTT | - MI_SEMAPHORE_POLL | - MI_SEMAPHORE_SAD_GTE_SDD; - *cs++ = idx; - *cs++ = offset; - *cs++ = 0; - - *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; - *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base)); - *cs++ = offset + idx * sizeof(u32); - *cs++ = 0; - - *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = offset; - *cs++ = 0; - *cs++ = idx + 1; - - intel_ring_advance(rq, cs); - - rq->sched.attr.priority = I915_PRIORITY_MASK; - err = 0; -err: - i915_request_get(rq); - i915_request_add(rq); - if (err) { - i915_request_put(rq); - return ERR_PTR(err); - } - - return rq; -} - -static int live_timeslice_rewind(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - enum intel_engine_id id; - - /* - * The usual presumption on timeslice expiration is that we replace - * the active context with another. However, given a chain of - * dependencies we may end up with replacing the context with itself, - * but only a few of those requests, forcing us to rewind the - * RING_TAIL of the original request. - */ - if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) - return 0; - - for_each_engine(engine, gt, id) { - enum { A1, A2, B1 }; - enum { X = 1, Z, Y }; - struct i915_request *rq[3] = {}; - struct intel_context *ce; - unsigned long timeslice; - int i, err = 0; - u32 *slot; - - if (!intel_engine_has_timeslices(engine)) - continue; - - /* - * A:rq1 -- semaphore wait, timestamp X - * A:rq2 -- write timestamp Y - * - * B:rq1 [await A:rq1] -- write timestamp Z - * - * Force timeslice, release semaphore. - * - * Expect execution/evaluation order XZY - */ - - st_engine_heartbeat_disable(engine); - timeslice = xchg(&engine->props.timeslice_duration_ms, 1); - - slot = memset32(engine->status_page.addr + 1000, 0, 4); - - ce = intel_context_create(engine); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto err; - } - - rq[A1] = create_rewinder(ce, NULL, slot, X); - if (IS_ERR(rq[A1])) { - intel_context_put(ce); - goto err; - } - - rq[A2] = create_rewinder(ce, NULL, slot, Y); - intel_context_put(ce); - if (IS_ERR(rq[A2])) - goto err; - - err = wait_for_submit(engine, rq[A2], HZ / 2); - if (err) { - pr_err("%s: failed to submit first context\n", - engine->name); - goto err; - } - - ce = intel_context_create(engine); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto err; - } - - rq[B1] = create_rewinder(ce, rq[A1], slot, Z); - intel_context_put(ce); - if (IS_ERR(rq[2])) - goto err; - - err = wait_for_submit(engine, rq[B1], HZ / 2); - if (err) { - pr_err("%s: failed to submit second context\n", - engine->name); - goto err; - } - - /* ELSP[] = { { A:rq1, A:rq2 }, { B:rq1 } } */ - ENGINE_TRACE(engine, "forcing tasklet for rewind\n"); - if (i915_request_is_active(rq[A2])) { /* semaphore yielded! */ - /* Wait for the timeslice to kick in */ - del_timer(&engine->execlists.timer); - tasklet_hi_schedule(&engine->execlists.tasklet); - intel_engine_flush_submission(engine); - } - /* -> ELSP[] = { { A:rq1 }, { B:rq1 } } */ - GEM_BUG_ON(!i915_request_is_active(rq[A1])); - GEM_BUG_ON(!i915_request_is_active(rq[B1])); - GEM_BUG_ON(i915_request_is_active(rq[A2])); - - /* Release the hounds! */ - slot[0] = 1; - wmb(); /* "pairs" with GPU; paranoid kick of internal CPU$ */ - - for (i = 1; i <= 3; i++) { - unsigned long timeout = jiffies + HZ / 2; - - while (!READ_ONCE(slot[i]) && - time_before(jiffies, timeout)) - ; - - if (!time_before(jiffies, timeout)) { - pr_err("%s: rq[%d] timed out\n", - engine->name, i - 1); - err = -ETIME; - goto err; - } - - pr_debug("%s: slot[%d]:%x\n", engine->name, i, slot[i]); - } - - /* XZY: XZ < XY */ - if (slot[Z] - slot[X] >= slot[Y] - slot[X]) { - pr_err("%s: timeslicing did not run context B [%u] before A [%u]!\n", - engine->name, - slot[Z] - slot[X], - slot[Y] - slot[X]); - err = -EINVAL; - } - -err: - memset32(&slot[0], -1, 4); - wmb(); - - engine->props.timeslice_duration_ms = timeslice; - st_engine_heartbeat_enable(engine); - for (i = 0; i < 3; i++) - i915_request_put(rq[i]); - if (igt_flush_test(gt->i915)) - err = -EIO; - if (err) - return err; - } - - return 0; -} - -static struct i915_request *nop_request(struct intel_engine_cs *engine) -{ - struct i915_request *rq; - - rq = intel_engine_create_kernel_request(engine); - if (IS_ERR(rq)) - return rq; - - i915_request_get(rq); - i915_request_add(rq); - - return rq; -} - -static long slice_timeout(struct intel_engine_cs *engine) -{ - long timeout; - - /* Enough time for a timeslice to kick in, and kick out */ - timeout = 2 * msecs_to_jiffies_timeout(timeslice(engine)); - - /* Enough time for the nop request to complete */ - timeout += HZ / 5; - - return timeout + 1; -} - -static int live_timeslice_queue(void *arg) -{ - struct intel_gt *gt = arg; - struct drm_i915_gem_object *obj; - struct intel_engine_cs *engine; - enum intel_engine_id id; - struct i915_vma *vma; - void *vaddr; - int err = 0; - - /* - * Make sure that even if ELSP[0] and ELSP[1] are filled with - * timeslicing between them disabled, we *do* enable timeslicing - * if the queue demands it. (Normally, we do not submit if - * ELSP[1] is already occupied, so must rely on timeslicing to - * eject ELSP[0] in favour of the queue.) - */ - if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) - return 0; - - obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); - if (IS_ERR(obj)) - return PTR_ERR(obj); - - vma = i915_vma_instance(obj, >->ggtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_obj; - } - - vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); - if (IS_ERR(vaddr)) { - err = PTR_ERR(vaddr); - goto err_obj; - } - - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); - if (err) - goto err_map; - - err = i915_vma_sync(vma); - if (err) - goto err_pin; - - for_each_engine(engine, gt, id) { - struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), - }; - struct i915_request *rq, *nop; - - if (!intel_engine_has_preemption(engine)) - continue; - - st_engine_heartbeat_disable(engine); - memset(vaddr, 0, PAGE_SIZE); - - /* ELSP[0]: semaphore wait */ - rq = semaphore_queue(engine, vma, 0); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_heartbeat; - } - engine->schedule(rq, &attr); - err = wait_for_submit(engine, rq, HZ / 2); - if (err) { - pr_err("%s: Timed out trying to submit semaphores\n", - engine->name); - goto err_rq; - } - - /* ELSP[1]: nop request */ - nop = nop_request(engine); - if (IS_ERR(nop)) { - err = PTR_ERR(nop); - goto err_rq; - } - err = wait_for_submit(engine, nop, HZ / 2); - i915_request_put(nop); - if (err) { - pr_err("%s: Timed out trying to submit nop\n", - engine->name); - goto err_rq; - } - - GEM_BUG_ON(i915_request_completed(rq)); - GEM_BUG_ON(execlists_active(&engine->execlists) != rq); - - /* Queue: semaphore signal, matching priority as semaphore */ - err = release_queue(engine, vma, 1, effective_prio(rq)); - if (err) - goto err_rq; - - /* Wait until we ack the release_queue and start timeslicing */ - do { - cond_resched(); - intel_engine_flush_submission(engine); - } while (READ_ONCE(engine->execlists.pending[0])); - - /* Timeslice every jiffy, so within 2 we should signal */ - if (i915_request_wait(rq, 0, slice_timeout(engine)) < 0) { - struct drm_printer p = - drm_info_printer(gt->i915->drm.dev); - - pr_err("%s: Failed to timeslice into queue\n", - engine->name); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - - memset(vaddr, 0xff, PAGE_SIZE); - err = -EIO; - } -err_rq: - i915_request_put(rq); -err_heartbeat: - st_engine_heartbeat_enable(engine); - if (err) - break; - } - -err_pin: - i915_vma_unpin(vma); -err_map: - i915_gem_object_unpin_map(obj); -err_obj: - i915_gem_object_put(obj); - return err; -} - -static int live_timeslice_nopreempt(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - enum intel_engine_id id; - struct igt_spinner spin; - int err = 0; - - /* - * We should not timeslice into a request that is marked with - * I915_REQUEST_NOPREEMPT. - */ - if (!IS_ACTIVE(CONFIG_DRM_I915_TIMESLICE_DURATION)) - return 0; - - if (igt_spinner_init(&spin, gt)) - return -ENOMEM; - - for_each_engine(engine, gt, id) { - struct intel_context *ce; - struct i915_request *rq; - unsigned long timeslice; - - if (!intel_engine_has_preemption(engine)) - continue; - - ce = intel_context_create(engine); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - break; - } - - st_engine_heartbeat_disable(engine); - timeslice = xchg(&engine->props.timeslice_duration_ms, 1); - - /* Create an unpreemptible spinner */ - - rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); - intel_context_put(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_heartbeat; - } - - i915_request_get(rq); - i915_request_add(rq); - - if (!igt_wait_for_spinner(&spin, rq)) { - i915_request_put(rq); - err = -ETIME; - goto out_spin; - } - - set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags); - i915_request_put(rq); - - /* Followed by a maximum priority barrier (heartbeat) */ - - ce = intel_context_create(engine); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto out_spin; - } - - rq = intel_context_create_request(ce); - intel_context_put(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_spin; - } - - rq->sched.attr.priority = I915_PRIORITY_BARRIER; - i915_request_get(rq); - i915_request_add(rq); - - /* - * Wait until the barrier is in ELSP, and we know timeslicing - * will have been activated. - */ - if (wait_for_submit(engine, rq, HZ / 2)) { - i915_request_put(rq); - err = -ETIME; - goto out_spin; - } - - /* - * Since the ELSP[0] request is unpreemptible, it should not - * allow the maximum priority barrier through. Wait long - * enough to see if it is timesliced in by mistake. - */ - if (i915_request_wait(rq, 0, slice_timeout(engine)) >= 0) { - pr_err("%s: I915_PRIORITY_BARRIER request completed, bypassing no-preempt request\n", - engine->name); - err = -EINVAL; - } - i915_request_put(rq); - -out_spin: - igt_spinner_end(&spin); -out_heartbeat: - xchg(&engine->props.timeslice_duration_ms, timeslice); - st_engine_heartbeat_enable(engine); - if (err) - break; - - if (igt_flush_test(gt->i915)) { - err = -EIO; - break; - } - } - - igt_spinner_fini(&spin); - return err; -} - -static int live_busywait_preempt(void *arg) -{ - struct intel_gt *gt = arg; - struct i915_gem_context *ctx_hi, *ctx_lo; - struct intel_engine_cs *engine; - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - enum intel_engine_id id; - int err = -ENOMEM; - u32 *map; - - /* - * Verify that even without HAS_LOGICAL_RING_PREEMPTION, we can - * preempt the busywaits used to synchronise between rings. - */ - - ctx_hi = kernel_context(gt->i915); - if (!ctx_hi) - return -ENOMEM; - ctx_hi->sched.priority = - I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); - - ctx_lo = kernel_context(gt->i915); - if (!ctx_lo) - goto err_ctx_hi; - ctx_lo->sched.priority = - I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); - - obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_ctx_lo; - } - - map = i915_gem_object_pin_map(obj, I915_MAP_WC); - if (IS_ERR(map)) { - err = PTR_ERR(map); - goto err_obj; - } - - vma = i915_vma_instance(obj, >->ggtt->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_map; - } - - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); - if (err) - goto err_map; - - err = i915_vma_sync(vma); - if (err) - goto err_vma; - - for_each_engine(engine, gt, id) { - struct i915_request *lo, *hi; - struct igt_live_test t; - u32 *cs; - - if (!intel_engine_has_preemption(engine)) - continue; - - if (!intel_engine_can_store_dword(engine)) - continue; - - if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { - err = -EIO; - goto err_vma; - } - - /* - * We create two requests. The low priority request - * busywaits on a semaphore (inside the ringbuffer where - * is should be preemptible) and the high priority requests - * uses a MI_STORE_DWORD_IMM to update the semaphore value - * allowing the first request to complete. If preemption - * fails, we hang instead. - */ - - lo = igt_request_alloc(ctx_lo, engine); - if (IS_ERR(lo)) { - err = PTR_ERR(lo); - goto err_vma; - } - - cs = intel_ring_begin(lo, 8); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - i915_request_add(lo); - goto err_vma; - } - - *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = i915_ggtt_offset(vma); - *cs++ = 0; - *cs++ = 1; - - /* XXX Do we need a flush + invalidate here? */ - - *cs++ = MI_SEMAPHORE_WAIT | - MI_SEMAPHORE_GLOBAL_GTT | - MI_SEMAPHORE_POLL | - MI_SEMAPHORE_SAD_EQ_SDD; - *cs++ = 0; - *cs++ = i915_ggtt_offset(vma); - *cs++ = 0; - - intel_ring_advance(lo, cs); - - i915_request_get(lo); - i915_request_add(lo); - - if (wait_for(READ_ONCE(*map), 10)) { - i915_request_put(lo); - err = -ETIMEDOUT; - goto err_vma; - } - - /* Low priority request should be busywaiting now */ - if (i915_request_wait(lo, 0, 1) != -ETIME) { - i915_request_put(lo); - pr_err("%s: Busywaiting request did not!\n", - engine->name); - err = -EIO; - goto err_vma; - } - - hi = igt_request_alloc(ctx_hi, engine); - if (IS_ERR(hi)) { - err = PTR_ERR(hi); - i915_request_put(lo); - goto err_vma; - } - - cs = intel_ring_begin(hi, 4); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - i915_request_add(hi); - i915_request_put(lo); - goto err_vma; - } - - *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = i915_ggtt_offset(vma); - *cs++ = 0; - *cs++ = 0; - - intel_ring_advance(hi, cs); - i915_request_add(hi); - - if (i915_request_wait(lo, 0, HZ / 5) < 0) { - struct drm_printer p = drm_info_printer(gt->i915->drm.dev); - - pr_err("%s: Failed to preempt semaphore busywait!\n", - engine->name); - - intel_engine_dump(engine, &p, "%s\n", engine->name); - GEM_TRACE_DUMP(); - - i915_request_put(lo); - intel_gt_set_wedged(gt); - err = -EIO; - goto err_vma; - } - GEM_BUG_ON(READ_ONCE(*map)); - i915_request_put(lo); - - if (igt_live_test_end(&t)) { - err = -EIO; - goto err_vma; - } - } - - err = 0; -err_vma: - i915_vma_unpin(vma); -err_map: - i915_gem_object_unpin_map(obj); -err_obj: - i915_gem_object_put(obj); -err_ctx_lo: - kernel_context_close(ctx_lo); -err_ctx_hi: - kernel_context_close(ctx_hi); - return err; -} - -static struct i915_request * -spinner_create_request(struct igt_spinner *spin, - struct i915_gem_context *ctx, - struct intel_engine_cs *engine, - u32 arb) -{ - struct intel_context *ce; - struct i915_request *rq; - - ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); - if (IS_ERR(ce)) - return ERR_CAST(ce); - - rq = igt_spinner_create_request(spin, ce, arb); - intel_context_put(ce); - return rq; -} - -static int live_preempt(void *arg) -{ - struct intel_gt *gt = arg; - struct i915_gem_context *ctx_hi, *ctx_lo; - struct igt_spinner spin_hi, spin_lo; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = -ENOMEM; - - if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) - return 0; - - if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PREEMPTION)) - pr_err("Logical preemption supported, but not exposed\n"); - - if (igt_spinner_init(&spin_hi, gt)) - return -ENOMEM; - - if (igt_spinner_init(&spin_lo, gt)) - goto err_spin_hi; - - ctx_hi = kernel_context(gt->i915); - if (!ctx_hi) - goto err_spin_lo; - ctx_hi->sched.priority = - I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); - - ctx_lo = kernel_context(gt->i915); - if (!ctx_lo) - goto err_ctx_hi; - ctx_lo->sched.priority = - I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); - - for_each_engine(engine, gt, id) { - struct igt_live_test t; - struct i915_request *rq; - - if (!intel_engine_has_preemption(engine)) - continue; - - if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { - err = -EIO; - goto err_ctx_lo; - } - - rq = spinner_create_request(&spin_lo, ctx_lo, engine, - MI_ARB_CHECK); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_ctx_lo; - } - - i915_request_add(rq); - if (!igt_wait_for_spinner(&spin_lo, rq)) { - GEM_TRACE("lo spinner failed to start\n"); - GEM_TRACE_DUMP(); - intel_gt_set_wedged(gt); - err = -EIO; - goto err_ctx_lo; - } - - rq = spinner_create_request(&spin_hi, ctx_hi, engine, - MI_ARB_CHECK); - if (IS_ERR(rq)) { - igt_spinner_end(&spin_lo); - err = PTR_ERR(rq); - goto err_ctx_lo; - } - - i915_request_add(rq); - if (!igt_wait_for_spinner(&spin_hi, rq)) { - GEM_TRACE("hi spinner failed to start\n"); - GEM_TRACE_DUMP(); - intel_gt_set_wedged(gt); - err = -EIO; - goto err_ctx_lo; - } - - igt_spinner_end(&spin_hi); - igt_spinner_end(&spin_lo); - - if (igt_live_test_end(&t)) { - err = -EIO; - goto err_ctx_lo; - } - } - - err = 0; -err_ctx_lo: - kernel_context_close(ctx_lo); -err_ctx_hi: - kernel_context_close(ctx_hi); -err_spin_lo: - igt_spinner_fini(&spin_lo); -err_spin_hi: - igt_spinner_fini(&spin_hi); - return err; -} - -static int live_late_preempt(void *arg) -{ - struct intel_gt *gt = arg; - struct i915_gem_context *ctx_hi, *ctx_lo; - struct igt_spinner spin_hi, spin_lo; - struct intel_engine_cs *engine; - struct i915_sched_attr attr = {}; - enum intel_engine_id id; - int err = -ENOMEM; - - if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) - return 0; - - if (igt_spinner_init(&spin_hi, gt)) - return -ENOMEM; - - if (igt_spinner_init(&spin_lo, gt)) - goto err_spin_hi; - - ctx_hi = kernel_context(gt->i915); - if (!ctx_hi) - goto err_spin_lo; - - ctx_lo = kernel_context(gt->i915); - if (!ctx_lo) - goto err_ctx_hi; - - /* Make sure ctx_lo stays before ctx_hi until we trigger preemption. */ - ctx_lo->sched.priority = I915_USER_PRIORITY(1); - - for_each_engine(engine, gt, id) { - struct igt_live_test t; - struct i915_request *rq; - - if (!intel_engine_has_preemption(engine)) - continue; - - if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { - err = -EIO; - goto err_ctx_lo; - } - - rq = spinner_create_request(&spin_lo, ctx_lo, engine, - MI_ARB_CHECK); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_ctx_lo; - } - - i915_request_add(rq); - if (!igt_wait_for_spinner(&spin_lo, rq)) { - pr_err("First context failed to start\n"); - goto err_wedged; - } - - rq = spinner_create_request(&spin_hi, ctx_hi, engine, - MI_NOOP); - if (IS_ERR(rq)) { - igt_spinner_end(&spin_lo); - err = PTR_ERR(rq); - goto err_ctx_lo; - } - - i915_request_add(rq); - if (igt_wait_for_spinner(&spin_hi, rq)) { - pr_err("Second context overtook first?\n"); - goto err_wedged; - } - - attr.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); - engine->schedule(rq, &attr); - - if (!igt_wait_for_spinner(&spin_hi, rq)) { - pr_err("High priority context failed to preempt the low priority context\n"); - GEM_TRACE_DUMP(); - goto err_wedged; - } - - igt_spinner_end(&spin_hi); - igt_spinner_end(&spin_lo); - - if (igt_live_test_end(&t)) { - err = -EIO; - goto err_ctx_lo; - } - } - - err = 0; -err_ctx_lo: - kernel_context_close(ctx_lo); -err_ctx_hi: - kernel_context_close(ctx_hi); -err_spin_lo: - igt_spinner_fini(&spin_lo); -err_spin_hi: - igt_spinner_fini(&spin_hi); - return err; - -err_wedged: - igt_spinner_end(&spin_hi); - igt_spinner_end(&spin_lo); - intel_gt_set_wedged(gt); - err = -EIO; - goto err_ctx_lo; -} - -struct preempt_client { - struct igt_spinner spin; - struct i915_gem_context *ctx; -}; - -static int preempt_client_init(struct intel_gt *gt, struct preempt_client *c) -{ - c->ctx = kernel_context(gt->i915); - if (!c->ctx) - return -ENOMEM; - - if (igt_spinner_init(&c->spin, gt)) - goto err_ctx; - - return 0; - -err_ctx: - kernel_context_close(c->ctx); - return -ENOMEM; -} - -static void preempt_client_fini(struct preempt_client *c) -{ - igt_spinner_fini(&c->spin); - kernel_context_close(c->ctx); -} - -static int live_nopreempt(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - struct preempt_client a, b; - enum intel_engine_id id; - int err = -ENOMEM; - - /* - * Verify that we can disable preemption for an individual request - * that may be being observed and not want to be interrupted. - */ - - if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) - return 0; - - if (preempt_client_init(gt, &a)) - return -ENOMEM; - if (preempt_client_init(gt, &b)) - goto err_client_a; - b.ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_MAX); - - for_each_engine(engine, gt, id) { - struct i915_request *rq_a, *rq_b; - - if (!intel_engine_has_preemption(engine)) - continue; - - engine->execlists.preempt_hang.count = 0; - - rq_a = spinner_create_request(&a.spin, - a.ctx, engine, - MI_ARB_CHECK); - if (IS_ERR(rq_a)) { - err = PTR_ERR(rq_a); - goto err_client_b; - } - - /* Low priority client, but unpreemptable! */ - __set_bit(I915_FENCE_FLAG_NOPREEMPT, &rq_a->fence.flags); - - i915_request_add(rq_a); - if (!igt_wait_for_spinner(&a.spin, rq_a)) { - pr_err("First client failed to start\n"); - goto err_wedged; - } - - rq_b = spinner_create_request(&b.spin, - b.ctx, engine, - MI_ARB_CHECK); - if (IS_ERR(rq_b)) { - err = PTR_ERR(rq_b); - goto err_client_b; - } - - i915_request_add(rq_b); - - /* B is much more important than A! (But A is unpreemptable.) */ - GEM_BUG_ON(rq_prio(rq_b) <= rq_prio(rq_a)); - - /* Wait long enough for preemption and timeslicing */ - if (igt_wait_for_spinner(&b.spin, rq_b)) { - pr_err("Second client started too early!\n"); - goto err_wedged; - } - - igt_spinner_end(&a.spin); - - if (!igt_wait_for_spinner(&b.spin, rq_b)) { - pr_err("Second client failed to start\n"); - goto err_wedged; - } - - igt_spinner_end(&b.spin); - - if (engine->execlists.preempt_hang.count) { - pr_err("Preemption recorded x%d; should have been suppressed!\n", - engine->execlists.preempt_hang.count); - err = -EINVAL; - goto err_wedged; - } - - if (igt_flush_test(gt->i915)) - goto err_wedged; - } - - err = 0; -err_client_b: - preempt_client_fini(&b); -err_client_a: - preempt_client_fini(&a); - return err; - -err_wedged: - igt_spinner_end(&b.spin); - igt_spinner_end(&a.spin); - intel_gt_set_wedged(gt); - err = -EIO; - goto err_client_b; -} - -struct live_preempt_cancel { - struct intel_engine_cs *engine; - struct preempt_client a, b; -}; - -static int __cancel_active0(struct live_preempt_cancel *arg) -{ - struct i915_request *rq; - struct igt_live_test t; - int err; - - /* Preempt cancel of ELSP0 */ - GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); - if (igt_live_test_begin(&t, arg->engine->i915, - __func__, arg->engine->name)) - return -EIO; - - rq = spinner_create_request(&arg->a.spin, - arg->a.ctx, arg->engine, - MI_ARB_CHECK); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - clear_bit(CONTEXT_BANNED, &rq->context->flags); - i915_request_get(rq); - i915_request_add(rq); - if (!igt_wait_for_spinner(&arg->a.spin, rq)) { - err = -EIO; - goto out; - } - - intel_context_set_banned(rq->context); - err = intel_engine_pulse(arg->engine); - if (err) - goto out; - - err = wait_for_reset(arg->engine, rq, HZ / 2); - if (err) { - pr_err("Cancelled inflight0 request did not reset\n"); - goto out; - } - -out: - i915_request_put(rq); - if (igt_live_test_end(&t)) - err = -EIO; - return err; -} - -static int __cancel_active1(struct live_preempt_cancel *arg) -{ - struct i915_request *rq[2] = {}; - struct igt_live_test t; - int err; - - /* Preempt cancel of ELSP1 */ - GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); - if (igt_live_test_begin(&t, arg->engine->i915, - __func__, arg->engine->name)) - return -EIO; - - rq[0] = spinner_create_request(&arg->a.spin, - arg->a.ctx, arg->engine, - MI_NOOP); /* no preemption */ - if (IS_ERR(rq[0])) - return PTR_ERR(rq[0]); - - clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); - i915_request_get(rq[0]); - i915_request_add(rq[0]); - if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { - err = -EIO; - goto out; - } - - rq[1] = spinner_create_request(&arg->b.spin, - arg->b.ctx, arg->engine, - MI_ARB_CHECK); - if (IS_ERR(rq[1])) { - err = PTR_ERR(rq[1]); - goto out; - } - - clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); - i915_request_get(rq[1]); - err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); - i915_request_add(rq[1]); - if (err) - goto out; - - intel_context_set_banned(rq[1]->context); - err = intel_engine_pulse(arg->engine); - if (err) - goto out; - - igt_spinner_end(&arg->a.spin); - err = wait_for_reset(arg->engine, rq[1], HZ / 2); - if (err) - goto out; - - if (rq[0]->fence.error != 0) { - pr_err("Normal inflight0 request did not complete\n"); - err = -EINVAL; - goto out; - } - - if (rq[1]->fence.error != -EIO) { - pr_err("Cancelled inflight1 request did not report -EIO\n"); - err = -EINVAL; - goto out; - } - -out: - i915_request_put(rq[1]); - i915_request_put(rq[0]); - if (igt_live_test_end(&t)) - err = -EIO; - return err; -} - -static int __cancel_queued(struct live_preempt_cancel *arg) -{ - struct i915_request *rq[3] = {}; - struct igt_live_test t; - int err; - - /* Full ELSP and one in the wings */ - GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); - if (igt_live_test_begin(&t, arg->engine->i915, - __func__, arg->engine->name)) - return -EIO; - - rq[0] = spinner_create_request(&arg->a.spin, - arg->a.ctx, arg->engine, - MI_ARB_CHECK); - if (IS_ERR(rq[0])) - return PTR_ERR(rq[0]); - - clear_bit(CONTEXT_BANNED, &rq[0]->context->flags); - i915_request_get(rq[0]); - i915_request_add(rq[0]); - if (!igt_wait_for_spinner(&arg->a.spin, rq[0])) { - err = -EIO; - goto out; - } - - rq[1] = igt_request_alloc(arg->b.ctx, arg->engine); - if (IS_ERR(rq[1])) { - err = PTR_ERR(rq[1]); - goto out; - } - - clear_bit(CONTEXT_BANNED, &rq[1]->context->flags); - i915_request_get(rq[1]); - err = i915_request_await_dma_fence(rq[1], &rq[0]->fence); - i915_request_add(rq[1]); - if (err) - goto out; - - rq[2] = spinner_create_request(&arg->b.spin, - arg->a.ctx, arg->engine, - MI_ARB_CHECK); - if (IS_ERR(rq[2])) { - err = PTR_ERR(rq[2]); - goto out; - } - - i915_request_get(rq[2]); - err = i915_request_await_dma_fence(rq[2], &rq[1]->fence); - i915_request_add(rq[2]); - if (err) - goto out; - - intel_context_set_banned(rq[2]->context); - err = intel_engine_pulse(arg->engine); - if (err) - goto out; - - err = wait_for_reset(arg->engine, rq[2], HZ / 2); - if (err) - goto out; - - if (rq[0]->fence.error != -EIO) { - pr_err("Cancelled inflight0 request did not report -EIO\n"); - err = -EINVAL; - goto out; - } - - if (rq[1]->fence.error != 0) { - pr_err("Normal inflight1 request did not complete\n"); - err = -EINVAL; - goto out; - } - - if (rq[2]->fence.error != -EIO) { - pr_err("Cancelled queued request did not report -EIO\n"); - err = -EINVAL; - goto out; - } - -out: - i915_request_put(rq[2]); - i915_request_put(rq[1]); - i915_request_put(rq[0]); - if (igt_live_test_end(&t)) - err = -EIO; - return err; -} - -static int __cancel_hostile(struct live_preempt_cancel *arg) -{ - struct i915_request *rq; - int err; - - /* Preempt cancel non-preemptible spinner in ELSP0 */ - if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) - return 0; - - if (!intel_has_reset_engine(arg->engine->gt)) - return 0; - - GEM_TRACE("%s(%s)\n", __func__, arg->engine->name); - rq = spinner_create_request(&arg->a.spin, - arg->a.ctx, arg->engine, - MI_NOOP); /* preemption disabled */ - if (IS_ERR(rq)) - return PTR_ERR(rq); - - clear_bit(CONTEXT_BANNED, &rq->context->flags); - i915_request_get(rq); - i915_request_add(rq); - if (!igt_wait_for_spinner(&arg->a.spin, rq)) { - err = -EIO; - goto out; - } - - intel_context_set_banned(rq->context); - err = intel_engine_pulse(arg->engine); /* force reset */ - if (err) - goto out; - - err = wait_for_reset(arg->engine, rq, HZ / 2); - if (err) { - pr_err("Cancelled inflight0 request did not reset\n"); - goto out; - } - -out: - i915_request_put(rq); - if (igt_flush_test(arg->engine->i915)) - err = -EIO; - return err; -} - -static int live_preempt_cancel(void *arg) -{ - struct intel_gt *gt = arg; - struct live_preempt_cancel data; - enum intel_engine_id id; - int err = -ENOMEM; - - /* - * To cancel an inflight context, we need to first remove it from the - * GPU. That sounds like preemption! Plus a little bit of bookkeeping. - */ - - if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) - return 0; - - if (preempt_client_init(gt, &data.a)) - return -ENOMEM; - if (preempt_client_init(gt, &data.b)) - goto err_client_a; - - for_each_engine(data.engine, gt, id) { - if (!intel_engine_has_preemption(data.engine)) - continue; - - err = __cancel_active0(&data); - if (err) - goto err_wedged; - - err = __cancel_active1(&data); - if (err) - goto err_wedged; - - err = __cancel_queued(&data); - if (err) - goto err_wedged; - - err = __cancel_hostile(&data); - if (err) - goto err_wedged; - } - - err = 0; -err_client_b: - preempt_client_fini(&data.b); -err_client_a: - preempt_client_fini(&data.a); - return err; - -err_wedged: - GEM_TRACE_DUMP(); - igt_spinner_end(&data.b.spin); - igt_spinner_end(&data.a.spin); - intel_gt_set_wedged(gt); - goto err_client_b; -} - -static int live_suppress_self_preempt(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX) - }; - struct preempt_client a, b; - enum intel_engine_id id; - int err = -ENOMEM; - - /* - * Verify that if a preemption request does not cause a change in - * the current execution order, the preempt-to-idle injection is - * skipped and that we do not accidentally apply it after the CS - * completion event. - */ - - if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) - return 0; - - if (intel_uc_uses_guc_submission(>->uc)) - return 0; /* presume black blox */ - - if (intel_vgpu_active(gt->i915)) - return 0; /* GVT forces single port & request submission */ - - if (preempt_client_init(gt, &a)) - return -ENOMEM; - if (preempt_client_init(gt, &b)) - goto err_client_a; - - for_each_engine(engine, gt, id) { - struct i915_request *rq_a, *rq_b; - int depth; - - if (!intel_engine_has_preemption(engine)) - continue; - - if (igt_flush_test(gt->i915)) - goto err_wedged; - - st_engine_heartbeat_disable(engine); - engine->execlists.preempt_hang.count = 0; - - rq_a = spinner_create_request(&a.spin, - a.ctx, engine, - MI_NOOP); - if (IS_ERR(rq_a)) { - err = PTR_ERR(rq_a); - st_engine_heartbeat_enable(engine); - goto err_client_b; - } - - i915_request_add(rq_a); - if (!igt_wait_for_spinner(&a.spin, rq_a)) { - pr_err("First client failed to start\n"); - st_engine_heartbeat_enable(engine); - goto err_wedged; - } - - /* Keep postponing the timer to avoid premature slicing */ - mod_timer(&engine->execlists.timer, jiffies + HZ); - for (depth = 0; depth < 8; depth++) { - rq_b = spinner_create_request(&b.spin, - b.ctx, engine, - MI_NOOP); - if (IS_ERR(rq_b)) { - err = PTR_ERR(rq_b); - st_engine_heartbeat_enable(engine); - goto err_client_b; - } - i915_request_add(rq_b); - - GEM_BUG_ON(i915_request_completed(rq_a)); - engine->schedule(rq_a, &attr); - igt_spinner_end(&a.spin); - - if (!igt_wait_for_spinner(&b.spin, rq_b)) { - pr_err("Second client failed to start\n"); - st_engine_heartbeat_enable(engine); - goto err_wedged; - } - - swap(a, b); - rq_a = rq_b; - } - igt_spinner_end(&a.spin); - - if (engine->execlists.preempt_hang.count) { - pr_err("Preemption on %s recorded x%d, depth %d; should have been suppressed!\n", - engine->name, - engine->execlists.preempt_hang.count, - depth); - st_engine_heartbeat_enable(engine); - err = -EINVAL; - goto err_client_b; - } - - st_engine_heartbeat_enable(engine); - if (igt_flush_test(gt->i915)) - goto err_wedged; - } - - err = 0; -err_client_b: - preempt_client_fini(&b); -err_client_a: - preempt_client_fini(&a); - return err; - -err_wedged: - igt_spinner_end(&b.spin); - igt_spinner_end(&a.spin); - intel_gt_set_wedged(gt); - err = -EIO; - goto err_client_b; -} - -static int live_chain_preempt(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - struct preempt_client hi, lo; - enum intel_engine_id id; - int err = -ENOMEM; - - /* - * Build a chain AB...BA between two contexts (A, B) and request - * preemption of the last request. It should then complete before - * the previously submitted spinner in B. - */ - - if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) - return 0; - - if (preempt_client_init(gt, &hi)) - return -ENOMEM; - - if (preempt_client_init(gt, &lo)) - goto err_client_hi; - - for_each_engine(engine, gt, id) { - struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(I915_PRIORITY_MAX), - }; - struct igt_live_test t; - struct i915_request *rq; - int ring_size, count, i; - - if (!intel_engine_has_preemption(engine)) - continue; - - rq = spinner_create_request(&lo.spin, - lo.ctx, engine, - MI_ARB_CHECK); - if (IS_ERR(rq)) - goto err_wedged; - - i915_request_get(rq); - i915_request_add(rq); - - ring_size = rq->wa_tail - rq->head; - if (ring_size < 0) - ring_size += rq->ring->size; - ring_size = rq->ring->size / ring_size; - pr_debug("%s(%s): Using maximum of %d requests\n", - __func__, engine->name, ring_size); - - igt_spinner_end(&lo.spin); - if (i915_request_wait(rq, 0, HZ / 2) < 0) { - pr_err("Timed out waiting to flush %s\n", engine->name); - i915_request_put(rq); - goto err_wedged; - } - i915_request_put(rq); - - if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { - err = -EIO; - goto err_wedged; - } - - for_each_prime_number_from(count, 1, ring_size) { - rq = spinner_create_request(&hi.spin, - hi.ctx, engine, - MI_ARB_CHECK); - if (IS_ERR(rq)) - goto err_wedged; - i915_request_add(rq); - if (!igt_wait_for_spinner(&hi.spin, rq)) - goto err_wedged; - - rq = spinner_create_request(&lo.spin, - lo.ctx, engine, - MI_ARB_CHECK); - if (IS_ERR(rq)) - goto err_wedged; - i915_request_add(rq); - - for (i = 0; i < count; i++) { - rq = igt_request_alloc(lo.ctx, engine); - if (IS_ERR(rq)) - goto err_wedged; - i915_request_add(rq); - } - - rq = igt_request_alloc(hi.ctx, engine); - if (IS_ERR(rq)) - goto err_wedged; - - i915_request_get(rq); - i915_request_add(rq); - engine->schedule(rq, &attr); - - igt_spinner_end(&hi.spin); - if (i915_request_wait(rq, 0, HZ / 5) < 0) { - struct drm_printer p = - drm_info_printer(gt->i915->drm.dev); - - pr_err("Failed to preempt over chain of %d\n", - count); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - i915_request_put(rq); - goto err_wedged; - } - igt_spinner_end(&lo.spin); - i915_request_put(rq); - - rq = igt_request_alloc(lo.ctx, engine); - if (IS_ERR(rq)) - goto err_wedged; - - i915_request_get(rq); - i915_request_add(rq); - - if (i915_request_wait(rq, 0, HZ / 5) < 0) { - struct drm_printer p = - drm_info_printer(gt->i915->drm.dev); - - pr_err("Failed to flush low priority chain of %d requests\n", - count); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - - i915_request_put(rq); - goto err_wedged; - } - i915_request_put(rq); - } - - if (igt_live_test_end(&t)) { - err = -EIO; - goto err_wedged; - } - } - - err = 0; -err_client_lo: - preempt_client_fini(&lo); -err_client_hi: - preempt_client_fini(&hi); - return err; - -err_wedged: - igt_spinner_end(&hi.spin); - igt_spinner_end(&lo.spin); - intel_gt_set_wedged(gt); - err = -EIO; - goto err_client_lo; -} - -static int create_gang(struct intel_engine_cs *engine, - struct i915_request **prev) -{ - struct drm_i915_gem_object *obj; - struct intel_context *ce; - struct i915_request *rq; - struct i915_vma *vma; - u32 *cs; - int err; - - ce = intel_context_create(engine); - if (IS_ERR(ce)) - return PTR_ERR(ce); - - obj = i915_gem_object_create_internal(engine->i915, 4096); - if (IS_ERR(obj)) { - err = PTR_ERR(obj); - goto err_ce; - } - - vma = i915_vma_instance(obj, ce->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto err_obj; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto err_obj; - - cs = i915_gem_object_pin_map(obj, I915_MAP_WC); - if (IS_ERR(cs)) - goto err_obj; - - /* Semaphore target: spin until zero */ - *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE; - - *cs++ = MI_SEMAPHORE_WAIT | - MI_SEMAPHORE_POLL | - MI_SEMAPHORE_SAD_EQ_SDD; - *cs++ = 0; - *cs++ = lower_32_bits(vma->node.start); - *cs++ = upper_32_bits(vma->node.start); - - if (*prev) { - u64 offset = (*prev)->batch->node.start; - - /* Terminate the spinner in the next lower priority batch. */ - *cs++ = MI_STORE_DWORD_IMM_GEN4; - *cs++ = lower_32_bits(offset); - *cs++ = upper_32_bits(offset); - *cs++ = 0; - } - - *cs++ = MI_BATCH_BUFFER_END; - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); - - rq = intel_context_create_request(ce); - if (IS_ERR(rq)) - goto err_obj; - - rq->batch = i915_vma_get(vma); - i915_request_get(rq); - - i915_vma_lock(vma); - err = i915_request_await_object(rq, vma->obj, false); - if (!err) - err = i915_vma_move_to_active(vma, rq, 0); - if (!err) - err = rq->engine->emit_bb_start(rq, - vma->node.start, - PAGE_SIZE, 0); - i915_vma_unlock(vma); - i915_request_add(rq); - if (err) - goto err_rq; - - i915_gem_object_put(obj); - intel_context_put(ce); - - rq->mock.link.next = &(*prev)->mock.link; - *prev = rq; - return 0; - -err_rq: - i915_vma_put(rq->batch); - i915_request_put(rq); -err_obj: - i915_gem_object_put(obj); -err_ce: - intel_context_put(ce); - return err; -} - -static int __live_preempt_ring(struct intel_engine_cs *engine, - struct igt_spinner *spin, - int queue_sz, int ring_sz) -{ - struct intel_context *ce[2] = {}; - struct i915_request *rq; - struct igt_live_test t; - int err = 0; - int n; - - if (igt_live_test_begin(&t, engine->i915, __func__, engine->name)) - return -EIO; - - for (n = 0; n < ARRAY_SIZE(ce); n++) { - struct intel_context *tmp; - - tmp = intel_context_create(engine); - if (IS_ERR(tmp)) { - err = PTR_ERR(tmp); - goto err_ce; - } - - tmp->ring = __intel_context_ring_size(ring_sz); - - err = intel_context_pin(tmp); - if (err) { - intel_context_put(tmp); - goto err_ce; - } - - memset32(tmp->ring->vaddr, - 0xdeadbeef, /* trigger a hang if executed */ - tmp->ring->vma->size / sizeof(u32)); - - ce[n] = tmp; - } - - rq = igt_spinner_create_request(spin, ce[0], MI_ARB_CHECK); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_ce; - } - - i915_request_get(rq); - rq->sched.attr.priority = I915_PRIORITY_BARRIER; - i915_request_add(rq); - - if (!igt_wait_for_spinner(spin, rq)) { - intel_gt_set_wedged(engine->gt); - i915_request_put(rq); - err = -ETIME; - goto err_ce; - } - - /* Fill the ring, until we will cause a wrap */ - n = 0; - while (ce[0]->ring->tail - rq->wa_tail <= queue_sz) { - struct i915_request *tmp; - - tmp = intel_context_create_request(ce[0]); - if (IS_ERR(tmp)) { - err = PTR_ERR(tmp); - i915_request_put(rq); - goto err_ce; - } - - i915_request_add(tmp); - intel_engine_flush_submission(engine); - n++; - } - intel_engine_flush_submission(engine); - pr_debug("%s: Filled %d with %d nop tails {size:%x, tail:%x, emit:%x, rq.tail:%x}\n", - engine->name, queue_sz, n, - ce[0]->ring->size, - ce[0]->ring->tail, - ce[0]->ring->emit, - rq->tail); - i915_request_put(rq); - - /* Create a second request to preempt the first ring */ - rq = intel_context_create_request(ce[1]); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_ce; - } - - rq->sched.attr.priority = I915_PRIORITY_BARRIER; - i915_request_get(rq); - i915_request_add(rq); - - err = wait_for_submit(engine, rq, HZ / 2); - i915_request_put(rq); - if (err) { - pr_err("%s: preemption request was not submited\n", - engine->name); - err = -ETIME; - } - - pr_debug("%s: ring[0]:{ tail:%x, emit:%x }, ring[1]:{ tail:%x, emit:%x }\n", - engine->name, - ce[0]->ring->tail, ce[0]->ring->emit, - ce[1]->ring->tail, ce[1]->ring->emit); - -err_ce: - intel_engine_flush_submission(engine); - igt_spinner_end(spin); - for (n = 0; n < ARRAY_SIZE(ce); n++) { - if (IS_ERR_OR_NULL(ce[n])) - break; - - intel_context_unpin(ce[n]); - intel_context_put(ce[n]); - } - if (igt_live_test_end(&t)) - err = -EIO; - return err; -} - -static int live_preempt_ring(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - struct igt_spinner spin; - enum intel_engine_id id; - int err = 0; - - /* - * Check that we rollback large chunks of a ring in order to do a - * preemption event. Similar to live_unlite_ring, but looking at - * ring size rather than the impact of intel_ring_direction(). - */ - - if (igt_spinner_init(&spin, gt)) - return -ENOMEM; - - for_each_engine(engine, gt, id) { - int n; - - if (!intel_engine_has_preemption(engine)) - continue; - - if (!intel_engine_can_store_dword(engine)) - continue; - - st_engine_heartbeat_disable(engine); - - for (n = 0; n <= 3; n++) { - err = __live_preempt_ring(engine, &spin, - n * SZ_4K / 4, SZ_4K); - if (err) - break; - } - - st_engine_heartbeat_enable(engine); - if (err) - break; - } - - igt_spinner_fini(&spin); - return err; -} - -static int live_preempt_gang(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - enum intel_engine_id id; - - if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) - return 0; - - /* - * Build as long a chain of preempters as we can, with each - * request higher priority than the last. Once we are ready, we release - * the last batch which then precolates down the chain, each releasing - * the next oldest in turn. The intent is to simply push as hard as we - * can with the number of preemptions, trying to exceed narrow HW - * limits. At a minimum, we insist that we can sort all the user - * high priority levels into execution order. - */ - - for_each_engine(engine, gt, id) { - struct i915_request *rq = NULL; - struct igt_live_test t; - IGT_TIMEOUT(end_time); - int prio = 0; - int err = 0; - u32 *cs; - - if (!intel_engine_has_preemption(engine)) - continue; - - if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) - return -EIO; - - do { - struct i915_sched_attr attr = { - .priority = I915_USER_PRIORITY(prio++), - }; - - err = create_gang(engine, &rq); - if (err) - break; - - /* Submit each spinner at increasing priority */ - engine->schedule(rq, &attr); - } while (prio <= I915_PRIORITY_MAX && - !__igt_timeout(end_time, NULL)); - pr_debug("%s: Preempt chain of %d requests\n", - engine->name, prio); - - /* - * Such that the last spinner is the highest priority and - * should execute first. When that spinner completes, - * it will terminate the next lowest spinner until there - * are no more spinners and the gang is complete. - */ - cs = i915_gem_object_pin_map(rq->batch->obj, I915_MAP_WC); - if (!IS_ERR(cs)) { - *cs = 0; - i915_gem_object_unpin_map(rq->batch->obj); - } else { - err = PTR_ERR(cs); - intel_gt_set_wedged(gt); - } - - while (rq) { /* wait for each rq from highest to lowest prio */ - struct i915_request *n = list_next_entry(rq, mock.link); - - if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) { - struct drm_printer p = - drm_info_printer(engine->i915->drm.dev); - - pr_err("Failed to flush chain of %d requests, at %d\n", - prio, rq_prio(rq) >> I915_USER_PRIORITY_SHIFT); - intel_engine_dump(engine, &p, - "%s\n", engine->name); - - err = -ETIME; - } - - i915_vma_put(rq->batch); - i915_request_put(rq); - rq = n; - } - - if (igt_live_test_end(&t)) - err = -EIO; - if (err) - return err; - } - - return 0; -} - -static struct i915_vma * -create_gpr_user(struct intel_engine_cs *engine, - struct i915_vma *result, - unsigned int offset) -{ - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - u32 *cs; - int err; - int i; - - obj = i915_gem_object_create_internal(engine->i915, 4096); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - vma = i915_vma_instance(obj, result->vm, NULL); - if (IS_ERR(vma)) { - i915_gem_object_put(obj); - return vma; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) { - i915_vma_put(vma); - return ERR_PTR(err); - } - - cs = i915_gem_object_pin_map(obj, I915_MAP_WC); - if (IS_ERR(cs)) { - i915_vma_put(vma); - return ERR_CAST(cs); - } - - /* All GPR are clear for new contexts. We use GPR(0) as a constant */ - *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = CS_GPR(engine, 0); - *cs++ = 1; - - for (i = 1; i < NUM_GPR; i++) { - u64 addr; - - /* - * Perform: GPR[i]++ - * - * As we read and write into the context saved GPR[i], if - * we restart this batch buffer from an earlier point, we - * will repeat the increment and store a value > 1. - */ - *cs++ = MI_MATH(4); - *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCA, MI_MATH_REG(i)); - *cs++ = MI_MATH_LOAD(MI_MATH_REG_SRCB, MI_MATH_REG(0)); - *cs++ = MI_MATH_ADD; - *cs++ = MI_MATH_STORE(MI_MATH_REG(i), MI_MATH_REG_ACCU); - - addr = result->node.start + offset + i * sizeof(*cs); - *cs++ = MI_STORE_REGISTER_MEM_GEN8; - *cs++ = CS_GPR(engine, 2 * i); - *cs++ = lower_32_bits(addr); - *cs++ = upper_32_bits(addr); - - *cs++ = MI_SEMAPHORE_WAIT | - MI_SEMAPHORE_POLL | - MI_SEMAPHORE_SAD_GTE_SDD; - *cs++ = i; - *cs++ = lower_32_bits(result->node.start); - *cs++ = upper_32_bits(result->node.start); - } - - *cs++ = MI_BATCH_BUFFER_END; - i915_gem_object_flush_map(obj); - i915_gem_object_unpin_map(obj); - - return vma; -} - -static struct i915_vma *create_global(struct intel_gt *gt, size_t sz) -{ - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - int err; - - obj = i915_gem_object_create_internal(gt->i915, sz); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - vma = i915_vma_instance(obj, >->ggtt->vm, NULL); - if (IS_ERR(vma)) { - i915_gem_object_put(obj); - return vma; - } - - err = i915_ggtt_pin(vma, NULL, 0, 0); - if (err) { - i915_vma_put(vma); - return ERR_PTR(err); - } - - return vma; -} - -static struct i915_request * -create_gpr_client(struct intel_engine_cs *engine, - struct i915_vma *global, - unsigned int offset) -{ - struct i915_vma *batch, *vma; - struct intel_context *ce; - struct i915_request *rq; - int err; - - ce = intel_context_create(engine); - if (IS_ERR(ce)) - return ERR_CAST(ce); - - vma = i915_vma_instance(global->obj, ce->vm, NULL); - if (IS_ERR(vma)) { - err = PTR_ERR(vma); - goto out_ce; - } - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - goto out_ce; - - batch = create_gpr_user(engine, vma, offset); - if (IS_ERR(batch)) { - err = PTR_ERR(batch); - goto out_vma; - } - - rq = intel_context_create_request(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_batch; - } - - i915_vma_lock(vma); - err = i915_request_await_object(rq, vma->obj, false); - if (!err) - err = i915_vma_move_to_active(vma, rq, 0); - i915_vma_unlock(vma); - - i915_vma_lock(batch); - if (!err) - err = i915_request_await_object(rq, batch->obj, false); - if (!err) - err = i915_vma_move_to_active(batch, rq, 0); - if (!err) - err = rq->engine->emit_bb_start(rq, - batch->node.start, - PAGE_SIZE, 0); - i915_vma_unlock(batch); - i915_vma_unpin(batch); - - if (!err) - i915_request_get(rq); - i915_request_add(rq); - -out_batch: - i915_vma_put(batch); -out_vma: - i915_vma_unpin(vma); -out_ce: - intel_context_put(ce); - return err ? ERR_PTR(err) : rq; -} - -static int preempt_user(struct intel_engine_cs *engine, - struct i915_vma *global, - int id) -{ - struct i915_sched_attr attr = { - .priority = I915_PRIORITY_MAX - }; - struct i915_request *rq; - int err = 0; - u32 *cs; - - rq = intel_engine_create_kernel_request(engine); - if (IS_ERR(rq)) - return PTR_ERR(rq); - - cs = intel_ring_begin(rq, 4); - if (IS_ERR(cs)) { - i915_request_add(rq); - return PTR_ERR(cs); - } - - *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; - *cs++ = i915_ggtt_offset(global); - *cs++ = 0; - *cs++ = id; - - intel_ring_advance(rq, cs); - - i915_request_get(rq); - i915_request_add(rq); - - engine->schedule(rq, &attr); - - if (i915_request_wait(rq, 0, HZ / 2) < 0) - err = -ETIME; - i915_request_put(rq); - - return err; -} - -static int live_preempt_user(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *engine; - struct i915_vma *global; - enum intel_engine_id id; - u32 *result; - int err = 0; - - if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) - return 0; - - /* - * In our other tests, we look at preemption in carefully - * controlled conditions in the ringbuffer. Since most of the - * time is spent in user batches, most of our preemptions naturally - * occur there. We want to verify that when we preempt inside a batch - * we continue on from the current instruction and do not roll back - * to the start, or another earlier arbitration point. - * - * To verify this, we create a batch which is a mixture of - * MI_MATH (gpr++) MI_SRM (gpr) and preemption points. Then with - * a few preempting contexts thrown into the mix, we look for any - * repeated instructions (which show up as incorrect values). - */ - - global = create_global(gt, 4096); - if (IS_ERR(global)) - return PTR_ERR(global); - - result = i915_gem_object_pin_map(global->obj, I915_MAP_WC); - if (IS_ERR(result)) { - i915_vma_unpin_and_release(&global, 0); - return PTR_ERR(result); - } - - for_each_engine(engine, gt, id) { - struct i915_request *client[3] = {}; - struct igt_live_test t; - int i; - - if (!intel_engine_has_preemption(engine)) - continue; - - if (IS_GEN(gt->i915, 8) && engine->class != RENDER_CLASS) - continue; /* we need per-context GPR */ - - if (igt_live_test_begin(&t, gt->i915, __func__, engine->name)) { - err = -EIO; - break; - } - - memset(result, 0, 4096); - - for (i = 0; i < ARRAY_SIZE(client); i++) { - struct i915_request *rq; - - rq = create_gpr_client(engine, global, - NUM_GPR * i * sizeof(u32)); - if (IS_ERR(rq)) - goto end_test; - - client[i] = rq; - } - - /* Continuously preempt the set of 3 running contexts */ - for (i = 1; i <= NUM_GPR; i++) { - err = preempt_user(engine, global, i); - if (err) - goto end_test; - } - - if (READ_ONCE(result[0]) != NUM_GPR) { - pr_err("%s: Failed to release semaphore\n", - engine->name); - err = -EIO; - goto end_test; - } - - for (i = 0; i < ARRAY_SIZE(client); i++) { - int gpr; - - if (i915_request_wait(client[i], 0, HZ / 2) < 0) { - err = -ETIME; - goto end_test; - } - - for (gpr = 1; gpr < NUM_GPR; gpr++) { - if (result[NUM_GPR * i + gpr] != 1) { - pr_err("%s: Invalid result, client %d, gpr %d, result: %d\n", - engine->name, - i, gpr, result[NUM_GPR * i + gpr]); - err = -EINVAL; - goto end_test; - } - } - } - -end_test: - for (i = 0; i < ARRAY_SIZE(client); i++) { - if (!client[i]) - break; - - i915_request_put(client[i]); - } - - /* Flush the semaphores on error */ - smp_store_mb(result[0], -1); - if (igt_live_test_end(&t)) - err = -EIO; - if (err) - break; - } - - i915_vma_unpin_and_release(&global, I915_VMA_RELEASE_MAP); - return err; -} - -static int live_preempt_timeout(void *arg) -{ - struct intel_gt *gt = arg; - struct i915_gem_context *ctx_hi, *ctx_lo; - struct igt_spinner spin_lo; - struct intel_engine_cs *engine; - enum intel_engine_id id; - int err = -ENOMEM; - - /* - * Check that we force preemption to occur by cancelling the previous - * context if it refuses to yield the GPU. - */ - if (!IS_ACTIVE(CONFIG_DRM_I915_PREEMPT_TIMEOUT)) - return 0; - - if (!HAS_LOGICAL_RING_PREEMPTION(gt->i915)) - return 0; - - if (!intel_has_reset_engine(gt)) - return 0; - - if (igt_spinner_init(&spin_lo, gt)) - return -ENOMEM; - - ctx_hi = kernel_context(gt->i915); - if (!ctx_hi) - goto err_spin_lo; - ctx_hi->sched.priority = - I915_USER_PRIORITY(I915_CONTEXT_MAX_USER_PRIORITY); - - ctx_lo = kernel_context(gt->i915); - if (!ctx_lo) - goto err_ctx_hi; - ctx_lo->sched.priority = - I915_USER_PRIORITY(I915_CONTEXT_MIN_USER_PRIORITY); - - for_each_engine(engine, gt, id) { - unsigned long saved_timeout; - struct i915_request *rq; - - if (!intel_engine_has_preemption(engine)) - continue; - - rq = spinner_create_request(&spin_lo, ctx_lo, engine, - MI_NOOP); /* preemption disabled */ - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto err_ctx_lo; - } - - i915_request_add(rq); - if (!igt_wait_for_spinner(&spin_lo, rq)) { - intel_gt_set_wedged(gt); - err = -EIO; - goto err_ctx_lo; - } - - rq = igt_request_alloc(ctx_hi, engine); - if (IS_ERR(rq)) { - igt_spinner_end(&spin_lo); - err = PTR_ERR(rq); - goto err_ctx_lo; - } - - /* Flush the previous CS ack before changing timeouts */ - while (READ_ONCE(engine->execlists.pending[0])) - cpu_relax(); - - saved_timeout = engine->props.preempt_timeout_ms; - engine->props.preempt_timeout_ms = 1; /* in ms, -> 1 jiffie */ - - i915_request_get(rq); - i915_request_add(rq); - - intel_engine_flush_submission(engine); - engine->props.preempt_timeout_ms = saved_timeout; - - if (i915_request_wait(rq, 0, HZ / 10) < 0) { - intel_gt_set_wedged(gt); - i915_request_put(rq); - err = -ETIME; - goto err_ctx_lo; - } - - igt_spinner_end(&spin_lo); - i915_request_put(rq); - } - - err = 0; -err_ctx_lo: - kernel_context_close(ctx_lo); -err_ctx_hi: - kernel_context_close(ctx_hi); -err_spin_lo: - igt_spinner_fini(&spin_lo); - return err; -} - -static int random_range(struct rnd_state *rnd, int min, int max) -{ - return i915_prandom_u32_max_state(max - min, rnd) + min; -} - -static int random_priority(struct rnd_state *rnd) -{ - return random_range(rnd, I915_PRIORITY_MIN, I915_PRIORITY_MAX); -} - -struct preempt_smoke { - struct intel_gt *gt; - struct i915_gem_context **contexts; - struct intel_engine_cs *engine; - struct drm_i915_gem_object *batch; - unsigned int ncontext; - struct rnd_state prng; - unsigned long count; -}; - -static struct i915_gem_context *smoke_context(struct preempt_smoke *smoke) -{ - return smoke->contexts[i915_prandom_u32_max_state(smoke->ncontext, - &smoke->prng)]; -} - -static int smoke_submit(struct preempt_smoke *smoke, - struct i915_gem_context *ctx, int prio, - struct drm_i915_gem_object *batch) -{ - struct i915_request *rq; - struct i915_vma *vma = NULL; - int err = 0; - - if (batch) { - struct i915_address_space *vm; - - vm = i915_gem_context_get_vm_rcu(ctx); - vma = i915_vma_instance(batch, vm, NULL); - i915_vm_put(vm); - if (IS_ERR(vma)) - return PTR_ERR(vma); - - err = i915_vma_pin(vma, 0, 0, PIN_USER); - if (err) - return err; - } - - ctx->sched.priority = prio; - - rq = igt_request_alloc(ctx, smoke->engine); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto unpin; - } - - if (vma) { - i915_vma_lock(vma); - err = i915_request_await_object(rq, vma->obj, false); - if (!err) - err = i915_vma_move_to_active(vma, rq, 0); - if (!err) - err = rq->engine->emit_bb_start(rq, - vma->node.start, - PAGE_SIZE, 0); - i915_vma_unlock(vma); - } - - i915_request_add(rq); - -unpin: - if (vma) - i915_vma_unpin(vma); - - return err; -} - -static int smoke_crescendo_thread(void *arg) -{ - struct preempt_smoke *smoke = arg; - IGT_TIMEOUT(end_time); - unsigned long count; - - count = 0; - do { - struct i915_gem_context *ctx = smoke_context(smoke); - int err; - - err = smoke_submit(smoke, - ctx, count % I915_PRIORITY_MAX, - smoke->batch); - if (err) - return err; - - count++; - } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL)); - - smoke->count = count; - return 0; -} - -static int smoke_crescendo(struct preempt_smoke *smoke, unsigned int flags) -#define BATCH BIT(0) -{ - struct task_struct *tsk[I915_NUM_ENGINES] = {}; - struct preempt_smoke arg[I915_NUM_ENGINES]; - struct intel_engine_cs *engine; - enum intel_engine_id id; - unsigned long count; - int err = 0; - - for_each_engine(engine, smoke->gt, id) { - arg[id] = *smoke; - arg[id].engine = engine; - if (!(flags & BATCH)) - arg[id].batch = NULL; - arg[id].count = 0; - - tsk[id] = kthread_run(smoke_crescendo_thread, &arg, - "igt/smoke:%d", id); - if (IS_ERR(tsk[id])) { - err = PTR_ERR(tsk[id]); - break; - } - get_task_struct(tsk[id]); - } - - yield(); /* start all threads before we kthread_stop() */ - - count = 0; - for_each_engine(engine, smoke->gt, id) { - int status; - - if (IS_ERR_OR_NULL(tsk[id])) - continue; - - status = kthread_stop(tsk[id]); - if (status && !err) - err = status; - - count += arg[id].count; - - put_task_struct(tsk[id]); - } - - pr_info("Submitted %lu crescendo:%x requests across %d engines and %d contexts\n", - count, flags, smoke->gt->info.num_engines, smoke->ncontext); - return 0; -} - -static int smoke_random(struct preempt_smoke *smoke, unsigned int flags) -{ - enum intel_engine_id id; - IGT_TIMEOUT(end_time); - unsigned long count; - - count = 0; - do { - for_each_engine(smoke->engine, smoke->gt, id) { - struct i915_gem_context *ctx = smoke_context(smoke); - int err; - - err = smoke_submit(smoke, - ctx, random_priority(&smoke->prng), - flags & BATCH ? smoke->batch : NULL); - if (err) - return err; - - count++; - } - } while (count < smoke->ncontext && !__igt_timeout(end_time, NULL)); - - pr_info("Submitted %lu random:%x requests across %d engines and %d contexts\n", - count, flags, smoke->gt->info.num_engines, smoke->ncontext); - return 0; -} - -static int live_preempt_smoke(void *arg) -{ - struct preempt_smoke smoke = { - .gt = arg, - .prng = I915_RND_STATE_INITIALIZER(i915_selftest.random_seed), - .ncontext = 256, - }; - const unsigned int phase[] = { 0, BATCH }; - struct igt_live_test t; - int err = -ENOMEM; - u32 *cs; - int n; - - if (!HAS_LOGICAL_RING_PREEMPTION(smoke.gt->i915)) - return 0; - - smoke.contexts = kmalloc_array(smoke.ncontext, - sizeof(*smoke.contexts), - GFP_KERNEL); - if (!smoke.contexts) - return -ENOMEM; - - smoke.batch = - i915_gem_object_create_internal(smoke.gt->i915, PAGE_SIZE); - if (IS_ERR(smoke.batch)) { - err = PTR_ERR(smoke.batch); - goto err_free; - } - - cs = i915_gem_object_pin_map(smoke.batch, I915_MAP_WB); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - goto err_batch; - } - for (n = 0; n < PAGE_SIZE / sizeof(*cs) - 1; n++) - cs[n] = MI_ARB_CHECK; - cs[n] = MI_BATCH_BUFFER_END; - i915_gem_object_flush_map(smoke.batch); - i915_gem_object_unpin_map(smoke.batch); - - if (igt_live_test_begin(&t, smoke.gt->i915, __func__, "all")) { - err = -EIO; - goto err_batch; - } - - for (n = 0; n < smoke.ncontext; n++) { - smoke.contexts[n] = kernel_context(smoke.gt->i915); - if (!smoke.contexts[n]) - goto err_ctx; - } - - for (n = 0; n < ARRAY_SIZE(phase); n++) { - err = smoke_crescendo(&smoke, phase[n]); - if (err) - goto err_ctx; - - err = smoke_random(&smoke, phase[n]); - if (err) - goto err_ctx; - } - -err_ctx: - if (igt_live_test_end(&t)) - err = -EIO; - - for (n = 0; n < smoke.ncontext; n++) { - if (!smoke.contexts[n]) - break; - kernel_context_close(smoke.contexts[n]); - } - -err_batch: - i915_gem_object_put(smoke.batch); -err_free: - kfree(smoke.contexts); - - return err; -} - -static int nop_virtual_engine(struct intel_gt *gt, - struct intel_engine_cs **siblings, - unsigned int nsibling, - unsigned int nctx, - unsigned int flags) -#define CHAIN BIT(0) -{ - IGT_TIMEOUT(end_time); - struct i915_request *request[16] = {}; - struct intel_context *ve[16]; - unsigned long n, prime, nc; - struct igt_live_test t; - ktime_t times[2] = {}; - int err; - - GEM_BUG_ON(!nctx || nctx > ARRAY_SIZE(ve)); - - for (n = 0; n < nctx; n++) { - ve[n] = intel_execlists_create_virtual(siblings, nsibling); - if (IS_ERR(ve[n])) { - err = PTR_ERR(ve[n]); - nctx = n; - goto out; - } - - err = intel_context_pin(ve[n]); - if (err) { - intel_context_put(ve[n]); - nctx = n; - goto out; - } - } - - err = igt_live_test_begin(&t, gt->i915, __func__, ve[0]->engine->name); - if (err) - goto out; - - for_each_prime_number_from(prime, 1, 8192) { - times[1] = ktime_get_raw(); - - if (flags & CHAIN) { - for (nc = 0; nc < nctx; nc++) { - for (n = 0; n < prime; n++) { - struct i915_request *rq; - - rq = i915_request_create(ve[nc]); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out; - } - - if (request[nc]) - i915_request_put(request[nc]); - request[nc] = i915_request_get(rq); - i915_request_add(rq); - } - } - } else { - for (n = 0; n < prime; n++) { - for (nc = 0; nc < nctx; nc++) { - struct i915_request *rq; - - rq = i915_request_create(ve[nc]); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out; - } - - if (request[nc]) - i915_request_put(request[nc]); - request[nc] = i915_request_get(rq); - i915_request_add(rq); - } - } - } - - for (nc = 0; nc < nctx; nc++) { - if (i915_request_wait(request[nc], 0, HZ / 10) < 0) { - pr_err("%s(%s): wait for %llx:%lld timed out\n", - __func__, ve[0]->engine->name, - request[nc]->fence.context, - request[nc]->fence.seqno); - - GEM_TRACE("%s(%s) failed at request %llx:%lld\n", - __func__, ve[0]->engine->name, - request[nc]->fence.context, - request[nc]->fence.seqno); - GEM_TRACE_DUMP(); - intel_gt_set_wedged(gt); - break; - } - } - - times[1] = ktime_sub(ktime_get_raw(), times[1]); - if (prime == 1) - times[0] = times[1]; - - for (nc = 0; nc < nctx; nc++) { - i915_request_put(request[nc]); - request[nc] = NULL; - } - - if (__igt_timeout(end_time, NULL)) - break; - } - - err = igt_live_test_end(&t); - if (err) - goto out; - - pr_info("Requestx%d latencies on %s: 1 = %lluns, %lu = %lluns\n", - nctx, ve[0]->engine->name, ktime_to_ns(times[0]), - prime, div64_u64(ktime_to_ns(times[1]), prime)); - -out: - if (igt_flush_test(gt->i915)) - err = -EIO; - - for (nc = 0; nc < nctx; nc++) { - i915_request_put(request[nc]); - intel_context_unpin(ve[nc]); - intel_context_put(ve[nc]); - } - return err; -} - -static unsigned int -__select_siblings(struct intel_gt *gt, - unsigned int class, - struct intel_engine_cs **siblings, - bool (*filter)(const struct intel_engine_cs *)) -{ - unsigned int n = 0; - unsigned int inst; - - for (inst = 0; inst <= MAX_ENGINE_INSTANCE; inst++) { - if (!gt->engine_class[class][inst]) - continue; - - if (filter && !filter(gt->engine_class[class][inst])) - continue; - - siblings[n++] = gt->engine_class[class][inst]; - } - - return n; -} - -static unsigned int -select_siblings(struct intel_gt *gt, - unsigned int class, - struct intel_engine_cs **siblings) -{ - return __select_siblings(gt, class, siblings, NULL); -} - -static int live_virtual_engine(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; - struct intel_engine_cs *engine; - enum intel_engine_id id; - unsigned int class; - int err; - - if (intel_uc_uses_guc_submission(>->uc)) - return 0; - - for_each_engine(engine, gt, id) { - err = nop_virtual_engine(gt, &engine, 1, 1, 0); - if (err) { - pr_err("Failed to wrap engine %s: err=%d\n", - engine->name, err); - return err; - } - } - - for (class = 0; class <= MAX_ENGINE_CLASS; class++) { - int nsibling, n; - - nsibling = select_siblings(gt, class, siblings); - if (nsibling < 2) - continue; - - for (n = 1; n <= nsibling + 1; n++) { - err = nop_virtual_engine(gt, siblings, nsibling, - n, 0); - if (err) - return err; - } - - err = nop_virtual_engine(gt, siblings, nsibling, n, CHAIN); - if (err) - return err; - } - - return 0; -} - -static int mask_virtual_engine(struct intel_gt *gt, - struct intel_engine_cs **siblings, - unsigned int nsibling) -{ - struct i915_request *request[MAX_ENGINE_INSTANCE + 1]; - struct intel_context *ve; - struct igt_live_test t; - unsigned int n; - int err; - - /* - * Check that by setting the execution mask on a request, we can - * restrict it to our desired engine within the virtual engine. - */ - - ve = intel_execlists_create_virtual(siblings, nsibling); - if (IS_ERR(ve)) { - err = PTR_ERR(ve); - goto out_close; - } - - err = intel_context_pin(ve); - if (err) - goto out_put; - - err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); - if (err) - goto out_unpin; - - for (n = 0; n < nsibling; n++) { - request[n] = i915_request_create(ve); - if (IS_ERR(request[n])) { - err = PTR_ERR(request[n]); - nsibling = n; - goto out; - } - - /* Reverse order as it's more likely to be unnatural */ - request[n]->execution_mask = siblings[nsibling - n - 1]->mask; - - i915_request_get(request[n]); - i915_request_add(request[n]); - } - - for (n = 0; n < nsibling; n++) { - if (i915_request_wait(request[n], 0, HZ / 10) < 0) { - pr_err("%s(%s): wait for %llx:%lld timed out\n", - __func__, ve->engine->name, - request[n]->fence.context, - request[n]->fence.seqno); - - GEM_TRACE("%s(%s) failed at request %llx:%lld\n", - __func__, ve->engine->name, - request[n]->fence.context, - request[n]->fence.seqno); - GEM_TRACE_DUMP(); - intel_gt_set_wedged(gt); - err = -EIO; - goto out; - } - - if (request[n]->engine != siblings[nsibling - n - 1]) { - pr_err("Executed on wrong sibling '%s', expected '%s'\n", - request[n]->engine->name, - siblings[nsibling - n - 1]->name); - err = -EINVAL; - goto out; - } - } - - err = igt_live_test_end(&t); -out: - if (igt_flush_test(gt->i915)) - err = -EIO; - - for (n = 0; n < nsibling; n++) - i915_request_put(request[n]); - -out_unpin: - intel_context_unpin(ve); -out_put: - intel_context_put(ve); -out_close: - return err; -} - -static int live_virtual_mask(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; - unsigned int class; - int err; - - if (intel_uc_uses_guc_submission(>->uc)) - return 0; - - for (class = 0; class <= MAX_ENGINE_CLASS; class++) { - unsigned int nsibling; - - nsibling = select_siblings(gt, class, siblings); - if (nsibling < 2) - continue; - - err = mask_virtual_engine(gt, siblings, nsibling); - if (err) - return err; - } - - return 0; -} - -static int slicein_virtual_engine(struct intel_gt *gt, - struct intel_engine_cs **siblings, - unsigned int nsibling) -{ - const long timeout = slice_timeout(siblings[0]); - struct intel_context *ce; - struct i915_request *rq; - struct igt_spinner spin; - unsigned int n; - int err = 0; - - /* - * Virtual requests must take part in timeslicing on the target engines. - */ - - if (igt_spinner_init(&spin, gt)) - return -ENOMEM; - - for (n = 0; n < nsibling; n++) { - ce = intel_context_create(siblings[n]); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto out; - } - - rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); - intel_context_put(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out; - } - - i915_request_add(rq); - } - - ce = intel_execlists_create_virtual(siblings, nsibling); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto out; - } - - rq = intel_context_create_request(ce); - intel_context_put(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out; - } - - i915_request_get(rq); - i915_request_add(rq); - if (i915_request_wait(rq, 0, timeout) < 0) { - GEM_TRACE_ERR("%s(%s) failed to slice in virtual request\n", - __func__, rq->engine->name); - GEM_TRACE_DUMP(); - intel_gt_set_wedged(gt); - err = -EIO; - } - i915_request_put(rq); - -out: - igt_spinner_end(&spin); - if (igt_flush_test(gt->i915)) - err = -EIO; - igt_spinner_fini(&spin); - return err; -} - -static int sliceout_virtual_engine(struct intel_gt *gt, - struct intel_engine_cs **siblings, - unsigned int nsibling) -{ - const long timeout = slice_timeout(siblings[0]); - struct intel_context *ce; - struct i915_request *rq; - struct igt_spinner spin; - unsigned int n; - int err = 0; - - /* - * Virtual requests must allow others a fair timeslice. - */ - - if (igt_spinner_init(&spin, gt)) - return -ENOMEM; - - /* XXX We do not handle oversubscription and fairness with normal rq */ - for (n = 0; n < nsibling; n++) { - ce = intel_execlists_create_virtual(siblings, nsibling); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto out; - } - - rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); - intel_context_put(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out; - } - - i915_request_add(rq); - } - - for (n = 0; !err && n < nsibling; n++) { - ce = intel_context_create(siblings[n]); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto out; - } - - rq = intel_context_create_request(ce); - intel_context_put(ce); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out; - } - - i915_request_get(rq); - i915_request_add(rq); - if (i915_request_wait(rq, 0, timeout) < 0) { - GEM_TRACE_ERR("%s(%s) failed to slice out virtual request\n", - __func__, siblings[n]->name); - GEM_TRACE_DUMP(); - intel_gt_set_wedged(gt); - err = -EIO; - } - i915_request_put(rq); - } - -out: - igt_spinner_end(&spin); - if (igt_flush_test(gt->i915)) - err = -EIO; - igt_spinner_fini(&spin); - return err; -} - -static int live_virtual_slice(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; - unsigned int class; - int err; - - if (intel_uc_uses_guc_submission(>->uc)) - return 0; - - for (class = 0; class <= MAX_ENGINE_CLASS; class++) { - unsigned int nsibling; - - nsibling = __select_siblings(gt, class, siblings, - intel_engine_has_timeslices); - if (nsibling < 2) - continue; - - err = slicein_virtual_engine(gt, siblings, nsibling); - if (err) - return err; - - err = sliceout_virtual_engine(gt, siblings, nsibling); - if (err) - return err; - } - - return 0; -} - -static int preserved_virtual_engine(struct intel_gt *gt, - struct intel_engine_cs **siblings, - unsigned int nsibling) -{ - struct i915_request *last = NULL; - struct intel_context *ve; - struct i915_vma *scratch; - struct igt_live_test t; - unsigned int n; - int err = 0; - u32 *cs; - - scratch = create_scratch(siblings[0]->gt); - if (IS_ERR(scratch)) - return PTR_ERR(scratch); - - err = i915_vma_sync(scratch); - if (err) - goto out_scratch; - - ve = intel_execlists_create_virtual(siblings, nsibling); - if (IS_ERR(ve)) { - err = PTR_ERR(ve); - goto out_scratch; - } - - err = intel_context_pin(ve); - if (err) - goto out_put; - - err = igt_live_test_begin(&t, gt->i915, __func__, ve->engine->name); - if (err) - goto out_unpin; - - for (n = 0; n < NUM_GPR_DW; n++) { - struct intel_engine_cs *engine = siblings[n % nsibling]; - struct i915_request *rq; - - rq = i915_request_create(ve); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_end; - } - - i915_request_put(last); - last = i915_request_get(rq); - - cs = intel_ring_begin(rq, 8); - if (IS_ERR(cs)) { - i915_request_add(rq); - err = PTR_ERR(cs); - goto out_end; - } - - *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; - *cs++ = CS_GPR(engine, n); - *cs++ = i915_ggtt_offset(scratch) + n * sizeof(u32); - *cs++ = 0; - - *cs++ = MI_LOAD_REGISTER_IMM(1); - *cs++ = CS_GPR(engine, (n + 1) % NUM_GPR_DW); - *cs++ = n + 1; - - *cs++ = MI_NOOP; - intel_ring_advance(rq, cs); - - /* Restrict this request to run on a particular engine */ - rq->execution_mask = engine->mask; - i915_request_add(rq); - } - - if (i915_request_wait(last, 0, HZ / 5) < 0) { - err = -ETIME; - goto out_end; - } - - cs = i915_gem_object_pin_map(scratch->obj, I915_MAP_WB); - if (IS_ERR(cs)) { - err = PTR_ERR(cs); - goto out_end; - } - - for (n = 0; n < NUM_GPR_DW; n++) { - if (cs[n] != n) { - pr_err("Incorrect value[%d] found for GPR[%d]\n", - cs[n], n); - err = -EINVAL; - break; - } - } - - i915_gem_object_unpin_map(scratch->obj); - -out_end: - if (igt_live_test_end(&t)) - err = -EIO; - i915_request_put(last); -out_unpin: - intel_context_unpin(ve); -out_put: - intel_context_put(ve); -out_scratch: - i915_vma_unpin_and_release(&scratch, 0); - return err; -} - -static int live_virtual_preserved(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; - unsigned int class; - - /* - * Check that the context image retains non-privileged (user) registers - * from one engine to the next. For this we check that the CS_GPR - * are preserved. - */ - - if (intel_uc_uses_guc_submission(>->uc)) - return 0; - - /* As we use CS_GPR we cannot run before they existed on all engines. */ - if (INTEL_GEN(gt->i915) < 9) - return 0; - - for (class = 0; class <= MAX_ENGINE_CLASS; class++) { - int nsibling, err; - - nsibling = select_siblings(gt, class, siblings); - if (nsibling < 2) - continue; - - err = preserved_virtual_engine(gt, siblings, nsibling); - if (err) - return err; - } - - return 0; -} - -static int bond_virtual_engine(struct intel_gt *gt, - unsigned int class, - struct intel_engine_cs **siblings, - unsigned int nsibling, - unsigned int flags) -#define BOND_SCHEDULE BIT(0) -{ - struct intel_engine_cs *master; - struct i915_request *rq[16]; - enum intel_engine_id id; - struct igt_spinner spin; - unsigned long n; - int err; - - /* - * A set of bonded requests is intended to be run concurrently - * across a number of engines. We use one request per-engine - * and a magic fence to schedule each of the bonded requests - * at the same time. A consequence of our current scheduler is that - * we only move requests to the HW ready queue when the request - * becomes ready, that is when all of its prerequisite fences have - * been signaled. As one of those fences is the master submit fence, - * there is a delay on all secondary fences as the HW may be - * currently busy. Equally, as all the requests are independent, - * they may have other fences that delay individual request - * submission to HW. Ergo, we do not guarantee that all requests are - * immediately submitted to HW at the same time, just that if the - * rules are abided by, they are ready at the same time as the - * first is submitted. Userspace can embed semaphores in its batch - * to ensure parallel execution of its phases as it requires. - * Though naturally it gets requested that perhaps the scheduler should - * take care of parallel execution, even across preemption events on - * different HW. (The proper answer is of course "lalalala".) - * - * With the submit-fence, we have identified three possible phases - * of synchronisation depending on the master fence: queued (not - * ready), executing, and signaled. The first two are quite simple - * and checked below. However, the signaled master fence handling is - * contentious. Currently we do not distinguish between a signaled - * fence and an expired fence, as once signaled it does not convey - * any information about the previous execution. It may even be freed - * and hence checking later it may not exist at all. Ergo we currently - * do not apply the bonding constraint for an already signaled fence, - * as our expectation is that it should not constrain the secondaries - * and is outside of the scope of the bonded request API (i.e. all - * userspace requests are meant to be running in parallel). As - * it imposes no constraint, and is effectively a no-op, we do not - * check below as normal execution flows are checked extensively above. - * - * XXX Is the degenerate handling of signaled submit fences the - * expected behaviour for userpace? - */ - - GEM_BUG_ON(nsibling >= ARRAY_SIZE(rq) - 1); - - if (igt_spinner_init(&spin, gt)) - return -ENOMEM; - - err = 0; - rq[0] = ERR_PTR(-ENOMEM); - for_each_engine(master, gt, id) { - struct i915_sw_fence fence = {}; - struct intel_context *ce; - - if (master->class == class) - continue; - - ce = intel_context_create(master); - if (IS_ERR(ce)) { - err = PTR_ERR(ce); - goto out; - } - - memset_p((void *)rq, ERR_PTR(-EINVAL), ARRAY_SIZE(rq)); - - rq[0] = igt_spinner_create_request(&spin, ce, MI_NOOP); - intel_context_put(ce); - if (IS_ERR(rq[0])) { - err = PTR_ERR(rq[0]); - goto out; - } - i915_request_get(rq[0]); - - if (flags & BOND_SCHEDULE) { - onstack_fence_init(&fence); - err = i915_sw_fence_await_sw_fence_gfp(&rq[0]->submit, - &fence, - GFP_KERNEL); - } - - i915_request_add(rq[0]); - if (err < 0) - goto out; - - if (!(flags & BOND_SCHEDULE) && - !igt_wait_for_spinner(&spin, rq[0])) { - err = -EIO; - goto out; - } - - for (n = 0; n < nsibling; n++) { - struct intel_context *ve; - - ve = intel_execlists_create_virtual(siblings, nsibling); - if (IS_ERR(ve)) { - err = PTR_ERR(ve); - onstack_fence_fini(&fence); - goto out; - } - - err = intel_virtual_engine_attach_bond(ve->engine, - master, - siblings[n]); - if (err) { - intel_context_put(ve); - onstack_fence_fini(&fence); - goto out; - } - - err = intel_context_pin(ve); - intel_context_put(ve); - if (err) { - onstack_fence_fini(&fence); - goto out; - } - - rq[n + 1] = i915_request_create(ve); - intel_context_unpin(ve); - if (IS_ERR(rq[n + 1])) { - err = PTR_ERR(rq[n + 1]); - onstack_fence_fini(&fence); - goto out; - } - i915_request_get(rq[n + 1]); - - err = i915_request_await_execution(rq[n + 1], - &rq[0]->fence, - ve->engine->bond_execute); - i915_request_add(rq[n + 1]); - if (err < 0) { - onstack_fence_fini(&fence); - goto out; - } - } - onstack_fence_fini(&fence); - intel_engine_flush_submission(master); - igt_spinner_end(&spin); - - if (i915_request_wait(rq[0], 0, HZ / 10) < 0) { - pr_err("Master request did not execute (on %s)!\n", - rq[0]->engine->name); - err = -EIO; - goto out; - } - - for (n = 0; n < nsibling; n++) { - if (i915_request_wait(rq[n + 1], 0, - MAX_SCHEDULE_TIMEOUT) < 0) { - err = -EIO; - goto out; - } - - if (rq[n + 1]->engine != siblings[n]) { - pr_err("Bonded request did not execute on target engine: expected %s, used %s; master was %s\n", - siblings[n]->name, - rq[n + 1]->engine->name, - rq[0]->engine->name); - err = -EINVAL; - goto out; - } - } - - for (n = 0; !IS_ERR(rq[n]); n++) - i915_request_put(rq[n]); - rq[0] = ERR_PTR(-ENOMEM); - } - -out: - for (n = 0; !IS_ERR(rq[n]); n++) - i915_request_put(rq[n]); - if (igt_flush_test(gt->i915)) - err = -EIO; - - igt_spinner_fini(&spin); - return err; -} - -static int live_virtual_bond(void *arg) -{ - static const struct phase { - const char *name; - unsigned int flags; - } phases[] = { - { "", 0 }, - { "schedule", BOND_SCHEDULE }, - { }, - }; - struct intel_gt *gt = arg; - struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; - unsigned int class; - int err; - - if (intel_uc_uses_guc_submission(>->uc)) - return 0; - - for (class = 0; class <= MAX_ENGINE_CLASS; class++) { - const struct phase *p; - int nsibling; - - nsibling = select_siblings(gt, class, siblings); - if (nsibling < 2) - continue; - - for (p = phases; p->name; p++) { - err = bond_virtual_engine(gt, - class, siblings, nsibling, - p->flags); - if (err) { - pr_err("%s(%s): failed class=%d, nsibling=%d, err=%d\n", - __func__, p->name, class, nsibling, err); - return err; - } - } - } - - return 0; -} - -static int reset_virtual_engine(struct intel_gt *gt, - struct intel_engine_cs **siblings, - unsigned int nsibling) -{ - struct intel_engine_cs *engine; - struct intel_context *ve; - struct igt_spinner spin; - struct i915_request *rq; - unsigned int n; - int err = 0; - - /* - * In order to support offline error capture for fast preempt reset, - * we need to decouple the guilty request and ensure that it and its - * descendents are not executed while the capture is in progress. - */ - - if (igt_spinner_init(&spin, gt)) - return -ENOMEM; - - ve = intel_execlists_create_virtual(siblings, nsibling); - if (IS_ERR(ve)) { - err = PTR_ERR(ve); - goto out_spin; - } - - for (n = 0; n < nsibling; n++) - st_engine_heartbeat_disable(siblings[n]); - - rq = igt_spinner_create_request(&spin, ve, MI_ARB_CHECK); - if (IS_ERR(rq)) { - err = PTR_ERR(rq); - goto out_heartbeat; - } - i915_request_add(rq); - - if (!igt_wait_for_spinner(&spin, rq)) { - intel_gt_set_wedged(gt); - err = -ETIME; - goto out_heartbeat; - } - - engine = rq->engine; - GEM_BUG_ON(engine == ve->engine); - - /* Take ownership of the reset and tasklet */ - if (test_and_set_bit(I915_RESET_ENGINE + engine->id, - >->reset.flags)) { - intel_gt_set_wedged(gt); - err = -EBUSY; - goto out_heartbeat; - } - tasklet_disable(&engine->execlists.tasklet); - - engine->execlists.tasklet.func(engine->execlists.tasklet.data); - GEM_BUG_ON(execlists_active(&engine->execlists) != rq); - - /* Fake a preemption event; failed of course */ - spin_lock_irq(&engine->active.lock); - __unwind_incomplete_requests(engine); - spin_unlock_irq(&engine->active.lock); - GEM_BUG_ON(rq->engine != ve->engine); - - /* Reset the engine while keeping our active request on hold */ - execlists_hold(engine, rq); - GEM_BUG_ON(!i915_request_on_hold(rq)); - - intel_engine_reset(engine, NULL); - GEM_BUG_ON(rq->fence.error != -EIO); - - /* Release our grasp on the engine, letting CS flow again */ - tasklet_enable(&engine->execlists.tasklet); - clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, >->reset.flags); - - /* Check that we do not resubmit the held request */ - i915_request_get(rq); - if (!i915_request_wait(rq, 0, HZ / 5)) { - pr_err("%s: on hold request completed!\n", - engine->name); - intel_gt_set_wedged(gt); - err = -EIO; - goto out_rq; - } - GEM_BUG_ON(!i915_request_on_hold(rq)); - - /* But is resubmitted on release */ - execlists_unhold(engine, rq); - if (i915_request_wait(rq, 0, HZ / 5) < 0) { - pr_err("%s: held request did not complete!\n", - engine->name); - intel_gt_set_wedged(gt); - err = -ETIME; - } - -out_rq: - i915_request_put(rq); -out_heartbeat: - for (n = 0; n < nsibling; n++) - st_engine_heartbeat_enable(siblings[n]); - - intel_context_put(ve); -out_spin: - igt_spinner_fini(&spin); - return err; -} - -static int live_virtual_reset(void *arg) -{ - struct intel_gt *gt = arg; - struct intel_engine_cs *siblings[MAX_ENGINE_INSTANCE + 1]; - unsigned int class; - - /* - * Check that we handle a reset event within a virtual engine. - * Only the physical engine is reset, but we have to check the flow - * of the virtual requests around the reset, and make sure it is not - * forgotten. - */ - - if (intel_uc_uses_guc_submission(>->uc)) - return 0; - - if (!intel_has_reset_engine(gt)) - return 0; - - for (class = 0; class <= MAX_ENGINE_CLASS; class++) { - int nsibling, err; - - nsibling = select_siblings(gt, class, siblings); - if (nsibling < 2) - continue; - - err = reset_virtual_engine(gt, siblings, nsibling); - if (err) - return err; - } - - return 0; -} - -int intel_execlists_live_selftests(struct drm_i915_private *i915) -{ - static const struct i915_subtest tests[] = { - SUBTEST(live_sanitycheck), - SUBTEST(live_unlite_switch), - SUBTEST(live_unlite_preempt), - SUBTEST(live_unlite_ring), - SUBTEST(live_pin_rewind), - SUBTEST(live_hold_reset), - SUBTEST(live_error_interrupt), - SUBTEST(live_timeslice_preempt), - SUBTEST(live_timeslice_rewind), - SUBTEST(live_timeslice_queue), - SUBTEST(live_timeslice_nopreempt), - SUBTEST(live_busywait_preempt), - SUBTEST(live_preempt), - SUBTEST(live_late_preempt), - SUBTEST(live_nopreempt), - SUBTEST(live_preempt_cancel), - SUBTEST(live_suppress_self_preempt), - SUBTEST(live_chain_preempt), - SUBTEST(live_preempt_ring), - SUBTEST(live_preempt_gang), - SUBTEST(live_preempt_timeout), - SUBTEST(live_preempt_user), - SUBTEST(live_preempt_smoke), - SUBTEST(live_virtual_engine), - SUBTEST(live_virtual_mask), - SUBTEST(live_virtual_preserved), - SUBTEST(live_virtual_slice), - SUBTEST(live_virtual_bond), - SUBTEST(live_virtual_reset), - }; - - if (!HAS_EXECLISTS(i915)) - return 0; - - if (intel_gt_is_wedged(&i915->gt)) - return 0; - - return intel_gt_live_subtests(tests, &i915->gt); -} - static int emit_semaphore_signal(struct intel_context *ce, void *slot) { const u32 offset = @@ -4775,9 +139,10 @@ static int live_lrc_layout(void *arg) * match the layout saved by HW. */ - lrc = kmalloc(PAGE_SIZE, GFP_KERNEL); + lrc = (u32 *)__get_free_page(GFP_KERNEL); /* requires page alignment */ if (!lrc) return -ENOMEM; + GEM_BUG_ON(offset_in_page(lrc)); err = 0; for_each_engine(engine, gt, id) { @@ -4794,15 +159,12 @@ static int live_lrc_layout(void *arg) } hw += LRC_STATE_OFFSET / sizeof(*hw); - execlists_init_reg_state(memset(lrc, POISON_INUSE, PAGE_SIZE), - engine->kernel_context, - engine, - engine->kernel_context->ring, - true); + __lrc_init_regs(memset(lrc, POISON_INUSE, PAGE_SIZE), + engine->kernel_context, engine, true); dw = 0; do { - u32 lri = hw[dw]; + u32 lri = READ_ONCE(hw[dw]); if (lri == 0) { dw++; @@ -4835,9 +197,11 @@ static int live_lrc_layout(void *arg) dw++; while (lri) { - if (hw[dw] != lrc[dw]) { + u32 offset = READ_ONCE(hw[dw]); + + if (offset != lrc[dw]) { pr_err("%s: Different registers found at dword %d, expected %x, found %x\n", - engine->name, dw, hw[dw], lrc[dw]); + engine->name, dw, offset, lrc[dw]); err = -EINVAL; break; } @@ -4849,7 +213,7 @@ static int live_lrc_layout(void *arg) dw += 2; lri -= 2; } - } while ((lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); + } while (!err && (lrc[dw] & ~BIT(0)) != MI_BATCH_BUFFER_END); if (err) { pr_info("%s: HW register image:\n", engine->name); @@ -4864,7 +228,7 @@ static int live_lrc_layout(void *arg) break; } - kfree(lrc); + free_page((unsigned long)lrc); return err; } @@ -5249,6 +613,10 @@ static int __live_lrc_gpr(struct intel_engine_cs *engine, err = emit_semaphore_signal(engine->kernel_context, slot); if (err) goto err_rq; + + err = wait_for_submit(engine, rq, HZ / 2); + if (err) + goto err_rq; } else { slot[0] = 1; wmb(); @@ -6242,16 +1610,17 @@ static void garbage_reset(struct intel_engine_cs *engine, const unsigned int bit = I915_RESET_ENGINE + engine->id; unsigned long *lock = &engine->gt->reset.flags; - if (test_and_set_bit(bit, lock)) - return; - - tasklet_disable(&engine->execlists.tasklet); + local_bh_disable(); + if (!test_and_set_bit(bit, lock)) { + tasklet_disable(&engine->execlists.tasklet); - if (!rq->fence.error) - intel_engine_reset(engine, NULL); + if (!rq->fence.error) + __intel_engine_reset_bh(engine, NULL); - tasklet_enable(&engine->execlists.tasklet); - clear_and_wake_up_bit(bit, lock); + tasklet_enable(&engine->execlists.tasklet); + clear_and_wake_up_bit(bit, lock); + } + local_bh_enable(); } static struct i915_request *garbage(struct intel_context *ce, diff --git a/drivers/gpu/drm/i915/gt/selftest_mocs.c b/drivers/gpu/drm/i915/gt/selftest_mocs.c index b25eba50c88e..cf373c72359e 100644 --- a/drivers/gpu/drm/i915/gt/selftest_mocs.c +++ b/drivers/gpu/drm/i915/gt/selftest_mocs.c @@ -5,6 +5,7 @@ */ #include "gt/intel_engine_pm.h" +#include "gt/intel_gpu_commands.h" #include "i915_selftest.h" #include "gem/selftests/mock_context.h" @@ -56,33 +57,6 @@ static int request_add_spin(struct i915_request *rq, struct igt_spinner *spin) return err; } -static struct i915_vma *create_scratch(struct intel_gt *gt) -{ - struct drm_i915_gem_object *obj; - struct i915_vma *vma; - int err; - - obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); - if (IS_ERR(obj)) - return ERR_CAST(obj); - - i915_gem_object_set_cache_coherency(obj, I915_CACHING_CACHED); - - vma = i915_vma_instance(obj, >->ggtt->vm, NULL); - if (IS_ERR(vma)) { - i915_gem_object_put(obj); - return vma; - } - - err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); - if (err) { - i915_gem_object_put(obj); - return ERR_PTR(err); - } - - return vma; -} - static int live_mocs_init(struct live_mocs *arg, struct intel_gt *gt) { struct drm_i915_mocs_table table; @@ -101,7 +75,7 @@ static int live_mocs_init(struct live_mocs *arg, struct intel_gt *gt) if (flags & (HAS_GLOBAL_MOCS | HAS_ENGINE_MOCS)) arg->mocs = table; - arg->scratch = create_scratch(gt); + arg->scratch = __vm_create_scratch_for_read(>->ggtt->vm, PAGE_SIZE); if (IS_ERR(arg->scratch)) return PTR_ERR(arg->scratch); @@ -125,7 +99,7 @@ static void live_mocs_fini(struct live_mocs *arg) static int read_regs(struct i915_request *rq, u32 addr, unsigned int count, - uint32_t *offset) + u32 *offset) { unsigned int i; u32 *cs; @@ -153,7 +127,7 @@ static int read_regs(struct i915_request *rq, static int read_mocs_table(struct i915_request *rq, const struct drm_i915_mocs_table *table, - uint32_t *offset) + u32 *offset) { u32 addr; @@ -167,7 +141,7 @@ static int read_mocs_table(struct i915_request *rq, static int read_l3cc_table(struct i915_request *rq, const struct drm_i915_mocs_table *table, - uint32_t *offset) + u32 *offset) { u32 addr = i915_mmio_reg_offset(GEN9_LNCFCMOCS(0)); @@ -176,7 +150,7 @@ static int read_l3cc_table(struct i915_request *rq, static int check_mocs_table(struct intel_engine_cs *engine, const struct drm_i915_mocs_table *table, - uint32_t **vaddr) + u32 **vaddr) { unsigned int i; u32 expect; @@ -205,7 +179,7 @@ static bool mcr_range(struct drm_i915_private *i915, u32 offset) static int check_l3cc_table(struct intel_engine_cs *engine, const struct drm_i915_mocs_table *table, - uint32_t **vaddr) + u32 **vaddr) { /* Can we read the MCR range 0xb00 directly? See intel_workarounds! */ u32 reg = i915_mmio_reg_offset(GEN9_LNCFCMOCS(0)); @@ -361,29 +335,34 @@ static int active_engine_reset(struct intel_context *ce, static int __live_mocs_reset(struct live_mocs *mocs, struct intel_context *ce) { + struct intel_gt *gt = ce->engine->gt; int err; - err = intel_engine_reset(ce->engine, "mocs"); - if (err) - return err; + if (intel_has_reset_engine(gt)) { + err = intel_engine_reset(ce->engine, "mocs"); + if (err) + return err; - err = check_mocs_engine(mocs, ce); - if (err) - return err; + err = check_mocs_engine(mocs, ce); + if (err) + return err; - err = active_engine_reset(ce, "mocs"); - if (err) - return err; + err = active_engine_reset(ce, "mocs"); + if (err) + return err; - err = check_mocs_engine(mocs, ce); - if (err) - return err; + err = check_mocs_engine(mocs, ce); + if (err) + return err; + } - intel_gt_reset(ce->engine->gt, ce->engine->mask, "mocs"); + if (intel_has_gpu_reset(gt)) { + intel_gt_reset(gt, ce->engine->mask, "mocs"); - err = check_mocs_engine(mocs, ce); - if (err) - return err; + err = check_mocs_engine(mocs, ce); + if (err) + return err; + } return 0; } @@ -398,9 +377,6 @@ static int live_mocs_reset(void *arg) /* Check the mocs setup is retained over per-engine and global resets */ - if (!intel_has_reset_engine(gt)) - return 0; - err = live_mocs_init(&mocs, gt); if (err) return err; diff --git a/drivers/gpu/drm/i915/gt/selftest_rc6.c b/drivers/gpu/drm/i915/gt/selftest_rc6.c index 64ef5ee5decf..61abc0556601 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rc6.c +++ b/drivers/gpu/drm/i915/gt/selftest_rc6.c @@ -6,6 +6,7 @@ #include "intel_context.h" #include "intel_engine_pm.h" +#include "intel_gpu_commands.h" #include "intel_gt_requests.h" #include "intel_ring.h" #include "selftest_rc6.h" diff --git a/drivers/gpu/drm/i915/gt/selftest_reset.c b/drivers/gpu/drm/i915/gt/selftest_reset.c index ef5aeebbeeb0..b7befcfbdcde 100644 --- a/drivers/gpu/drm/i915/gt/selftest_reset.c +++ b/drivers/gpu/drm/i915/gt/selftest_reset.c @@ -9,6 +9,7 @@ #include "i915_memcpy.h" #include "i915_selftest.h" +#include "intel_gpu_commands.h" #include "selftests/igt_reset.h" #include "selftests/igt_atomic.h" #include "selftests/igt_spinner.h" @@ -95,10 +96,10 @@ __igt_reset_stolen(struct intel_gt *gt, if (!__drm_mm_interval_first(>->i915->mm.stolen, page << PAGE_SHIFT, ((page + 1) << PAGE_SHIFT) - 1)) - memset32(s, STACK_MAGIC, PAGE_SIZE / sizeof(u32)); + memset_io(s, STACK_MAGIC, PAGE_SIZE); - in = s; - if (i915_memcpy_from_wc(tmp, s, PAGE_SIZE)) + in = (void __force *)s; + if (i915_memcpy_from_wc(tmp, in, PAGE_SIZE)) in = tmp; crc[page] = crc32_le(0, in, PAGE_SIZE); @@ -133,8 +134,8 @@ __igt_reset_stolen(struct intel_gt *gt, ggtt->error_capture.start, PAGE_SIZE); - in = s; - if (i915_memcpy_from_wc(tmp, s, PAGE_SIZE)) + in = (void __force *)s; + if (i915_memcpy_from_wc(tmp, in, PAGE_SIZE)) in = tmp; x = crc32_le(0, in, PAGE_SIZE); @@ -326,11 +327,16 @@ static int igt_atomic_engine_reset(void *arg) for (p = igt_atomic_phases; p->name; p++) { GEM_TRACE("intel_engine_reset(%s) under %s\n", engine->name, p->name); + if (strcmp(p->name, "softirq")) + local_bh_disable(); p->critical_section_begin(); - err = intel_engine_reset(engine, NULL); + err = __intel_engine_reset_bh(engine, NULL); p->critical_section_end(); + if (strcmp(p->name, "softirq")) + local_bh_enable(); + if (err) { pr_err("intel_engine_reset(%s) failed under %s\n", engine->name, p->name); @@ -340,6 +346,7 @@ static int igt_atomic_engine_reset(void *arg) intel_engine_pm_put(engine); tasklet_enable(&engine->execlists.tasklet); + tasklet_hi_schedule(&engine->execlists.tasklet); if (err) break; } diff --git a/drivers/gpu/drm/i915/gt/selftest_rps.c b/drivers/gpu/drm/i915/gt/selftest_rps.c index aa5675ecb5cc..967641fee42a 100644 --- a/drivers/gpu/drm/i915/gt/selftest_rps.c +++ b/drivers/gpu/drm/i915/gt/selftest_rps.c @@ -185,7 +185,10 @@ static u8 rps_set_check(struct intel_rps *rps, u8 freq) { mutex_lock(&rps->lock); GEM_BUG_ON(!intel_rps_is_active(rps)); - intel_rps_set(rps, freq); + if (wait_for(!intel_rps_set(rps, freq), 50)) { + mutex_unlock(&rps->lock); + return 0; + } GEM_BUG_ON(rps->last_freq != freq); mutex_unlock(&rps->lock); diff --git a/drivers/gpu/drm/i915/gt/selftest_timeline.c b/drivers/gpu/drm/i915/gt/selftest_timeline.c index 2edf2b15885f..6f3a3687ef0f 100644 --- a/drivers/gpu/drm/i915/gt/selftest_timeline.c +++ b/drivers/gpu/drm/i915/gt/selftest_timeline.c @@ -9,6 +9,7 @@ #include "intel_context.h" #include "intel_engine_heartbeat.h" #include "intel_engine_pm.h" +#include "intel_gpu_commands.h" #include "intel_gt.h" #include "intel_gt_requests.h" #include "intel_ring.h" @@ -1090,12 +1091,6 @@ static int live_hwsp_read(void *arg) } count++; - if (8 * watcher[1].rq->ring->emit > - 3 * watcher[1].rq->ring->size) { - i915_request_put(rq); - break; - } - /* Flush the timeline before manually wrapping again */ if (i915_request_wait(rq, I915_WAIT_INTERRUPTIBLE, @@ -1104,9 +1099,14 @@ static int live_hwsp_read(void *arg) i915_request_put(rq); goto out; } - retire_requests(tl); i915_request_put(rq); + + /* Single requests are limited to half a ring at most */ + if (8 * watcher[1].rq->ring->emit > + 3 * watcher[1].rq->ring->size) + break; + } while (!__igt_timeout(end_time, NULL)); WRITE_ONCE(*(u32 *)tl->hwsp_seqno, 0xdeadbeef); diff --git a/drivers/gpu/drm/i915/gt/selftest_workarounds.c b/drivers/gpu/drm/i915/gt/selftest_workarounds.c index 61a0532d0f3d..2070b91cb607 100644 --- a/drivers/gpu/drm/i915/gt/selftest_workarounds.c +++ b/drivers/gpu/drm/i915/gt/selftest_workarounds.c @@ -95,8 +95,9 @@ reference_lists_fini(struct intel_gt *gt, struct wa_lists *lists) } static struct drm_i915_gem_object * -read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) +read_nonprivs(struct intel_context *ce) { + struct intel_engine_cs *engine = ce->engine; const u32 base = engine->mmio_base; struct drm_i915_gem_object *result; struct i915_request *rq; @@ -130,7 +131,7 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) if (err) goto err_obj; - rq = igt_request_alloc(ctx, engine); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_pin; @@ -145,7 +146,7 @@ read_nonprivs(struct i915_gem_context *ctx, struct intel_engine_cs *engine) goto err_req; srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT; - if (INTEL_GEN(ctx->i915) >= 8) + if (INTEL_GEN(engine->i915) >= 8) srm++; cs = intel_ring_begin(rq, 4 * RING_MAX_NONPRIV_SLOTS); @@ -200,16 +201,16 @@ print_results(const struct intel_engine_cs *engine, const u32 *results) } } -static int check_whitelist(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +static int check_whitelist(struct intel_context *ce) { + struct intel_engine_cs *engine = ce->engine; struct drm_i915_gem_object *results; struct intel_wedge_me wedge; u32 *vaddr; int err; int i; - results = read_nonprivs(ctx, engine); + results = read_nonprivs(ce); if (IS_ERR(results)) return PTR_ERR(results); @@ -293,8 +294,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, int (*reset)(struct intel_engine_cs *), const char *name) { - struct drm_i915_private *i915 = engine->i915; - struct i915_gem_context *ctx, *tmp; + struct intel_context *ce, *tmp; struct igt_spinner spin; intel_wakeref_t wakeref; int err; @@ -302,15 +302,15 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, pr_info("Checking %d whitelisted registers on %s (RING_NONPRIV) [%s]\n", engine->whitelist.count, engine->name, name); - ctx = kernel_context(i915); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return PTR_ERR(ce); err = igt_spinner_init(&spin, engine->gt); if (err) goto out_ctx; - err = check_whitelist(ctx, engine); + err = check_whitelist(ce); if (err) { pr_err("Invalid whitelist *before* %s reset!\n", name); goto out_spin; @@ -330,22 +330,22 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, goto out_spin; } - err = check_whitelist(ctx, engine); + err = check_whitelist(ce); if (err) { pr_err("Whitelist not preserved in context across %s reset!\n", name); goto out_spin; } - tmp = kernel_context(i915); + tmp = intel_context_create(engine); if (IS_ERR(tmp)) { err = PTR_ERR(tmp); goto out_spin; } - kernel_context_close(ctx); - ctx = tmp; + intel_context_put(ce); + ce = tmp; - err = check_whitelist(ctx, engine); + err = check_whitelist(ce); if (err) { pr_err("Invalid whitelist *after* %s reset in fresh context!\n", name); @@ -355,7 +355,7 @@ static int check_whitelist_across_reset(struct intel_engine_cs *engine, out_spin: igt_spinner_fini(&spin); out_ctx: - kernel_context_close(ctx); + intel_context_put(ce); return err; } @@ -486,10 +486,11 @@ static int check_dirty_whitelist(struct intel_context *ce) struct intel_engine_cs *engine = ce->engine; struct i915_vma *scratch; struct i915_vma *batch; - int err = 0, i, v; + int err = 0, i, v, sz; u32 *cs, *results; - scratch = create_scratch(ce->vm, 2 * ARRAY_SIZE(values) + 1); + sz = (2 * ARRAY_SIZE(values) + 1) * sizeof(u32); + scratch = __vm_create_scratch_for_read(ce->vm, sz); if (IS_ERR(scratch)) return PTR_ERR(scratch); @@ -786,15 +787,15 @@ out: return err; } -static int read_whitelisted_registers(struct i915_gem_context *ctx, - struct intel_engine_cs *engine, +static int read_whitelisted_registers(struct intel_context *ce, struct i915_vma *results) { + struct intel_engine_cs *engine = ce->engine; struct i915_request *rq; int i, err = 0; u32 srm, *cs; - rq = igt_request_alloc(ctx, engine); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) return PTR_ERR(rq); @@ -807,7 +808,7 @@ static int read_whitelisted_registers(struct i915_gem_context *ctx, goto err_req; srm = MI_STORE_REGISTER_MEM; - if (INTEL_GEN(ctx->i915) >= 8) + if (INTEL_GEN(engine->i915) >= 8) srm++; cs = intel_ring_begin(rq, 4 * engine->whitelist.count); @@ -834,18 +835,15 @@ err_req: return request_add_sync(rq, err); } -static int scrub_whitelisted_registers(struct i915_gem_context *ctx, - struct intel_engine_cs *engine) +static int scrub_whitelisted_registers(struct intel_context *ce) { - struct i915_address_space *vm; + struct intel_engine_cs *engine = ce->engine; struct i915_request *rq; struct i915_vma *batch; int i, err = 0; u32 *cs; - vm = i915_gem_context_get_vm_rcu(ctx); - batch = create_batch(vm); - i915_vm_put(vm); + batch = create_batch(ce->vm); if (IS_ERR(batch)) return PTR_ERR(batch); @@ -873,7 +871,7 @@ static int scrub_whitelisted_registers(struct i915_gem_context *ctx, i915_gem_object_flush_map(batch->obj); intel_gt_chipset_flush(engine->gt); - rq = igt_request_alloc(ctx, engine); + rq = intel_context_create_request(ce); if (IS_ERR(rq)) { err = PTR_ERR(rq); goto err_unpin; @@ -1016,7 +1014,6 @@ static int live_isolated_whitelist(void *arg) { struct intel_gt *gt = arg; struct { - struct i915_gem_context *ctx; struct i915_vma *scratch[2]; } client[2] = {}; struct intel_engine_cs *engine; @@ -1032,61 +1029,57 @@ static int live_isolated_whitelist(void *arg) return 0; for (i = 0; i < ARRAY_SIZE(client); i++) { - struct i915_address_space *vm; - struct i915_gem_context *c; - - c = kernel_context(gt->i915); - if (IS_ERR(c)) { - err = PTR_ERR(c); - goto err; - } - - vm = i915_gem_context_get_vm_rcu(c); - - client[i].scratch[0] = create_scratch(vm, 1024); + client[i].scratch[0] = + __vm_create_scratch_for_read(gt->vm, 4096); if (IS_ERR(client[i].scratch[0])) { err = PTR_ERR(client[i].scratch[0]); - i915_vm_put(vm); - kernel_context_close(c); goto err; } - client[i].scratch[1] = create_scratch(vm, 1024); + client[i].scratch[1] = + __vm_create_scratch_for_read(gt->vm, 4096); if (IS_ERR(client[i].scratch[1])) { err = PTR_ERR(client[i].scratch[1]); i915_vma_unpin_and_release(&client[i].scratch[0], 0); - i915_vm_put(vm); - kernel_context_close(c); goto err; } - - client[i].ctx = c; - i915_vm_put(vm); } for_each_engine(engine, gt, id) { + struct intel_context *ce[2]; + if (!engine->kernel_context->vm) continue; if (!whitelist_writable_count(engine)) continue; + ce[0] = intel_context_create(engine); + if (IS_ERR(ce[0])) { + err = PTR_ERR(ce[0]); + break; + } + ce[1] = intel_context_create(engine); + if (IS_ERR(ce[1])) { + err = PTR_ERR(ce[1]); + intel_context_put(ce[0]); + break; + } + /* Read default values */ - err = read_whitelisted_registers(client[0].ctx, engine, - client[0].scratch[0]); + err = read_whitelisted_registers(ce[0], client[0].scratch[0]); if (err) - goto err; + goto err_ce; /* Try to overwrite registers (should only affect ctx0) */ - err = scrub_whitelisted_registers(client[0].ctx, engine); + err = scrub_whitelisted_registers(ce[0]); if (err) - goto err; + goto err_ce; /* Read values from ctx1, we expect these to be defaults */ - err = read_whitelisted_registers(client[1].ctx, engine, - client[1].scratch[0]); + err = read_whitelisted_registers(ce[1], client[1].scratch[0]); if (err) - goto err; + goto err_ce; /* Verify that both reads return the same default values */ err = check_whitelisted_registers(engine, @@ -1094,31 +1087,29 @@ static int live_isolated_whitelist(void *arg) client[1].scratch[0], result_eq); if (err) - goto err; + goto err_ce; /* Read back the updated values in ctx0 */ - err = read_whitelisted_registers(client[0].ctx, engine, - client[0].scratch[1]); + err = read_whitelisted_registers(ce[0], client[0].scratch[1]); if (err) - goto err; + goto err_ce; /* User should be granted privilege to overwhite regs */ err = check_whitelisted_registers(engine, client[0].scratch[0], client[0].scratch[1], result_neq); +err_ce: + intel_context_put(ce[1]); + intel_context_put(ce[0]); if (err) - goto err; + break; } err: for (i = 0; i < ARRAY_SIZE(client); i++) { - if (!client[i].ctx) - break; - i915_vma_unpin_and_release(&client[i].scratch[1], 0); i915_vma_unpin_and_release(&client[i].scratch[0], 0); - kernel_context_close(client[i].ctx); } if (igt_flush_test(gt->i915)) @@ -1128,18 +1119,21 @@ err: } static bool -verify_wa_lists(struct i915_gem_context *ctx, struct wa_lists *lists, +verify_wa_lists(struct intel_gt *gt, struct wa_lists *lists, const char *str) { - struct drm_i915_private *i915 = ctx->i915; - struct i915_gem_engines_iter it; - struct intel_context *ce; + struct intel_engine_cs *engine; + enum intel_engine_id id; bool ok = true; - ok &= wa_list_verify(&i915->uncore, &lists->gt_wa_list, str); + ok &= wa_list_verify(gt->uncore, &lists->gt_wa_list, str); + + for_each_engine(engine, gt, id) { + struct intel_context *ce; - for_each_gem_engine(ce, i915_gem_context_engines(ctx), it) { - enum intel_engine_id id = ce->engine->id; + ce = intel_context_create(engine); + if (IS_ERR(ce)) + return false; ok &= engine_wa_list_verify(ce, &lists->engine[id].wa_list, @@ -1148,6 +1142,8 @@ verify_wa_lists(struct i915_gem_context *ctx, struct wa_lists *lists, ok &= engine_wa_list_verify(ce, &lists->engine[id].ctx_wa_list, str) == 0; + + intel_context_put(ce); } return ok; @@ -1157,7 +1153,6 @@ static int live_gpu_reset_workarounds(void *arg) { struct intel_gt *gt = arg; - struct i915_gem_context *ctx; intel_wakeref_t wakeref; struct wa_lists lists; bool ok; @@ -1165,12 +1160,6 @@ live_gpu_reset_workarounds(void *arg) if (!intel_has_gpu_reset(gt)) return 0; - ctx = kernel_context(gt->i915); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - - i915_gem_context_lock_engines(ctx); - pr_info("Verifying after GPU reset...\n"); igt_global_reset_lock(gt); @@ -1178,17 +1167,15 @@ live_gpu_reset_workarounds(void *arg) reference_lists_init(gt, &lists); - ok = verify_wa_lists(ctx, &lists, "before reset"); + ok = verify_wa_lists(gt, &lists, "before reset"); if (!ok) goto out; intel_gt_reset(gt, ALL_ENGINES, "live_workarounds"); - ok = verify_wa_lists(ctx, &lists, "after reset"); + ok = verify_wa_lists(gt, &lists, "after reset"); out: - i915_gem_context_unlock_engines(ctx); - kernel_context_close(ctx); reference_lists_fini(gt, &lists); intel_runtime_pm_put(gt->uncore->rpm, wakeref); igt_global_reset_unlock(gt); @@ -1200,8 +1187,8 @@ static int live_engine_reset_workarounds(void *arg) { struct intel_gt *gt = arg; - struct i915_gem_engines_iter it; - struct i915_gem_context *ctx; + struct intel_engine_cs *engine; + enum intel_engine_id id; struct intel_context *ce; struct igt_spinner spin; struct i915_request *rq; @@ -1212,30 +1199,30 @@ live_engine_reset_workarounds(void *arg) if (!intel_has_reset_engine(gt)) return 0; - ctx = kernel_context(gt->i915); - if (IS_ERR(ctx)) - return PTR_ERR(ctx); - igt_global_reset_lock(gt); wakeref = intel_runtime_pm_get(gt->uncore->rpm); reference_lists_init(gt, &lists); - for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { - struct intel_engine_cs *engine = ce->engine; + for_each_engine(engine, gt, id) { bool ok; pr_info("Verifying after %s reset...\n", engine->name); + ce = intel_context_create(engine); + if (IS_ERR(ce)) { + ret = PTR_ERR(ce); + break; + } - ok = verify_wa_lists(ctx, &lists, "before reset"); + ok = verify_wa_lists(gt, &lists, "before reset"); if (!ok) { ret = -ESRCH; goto err; } - intel_engine_reset(engine, "live_workarounds"); + intel_engine_reset(engine, "live_workarounds:idle"); - ok = verify_wa_lists(ctx, &lists, "after idle reset"); + ok = verify_wa_lists(gt, &lists, "after idle reset"); if (!ok) { ret = -ESRCH; goto err; @@ -1259,23 +1246,26 @@ live_engine_reset_workarounds(void *arg) goto err; } - intel_engine_reset(engine, "live_workarounds"); + intel_engine_reset(engine, "live_workarounds:active"); igt_spinner_end(&spin); igt_spinner_fini(&spin); - ok = verify_wa_lists(ctx, &lists, "after busy reset"); + ok = verify_wa_lists(gt, &lists, "after busy reset"); if (!ok) { ret = -ESRCH; goto err; } - } + err: - i915_gem_context_unlock_engines(ctx); + intel_context_put(ce); + if (ret) + break; + } + reference_lists_fini(gt, &lists); intel_runtime_pm_put(gt->uncore->rpm, wakeref); igt_global_reset_unlock(gt); - kernel_context_close(ctx); igt_flush_test(gt->i915); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 2a343a977987..4545e90e3bf1 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -579,20 +579,8 @@ int intel_guc_reset_engine(struct intel_guc *guc, */ int intel_guc_resume(struct intel_guc *guc) { - u32 action[] = { - INTEL_GUC_ACTION_EXIT_S_STATE, - GUC_POWER_D0, - }; - - /* - * If GuC communication is enabled but submission is not supported, - * we do not need to resume the GuC but we do need to enable the - * GuC communication on resume (above). - */ - if (!intel_guc_submission_is_used(guc) || !intel_guc_is_ready(guc)) - return 0; - - return intel_guc_send(guc, action, ARRAY_SIZE(action)); + /* XXX: to be implemented with submission interface rework */ + return 0; } /** diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.h b/drivers/gpu/drm/i915/gt/uc/intel_guc.h index e84ab67b317d..bc2ba7d0626c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.h @@ -47,13 +47,6 @@ struct intel_guc { struct i915_vma *stage_desc_pool; void *stage_desc_pool_vaddr; - struct i915_vma *workqueue; - void *workqueue_vaddr; - spinlock_t wq_lock; - - struct i915_vma *proc_desc; - void *proc_desc_vaddr; - /* Control params for fw initialization */ u32 params[GUC_CTL_MAX_DWORDS]; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index 5212ff844292..17526717368c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -4,6 +4,7 @@ */ #include "gt/intel_gt.h" +#include "gt/intel_lrc.h" #include "intel_guc_ads.h" #include "intel_uc.h" #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c index f9d0907ea1a5..2270d6b3b272 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fw.c @@ -76,7 +76,6 @@ static inline bool guc_ready(struct intel_uncore *uncore, u32 *status) static int guc_wait_ucode(struct intel_uncore *uncore) { - struct drm_device *drm = &uncore->i915->drm; u32 status; int ret; @@ -89,11 +88,11 @@ static int guc_wait_ucode(struct intel_uncore *uncore) * attempt the ucode load again if this happens.) */ ret = wait_for(guc_ready(uncore, &status), 100); - DRM_DEBUG_DRIVER("GuC status %#x\n", status); - if (ret) { - drm_err(drm, "GuC load failed: status = 0x%08X\n", status); - drm_err(drm, "GuC load failed: status: Reset = %d, " + struct drm_device *drm = &uncore->i915->drm; + + drm_dbg(drm, "GuC load failed: status = 0x%08X\n", status); + drm_dbg(drm, "GuC load failed: status: Reset = %d, " "BootROM = 0x%02X, UKernel = 0x%02X, " "MIA = 0x%02X, Auth = 0x%02X\n", REG_FIELD_GET(GS_MIA_IN_RESET, status), @@ -103,12 +102,12 @@ static int guc_wait_ucode(struct intel_uncore *uncore) REG_FIELD_GET(GS_AUTH_STATUS_MASK, status)); if ((status & GS_BOOTROM_MASK) == GS_BOOTROM_RSA_FAILED) { - drm_err(drm, "GuC firmware signature verification failed\n"); + drm_dbg(drm, "GuC firmware signature verification failed\n"); ret = -ENOEXEC; } if ((status & GS_UKERNEL_MASK) == GS_UKERNEL_EXCEPTION) { - drm_err(drm, "GuC firmware exception. EIP: %#x\n", + drm_dbg(drm, "GuC firmware exception. EIP: %#x\n", intel_uncore_read(uncore, SOFT_SCRATCH(13))); ret = -ENXIO; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c index fdfeb4b9b0f5..23dc0aeaa0ab 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c @@ -6,11 +6,14 @@ #include <linux/circ_buf.h> #include "gem/i915_gem_context.h" +#include "gt/gen8_engine_cs.h" +#include "gt/intel_breadcrumbs.h" #include "gt/intel_context.h" #include "gt/intel_engine_pm.h" #include "gt/intel_gt.h" #include "gt/intel_gt_pm.h" -#include "gt/intel_lrc_reg.h" +#include "gt/intel_lrc.h" +#include "gt/intel_mocs.h" #include "gt/intel_ring.h" #include "intel_guc_submission.h" @@ -54,6 +57,8 @@ * */ +#define GUC_REQUEST_SIZE 64 /* bytes */ + static inline struct i915_priolist *to_priolist(struct rb_node *rb) { return rb_entry(rb, struct i915_priolist, node); @@ -66,58 +71,6 @@ static struct guc_stage_desc *__get_stage_desc(struct intel_guc *guc, u32 id) return &base[id]; } -static int guc_workqueue_create(struct intel_guc *guc) -{ - return intel_guc_allocate_and_map_vma(guc, GUC_WQ_SIZE, &guc->workqueue, - &guc->workqueue_vaddr); -} - -static void guc_workqueue_destroy(struct intel_guc *guc) -{ - i915_vma_unpin_and_release(&guc->workqueue, I915_VMA_RELEASE_MAP); -} - -/* - * Initialise the process descriptor shared with the GuC firmware. - */ -static int guc_proc_desc_create(struct intel_guc *guc) -{ - const u32 size = PAGE_ALIGN(sizeof(struct guc_process_desc)); - - return intel_guc_allocate_and_map_vma(guc, size, &guc->proc_desc, - &guc->proc_desc_vaddr); -} - -static void guc_proc_desc_destroy(struct intel_guc *guc) -{ - i915_vma_unpin_and_release(&guc->proc_desc, I915_VMA_RELEASE_MAP); -} - -static void guc_proc_desc_init(struct intel_guc *guc) -{ - struct guc_process_desc *desc; - - desc = memset(guc->proc_desc_vaddr, 0, sizeof(*desc)); - - /* - * XXX: pDoorbell and WQVBaseAddress are pointers in process address - * space for ring3 clients (set them as in mmap_ioctl) or kernel - * space for kernel clients (map on demand instead? May make debug - * easier to have it mapped). - */ - desc->wq_base_addr = 0; - desc->db_base_addr = 0; - - desc->wq_size_bytes = GUC_WQ_SIZE; - desc->wq_status = WQ_STATUS_ACTIVE; - desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL; -} - -static void guc_proc_desc_fini(struct intel_guc *guc) -{ - memset(guc->proc_desc_vaddr, 0, sizeof(struct guc_process_desc)); -} - static int guc_stage_desc_pool_create(struct intel_guc *guc) { u32 size = PAGE_ALIGN(sizeof(struct guc_stage_desc) * @@ -153,8 +106,6 @@ static void guc_stage_desc_init(struct intel_guc *guc) desc->stage_id = 0; desc->priority = GUC_CLIENT_PRIORITY_KMD_NORMAL; - desc->process_desc = intel_guc_ggtt_offset(guc, guc->proc_desc); - desc->wq_addr = intel_guc_ggtt_offset(guc, guc->workqueue); desc->wq_size = GUC_WQ_SIZE; } @@ -166,62 +117,9 @@ static void guc_stage_desc_fini(struct intel_guc *guc) memset(desc, 0, sizeof(*desc)); } -/* Construct a Work Item and append it to the GuC's Work Queue */ -static void guc_wq_item_append(struct intel_guc *guc, - u32 target_engine, u32 context_desc, - u32 ring_tail, u32 fence_id) -{ - /* wqi_len is in DWords, and does not include the one-word header */ - const size_t wqi_size = sizeof(struct guc_wq_item); - const u32 wqi_len = wqi_size / sizeof(u32) - 1; - struct guc_process_desc *desc = guc->proc_desc_vaddr; - struct guc_wq_item *wqi; - u32 wq_off; - - lockdep_assert_held(&guc->wq_lock); - - /* For now workqueue item is 4 DWs; workqueue buffer is 2 pages. So we - * should not have the case where structure wqi is across page, neither - * wrapped to the beginning. This simplifies the implementation below. - * - * XXX: if not the case, we need save data to a temp wqi and copy it to - * workqueue buffer dw by dw. - */ - BUILD_BUG_ON(wqi_size != 16); - - /* We expect the WQ to be active if we're appending items to it */ - GEM_BUG_ON(desc->wq_status != WQ_STATUS_ACTIVE); - - /* Free space is guaranteed. */ - wq_off = READ_ONCE(desc->tail); - GEM_BUG_ON(CIRC_SPACE(wq_off, READ_ONCE(desc->head), - GUC_WQ_SIZE) < wqi_size); - GEM_BUG_ON(wq_off & (wqi_size - 1)); - - wqi = guc->workqueue_vaddr + wq_off; - - /* Now fill in the 4-word work queue item */ - wqi->header = WQ_TYPE_INORDER | - (wqi_len << WQ_LEN_SHIFT) | - (target_engine << WQ_TARGET_SHIFT) | - WQ_NO_WCFLUSH_WAIT; - wqi->context_desc = context_desc; - wqi->submit_element_info = ring_tail << WQ_RING_TAIL_SHIFT; - GEM_BUG_ON(ring_tail > WQ_RING_TAIL_MAX); - wqi->fence_id = fence_id; - - /* Make the update visible to GuC */ - WRITE_ONCE(desc->tail, (wq_off + wqi_size) & (GUC_WQ_SIZE - 1)); -} - static void guc_add_request(struct intel_guc *guc, struct i915_request *rq) { - struct intel_engine_cs *engine = rq->engine; - u32 ctx_desc = rq->context->lrc.ccid; - u32 ring_tail = intel_ring_set_tail(rq->ring, rq->tail) / sizeof(u64); - - guc_wq_item_append(guc, engine->guc_id, ctx_desc, - ring_tail, rq->fence.seqno); + /* Leaving stub as this function will be used in future patches */ } /* @@ -244,16 +142,12 @@ static void guc_submit(struct intel_engine_cs *engine, { struct intel_guc *guc = &engine->gt->uc.guc; - spin_lock(&guc->wq_lock); - do { struct i915_request *rq = *out++; flush_ggtt_writes(rq->ring->vma); guc_add_request(guc, rq); } while (out != end); - - spin_unlock(&guc->wq_lock); } static inline int rq_prio(const struct i915_request *rq) @@ -388,17 +282,26 @@ static void guc_reset_prepare(struct intel_engine_cs *engine) __tasklet_disable_sync_once(&execlists->tasklet); } -static void -cancel_port_requests(struct intel_engine_execlists * const execlists) +static void guc_reset_state(struct intel_context *ce, + struct intel_engine_cs *engine, + u32 head, + bool scrub) { - struct i915_request * const *port, *rq; + GEM_BUG_ON(!intel_context_is_pinned(ce)); - /* Note we are only using the inflight and not the pending queue */ + /* + * We want a simple context + ring to execute the breadcrumb update. + * We cannot rely on the context being intact across the GPU hang, + * so clear it and rebuild just what we need for the breadcrumb. + * All pending requests for this context will be zapped, and any + * future request will be after userspace has had the opportunity + * to recreate its own state. + */ + if (scrub) + lrc_init_regs(ce, engine, true); - for (port = execlists->active; (rq = *port); port++) - schedule_out(rq); - execlists->active = - memset(execlists->inflight, 0, sizeof(execlists->inflight)); + /* Rerun the request; its payload has been neutered (if guilty). */ + lrc_update_regs(ce, engine, head); } static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled) @@ -409,8 +312,6 @@ static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled) spin_lock_irqsave(&engine->active.lock, flags); - cancel_port_requests(execlists); - /* Push back any incomplete requests for replay after the reset. */ rq = execlists_unwind_incomplete_requests(execlists); if (!rq) @@ -420,7 +321,7 @@ static void guc_reset_rewind(struct intel_engine_cs *engine, bool stalled) stalled = false; __i915_request_reset(rq, stalled); - intel_lr_context_reset(engine, rq->context, rq->head, stalled); + guc_reset_state(rq->context, engine, rq->head, stalled); out_unlock: spin_unlock_irqrestore(&engine->active.lock, flags); @@ -451,9 +352,6 @@ static void guc_reset_cancel(struct intel_engine_cs *engine) */ spin_lock_irqsave(&engine->active.lock, flags); - /* Cancel the requests on the HW and clear the ELSP tracker. */ - cancel_port_requests(execlists); - /* Mark all executing requests as skipped. */ list_for_each_entry(rq, &engine->active.requests, sched.link) { i915_request_set_error_once(rq, -EIO); @@ -497,12 +395,6 @@ static void guc_reset_finish(struct intel_engine_cs *engine) } /* - * Everything below here is concerned with setup & teardown, and is - * therefore not part of the somewhat time-critical batch-submission - * path of guc_submit() above. - */ - -/* * Set up the memory resources to be shared with the GuC (via the GGTT) * at firmware loading time. */ @@ -522,30 +414,12 @@ int intel_guc_submission_init(struct intel_guc *guc) */ GEM_BUG_ON(!guc->stage_desc_pool); - ret = guc_workqueue_create(guc); - if (ret) - goto err_pool; - - ret = guc_proc_desc_create(guc); - if (ret) - goto err_workqueue; - - spin_lock_init(&guc->wq_lock); - return 0; - -err_workqueue: - guc_workqueue_destroy(guc); -err_pool: - guc_stage_desc_pool_destroy(guc); - return ret; } void intel_guc_submission_fini(struct intel_guc *guc) { if (guc->stage_desc_pool) { - guc_proc_desc_destroy(guc); - guc_workqueue_destroy(guc); guc_stage_desc_pool_destroy(guc); } } @@ -576,33 +450,186 @@ static void guc_interrupts_release(struct intel_gt *gt) intel_uncore_rmw(uncore, GEN11_VCS_VECS_INTR_ENABLE, 0, dmask); } -static void guc_set_default_submission(struct intel_engine_cs *engine) +static int guc_context_alloc(struct intel_context *ce) +{ + return lrc_alloc(ce, ce->engine); +} + +static int guc_context_pre_pin(struct intel_context *ce, + struct i915_gem_ww_ctx *ww, + void **vaddr) +{ + return lrc_pre_pin(ce, ce->engine, ww, vaddr); +} + +static int guc_context_pin(struct intel_context *ce, void *vaddr) { + return lrc_pin(ce, ce->engine, vaddr); +} + +static const struct intel_context_ops guc_context_ops = { + .alloc = guc_context_alloc, + + .pre_pin = guc_context_pre_pin, + .pin = guc_context_pin, + .unpin = lrc_unpin, + .post_unpin = lrc_post_unpin, + + .enter = intel_context_enter_engine, + .exit = intel_context_exit_engine, + + .reset = lrc_reset, + .destroy = lrc_destroy, +}; + +static int guc_request_alloc(struct i915_request *request) +{ + int ret; + + GEM_BUG_ON(!intel_context_is_pinned(request->context)); + /* - * We inherit a bunch of functions from execlists that we'd like - * to keep using: - * - * engine->submit_request = execlists_submit_request; - * engine->cancel_requests = execlists_cancel_requests; - * engine->schedule = execlists_schedule; + * Flush enough space to reduce the likelihood of waiting after + * we start building the request - in which case we will just + * have to repeat work. + */ + request->reserved_space += GUC_REQUEST_SIZE; + + /* + * Note that after this point, we have committed to using + * this request as it is being used to both track the + * state of engine initialisation and liveness of the + * golden renderstate above. Think twice before you try + * to cancel/unwind this request now. + */ + + /* Unconditionally invalidate GPU caches and TLBs. */ + ret = request->engine->emit_flush(request, EMIT_INVALIDATE); + if (ret) + return ret; + + request->reserved_space -= GUC_REQUEST_SIZE; + return 0; +} + +static inline void queue_request(struct intel_engine_cs *engine, + struct i915_request *rq, + int prio) +{ + GEM_BUG_ON(!list_empty(&rq->sched.link)); + list_add_tail(&rq->sched.link, + i915_sched_lookup_priolist(engine, prio)); + set_bit(I915_FENCE_FLAG_PQUEUE, &rq->fence.flags); +} + +static void guc_submit_request(struct i915_request *rq) +{ + struct intel_engine_cs *engine = rq->engine; + unsigned long flags; + + /* Will be called from irq-context when using foreign fences. */ + spin_lock_irqsave(&engine->active.lock, flags); + + queue_request(engine, rq, rq_prio(rq)); + + GEM_BUG_ON(RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)); + GEM_BUG_ON(list_empty(&rq->sched.link)); + + tasklet_hi_schedule(&engine->execlists.tasklet); + + spin_unlock_irqrestore(&engine->active.lock, flags); +} + +static void sanitize_hwsp(struct intel_engine_cs *engine) +{ + struct intel_timeline *tl; + + list_for_each_entry(tl, &engine->status_page.timelines, engine_link) + intel_timeline_reset_seqno(tl); +} + +static void guc_sanitize(struct intel_engine_cs *engine) +{ + /* + * Poison residual state on resume, in case the suspend didn't! * - * But we need to override the actual submission backend in order - * to talk to the GuC. + * We have to assume that across suspend/resume (or other loss + * of control) that the contents of our pinned buffers has been + * lost, replaced by garbage. Since this doesn't always happen, + * let's poison such state so that we more quickly spot when + * we falsely assume it has been preserved. */ - intel_execlists_set_default_submission(engine); + if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) + memset(engine->status_page.addr, POISON_INUSE, PAGE_SIZE); - engine->execlists.tasklet.func = guc_submission_tasklet; + /* + * The kernel_context HWSP is stored in the status_page. As above, + * that may be lost on resume/initialisation, and so we need to + * reset the value in the HWSP. + */ + sanitize_hwsp(engine); - /* do not use execlists park/unpark */ - engine->park = engine->unpark = NULL; + /* And scrub the dirty cachelines for the HWSP */ + clflush_cache_range(engine->status_page.addr, PAGE_SIZE); +} + +static void setup_hwsp(struct intel_engine_cs *engine) +{ + intel_engine_set_hwsp_writemask(engine, ~0u); /* HWSTAM */ + + ENGINE_WRITE_FW(engine, + RING_HWS_PGA, + i915_ggtt_offset(engine->status_page.vma)); +} + +static void start_engine(struct intel_engine_cs *engine) +{ + ENGINE_WRITE_FW(engine, + RING_MODE_GEN7, + _MASKED_BIT_ENABLE(GEN11_GFX_DISABLE_LEGACY_MODE)); + + ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); + ENGINE_POSTING_READ(engine, RING_MI_MODE); +} + +static int guc_resume(struct intel_engine_cs *engine) +{ + assert_forcewakes_active(engine->uncore, FORCEWAKE_ALL); + + intel_mocs_init_engine(engine); + + intel_breadcrumbs_reset(engine->breadcrumbs); + + setup_hwsp(engine); + start_engine(engine); + + return 0; +} + +static void guc_set_default_submission(struct intel_engine_cs *engine) +{ + engine->submit_request = guc_submit_request; + engine->schedule = i915_schedule; + engine->execlists.tasklet.func = guc_submission_tasklet; engine->reset.prepare = guc_reset_prepare; engine->reset.rewind = guc_reset_rewind; engine->reset.cancel = guc_reset_cancel; engine->reset.finish = guc_reset_finish; - engine->flags &= ~I915_ENGINE_SUPPORTS_STATS; engine->flags |= I915_ENGINE_NEEDS_BREADCRUMB_TASKLET; + engine->flags |= I915_ENGINE_HAS_PREEMPTION; + + /* + * TODO: GuC supports timeslicing and semaphores as well, but they're + * handled by the firmware so some minor tweaks are required before + * enabling. + * + * engine->flags |= I915_ENGINE_HAS_TIMESLICES; + * engine->flags |= I915_ENGINE_HAS_SEMAPHORES; + */ + + engine->emit_bb_start = gen8_emit_bb_start; /* * For the breadcrumb irq to work we need the interrupts to stay @@ -613,35 +640,92 @@ static void guc_set_default_submission(struct intel_engine_cs *engine) GEM_BUG_ON(engine->irq_enable || engine->irq_disable); } -void intel_guc_submission_enable(struct intel_guc *guc) +static void guc_release(struct intel_engine_cs *engine) { - struct intel_gt *gt = guc_to_gt(guc); - struct intel_engine_cs *engine; - enum intel_engine_id id; + engine->sanitize = NULL; /* no longer in control, nothing to sanitize */ + + tasklet_kill(&engine->execlists.tasklet); + + intel_engine_cleanup_common(engine); + lrc_fini_wa_ctx(engine); +} + +static void guc_default_vfuncs(struct intel_engine_cs *engine) +{ + /* Default vfuncs which can be overridden by each engine. */ + + engine->resume = guc_resume; + + engine->cops = &guc_context_ops; + engine->request_alloc = guc_request_alloc; + + engine->emit_flush = gen8_emit_flush_xcs; + engine->emit_init_breadcrumb = gen8_emit_init_breadcrumb; + engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_xcs; + if (INTEL_GEN(engine->i915) >= 12) { + engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_xcs; + engine->emit_flush = gen12_emit_flush_xcs; + } + engine->set_default_submission = guc_set_default_submission; +} + +static void rcs_submission_override(struct intel_engine_cs *engine) +{ + switch (INTEL_GEN(engine->i915)) { + case 12: + engine->emit_flush = gen12_emit_flush_rcs; + engine->emit_fini_breadcrumb = gen12_emit_fini_breadcrumb_rcs; + break; + case 11: + engine->emit_flush = gen11_emit_flush_rcs; + engine->emit_fini_breadcrumb = gen11_emit_fini_breadcrumb_rcs; + break; + default: + engine->emit_flush = gen8_emit_flush_rcs; + engine->emit_fini_breadcrumb = gen8_emit_fini_breadcrumb_rcs; + break; + } +} + +static inline void guc_default_irqs(struct intel_engine_cs *engine) +{ + engine->irq_keep_mask = GT_RENDER_USER_INTERRUPT; +} + +int intel_guc_submission_setup(struct intel_engine_cs *engine) +{ + struct drm_i915_private *i915 = engine->i915; /* - * We're using GuC work items for submitting work through GuC. Since - * we're coalescing multiple requests from a single context into a - * single work item prior to assigning it to execlist_port, we can - * never have more work items than the total number of ports (for all - * engines). The GuC firmware is controlling the HEAD of work queue, - * and it is guaranteed that it will remove the work item from the - * queue before our request is completed. + * The setup relies on several assumptions (e.g. irqs always enabled) + * that are only valid on gen11+ */ - BUILD_BUG_ON(ARRAY_SIZE(engine->execlists.inflight) * - sizeof(struct guc_wq_item) * - I915_NUM_ENGINES > GUC_WQ_SIZE); + GEM_BUG_ON(INTEL_GEN(i915) < 11); + + tasklet_init(&engine->execlists.tasklet, + guc_submission_tasklet, (unsigned long)engine); + + guc_default_vfuncs(engine); + guc_default_irqs(engine); - guc_proc_desc_init(guc); + if (engine->class == RENDER_CLASS) + rcs_submission_override(engine); + + lrc_init_wa_ctx(engine); + + /* Finally, take ownership and responsibility for cleanup! */ + engine->sanitize = guc_sanitize; + engine->release = guc_release; + + return 0; +} + +void intel_guc_submission_enable(struct intel_guc *guc) +{ guc_stage_desc_init(guc); /* Take over from manual control of ELSP (execlists) */ - guc_interrupts_capture(gt); - - for_each_engine(engine, gt, id) { - engine->set_default_submission = guc_set_default_submission; - engine->set_default_submission(engine); - } + guc_interrupts_capture(guc_to_gt(guc)); } void intel_guc_submission_disable(struct intel_guc *guc) @@ -655,7 +739,6 @@ void intel_guc_submission_disable(struct intel_guc *guc) guc_interrupts_release(gt); guc_stage_desc_fini(guc); - guc_proc_desc_fini(guc); } static bool __guc_submission_selected(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h index 4cf9d3e50263..5f7b9e6347d0 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.h @@ -19,6 +19,7 @@ void intel_guc_submission_disable(struct intel_guc *guc); void intel_guc_submission_fini(struct intel_guc *guc); int intel_guc_preempt_work_create(struct intel_guc *guc); void intel_guc_preempt_work_destroy(struct intel_guc *guc); +int intel_guc_submission_setup(struct intel_engine_cs *engine); bool intel_engine_in_guc_submission_mode(const struct intel_engine_cs *engine); static inline bool intel_guc_submission_is_supported(struct intel_guc *guc) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc.c b/drivers/gpu/drm/i915/gt/uc/intel_uc.c index 4e6070e95fe9..6a0452815c41 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc.c @@ -175,19 +175,15 @@ static void guc_get_mmio_msg(struct intel_guc *guc) static void guc_handle_mmio_msg(struct intel_guc *guc) { - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; - /* we need communication to be enabled to reply to GuC */ GEM_BUG_ON(!guc_communication_enabled(guc)); - if (!guc->mmio_msg) - return; - - spin_lock_irq(&i915->irq_lock); - intel_guc_to_host_process_recv_msg(guc, &guc->mmio_msg, 1); - spin_unlock_irq(&i915->irq_lock); - - guc->mmio_msg = 0; + spin_lock_irq(&guc->irq_lock); + if (guc->mmio_msg) { + intel_guc_to_host_process_recv_msg(guc, &guc->mmio_msg, 1); + guc->mmio_msg = 0; + } + spin_unlock_irq(&guc->irq_lock); } static void guc_reset_interrupts(struct intel_guc *guc) @@ -207,7 +203,8 @@ static void guc_disable_interrupts(struct intel_guc *guc) static int guc_enable_communication(struct intel_guc *guc) { - struct drm_i915_private *i915 = guc_to_gt(guc)->i915; + struct intel_gt *gt = guc_to_gt(guc); + struct drm_i915_private *i915 = gt->i915; int ret; GEM_BUG_ON(guc_communication_enabled(guc)); @@ -227,9 +224,9 @@ static int guc_enable_communication(struct intel_guc *guc) guc_enable_interrupts(guc); /* check for CT messages received before we enabled interrupts */ - spin_lock_irq(&i915->irq_lock); + spin_lock_irq(>->irq_lock); intel_guc_ct_event_handler(&guc->ct); - spin_unlock_irq(&i915->irq_lock); + spin_unlock_irq(>->irq_lock); drm_dbg(&i915->drm, "GuC communication enabled\n"); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 180c23e2e25e..602f1a0bc587 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -53,6 +53,7 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, fw_def(ELKHARTLAKE, 0, guc_def(ehl, 49, 0, 1), huc_def(ehl, 9, 0, 0)) \ fw_def(ICELAKE, 0, guc_def(icl, 49, 0, 1), huc_def(icl, 9, 0, 0)) \ fw_def(COMETLAKE, 5, guc_def(cml, 49, 0, 1), huc_def(cml, 4, 0, 0)) \ + fw_def(COMETLAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ fw_def(COFFEELAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ fw_def(GEMINILAKE, 0, guc_def(glk, 49, 0, 1), huc_def(glk, 4, 0, 0)) \ fw_def(KABYLAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c index 16b582cb97ed..3fea967ee817 100644 --- a/drivers/gpu/drm/i915/gvt/cmd_parser.c +++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c @@ -37,6 +37,7 @@ #include <linux/slab.h> #include "i915_drv.h" +#include "gt/intel_gpu_commands.h" #include "gt/intel_ring.h" #include "gvt.h" #include "i915_pvinfo.h" diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c index eaba80975f42..0d124ced5f94 100644 --- a/drivers/gpu/drm/i915/gvt/handlers.c +++ b/drivers/gpu/drm/i915/gvt/handlers.c @@ -1651,7 +1651,7 @@ static int edp_psr_imr_iir_write(struct intel_vgpu *vgpu, return 0; } -/** +/* * FixMe: * If guest fills non-priv batch buffer on ApolloLake/Broxton as Mesa i965 did: * 717e7539124d (i965: Use a WC map and memcpy for the batch instead of pwrite.) diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c index afe574d6b3b5..c9589e26af93 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.c +++ b/drivers/gpu/drm/i915/gvt/mmio_context.c @@ -35,6 +35,7 @@ #include "i915_drv.h" #include "gt/intel_context.h" +#include "gt/intel_gpu_commands.h" #include "gt/intel_ring.h" #include "gvt.h" #include "trace.h" diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.h b/drivers/gpu/drm/i915/gvt/mmio_context.h index 1421d3a70412..b6b69777af49 100644 --- a/drivers/gpu/drm/i915/gvt/mmio_context.h +++ b/drivers/gpu/drm/i915/gvt/mmio_context.h @@ -39,6 +39,7 @@ #include <linux/types.h> #include "gt/intel_engine_types.h" +#include "gt/intel_lrc_reg.h" #include "i915_reg.h" struct i915_request; diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c index aed2ef6466a2..6af5c06caee0 100644 --- a/drivers/gpu/drm/i915/gvt/scheduler.c +++ b/drivers/gpu/drm/i915/gvt/scheduler.c @@ -37,6 +37,8 @@ #include "gem/i915_gem_pm.h" #include "gt/intel_context.h" +#include "gt/intel_execlists_submission.h" +#include "gt/intel_lrc.h" #include "gt/intel_ring.h" #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c index 10a865f3dc09..ab4382841c6b 100644 --- a/drivers/gpu/drm/i915/i915_active.c +++ b/drivers/gpu/drm/i915/i915_active.c @@ -159,8 +159,7 @@ __active_retire(struct i915_active *ref) GEM_BUG_ON(ref->tree.rb_node != &ref->cache->node); /* Make the cached node available for reuse with any timeline */ - if (IS_ENABLED(CONFIG_64BIT)) - ref->cache->timeline = 0; /* needs cmpxchg(u64) */ + ref->cache->timeline = 0; /* needs cmpxchg(u64) */ } spin_unlock_irqrestore(&ref->tree_lock, flags); @@ -256,7 +255,6 @@ static struct active_node *__active_lookup(struct i915_active *ref, u64 idx) if (cached == idx) return it; -#ifdef CONFIG_64BIT /* for cmpxchg(u64) */ /* * An unclaimed cache [.timeline=0] can only be claimed once. * @@ -267,9 +265,8 @@ static struct active_node *__active_lookup(struct i915_active *ref, u64 idx) * only the winner of that race will cmpxchg return the old * value of 0). */ - if (!cached && !cmpxchg(&it->timeline, 0, idx)) + if (!cached && !cmpxchg64(&it->timeline, 0, idx)) return it; -#endif } BUILD_BUG_ON(offsetof(typeof(*it), node)); diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c index 93265951fdbb..82d0f19e86df 100644 --- a/drivers/gpu/drm/i915/i915_cmd_parser.c +++ b/drivers/gpu/drm/i915/i915_cmd_parser.c @@ -26,6 +26,7 @@ */ #include "gt/intel_engine.h" +#include "gt/intel_gpu_commands.h" #include "i915_drv.h" #include "i915_memcpy.h" @@ -1166,7 +1167,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, } } if (IS_ERR(src)) { - unsigned long x, n; + unsigned long x, n, remain; void *ptr; /* @@ -1177,14 +1178,15 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, * We don't care about copying too much here as we only * validate up to the end of the batch. */ + remain = length; if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ)) - length = round_up(length, + remain = round_up(remain, boot_cpu_data.x86_clflush_size); ptr = dst; x = offset_in_page(offset); - for (n = offset >> PAGE_SHIFT; length; n++) { - int len = min(length, PAGE_SIZE - x); + for (n = offset >> PAGE_SHIFT; remain; n++) { + int len = min(remain, PAGE_SIZE - x); src = kmap_atomic(i915_gem_object_get_page(src_obj, n)); if (needs_clflush) @@ -1193,13 +1195,15 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj, kunmap_atomic(src); ptr += len; - length -= len; + remain -= len; x = 0; } } i915_gem_object_unpin_pages(src_obj); + memset32(dst + length, 0, (dst_obj->base.size - length) / sizeof(u32)); + /* dst_obj is returned with vmap pinned */ return dst; } @@ -1392,11 +1396,6 @@ static unsigned long *alloc_whitelist(u32 batch_length) #define LENGTH_BIAS 2 -static bool shadow_needs_clflush(struct drm_i915_gem_object *obj) -{ - return !(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE); -} - /** * intel_engine_cmd_parser() - parse a batch buffer for privilege violations * @engine: the engine on which the batch is to execute @@ -1538,16 +1537,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine, ret = 0; /* allow execution */ } } - - if (shadow_needs_clflush(shadow->obj)) - drm_clflush_virt_range(batch_end, 8); } - if (shadow_needs_clflush(shadow->obj)) { - void *ptr = page_mask_bits(shadow->obj->mm.mapping); - - drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr); - } + i915_gem_object_flush_map(shadow->obj); if (!IS_ERR_OR_NULL(jump_whitelist)) kfree(jump_whitelist); diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index a61e9b095896..de8e0e44cfb6 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -45,6 +45,7 @@ #include "i915_debugfs.h" #include "i915_debugfs_params.h" #include "i915_irq.h" +#include "i915_scheduler.h" #include "i915_trace.h" #include "intel_pm.h" #include "intel_sideband.h" @@ -634,27 +635,27 @@ static int i915_frequency_info(struct seq_file *m, void *unused) seq_printf(m, "RPDECLIMIT: 0x%08x\n", rpdeclimit); seq_printf(m, "RPNSWREQ: %dMHz\n", reqf); seq_printf(m, "CAGF: %dMHz\n", cagf); - seq_printf(m, "RP CUR UP EI: %d (%dns)\n", + seq_printf(m, "RP CUR UP EI: %d (%lldns)\n", rpupei, intel_gt_pm_interval_to_ns(&dev_priv->gt, rpupei)); - seq_printf(m, "RP CUR UP: %d (%dun)\n", + seq_printf(m, "RP CUR UP: %d (%lldun)\n", rpcurup, intel_gt_pm_interval_to_ns(&dev_priv->gt, rpcurup)); - seq_printf(m, "RP PREV UP: %d (%dns)\n", + seq_printf(m, "RP PREV UP: %d (%lldns)\n", rpprevup, intel_gt_pm_interval_to_ns(&dev_priv->gt, rpprevup)); seq_printf(m, "Up threshold: %d%%\n", rps->power.up_threshold); - seq_printf(m, "RP CUR DOWN EI: %d (%dns)\n", + seq_printf(m, "RP CUR DOWN EI: %d (%lldns)\n", rpdownei, intel_gt_pm_interval_to_ns(&dev_priv->gt, rpdownei)); - seq_printf(m, "RP CUR DOWN: %d (%dns)\n", + seq_printf(m, "RP CUR DOWN: %d (%lldns)\n", rpcurdown, intel_gt_pm_interval_to_ns(&dev_priv->gt, rpcurdown)); - seq_printf(m, "RP PREV DOWN: %d (%dns)\n", + seq_printf(m, "RP PREV DOWN: %d (%lldns)\n", rpprevdown, intel_gt_pm_interval_to_ns(&dev_priv->gt, rpprevdown)); @@ -810,7 +811,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data) intel_gpu_freq(rps, rps->efficient_freq), intel_gpu_freq(rps, rps->boost_freq)); - seq_printf(m, "Wait boosts: %d\n", atomic_read(&rps->boosts)); + seq_printf(m, "Wait boosts: %d\n", READ_ONCE(rps->boosts)); return 0; } @@ -850,24 +851,28 @@ static int i915_runtime_pm_status(struct seq_file *m, void *unused) static int i915_engine_info(struct seq_file *m, void *unused) { - struct drm_i915_private *dev_priv = node_to_i915(m->private); + struct drm_i915_private *i915 = node_to_i915(m->private); struct intel_engine_cs *engine; intel_wakeref_t wakeref; struct drm_printer p; - wakeref = intel_runtime_pm_get(&dev_priv->runtime_pm); + wakeref = intel_runtime_pm_get(&i915->runtime_pm); - seq_printf(m, "GT awake? %s [%d]\n", - yesno(dev_priv->gt.awake), - atomic_read(&dev_priv->gt.wakeref.count)); - seq_printf(m, "CS timestamp frequency: %u Hz\n", - RUNTIME_INFO(dev_priv)->cs_timestamp_frequency_hz); + seq_printf(m, "GT awake? %s [%d], %llums\n", + yesno(i915->gt.awake), + atomic_read(&i915->gt.wakeref.count), + ktime_to_ms(intel_gt_get_awake_time(&i915->gt))); + seq_printf(m, "CS timestamp frequency: %u Hz, %d ns\n", + i915->gt.clock_frequency, + i915->gt.clock_period_ns); p = drm_seq_file_printer(m); - for_each_uabi_engine(engine, dev_priv) + for_each_uabi_engine(engine, i915) intel_engine_dump(engine, &p, "%s\n", engine->name); - intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); + intel_gt_show_timelines(&i915->gt, &p, i915_request_show_with_schedule); + + intel_runtime_pm_put(&i915->runtime_pm, wakeref); return 0; } @@ -945,7 +950,7 @@ i915_perf_noa_delay_set(void *data, u64 val) * This would lead to infinite waits as we're doing timestamp * difference on the CS with only 32bits. */ - if (i915_cs_timestamp_ns_to_ticks(i915, val) > U32_MAX) + if (intel_gt_ns_to_clock_interval(&i915->gt, val) > U32_MAX) return -EINVAL; atomic64_set(&i915->perf.noa_programming_delay, val); diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c index 643a899b3b44..f5666b44ea9d 100644 --- a/drivers/gpu/drm/i915/i915_drv.c +++ b/drivers/gpu/drm/i915/i915_drv.c @@ -410,6 +410,7 @@ static int i915_driver_mmio_probe(struct drm_i915_private *dev_priv) /* Try to make sure MCHBAR is enabled before poking at it */ intel_setup_mchbar(dev_priv); + intel_device_info_runtime_init(dev_priv); ret = intel_gt_init_mmio(&dev_priv->gt); if (ret) @@ -516,8 +517,6 @@ static int i915_driver_hw_probe(struct drm_i915_private *dev_priv) if (i915_inject_probe_failure(dev_priv)) return -ENODEV; - intel_device_info_runtime_init(dev_priv); - if (HAS_PPGTT(dev_priv)) { if (intel_vgpu_active(dev_priv) && !intel_vgpu_has_full_ppgtt(dev_priv)) { diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 266dec627fa2..8376cff5ba86 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -79,9 +79,9 @@ #include "gem/i915_gem_shrinker.h" #include "gem/i915_gem_stolen.h" -#include "gt/intel_lrc.h" #include "gt/intel_engine.h" #include "gt/intel_gt_types.h" +#include "gt/intel_region_lmem.h" #include "gt/intel_workarounds.h" #include "gt/uc/intel_uc.h" @@ -103,7 +103,6 @@ #include "i915_vma.h" #include "i915_irq.h" -#include "intel_region_lmem.h" /* General customization: */ @@ -1170,9 +1169,6 @@ struct drm_i915_private { struct i915_gem_contexts { spinlock_t lock; /* locks list */ struct list_head list; - - struct llist_head free_list; - struct work_struct free_work; } contexts; /* @@ -1570,16 +1566,30 @@ enum { TGL_REVID_D0, }; -extern const struct i915_rev_steppings tgl_uy_revids[]; -extern const struct i915_rev_steppings tgl_revids[]; +#define TGL_UY_REVIDS_SIZE 4 +#define TGL_REVIDS_SIZE 2 + +extern const struct i915_rev_steppings tgl_uy_revids[TGL_UY_REVIDS_SIZE]; +extern const struct i915_rev_steppings tgl_revids[TGL_REVIDS_SIZE]; static inline const struct i915_rev_steppings * tgl_revids_get(struct drm_i915_private *dev_priv) { - if (IS_TGL_U(dev_priv) || IS_TGL_Y(dev_priv)) - return &tgl_uy_revids[INTEL_REVID(dev_priv)]; - else - return &tgl_revids[INTEL_REVID(dev_priv)]; + u8 revid = INTEL_REVID(dev_priv); + u8 size; + const struct i915_rev_steppings *tgl_revid_tbl; + + if (IS_TGL_U(dev_priv) || IS_TGL_Y(dev_priv)) { + tgl_revid_tbl = tgl_uy_revids; + size = ARRAY_SIZE(tgl_uy_revids); + } else { + tgl_revid_tbl = tgl_revids; + size = ARRAY_SIZE(tgl_revids); + } + + revid = min_t(u8, revid, size - 1); + + return &tgl_revid_tbl[revid]; } #define IS_TGL_DISP_REVID(p, since, until) \ @@ -1589,14 +1599,14 @@ tgl_revids_get(struct drm_i915_private *dev_priv) #define IS_TGL_UY_GT_REVID(p, since, until) \ ((IS_TGL_U(p) || IS_TGL_Y(p)) && \ - tgl_uy_revids[INTEL_REVID(p)].gt_stepping >= (since) && \ - tgl_uy_revids[INTEL_REVID(p)].gt_stepping <= (until)) + tgl_revids_get(p)->gt_stepping >= (since) && \ + tgl_revids_get(p)->gt_stepping <= (until)) #define IS_TGL_GT_REVID(p, since, until) \ (IS_TIGERLAKE(p) && \ !(IS_TGL_U(p) || IS_TGL_Y(p)) && \ - tgl_revids[INTEL_REVID(p)].gt_stepping >= (since) && \ - tgl_revids[INTEL_REVID(p)].gt_stepping <= (until)) + tgl_revids_get(p)->gt_stepping >= (since) && \ + tgl_revids_get(p)->gt_stepping <= (until)) #define RKL_REVID_A0 0x0 #define RKL_REVID_B0 0x1 @@ -1647,8 +1657,6 @@ tgl_revids_get(struct drm_i915_private *dev_priv) (INTEL_INFO(dev_priv)->has_logical_ring_contexts) #define HAS_LOGICAL_RING_ELSQ(dev_priv) \ (INTEL_INFO(dev_priv)->has_logical_ring_elsq) -#define HAS_LOGICAL_RING_PREEMPTION(dev_priv) \ - (INTEL_INFO(dev_priv)->has_logical_ring_preemption) #define HAS_MASTER_UNIT_IRQ(dev_priv) (INTEL_INFO(dev_priv)->has_master_unit_irq) @@ -1995,16 +2003,4 @@ i915_coherent_map_type(struct drm_i915_private *i915) return HAS_LLC(i915) ? I915_MAP_WB : I915_MAP_WC; } -static inline u64 i915_cs_timestamp_ns_to_ticks(struct drm_i915_private *i915, u64 val) -{ - return DIV_ROUND_UP_ULL(val * RUNTIME_INFO(i915)->cs_timestamp_frequency_hz, - 1000000000); -} - -static inline u64 i915_cs_timestamp_ticks_to_ns(struct drm_i915_private *i915, u64 val) -{ - return div_u64(val * 1000000000, - RUNTIME_INFO(i915)->cs_timestamp_frequency_hz); -} - #endif diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 58276694c848..17a4636ee542 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1207,8 +1207,6 @@ void i915_gem_driver_remove(struct drm_i915_private *dev_priv) void i915_gem_driver_release(struct drm_i915_private *dev_priv) { - i915_gem_driver_release__contexts(dev_priv); - intel_gt_driver_release(&dev_priv->gt); intel_wa_list_free(&dev_priv->gt_wa_list); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index c5ee1567f3d1..3ee2f682eff6 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -55,22 +55,17 @@ int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) { - struct drm_i915_private *dev_priv = to_i915(obj->base.dev); - struct device *kdev = &dev_priv->drm.pdev->dev; - struct i915_ggtt *ggtt = &dev_priv->ggtt; - - if (unlikely(ggtt->do_idle_maps)) { - /* XXX This does not prevent more requests being submitted! */ - if (intel_gt_retire_requests_timeout(ggtt->vm.gt, - -MAX_SCHEDULE_TIMEOUT)) { - drm_err(&dev_priv->drm, - "Failed to wait for idle; VT'd may hang.\n"); - /* Wait a bit, in hopes it avoids the hang */ - udelay(10); - } - } + struct drm_i915_private *i915 = to_i915(obj->base.dev); + struct i915_ggtt *ggtt = &i915->ggtt; + + /* XXX This does not prevent more requests being submitted! */ + if (unlikely(ggtt->do_idle_maps)) + /* Wait a bit, in the hope it avoids the hang */ + usleep_range(100, 250); - dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL); + dma_unmap_sg(&i915->drm.pdev->dev, + pages->sgl, pages->nents, + PCI_DMA_BIDIRECTIONAL); } /** diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c index f96032c60a12..75c3bfc2486e 100644 --- a/drivers/gpu/drm/i915/i915_getparam.c +++ b/drivers/gpu/drm/i915/i915_getparam.c @@ -154,7 +154,7 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data, return -ENODEV; break; case I915_PARAM_CS_TIMESTAMP_FREQUENCY: - value = RUNTIME_INFO(i915)->cs_timestamp_frequency_hz; + value = i915->gt.clock_frequency; break; case I915_PARAM_MMAP_GTT_COHERENT: value = INTEL_INFO(i915)->has_coherent_ggtt; diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index d8cac4c5881f..8b163ee1b86d 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -485,7 +485,7 @@ static void error_print_context(struct drm_i915_error_state_buf *m, const char *header, const struct i915_gem_context_coredump *ctx) { - const u32 period = RUNTIME_INFO(m->i915)->cs_timestamp_period_ns; + const u32 period = m->i915->gt.clock_period_ns; err_printf(m, "%s%s[%d] prio %d, guilty %d active %d, runtime total %lluns, avg %lluns\n", header, ctx->comm, ctx->pid, ctx->sched_attr.priority, diff --git a/drivers/gpu/drm/i915/i915_mitigations.c b/drivers/gpu/drm/i915/i915_mitigations.c new file mode 100644 index 000000000000..84f12598d145 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_mitigations.c @@ -0,0 +1,146 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2021 Intel Corporation + */ + +#include <linux/kernel.h> +#include <linux/moduleparam.h> +#include <linux/slab.h> +#include <linux/string.h> + +#include "i915_drv.h" +#include "i915_mitigations.h" + +static unsigned long mitigations __read_mostly = ~0UL; + +enum { + CLEAR_RESIDUALS = 0, +}; + +static const char * const names[] = { + [CLEAR_RESIDUALS] = "residuals", +}; + +bool i915_mitigate_clear_residuals(void) +{ + return READ_ONCE(mitigations) & BIT(CLEAR_RESIDUALS); +} + +static int mitigations_set(const char *val, const struct kernel_param *kp) +{ + unsigned long new = ~0UL; + char *str, *sep, *tok; + bool first = true; + int err = 0; + + BUILD_BUG_ON(ARRAY_SIZE(names) >= BITS_PER_TYPE(mitigations)); + + str = kstrdup(val, GFP_KERNEL); + if (!str) + return -ENOMEM; + + for (sep = str; (tok = strsep(&sep, ","));) { + bool enable = true; + int i; + + /* Be tolerant of leading/trailing whitespace */ + tok = strim(tok); + + if (first) { + first = false; + + if (!strcmp(tok, "auto")) + continue; + + new = 0; + if (!strcmp(tok, "off")) + continue; + } + + if (*tok == '!') { + enable = !enable; + tok++; + } + + if (!strncmp(tok, "no", 2)) { + enable = !enable; + tok += 2; + } + + if (*tok == '\0') + continue; + + for (i = 0; i < ARRAY_SIZE(names); i++) { + if (!strcmp(tok, names[i])) { + if (enable) + new |= BIT(i); + else + new &= ~BIT(i); + break; + } + } + if (i == ARRAY_SIZE(names)) { + pr_err("Bad \"%s.mitigations=%s\", '%s' is unknown\n", + DRIVER_NAME, val, tok); + err = -EINVAL; + break; + } + } + kfree(str); + if (err) + return err; + + WRITE_ONCE(mitigations, new); + return 0; +} + +static int mitigations_get(char *buffer, const struct kernel_param *kp) +{ + unsigned long local = READ_ONCE(mitigations); + int count, i; + bool enable; + + if (!local) + return scnprintf(buffer, PAGE_SIZE, "%s\n", "off"); + + if (local & BIT(BITS_PER_LONG - 1)) { + count = scnprintf(buffer, PAGE_SIZE, "%s,", "auto"); + enable = false; + } else { + enable = true; + count = 0; + } + + for (i = 0; i < ARRAY_SIZE(names); i++) { + if ((local & BIT(i)) != enable) + continue; + + count += scnprintf(buffer + count, PAGE_SIZE - count, + "%s%s,", enable ? "" : "!", names[i]); + } + + buffer[count - 1] = '\n'; + return count; +} + +static const struct kernel_param_ops ops = { + .set = mitigations_set, + .get = mitigations_get, +}; + +module_param_cb_unsafe(mitigations, &ops, NULL, 0600); +MODULE_PARM_DESC(mitigations, +"Selectively enable security mitigations for all Intel® GPUs in the system.\n" +"\n" +" auto -- enables all mitigations required for the platform [default]\n" +" off -- disables all mitigations\n" +"\n" +"Individual mitigations can be enabled by passing a comma-separated string,\n" +"e.g. mitigations=residuals to enable only clearing residuals or\n" +"mitigations=auto,noresiduals to disable only the clear residual mitigation.\n" +"Either '!' or 'no' may be used to switch from enabling the mitigation to\n" +"disabling it.\n" +"\n" +"Active mitigations for Ivybridge, Baytrail, Haswell:\n" +" residuals -- clear all thread-local registers between contexts" +); diff --git a/drivers/gpu/drm/i915/i915_mitigations.h b/drivers/gpu/drm/i915/i915_mitigations.h new file mode 100644 index 000000000000..1359d8135287 --- /dev/null +++ b/drivers/gpu/drm/i915/i915_mitigations.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: MIT */ +/* + * Copyright © 2021 Intel Corporation + */ + +#ifndef __I915_MITIGATIONS_H__ +#define __I915_MITIGATIONS_H__ + +#include <linux/types.h> + +bool i915_mitigate_clear_residuals(void); + +#endif /* __I915_MITIGATIONS_H__ */ diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c index 11fe790b1969..39608381b4a4 100644 --- a/drivers/gpu/drm/i915/i915_pci.c +++ b/drivers/gpu/drm/i915/i915_pci.c @@ -639,7 +639,6 @@ static const struct intel_device_info chv_info = { GEN8_FEATURES, \ GEN(9), \ GEN9_DEFAULT_PAGE_SIZES, \ - .has_logical_ring_preemption = 1, \ .display.has_csr = 1, \ .has_gt_uc = 1, \ .display.has_hdcp = 1, \ @@ -700,7 +699,6 @@ static const struct intel_device_info skl_gt4_info = { .has_rps = true, \ .display.has_dp_mst = 1, \ .has_logical_ring_contexts = 1, \ - .has_logical_ring_preemption = 1, \ .has_gt_uc = 1, \ .dma_mask_size = 39, \ .ppgtt_type = INTEL_PPGTT_FULL, \ diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c index 649c26518d26..112ba5f2ce90 100644 --- a/drivers/gpu/drm/i915/i915_perf.c +++ b/drivers/gpu/drm/i915/i915_perf.c @@ -198,8 +198,11 @@ #include "gem/i915_gem_context.h" #include "gt/intel_engine_pm.h" #include "gt/intel_engine_user.h" +#include "gt/intel_execlists_submission.h" +#include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" -#include "gt/intel_lrc_reg.h" +#include "gt/intel_gt_clock_utils.h" +#include "gt/intel_lrc.h" #include "gt/intel_ring.h" #include "i915_drv.h" @@ -1635,7 +1638,8 @@ static int alloc_noa_wait(struct i915_perf_stream *stream) struct drm_i915_gem_object *bo; struct i915_vma *vma; const u64 delay_ticks = 0xffffffffffffffff - - i915_cs_timestamp_ns_to_ticks(i915, atomic64_read(&stream->perf->noa_programming_delay)); + intel_gt_ns_to_clock_interval(stream->perf->i915->ggtt.vm.gt, + atomic64_read(&stream->perf->noa_programming_delay)); const u32 base = stream->engine->mmio_base; #define CS_GPR(x) GEN8_RING_CS_GPR(base, x) u32 *batch, *ts0, *cs, *jump; @@ -3516,7 +3520,8 @@ err: static u64 oa_exponent_to_ns(struct i915_perf *perf, int exponent) { - return i915_cs_timestamp_ticks_to_ns(perf->i915, 2ULL << exponent); + return intel_gt_clock_interval_to_ns(perf->i915->ggtt.vm.gt, + 2ULL << exponent); } /** @@ -4370,11 +4375,11 @@ void i915_perf_init(struct drm_i915_private *i915) if (perf->ops.enable_metric_set) { mutex_init(&perf->lock); - oa_sample_rate_hard_limit = - RUNTIME_INFO(i915)->cs_timestamp_frequency_hz / 2; + /* Choose a representative limit */ + oa_sample_rate_hard_limit = i915->gt.clock_frequency / 2; mutex_init(&perf->metrics_lock); - idr_init(&perf->metrics_idr); + idr_init_base(&perf->metrics_idr, 1); /* We set up some ratelimit state to potentially throttle any * _NOTES about spurious, invalid OA reports which we don't diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c index d76685ce0399..2b88c0baa1bf 100644 --- a/drivers/gpu/drm/i915/i915_pmu.c +++ b/drivers/gpu/drm/i915/i915_pmu.c @@ -26,8 +26,6 @@ BIT(I915_SAMPLE_WAIT) | \ BIT(I915_SAMPLE_SEMA)) -#define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS) - static cpumask_t i915_pmu_cpumask; static unsigned int i915_pmu_target_cpu = -1; @@ -56,17 +54,42 @@ static bool is_engine_config(u64 config) return config < __I915_PMU_OTHER(0); } -static unsigned int config_enabled_bit(u64 config) +static unsigned int other_bit(const u64 config) +{ + unsigned int val; + + switch (config) { + case I915_PMU_ACTUAL_FREQUENCY: + val = __I915_PMU_ACTUAL_FREQUENCY_ENABLED; + break; + case I915_PMU_REQUESTED_FREQUENCY: + val = __I915_PMU_REQUESTED_FREQUENCY_ENABLED; + break; + case I915_PMU_RC6_RESIDENCY: + val = __I915_PMU_RC6_RESIDENCY_ENABLED; + break; + default: + /* + * Events that do not require sampling, or tracking state + * transitions between enabled and disabled can be ignored. + */ + return -1; + } + + return I915_ENGINE_SAMPLE_COUNT + val; +} + +static unsigned int config_bit(const u64 config) { if (is_engine_config(config)) return engine_config_sample(config); else - return ENGINE_SAMPLE_BITS + (config - __I915_PMU_OTHER(0)); + return other_bit(config); } -static u64 config_enabled_mask(u64 config) +static u64 config_mask(u64 config) { - return BIT_ULL(config_enabled_bit(config)); + return BIT_ULL(config_bit(config)); } static bool is_engine_event(struct perf_event *event) @@ -74,15 +97,15 @@ static bool is_engine_event(struct perf_event *event) return is_engine_config(event->attr.config); } -static unsigned int event_enabled_bit(struct perf_event *event) +static unsigned int event_bit(struct perf_event *event) { - return config_enabled_bit(event->attr.config); + return config_bit(event->attr.config); } static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) { struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); - u64 enable; + u32 enable; /* * Only some counters need the sampling timer. @@ -95,8 +118,8 @@ static bool pmu_needs_timer(struct i915_pmu *pmu, bool gpu_active) * Mask out all the ones which do not need the timer, or in * other words keep all the ones that could need the timer. */ - enable &= config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) | - config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY) | + enable &= config_mask(I915_PMU_ACTUAL_FREQUENCY) | + config_mask(I915_PMU_REQUESTED_FREQUENCY) | ENGINE_SAMPLE_MASK; /* @@ -137,11 +160,9 @@ static u64 __get_rc6(struct intel_gt *gt) return val; } -#if IS_ENABLED(CONFIG_PM) - -static inline s64 ktime_since(const ktime_t kt) +static inline s64 ktime_since_raw(const ktime_t kt) { - return ktime_to_ns(ktime_sub(ktime_get(), kt)); + return ktime_to_ns(ktime_sub(ktime_get_raw(), kt)); } static u64 get_rc6(struct intel_gt *gt) @@ -170,7 +191,7 @@ static u64 get_rc6(struct intel_gt *gt) * on top of the last known real value, as the approximated RC6 * counter value. */ - val = ktime_since(pmu->sleep_last); + val = ktime_since_raw(pmu->sleep_last); val += pmu->sample[__I915_SAMPLE_RC6].cur; } @@ -184,26 +205,26 @@ static u64 get_rc6(struct intel_gt *gt) return val; } -static void park_rc6(struct drm_i915_private *i915) +static void init_rc6(struct i915_pmu *pmu) { - struct i915_pmu *pmu = &i915->pmu; + struct drm_i915_private *i915 = container_of(pmu, typeof(*i915), pmu); + intel_wakeref_t wakeref; - if (pmu->enable & config_enabled_mask(I915_PMU_RC6_RESIDENCY)) + with_intel_runtime_pm(i915->gt.uncore->rpm, wakeref) { pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt); - - pmu->sleep_last = ktime_get(); + pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = + pmu->sample[__I915_SAMPLE_RC6].cur; + pmu->sleep_last = ktime_get_raw(); + } } -#else - -static u64 get_rc6(struct intel_gt *gt) +static void park_rc6(struct drm_i915_private *i915) { - return __get_rc6(gt); -} - -static void park_rc6(struct drm_i915_private *i915) {} + struct i915_pmu *pmu = &i915->pmu; -#endif + pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt); + pmu->sleep_last = ktime_get_raw(); +} static void __i915_pmu_maybe_start_timer(struct i915_pmu *pmu) { @@ -343,8 +364,8 @@ add_sample_mult(struct i915_pmu_sample *sample, u32 val, u32 mul) static bool frequency_sampling_enabled(struct i915_pmu *pmu) { return pmu->enable & - (config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY) | - config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)); + (config_mask(I915_PMU_ACTUAL_FREQUENCY) | + config_mask(I915_PMU_REQUESTED_FREQUENCY)); } static void @@ -362,7 +383,7 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) if (!intel_gt_pm_get_if_awake(gt)) return; - if (pmu->enable & config_enabled_mask(I915_PMU_ACTUAL_FREQUENCY)) { + if (pmu->enable & config_mask(I915_PMU_ACTUAL_FREQUENCY)) { u32 val; /* @@ -384,7 +405,7 @@ frequency_sample(struct intel_gt *gt, unsigned int period_ns) intel_gpu_freq(rps, val), period_ns / 1000); } - if (pmu->enable & config_enabled_mask(I915_PMU_REQUESTED_FREQUENCY)) { + if (pmu->enable & config_mask(I915_PMU_REQUESTED_FREQUENCY)) { add_sample_mult(&pmu->sample[__I915_SAMPLE_FREQ_REQ], intel_gpu_freq(rps, rps->cur_freq), period_ns / 1000); @@ -471,6 +492,8 @@ config_status(struct drm_i915_private *i915, u64 config) if (!HAS_RC6(i915)) return -ENODEV; break; + case I915_PMU_SOFTWARE_GT_AWAKE_TIME: + break; default: return -ENOENT; } @@ -578,6 +601,9 @@ static u64 __i915_pmu_event_read(struct perf_event *event) case I915_PMU_RC6_RESIDENCY: val = get_rc6(&i915->gt); break; + case I915_PMU_SOFTWARE_GT_AWAKE_TIME: + val = ktime_to_ns(intel_gt_get_awake_time(&i915->gt)); + break; } } @@ -610,12 +636,14 @@ static void i915_pmu_enable(struct perf_event *event) { struct drm_i915_private *i915 = container_of(event->pmu, typeof(*i915), pmu.base); - unsigned int bit = event_enabled_bit(event); struct i915_pmu *pmu = &i915->pmu; - intel_wakeref_t wakeref; unsigned long flags; + unsigned int bit; + + bit = event_bit(event); + if (bit == -1) + goto update; - wakeref = intel_runtime_pm_get(&i915->runtime_pm); spin_lock_irqsave(&pmu->lock, flags); /* @@ -626,13 +654,6 @@ static void i915_pmu_enable(struct perf_event *event) GEM_BUG_ON(bit >= ARRAY_SIZE(pmu->enable_count)); GEM_BUG_ON(pmu->enable_count[bit] == ~0); - if (pmu->enable_count[bit] == 0 && - config_enabled_mask(I915_PMU_RC6_RESIDENCY) & BIT_ULL(bit)) { - pmu->sample[__I915_SAMPLE_RC6_LAST_REPORTED].cur = 0; - pmu->sample[__I915_SAMPLE_RC6].cur = __get_rc6(&i915->gt); - pmu->sleep_last = ktime_get(); - } - pmu->enable |= BIT_ULL(bit); pmu->enable_count[bit]++; @@ -667,24 +688,26 @@ static void i915_pmu_enable(struct perf_event *event) spin_unlock_irqrestore(&pmu->lock, flags); +update: /* * Store the current counter value so we can report the correct delta * for all listeners. Even when the event was already enabled and has * an existing non-zero value. */ local64_set(&event->hw.prev_count, __i915_pmu_event_read(event)); - - intel_runtime_pm_put(&i915->runtime_pm, wakeref); } static void i915_pmu_disable(struct perf_event *event) { struct drm_i915_private *i915 = container_of(event->pmu, typeof(*i915), pmu.base); - unsigned int bit = event_enabled_bit(event); + unsigned int bit = event_bit(event); struct i915_pmu *pmu = &i915->pmu; unsigned long flags; + if (bit == -1) + return; + spin_lock_irqsave(&pmu->lock, flags); if (is_engine_event(event)) { @@ -881,6 +904,7 @@ create_event_attributes(struct i915_pmu *pmu) __event(I915_PMU_REQUESTED_FREQUENCY, "requested-frequency", "M"), __event(I915_PMU_INTERRUPTS, "interrupts", NULL), __event(I915_PMU_RC6_RESIDENCY, "rc6-residency", "ns"), + __event(I915_PMU_SOFTWARE_GT_AWAKE_TIME, "software-gt-awake-time", "ns"), }; static const struct { enum drm_i915_pmu_engine_sample sample; @@ -1130,6 +1154,7 @@ void i915_pmu_register(struct drm_i915_private *i915) hrtimer_init(&pmu->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); pmu->timer.function = i915_sample; pmu->cpuhp.cpu = -1; + init_rc6(pmu); if (!is_igp(i915)) { pmu->name = kasprintf(GFP_KERNEL, diff --git a/drivers/gpu/drm/i915/i915_pmu.h b/drivers/gpu/drm/i915/i915_pmu.h index 8405d6da5b9a..60f9595f902c 100644 --- a/drivers/gpu/drm/i915/i915_pmu.h +++ b/drivers/gpu/drm/i915/i915_pmu.h @@ -14,6 +14,21 @@ struct drm_i915_private; +/** + * Non-engine events that we need to track enabled-disabled transition and + * current state. + */ +enum i915_pmu_tracked_events { + __I915_PMU_ACTUAL_FREQUENCY_ENABLED = 0, + __I915_PMU_REQUESTED_FREQUENCY_ENABLED, + __I915_PMU_RC6_RESIDENCY_ENABLED, + __I915_PMU_TRACKED_EVENT_COUNT, /* count marker */ +}; + +/** + * Slots used from the sampling timer (non-engine events) with some extras for + * convenience. + */ enum { __I915_SAMPLE_FREQ_ACT = 0, __I915_SAMPLE_FREQ_REQ, @@ -28,8 +43,7 @@ enum { * It is also used to know to needed number of event reference counters. */ #define I915_PMU_MASK_BITS \ - ((1 << I915_PMU_SAMPLE_BITS) + \ - (I915_PMU_LAST + 1 - __I915_PMU_OTHER(0))) + (I915_ENGINE_SAMPLE_COUNT + __I915_PMU_TRACKED_EVENT_COUNT) #define I915_ENGINE_SAMPLE_COUNT (I915_SAMPLE_SEMA + 1) @@ -66,18 +80,17 @@ struct i915_pmu { */ struct hrtimer timer; /** - * @enable: Bitmask of all currently enabled events. + * @enable: Bitmask of specific enabled events. + * + * For some events we need to track their state and do some internal + * house keeping. * - * Bits are derived from uAPI event numbers in a way that low 16 bits - * correspond to engine event _sample_ _type_ (I915_SAMPLE_QUEUED is - * bit 0), and higher bits correspond to other events (for instance - * I915_PMU_ACTUAL_FREQUENCY is bit 16 etc). + * Each engine event sampler type and event listed in enum + * i915_pmu_tracked_events gets a bit in this field. * - * In other words, low 16 bits are not per engine but per engine - * sampler type, while the upper bits are directly mapped to other - * event types. + * Low bits are engine samplers and other events continue from there. */ - u64 enable; + u32 enable; /** * @timer_last: diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 5385b081a376..0b1a46a0d866 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -33,6 +33,7 @@ #include "gem/i915_gem_context.h" #include "gt/intel_breadcrumbs.h" #include "gt/intel_context.h" +#include "gt/intel_gpu_commands.h" #include "gt/intel_ring.h" #include "gt/intel_rps.h" @@ -306,10 +307,8 @@ bool i915_request_retire(struct i915_request *rq) spin_unlock_irq(&rq->lock); } - if (i915_request_has_waitboost(rq)) { - GEM_BUG_ON(!atomic_read(&rq->engine->gt->rps.num_waiters)); + if (test_and_set_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags)) atomic_dec(&rq->engine->gt->rps.num_waiters); - } /* * We only loosely track inflight requests across preemption, @@ -321,7 +320,8 @@ bool i915_request_retire(struct i915_request *rq) * after removing the breadcrumb and signaling it, so that we do not * inadvertently attach the breadcrumb to a completed request. */ - remove_from_engine(rq); + if (!list_empty(&rq->sched.link)) + remove_from_engine(rq); GEM_BUG_ON(!llist_empty(&rq->execute_cb)); __list_del_entry(&rq->link); /* poison neither prev/next (RCU walks) */ @@ -488,6 +488,8 @@ void __i915_request_skip(struct i915_request *rq) if (rq->infix == rq->postfix) return; + RQ_TRACE(rq, "error: %d\n", rq->fence.error); + /* * As this request likely depends on state from the lost * context, clear out all the user operations leaving the @@ -513,6 +515,17 @@ void i915_request_set_error_once(struct i915_request *rq, int error) } while (!try_cmpxchg(&rq->fence.error, &old, error)); } +void i915_request_mark_eio(struct i915_request *rq) +{ + if (__i915_request_is_complete(rq)) + return; + + GEM_BUG_ON(i915_request_signaled(rq)); + + i915_request_set_error_once(rq, -EIO); + i915_request_mark_complete(rq); +} + bool __i915_request_submit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; @@ -542,10 +555,6 @@ bool __i915_request_submit(struct i915_request *request) if (i915_request_completed(request)) goto xfer; - if (unlikely(intel_context_is_closed(request->context) && - !intel_engine_has_heartbeat(engine))) - intel_context_set_banned(request->context); - if (unlikely(intel_context_is_banned(request->context))) i915_request_set_error_once(request, -EIO); @@ -1582,6 +1591,12 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) return __i915_request_add_to_timeline(rq); } +void __i915_request_queue_bh(struct i915_request *rq) +{ + i915_sw_fence_commit(&rq->semaphore); + i915_sw_fence_commit(&rq->submit); +} + void __i915_request_queue(struct i915_request *rq, const struct i915_sched_attr *attr) { @@ -1598,8 +1613,10 @@ void __i915_request_queue(struct i915_request *rq, */ if (attr && rq->engine->schedule) rq->engine->schedule(rq, attr); - i915_sw_fence_commit(&rq->semaphore); - i915_sw_fence_commit(&rq->submit); + + local_bh_disable(); + __i915_request_queue_bh(rq); + local_bh_enable(); /* kick tasklets */ } void i915_request_add(struct i915_request *rq) @@ -1823,7 +1840,7 @@ long i915_request_wait(struct i915_request *rq, * for unhappy HW. */ if (i915_request_is_ready(rq)) - intel_engine_flush_submission(rq->engine); + __intel_engine_flush_submission(rq->engine, false); for (;;) { set_current_state(state); @@ -1855,6 +1872,106 @@ out: return timeout; } +static int print_sched_attr(const struct i915_sched_attr *attr, + char *buf, int x, int len) +{ + if (attr->priority == I915_PRIORITY_INVALID) + return x; + + x += snprintf(buf + x, len - x, + " prio=%d", attr->priority); + + return x; +} + +static char queue_status(const struct i915_request *rq) +{ + if (i915_request_is_active(rq)) + return 'E'; + + if (i915_request_is_ready(rq)) + return intel_engine_is_virtual(rq->engine) ? 'V' : 'R'; + + return 'U'; +} + +static const char *run_status(const struct i915_request *rq) +{ + if (i915_request_completed(rq)) + return "!"; + + if (i915_request_started(rq)) + return "*"; + + if (!i915_sw_fence_signaled(&rq->semaphore)) + return "&"; + + return ""; +} + +static const char *fence_status(const struct i915_request *rq) +{ + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &rq->fence.flags)) + return "+"; + + if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &rq->fence.flags)) + return "-"; + + return ""; +} + +void i915_request_show(struct drm_printer *m, + const struct i915_request *rq, + const char *prefix, + int indent) +{ + const char *name = rq->fence.ops->get_timeline_name((struct dma_fence *)&rq->fence); + char buf[80] = ""; + int x = 0; + + /* + * The prefix is used to show the queue status, for which we use + * the following flags: + * + * U [Unready] + * - initial status upon being submitted by the user + * + * - the request is not ready for execution as it is waiting + * for external fences + * + * R [Ready] + * - all fences the request was waiting on have been signaled, + * and the request is now ready for execution and will be + * in a backend queue + * + * - a ready request may still need to wait on semaphores + * [internal fences] + * + * V [Ready/virtual] + * - same as ready, but queued over multiple backends + * + * E [Executing] + * - the request has been transferred from the backend queue and + * submitted for execution on HW + * + * - a completed request may still be regarded as executing, its + * status may not be updated until it is retired and removed + * from the lists + */ + + x = print_sched_attr(&rq->sched.attr, buf, x, sizeof(buf)); + + drm_printf(m, "%s%.*s%c %llx:%lld%s%s %s @ %dms: %s\n", + prefix, indent, " ", + queue_status(rq), + rq->fence.context, rq->fence.seqno, + run_status(rq), + fence_status(rq), + buf, + jiffies_to_msecs(jiffies - rq->emitted_jiffies), + name); +} + #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) #include "selftests/mock_request.c" #include "selftests/i915_request.c" diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 620b6fab2c5c..1bfe214a47e9 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -43,6 +43,7 @@ struct drm_file; struct drm_i915_gem_object; +struct drm_printer; struct i915_request; struct i915_capture_list { @@ -308,12 +309,14 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp); struct i915_request * __must_check i915_request_create(struct intel_context *ce); -void i915_request_set_error_once(struct i915_request *rq, int error); void __i915_request_skip(struct i915_request *rq); +void i915_request_set_error_once(struct i915_request *rq, int error); +void i915_request_mark_eio(struct i915_request *rq); struct i915_request *__i915_request_commit(struct i915_request *request); void __i915_request_queue(struct i915_request *rq, const struct i915_sched_attr *attr); +void __i915_request_queue_bh(struct i915_request *rq); bool i915_request_retire(struct i915_request *rq); void i915_request_retire_upto(struct i915_request *rq); @@ -371,6 +374,11 @@ long i915_request_wait(struct i915_request *rq, #define I915_WAIT_PRIORITY BIT(1) /* small priority bump for the request */ #define I915_WAIT_ALL BIT(2) /* used by i915_gem_object_wait() */ +void i915_request_show(struct drm_printer *m, + const struct i915_request *rq, + const char *prefix, + int indent); + static inline bool i915_request_signaled(const struct i915_request *rq) { /* The request may live longer than its HWSP, so check flags first! */ @@ -434,7 +442,7 @@ static inline u32 hwsp_seqno(const struct i915_request *rq) static inline bool __i915_request_has_started(const struct i915_request *rq) { - return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno - 1); + return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno - 1); } /** @@ -465,11 +473,19 @@ static inline bool __i915_request_has_started(const struct i915_request *rq) */ static inline bool i915_request_started(const struct i915_request *rq) { + bool result; + if (i915_request_signaled(rq)) return true; - /* Remember: started but may have since been preempted! */ - return __i915_request_has_started(rq); + result = true; + rcu_read_lock(); /* the HWSP may be freed at runtime */ + if (likely(!i915_request_signaled(rq))) + /* Remember: started but may have since been preempted! */ + result = __i915_request_has_started(rq); + rcu_read_unlock(); + + return result; } /** @@ -482,10 +498,16 @@ static inline bool i915_request_started(const struct i915_request *rq) */ static inline bool i915_request_is_running(const struct i915_request *rq) { + bool result; + if (!i915_request_is_active(rq)) return false; - return __i915_request_has_started(rq); + rcu_read_lock(); + result = __i915_request_has_started(rq) && i915_request_is_active(rq); + rcu_read_unlock(); + + return result; } /** @@ -509,12 +531,25 @@ static inline bool i915_request_is_ready(const struct i915_request *rq) return !list_empty(&rq->sched.link); } +static inline bool __i915_request_is_complete(const struct i915_request *rq) +{ + return i915_seqno_passed(__hwsp_seqno(rq), rq->fence.seqno); +} + static inline bool i915_request_completed(const struct i915_request *rq) { + bool result; + if (i915_request_signaled(rq)) return true; - return i915_seqno_passed(hwsp_seqno(rq), rq->fence.seqno); + result = true; + rcu_read_lock(); /* the HWSP may be freed at runtime */ + if (likely(!i915_request_signaled(rq))) + result = __i915_request_is_complete(rq); + rcu_read_unlock(); + + return result; } static inline void i915_request_mark_complete(struct i915_request *rq) diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c index cbb880b10c65..318e359bf5c3 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.c +++ b/drivers/gpu/drm/i915/i915_scheduler.c @@ -458,14 +458,10 @@ int i915_sched_node_add_dependency(struct i915_sched_node *node, if (!dep) return -ENOMEM; - local_bh_disable(); - if (!__i915_sched_node_add_dependency(node, signal, dep, flags | I915_DEPENDENCY_ALLOC)) i915_dependency_free(dep); - local_bh_enable(); /* kick submission tasklet */ - return 0; } @@ -504,6 +500,34 @@ void i915_sched_node_fini(struct i915_sched_node *node) spin_unlock_irq(&schedule_lock); } +void i915_request_show_with_schedule(struct drm_printer *m, + const struct i915_request *rq, + const char *prefix, + int indent) +{ + struct i915_dependency *dep; + + i915_request_show(m, rq, prefix, indent); + if (i915_request_completed(rq)) + return; + + rcu_read_lock(); + for_each_signaler(dep, rq) { + const struct i915_request *signaler = + node_to_request(dep->signaler); + + /* Dependencies along the same timeline are expected. */ + if (signaler->timeline == rq->timeline) + continue; + + if (i915_request_completed(signaler)) + continue; + + i915_request_show(m, signaler, prefix, indent + 2); + } + rcu_read_unlock(); +} + static void i915_global_scheduler_shrink(void) { kmem_cache_shrink(global.slab_dependencies); diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h index 6f0bf00fc569..4501e5ac2637 100644 --- a/drivers/gpu/drm/i915/i915_scheduler.h +++ b/drivers/gpu/drm/i915/i915_scheduler.h @@ -13,6 +13,8 @@ #include "i915_scheduler_types.h" +struct drm_printer; + #define priolist_for_each_request(it, plist, idx) \ for (idx = 0; idx < ARRAY_SIZE((plist)->requests); idx++) \ list_for_each_entry(it, &(plist)->requests[idx], sched.link) @@ -54,4 +56,9 @@ static inline void i915_priolist_free(struct i915_priolist *p) __i915_priolist_free(p); } +void i915_request_show_with_schedule(struct drm_printer *m, + const struct i915_request *rq, + const char *prefix, + int indent); + #endif /* _I915_SCHEDULER_H_ */ diff --git a/drivers/gpu/drm/i915/i915_scheduler_types.h b/drivers/gpu/drm/i915/i915_scheduler_types.h index f72e6c397b08..343ed44d5ed4 100644 --- a/drivers/gpu/drm/i915/i915_scheduler_types.h +++ b/drivers/gpu/drm/i915/i915_scheduler_types.h @@ -81,4 +81,14 @@ struct i915_dependency { #define I915_DEPENDENCY_WEAK BIT(2) }; +#define for_each_waiter(p__, rq__) \ + list_for_each_entry_lockless(p__, \ + &(rq__)->sched.waiters_list, \ + wait_link) + +#define for_each_signaler(p__, rq__) \ + list_for_each_entry_rcu(p__, \ + &(rq__)->sched.signalers_list, \ + signal_link) + #endif /* _I915_SCHEDULER_TYPES_H_ */ diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c index 038d4c6884c5..2744558f3050 100644 --- a/drivers/gpu/drm/i915/i915_sw_fence.c +++ b/drivers/gpu/drm/i915/i915_sw_fence.c @@ -18,10 +18,15 @@ #define I915_SW_FENCE_BUG_ON(expr) BUILD_BUG_ON_INVALID(expr) #endif -#define I915_SW_FENCE_FLAG_ALLOC BIT(3) /* after WQ_FLAG_* for safety */ - static DEFINE_SPINLOCK(i915_sw_fence_lock); +#define WQ_FLAG_BITS \ + BITS_PER_TYPE(typeof_member(struct wait_queue_entry, flags)) + +/* after WQ_FLAG_* for safety */ +#define I915_SW_FENCE_FLAG_FENCE BIT(WQ_FLAG_BITS - 1) +#define I915_SW_FENCE_FLAG_ALLOC BIT(WQ_FLAG_BITS - 2) + enum { DEBUG_FENCE_IDLE = 0, DEBUG_FENCE_NOTIFY, @@ -154,10 +159,10 @@ static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence, spin_lock_irqsave_nested(&x->lock, flags, 1 + !!continuation); if (continuation) { list_for_each_entry_safe(pos, next, &x->head, entry) { - if (pos->func == autoremove_wake_function) - pos->func(pos, TASK_NORMAL, 0, continuation); - else + if (pos->flags & I915_SW_FENCE_FLAG_FENCE) list_move_tail(&pos->entry, continuation); + else + pos->func(pos, TASK_NORMAL, 0, continuation); } } else { LIST_HEAD(extra); @@ -166,9 +171,9 @@ static void __i915_sw_fence_wake_up_all(struct i915_sw_fence *fence, list_for_each_entry_safe(pos, next, &x->head, entry) { int wake_flags; - wake_flags = fence->error; - if (pos->func == autoremove_wake_function) - wake_flags = 0; + wake_flags = 0; + if (pos->flags & I915_SW_FENCE_FLAG_FENCE) + wake_flags = fence->error; pos->func(pos, TASK_NORMAL, wake_flags, &extra); } @@ -332,8 +337,8 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, struct i915_sw_fence *signaler, wait_queue_entry_t *wq, gfp_t gfp) { + unsigned int pending; unsigned long flags; - int pending; debug_fence_assert(fence); might_sleep_if(gfpflags_allow_blocking(gfp)); @@ -349,7 +354,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence, if (unlikely(i915_sw_fence_check_if_after(fence, signaler))) return -EINVAL; - pending = 0; + pending = I915_SW_FENCE_FLAG_FENCE; if (!wq) { wq = kmalloc(sizeof(*wq), gfp); if (!wq) { diff --git a/drivers/gpu/drm/i915/i915_utils.c b/drivers/gpu/drm/i915/i915_utils.c index 4c305d838016..f9e780dee9de 100644 --- a/drivers/gpu/drm/i915/i915_utils.c +++ b/drivers/gpu/drm/i915/i915_utils.c @@ -87,7 +87,7 @@ bool i915_error_injected(void) void cancel_timer(struct timer_list *t) { - if (!READ_ONCE(t->expires)) + if (!timer_active(t)) return; del_timer(t); diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 54773371e6bd..abd4dcd9f79c 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -438,9 +438,14 @@ static inline void __add_taint_for_CI(unsigned int taint) void cancel_timer(struct timer_list *t); void set_timer_ms(struct timer_list *t, unsigned long timeout); +static inline bool timer_active(const struct timer_list *t) +{ + return READ_ONCE(t->expires); +} + static inline bool timer_expired(const struct timer_list *t) { - return READ_ONCE(t->expires) && !timer_pending(t); + return timer_active(t) && !timer_pending(t); } /* diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index ef767f04c37c..f2d5ae59081e 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -117,150 +117,6 @@ void intel_device_info_print_runtime(const struct intel_runtime_info *info, struct drm_printer *p) { drm_printf(p, "rawclk rate: %u kHz\n", info->rawclk_freq); - drm_printf(p, "CS timestamp frequency: %u Hz\n", - info->cs_timestamp_frequency_hz); -} - -static u32 read_reference_ts_freq(struct drm_i915_private *dev_priv) -{ - u32 ts_override = intel_uncore_read(&dev_priv->uncore, - GEN9_TIMESTAMP_OVERRIDE); - u32 base_freq, frac_freq; - - base_freq = ((ts_override & GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK) >> - GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_SHIFT) + 1; - base_freq *= 1000000; - - frac_freq = ((ts_override & - GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK) >> - GEN9_TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_SHIFT); - frac_freq = 1000000 / (frac_freq + 1); - - return base_freq + frac_freq; -} - -static u32 gen10_get_crystal_clock_freq(struct drm_i915_private *dev_priv, - u32 rpm_config_reg) -{ - u32 f19_2_mhz = 19200000; - u32 f24_mhz = 24000000; - u32 crystal_clock = (rpm_config_reg & - GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> - GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; - - switch (crystal_clock) { - case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ: - return f19_2_mhz; - case GEN9_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: - return f24_mhz; - default: - MISSING_CASE(crystal_clock); - return 0; - } -} - -static u32 gen11_get_crystal_clock_freq(struct drm_i915_private *dev_priv, - u32 rpm_config_reg) -{ - u32 f19_2_mhz = 19200000; - u32 f24_mhz = 24000000; - u32 f25_mhz = 25000000; - u32 f38_4_mhz = 38400000; - u32 crystal_clock = (rpm_config_reg & - GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_MASK) >> - GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_SHIFT; - - switch (crystal_clock) { - case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_24_MHZ: - return f24_mhz; - case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_19_2_MHZ: - return f19_2_mhz; - case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_38_4_MHZ: - return f38_4_mhz; - case GEN11_RPM_CONFIG0_CRYSTAL_CLOCK_FREQ_25_MHZ: - return f25_mhz; - default: - MISSING_CASE(crystal_clock); - return 0; - } -} - -static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv) -{ - struct intel_uncore *uncore = &dev_priv->uncore; - u32 f12_5_mhz = 12500000; - u32 f19_2_mhz = 19200000; - u32 f24_mhz = 24000000; - - if (INTEL_GEN(dev_priv) <= 4) { - /* PRMs say: - * - * "The value in this register increments once every 16 - * hclks." (through the “Clocking Configuration” - * (“CLKCFG”) MCHBAR register) - */ - return RUNTIME_INFO(dev_priv)->rawclk_freq * 1000 / 16; - } else if (INTEL_GEN(dev_priv) <= 8) { - /* PRMs say: - * - * "The PCU TSC counts 10ns increments; this timestamp - * reflects bits 38:3 of the TSC (i.e. 80ns granularity, - * rolling over every 1.5 hours). - */ - return f12_5_mhz; - } else if (INTEL_GEN(dev_priv) <= 9) { - u32 ctc_reg = intel_uncore_read(uncore, CTC_MODE); - u32 freq = 0; - - if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) { - freq = read_reference_ts_freq(dev_priv); - } else { - freq = IS_GEN9_LP(dev_priv) ? f19_2_mhz : f24_mhz; - - /* Now figure out how the command stream's timestamp - * register increments from this frequency (it might - * increment only every few clock cycle). - */ - freq >>= 3 - ((ctc_reg & CTC_SHIFT_PARAMETER_MASK) >> - CTC_SHIFT_PARAMETER_SHIFT); - } - - return freq; - } else if (INTEL_GEN(dev_priv) <= 12) { - u32 ctc_reg = intel_uncore_read(uncore, CTC_MODE); - u32 freq = 0; - - /* First figure out the reference frequency. There are 2 ways - * we can compute the frequency, either through the - * TIMESTAMP_OVERRIDE register or through RPM_CONFIG. CTC_MODE - * tells us which one we should use. - */ - if ((ctc_reg & CTC_SOURCE_PARAMETER_MASK) == CTC_SOURCE_DIVIDE_LOGIC) { - freq = read_reference_ts_freq(dev_priv); - } else { - u32 rpm_config_reg = intel_uncore_read(uncore, RPM_CONFIG0); - - if (INTEL_GEN(dev_priv) <= 10) - freq = gen10_get_crystal_clock_freq(dev_priv, - rpm_config_reg); - else - freq = gen11_get_crystal_clock_freq(dev_priv, - rpm_config_reg); - - /* Now figure out how the command stream's timestamp - * register increments from this frequency (it might - * increment only every few clock cycle). - */ - freq >>= 3 - ((rpm_config_reg & - GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >> - GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT); - } - - return freq; - } - - MISSING_CASE("Unknown gen, unable to read command streamer timestamp frequency\n"); - return 0; } #undef INTEL_VGA_DEVICE @@ -505,19 +361,6 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv) runtime->rawclk_freq = intel_read_rawclk(dev_priv); drm_dbg(&dev_priv->drm, "rawclk rate: %d kHz\n", runtime->rawclk_freq); - /* Initialize command stream timestamp frequency */ - runtime->cs_timestamp_frequency_hz = - read_timestamp_frequency(dev_priv); - if (runtime->cs_timestamp_frequency_hz) { - runtime->cs_timestamp_period_ns = - i915_cs_timestamp_ticks_to_ns(dev_priv, 1); - drm_dbg(&dev_priv->drm, - "CS timestamp wraparound in %lldms\n", - div_u64(mul_u32_u32(runtime->cs_timestamp_period_ns, - S32_MAX), - USEC_PER_SEC)); - } - if (!HAS_DISPLAY(dev_priv)) { dev_priv->drm.driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC); diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index d92fa041c700..cf2d528c6e9b 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -123,7 +123,6 @@ enum intel_ppgtt_type { func(has_llc); \ func(has_logical_ring_contexts); \ func(has_logical_ring_elsq); \ - func(has_logical_ring_preemption); \ func(has_master_unit_irq); \ func(has_pooled_eu); \ func(has_rc6); \ @@ -224,9 +223,6 @@ struct intel_runtime_info { u8 num_scalers[I915_MAX_PIPES]; u32 rawclk_freq; - - u32 cs_timestamp_frequency_hz; - u32 cs_timestamp_period_ns; }; struct intel_driver_caps { diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c index b326993a1026..1bfcdd89b241 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.c +++ b/drivers/gpu/drm/i915/intel_memory_region.c @@ -10,7 +10,7 @@ #define REGION_MAP(type, inst) \ BIT((type) + INTEL_MEMORY_TYPE_SHIFT) | BIT(inst) -const u32 intel_region_map[] = { +static const u32 intel_region_map[] = { [INTEL_REGION_SMEM] = REGION_MAP(INTEL_MEMORY_SYSTEM, 0), [INTEL_REGION_LMEM] = REGION_MAP(INTEL_MEMORY_LOCAL, 0), [INTEL_REGION_STOLEN] = REGION_MAP(INTEL_MEMORY_STOLEN, 0), diff --git a/drivers/gpu/drm/i915/intel_memory_region.h b/drivers/gpu/drm/i915/intel_memory_region.h index 232490d89a83..6590d55df6cb 100644 --- a/drivers/gpu/drm/i915/intel_memory_region.h +++ b/drivers/gpu/drm/i915/intel_memory_region.h @@ -51,11 +51,6 @@ enum intel_region_id { for (id = 0; id < ARRAY_SIZE((i915)->mm.regions); id++) \ for_each_if((mr) = (i915)->mm.regions[id]) -/** - * Memory regions encoded as type | instance - */ -extern const u32 intel_region_map[]; - struct intel_memory_region_ops { unsigned int flags; diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index f88473d396f4..3512bb8433cf 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -442,28 +442,22 @@ static int igt_evict_contexts(void *arg) /* Overfill the GGTT with context objects and so try to evict one. */ for_each_engine(engine, gt, id) { struct i915_sw_fence fence; - struct file *file; - - file = mock_file(i915); - if (IS_ERR(file)) { - err = PTR_ERR(file); - break; - } count = 0; onstack_fence_init(&fence); do { + struct intel_context *ce; struct i915_request *rq; - struct i915_gem_context *ctx; - ctx = live_context(i915, file); - if (IS_ERR(ctx)) + ce = intel_context_create(engine); + if (IS_ERR(ce)) break; /* We will need some GGTT space for the rq's context */ igt_evict_ctl.fail_if_busy = true; - rq = igt_request_alloc(ctx, engine); + rq = intel_context_create_request(ce); igt_evict_ctl.fail_if_busy = false; + intel_context_put(ce); if (IS_ERR(rq)) { /* When full, fail_if_busy will trigger EBUSY */ @@ -490,8 +484,6 @@ static int igt_evict_contexts(void *arg) onstack_fence_fini(&fence); pr_info("Submitted %lu contexts/requests on %s\n", count, engine->name); - - fput(file); if (err) break; } diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c index c53a222e3dec..70e07e9b78c2 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c @@ -28,6 +28,7 @@ #include "gem/i915_gem_context.h" #include "gem/selftests/mock_context.h" #include "gt/intel_context.h" +#include "gt/intel_gpu_commands.h" #include "i915_random.h" #include "i915_selftest.h" diff --git a/drivers/gpu/drm/i915/selftests/i915_perf.c b/drivers/gpu/drm/i915/selftests/i915_perf.c index debbac660519..e9d86dab8677 100644 --- a/drivers/gpu/drm/i915/selftests/i915_perf.c +++ b/drivers/gpu/drm/i915/selftests/i915_perf.c @@ -262,7 +262,7 @@ static int live_noa_delay(void *arg) delay = intel_read_status_page(stream->engine, 0x102); delay -= intel_read_status_page(stream->engine, 0x100); - delay = i915_cs_timestamp_ticks_to_ns(i915, delay); + delay = intel_gt_clock_interval_to_ns(stream->engine->gt, delay); pr_info("GPU delay: %uns, expected %lluns\n", delay, expected); diff --git a/drivers/gpu/drm/i915/selftests/i915_request.c b/drivers/gpu/drm/i915/selftests/i915_request.c index e424a6d1a68c..d2a678a2497e 100644 --- a/drivers/gpu/drm/i915/selftests/i915_request.c +++ b/drivers/gpu/drm/i915/selftests/i915_request.c @@ -33,6 +33,7 @@ #include "gt/intel_engine_pm.h" #include "gt/intel_engine_user.h" #include "gt/intel_gt.h" +#include "gt/intel_gt_clock_utils.h" #include "gt/intel_gt_requests.h" #include "gt/selftest_engine_heartbeat.h" @@ -1560,7 +1561,7 @@ static u32 trifilter(u32 *a) static u64 cycles_to_ns(struct intel_engine_cs *engine, u32 cycles) { - u64 ns = i915_cs_timestamp_ticks_to_ns(engine->i915, cycles); + u64 ns = intel_gt_clock_interval_to_ns(engine->gt, cycles); return DIV_ROUND_CLOSEST(ns, 1 << TF_BIAS); } @@ -1932,9 +1933,7 @@ static int measure_inter_request(struct intel_context *ce) intel_ring_advance(rq, cs); i915_request_add(rq); } - local_bh_disable(); i915_sw_fence_commit(submit); - local_bh_enable(); intel_engine_flush_submission(ce->engine); heap_fence_put(submit); @@ -2220,11 +2219,9 @@ static int measure_completion(struct intel_context *ce) intel_ring_advance(rq, cs); dma_fence_add_callback(&rq->fence, &cb.base, signal_cb); - - local_bh_disable(); i915_request_add(rq); - local_bh_enable(); + intel_engine_flush_submission(ce->engine); if (wait_for(READ_ONCE(sema[i]) == -1, 50)) { err = -EIO; goto err; diff --git a/drivers/gpu/drm/i915/selftests/igt_spinner.c b/drivers/gpu/drm/i915/selftests/igt_spinner.c index ec0ecb4e4ca6..83f6e5f31fb3 100644 --- a/drivers/gpu/drm/i915/selftests/igt_spinner.c +++ b/drivers/gpu/drm/i915/selftests/igt_spinner.c @@ -3,6 +3,7 @@ * * Copyright © 2018 Intel Corporation */ +#include "gt/intel_gpu_commands.h" #include "gt/intel_gt.h" #include "gem/selftests/igt_gem_utils.h" @@ -219,6 +220,9 @@ void igt_spinner_fini(struct igt_spinner *spin) bool igt_wait_for_spinner(struct igt_spinner *spin, struct i915_request *rq) { + if (i915_request_is_ready(rq)) + intel_engine_flush_submission(rq->engine); + return !(wait_for_us(i915_seqno_passed(hws_seqno(spin, rq), rq->fence.seqno), 100) && diff --git a/drivers/gpu/drm/i915/selftests/intel_memory_region.c b/drivers/gpu/drm/i915/selftests/intel_memory_region.c index 0aeba8e3af28..ce7adfa3bca0 100644 --- a/drivers/gpu/drm/i915/selftests/intel_memory_region.c +++ b/drivers/gpu/drm/i915/selftests/intel_memory_region.c @@ -129,6 +129,21 @@ static void igt_object_release(struct drm_i915_gem_object *obj) i915_gem_object_put(obj); } +static bool is_contiguous(struct drm_i915_gem_object *obj) +{ + struct scatterlist *sg; + dma_addr_t addr = -1; + + for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) { + if (addr != -1 && sg_dma_address(sg) != addr) + return false; + + addr = sg_dma_address(sg) + sg_dma_len(sg); + } + + return true; +} + static int igt_mock_contiguous(void *arg) { struct intel_memory_region *mem = arg; @@ -150,8 +165,8 @@ static int igt_mock_contiguous(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - if (obj->mm.pages->nents != 1) { - pr_err("%s min object spans multiple sg entries\n", __func__); + if (!is_contiguous(obj)) { + pr_err("%s min object spans disjoint sg entries\n", __func__); err = -EINVAL; goto err_close_objects; } @@ -163,8 +178,8 @@ static int igt_mock_contiguous(void *arg) if (IS_ERR(obj)) return PTR_ERR(obj); - if (obj->mm.pages->nents != 1) { - pr_err("%s max object spans multiple sg entries\n", __func__); + if (!is_contiguous(obj)) { + pr_err("%s max object spans disjoint sg entries\n", __func__); err = -EINVAL; goto err_close_objects; } @@ -189,8 +204,8 @@ static int igt_mock_contiguous(void *arg) goto err_close_objects; } - if (obj->mm.pages->nents != 1) { - pr_err("%s object spans multiple sg entries\n", __func__); + if (!is_contiguous(obj)) { + pr_err("%s object spans disjoint sg entries\n", __func__); err = -EINVAL; goto err_close_objects; } @@ -337,6 +352,68 @@ out_put: return err; } +#ifndef SZ_8G +#define SZ_8G BIT_ULL(33) +#endif + +static int igt_mock_max_segment(void *arg) +{ + const unsigned int max_segment = i915_sg_segment_size(); + struct intel_memory_region *mem = arg; + struct drm_i915_private *i915 = mem->i915; + struct drm_i915_gem_object *obj; + struct i915_buddy_block *block; + struct scatterlist *sg; + LIST_HEAD(objects); + u64 size; + int err = 0; + + /* + * While we may create very large contiguous blocks, we may need + * to break those down for consumption elsewhere. In particular, + * dma-mapping with scatterlist elements have an implicit limit of + * UINT_MAX on each element. + */ + + size = SZ_8G; + mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0); + if (IS_ERR(mem)) + return PTR_ERR(mem); + + obj = igt_object_create(mem, &objects, size, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_put; + } + + size = 0; + list_for_each_entry(block, &obj->mm.blocks, link) { + if (i915_buddy_block_size(&mem->mm, block) > size) + size = i915_buddy_block_size(&mem->mm, block); + } + if (size < max_segment) { + pr_err("%s: Failed to create a huge contiguous block [> %u], largest block %lld\n", + __func__, max_segment, size); + err = -EINVAL; + goto out_close; + } + + for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) { + if (sg->length > max_segment) { + pr_err("%s: Created an oversized scatterlist entry, %u > %u\n", + __func__, sg->length, max_segment); + err = -EINVAL; + goto out_close; + } + } + +out_close: + close_objects(mem, &objects); +out_put: + intel_memory_region_put(mem); + return err; +} + static int igt_gpu_write_dw(struct intel_context *ce, struct i915_vma *vma, u32 dword, @@ -775,14 +852,22 @@ static int _perf_memcpy(struct intel_memory_region *src_mr, } sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); + if (t[0] <= 0) { + /* ignore the impossible to protect our sanity */ + pr_debug("Skipping %s src(%s, %s) -> dst(%s, %s) %14s %4lluKiB copy, unstable measurement [%lld, %lld]\n", + __func__, + src_mr->name, repr_type(src_type), + dst_mr->name, repr_type(dst_type), + tests[i].name, size >> 10, + t[0], t[4]); + continue; + } + pr_info("%s src(%s, %s) -> dst(%s, %s) %14s %4llu KiB copy: %5lld MiB/s\n", __func__, - src_mr->name, - repr_type(src_type), - dst_mr->name, - repr_type(dst_type), - tests[i].name, - size >> 10, + src_mr->name, repr_type(src_type), + dst_mr->name, repr_type(dst_type), + tests[i].name, size >> 10, div64_u64(mul_u32_u32(4 * size, 1000 * 1000 * 1000), t[1] + 2 * t[2] + t[3]) >> 20); @@ -848,6 +933,7 @@ int intel_memory_region_mock_selftests(void) SUBTEST(igt_mock_fill), SUBTEST(igt_mock_contiguous), SUBTEST(igt_mock_splintered_region), + SUBTEST(igt_mock_max_segment), }; struct intel_memory_region *mem; struct drm_i915_private *i915; diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c index e946bd2087d8..0188f877cab2 100644 --- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c +++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c @@ -64,8 +64,6 @@ static void mock_device_release(struct drm_device *dev) mock_device_flush(i915); intel_gt_driver_remove(&i915->gt); - i915_gem_driver_release__contexts(i915); - i915_gem_drain_workqueue(i915); i915_gem_drain_freed_objects(i915); diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c index dc68e538d5a9..34b7c6f16479 100644 --- a/drivers/gpu/drm/radeon/r600_cs.c +++ b/drivers/gpu/drm/radeon/r600_cs.c @@ -220,7 +220,7 @@ int r600_fmt_get_nblocksx(u32 format, u32 w) if (bw == 0) return 0; - return (w + bw - 1) / bw; + return DIV_ROUND_UP(w, bw); } int r600_fmt_get_nblocksy(u32 format, u32 h) @@ -234,7 +234,7 @@ int r600_fmt_get_nblocksy(u32 format, u32 h) if (bh == 0) return 0; - return (h + bh - 1) / bh; + return DIV_ROUND_UP(h, bh); } struct array_mode_checker { diff --git a/drivers/gpu/drm/radeon/radeon_uvd.c b/drivers/gpu/drm/radeon/radeon_uvd.c index 39c1c339be7b..dfa9fdbe98da 100644 --- a/drivers/gpu/drm/radeon/radeon_uvd.c +++ b/drivers/gpu/drm/radeon/radeon_uvd.c @@ -781,7 +781,7 @@ int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - RADEON_GPU_PAGE_SIZE; - uint32_t *msg = rdev->uvd.cpu_addr + offs; + uint32_t __iomem *msg = (void __iomem *)(rdev->uvd.cpu_addr + offs); uint64_t addr = rdev->uvd.gpu_addr + offs; int r, i; @@ -791,19 +791,19 @@ int radeon_uvd_get_create_msg(struct radeon_device *rdev, int ring, return r; /* stitch together an UVD create msg */ - msg[0] = cpu_to_le32(0x00000de4); - msg[1] = cpu_to_le32(0x00000000); - msg[2] = cpu_to_le32(handle); - msg[3] = cpu_to_le32(0x00000000); - msg[4] = cpu_to_le32(0x00000000); - msg[5] = cpu_to_le32(0x00000000); - msg[6] = cpu_to_le32(0x00000000); - msg[7] = cpu_to_le32(0x00000780); - msg[8] = cpu_to_le32(0x00000440); - msg[9] = cpu_to_le32(0x00000000); - msg[10] = cpu_to_le32(0x01b37000); + writel(cpu_to_le32(0x00000de4), &msg[0]); + writel(0x0, (void __iomem *)&msg[1]); + writel(cpu_to_le32(handle), &msg[2]); + writel(0x0, &msg[3]); + writel(0x0, &msg[4]); + writel(0x0, &msg[5]); + writel(0x0, &msg[6]); + writel(cpu_to_le32(0x00000780), &msg[7]); + writel(cpu_to_le32(0x00000440), &msg[8]); + writel(0x0, &msg[9]); + writel(cpu_to_le32(0x01b37000), &msg[10]); for (i = 11; i < 1024; ++i) - msg[i] = cpu_to_le32(0x0); + writel(0x0, &msg[i]); r = radeon_uvd_send_msg(rdev, ring, addr, fence); radeon_bo_unreserve(rdev->uvd.vcpu_bo); @@ -817,7 +817,7 @@ int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, uint64_t offs = radeon_bo_size(rdev->uvd.vcpu_bo) - RADEON_GPU_PAGE_SIZE; - uint32_t *msg = rdev->uvd.cpu_addr + offs; + uint32_t __iomem *msg = (void __iomem *)(rdev->uvd.cpu_addr + offs); uint64_t addr = rdev->uvd.gpu_addr + offs; int r, i; @@ -827,12 +827,12 @@ int radeon_uvd_get_destroy_msg(struct radeon_device *rdev, int ring, return r; /* stitch together an UVD destroy msg */ - msg[0] = cpu_to_le32(0x00000de4); - msg[1] = cpu_to_le32(0x00000002); - msg[2] = cpu_to_le32(handle); - msg[3] = cpu_to_le32(0x00000000); + writel(cpu_to_le32(0x00000de4), &msg[0]); + writel(cpu_to_le32(0x00000002), &msg[1]); + writel(cpu_to_le32(handle), &msg[2]); + writel(0x0, &msg[3]); for (i = 4; i < 1024; ++i) - msg[i] = cpu_to_le32(0x0); + writel(0x0, &msg[i]); r = radeon_uvd_send_msg(rdev, ring, addr, fence); radeon_bo_unreserve(rdev->uvd.vcpu_bo); |