diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-15 10:46:16 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-15 10:46:16 -0700 |
commit | 93b694d096cc10994c817730d4d50288f9ae3d66 (patch) | |
tree | 5bd967686d0003f7dbbe1da49f5399cb4a92f074 /drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | |
parent | 726eb70e0d34dc4bc4dada71f52bba8ed638431e (diff) | |
parent | 640eee067d9aae0bb98d8706001976ff1affaf00 (diff) |
Merge tag 'drm-next-2020-10-15' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie:
"Not a major amount of change, the i915 trees got split into display
and gt trees to better facilitate higher level review, and there's a
major refactoring of i915 GEM locking to use more core kernel concepts
(like ww-mutexes). msm gets per-process pagetables, older AMD SI cards
get DC support, nouveau got a bump in displayport support with common
code extraction from i915.
Outside of drm this contains a couple of patches for hexint
moduleparams which you've acked, and a virtio common code tree that
you should also get via it's regular path.
New driver:
- Cadence MHDP8546 DisplayPort bridge driver
core:
- cross-driver scatterlist cleanups
- devm_drm conversions
- remove drm_dev_init
- devm_drm_dev_alloc conversion
ttm:
- lots of refactoring and cleanups
bridges:
- chained bridge support in more drivers
panel:
- misc new panels
scheduler:
- cleanup priority levels
displayport:
- refactor i915 code into helpers for nouveau
i915:
- split into display and GT trees
- WW locking refactoring in GEM
- execbuf2 extension mechanism
- syncobj timeline support
- GEN 12 HOBL display powersaving
- Rocket Lake display additions
- Disable FBC on Tigerlake
- Tigerlake Type-C + DP improvements
- Hotplug interrupt refactoring
amdgpu:
- Sienna Cichlid updates
- Navy Flounder updates
- DCE6 (SI) support for DC
- Plane rotation enabled
- TMZ state info ioctl
- PCIe DPC recovery support
- DC interrupt handling refactor
- OLED panel fixes
amdkfd:
- add SMI events for thermal throttling
- SMI interface events ioctl update
- process eviction counters
radeon:
- move to dma_ for allocations
- expose sclk via sysfs
msm:
- DSI support for sm8150/sm8250
- per-process GPU pagetable support
- Displayport support
mediatek:
- move HDMI phy driver to PHY
- convert mtk-dpi to bridge API
- disable mt2701 tmds
tegra:
- bridge support
exynos:
- misc cleanups
vc4:
- dual display cleanups
ast:
- cleanups
gma500:
- conversion to GPIOd API
hisilicon:
- misc reworks
ingenic:
- clock handling and format improvements
mcde:
- DSI support
mgag200:
- desktop g200 support
mxsfb:
- i.MX7 + i.MX8M
- alpha plane support
panfrost:
- devfreq support
- amlogic SoC support
ps8640:
- EDID from eDP retrieval
tidss:
- AM65xx YUV workaround
virtio:
- virtio-gpu exported resources
rcar-du:
- R8A7742, R8A774E1 and R8A77961 support
- YUV planar format fixes
- non-visible plane handling
- VSP device reference count fix
- Kconfig fix to avoid displaying disabled options in .config"
* tag 'drm-next-2020-10-15' of git://anongit.freedesktop.org/drm/drm: (1494 commits)
drm/ingenic: Fix bad revert
drm/amdgpu: Fix invalid number of character '{' in amdgpu_acpi_init
drm/amdgpu: Remove warning for virtual_display
drm/amdgpu: kfd_initialized can be static
drm/amd/pm: setup APU dpm clock table in SMU HW initialization
drm/amdgpu: prevent spurious warning
drm/amdgpu/swsmu: fix ARC build errors
drm/amd/display: Fix OPTC_DATA_FORMAT programming
drm/amd/display: Don't allow pstate if no support in blank
drm/panfrost: increase readl_relaxed_poll_timeout values
MAINTAINERS: Update entry for st7703 driver after the rename
Revert "gpu/drm: ingenic: Add option to mmap GEM buffers cached"
drm/amd/display: HDMI remote sink need mode validation for Linux
drm/amd/display: Change to correct unit on audio rate
drm/amd/display: Avoid set zero in the requested clk
drm/amdgpu: align frag_end to covered address space
drm/amdgpu: fix NULL pointer dereference for Renoir
drm/vmwgfx: fix regression in thp code due to ttm init refactor.
drm/amdgpu/swsmu: add interrupt work handler for smu11 parts
drm/amdgpu/swsmu: add interrupt work function
...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 188 |
1 files changed, 179 insertions, 9 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 1abfe63c80fe..43b18863a8b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -36,9 +36,7 @@ #include "v9_structs.h" #include "soc15.h" #include "soc15d.h" -#include "mmhub_v1_0.h" -#include "gfxhub_v1_0.h" - +#include "gfx_v9_0.h" enum hqd_dequeue_request_type { NO_ACTION = 0, @@ -552,7 +550,7 @@ int kgd_gfx_v9_hqd_destroy(struct kgd_dev *kgd, void *mqd, uint32_t temp; struct v9_mqd *m = get_mqd(mqd); - if (adev->in_gpu_reset) + if (amdgpu_in_reset(adev)) return -EIO; acquire_queue(kgd, pipe_id, queue_id); @@ -690,7 +688,7 @@ uint32_t kgd_gfx_v9_address_watch_get_offset(struct kgd_dev *kgd, return 0; } -static void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, +void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, uint64_t page_table_base) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -701,9 +699,182 @@ static void kgd_gfx_v9_set_vm_context_page_table_base(struct kgd_dev *kgd, return; } - mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); + adev->mmhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); + + adev->gfxhub.funcs->setup_vm_pt_regs(adev, vmid, page_table_base); +} + +static void lock_spi_csq_mutexes(struct amdgpu_device *adev) +{ + mutex_lock(&adev->srbm_mutex); + mutex_lock(&adev->grbm_idx_mutex); + +} + +static void unlock_spi_csq_mutexes(struct amdgpu_device *adev) +{ + mutex_unlock(&adev->grbm_idx_mutex); + mutex_unlock(&adev->srbm_mutex); +} + +/** + * @get_wave_count: Read device registers to get number of waves in flight for + * a particular queue. The method also returns the VMID associated with the + * queue. + * + * @adev: Handle of device whose registers are to be read + * @queue_idx: Index of queue in the queue-map bit-field + * @wave_cnt: Output parameter updated with number of waves in flight + * @vmid: Output parameter updated with VMID of queue whose wave count + * is being collected + */ +static void get_wave_count(struct amdgpu_device *adev, int queue_idx, + int *wave_cnt, int *vmid) +{ + int pipe_idx; + int queue_slot; + unsigned int reg_val; + + /* + * Program GRBM with appropriate MEID, PIPEID, QUEUEID and VMID + * parameters to read out waves in flight. Get VMID if there are + * non-zero waves in flight. + */ + *vmid = 0xFF; + *wave_cnt = 0; + pipe_idx = queue_idx / adev->gfx.mec.num_queue_per_pipe; + queue_slot = queue_idx % adev->gfx.mec.num_queue_per_pipe; + soc15_grbm_select(adev, 1, pipe_idx, queue_slot, 0); + reg_val = RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CSQ_WF_ACTIVE_COUNT_0) + + queue_slot); + *wave_cnt = reg_val & SPI_CSQ_WF_ACTIVE_COUNT_0__COUNT_MASK; + if (*wave_cnt != 0) + *vmid = (RREG32_SOC15(GC, 0, mmCP_HQD_VMID) & + CP_HQD_VMID__VMID_MASK) >> CP_HQD_VMID__VMID__SHIFT; +} + +/** + * @kgd_gfx_v9_get_cu_occupancy: Reads relevant registers associated with each + * shader engine and aggregates the number of waves that are in flight for the + * process whose pasid is provided as a parameter. The process could have ZERO + * or more queues running and submitting waves to compute units. + * + * @kgd: Handle of device from which to get number of waves in flight + * @pasid: Identifies the process for which this query call is invoked + * @wave_cnt: Output parameter updated with number of waves in flight that + * belong to process with given pasid + * @max_waves_per_cu: Output parameter updated with maximum number of waves + * possible per Compute Unit + * + * @note: It's possible that the device has too many queues (oversubscription) + * in which case a VMID could be remapped to a different PASID. This could lead + * to an iaccurate wave count. Following is a high-level sequence: + * Time T1: vmid = getVmid(); vmid is associated with Pasid P1 + * Time T2: passId = getPasId(vmid); vmid is associated with Pasid P2 + * In the sequence above wave count obtained from time T1 will be incorrectly + * lost or added to total wave count. + * + * The registers that provide the waves in flight are: + * + * SPI_CSQ_WF_ACTIVE_STATUS - bit-map of queues per pipe. The bit is ON if a + * queue is slotted, OFF if there is no queue. A process could have ZERO or + * more queues slotted and submitting waves to be run on compute units. Even + * when there is a queue it is possible there could be zero wave fronts, this + * can happen when queue is waiting on top-of-pipe events - e.g. waitRegMem + * command + * + * For each bit that is ON from above: + * + * Read (SPI_CSQ_WF_ACTIVE_COUNT_0 + queue_idx) register. It provides the + * number of waves that are in flight for the queue at specified index. The + * index ranges from 0 to 7. + * + * If non-zero waves are in flight, read CP_HQD_VMID register to obtain VMID + * of the wave(s). + * + * Determine if VMID from above step maps to pasid provided as parameter. If + * it matches agrregate the wave count. That the VMID will not match pasid is + * a normal condition i.e. a device is expected to support multiple queues + * from multiple proceses. + * + * Reading registers referenced above involves programming GRBM appropriately + */ +static void kgd_gfx_v9_get_cu_occupancy(struct kgd_dev *kgd, int pasid, + int *pasid_wave_cnt, int *max_waves_per_cu) +{ + int qidx; + int vmid; + int se_idx; + int sh_idx; + int se_cnt; + int sh_cnt; + int wave_cnt; + int queue_map; + int pasid_tmp; + int max_queue_cnt; + int vmid_wave_cnt = 0; + struct amdgpu_device *adev; + DECLARE_BITMAP(cp_queue_bitmap, KGD_MAX_QUEUES); + + adev = get_amdgpu_device(kgd); + lock_spi_csq_mutexes(adev); + soc15_grbm_select(adev, 1, 0, 0, 0); + + /* + * Iterate through the shader engines and arrays of the device + * to get number of waves in flight + */ + bitmap_complement(cp_queue_bitmap, adev->gfx.mec.queue_bitmap, + KGD_MAX_QUEUES); + max_queue_cnt = adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe; + sh_cnt = adev->gfx.config.max_sh_per_se; + se_cnt = adev->gfx.config.max_shader_engines; + for (se_idx = 0; se_idx < se_cnt; se_idx++) { + for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) { + + gfx_v9_0_select_se_sh(adev, se_idx, sh_idx, 0xffffffff); + queue_map = RREG32(SOC15_REG_OFFSET(GC, 0, + mmSPI_CSQ_WF_ACTIVE_STATUS)); + + /* + * Assumption: queue map encodes following schema: four + * pipes per each micro-engine, with each pipe mapping + * eight queues. This schema is true for GFX9 devices + * and must be verified for newer device families + */ + for (qidx = 0; qidx < max_queue_cnt; qidx++) { + + /* Skip qeueus that are not associated with + * compute functions + */ + if (!test_bit(qidx, cp_queue_bitmap)) + continue; + + if (!(queue_map & (1 << qidx))) + continue; + + /* Get number of waves in flight and aggregate them */ + get_wave_count(adev, qidx, &wave_cnt, &vmid); + if (wave_cnt != 0) { + pasid_tmp = + RREG32(SOC15_REG_OFFSET(OSSSYS, 0, + mmIH_VMID_0_LUT) + vmid); + if (pasid_tmp == pasid) + vmid_wave_cnt += wave_cnt; + } + } + } + } + + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + soc15_grbm_select(adev, 0, 0, 0, 0); + unlock_spi_csq_mutexes(adev); - gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); + /* Update the output parameters and return */ + *pasid_wave_cnt = vmid_wave_cnt; + *max_waves_per_cu = adev->gfx.cu_info.simd_per_cu * + adev->gfx.cu_info.max_waves_per_simd; } const struct kfd2kgd_calls gfx_v9_kfd2kgd = { @@ -726,6 +897,5 @@ const struct kfd2kgd_calls gfx_v9_kfd2kgd = { .get_atc_vmid_pasid_mapping_info = kgd_gfx_v9_get_atc_vmid_pasid_mapping_info, .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, - .get_hive_id = amdgpu_amdkfd_get_hive_id, - .get_unique_id = amdgpu_amdkfd_get_unique_id, + .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, }; |