diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-02-21 14:44:44 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-02-21 14:44:44 -0800 |
commit | d99676af540c2dc829999928fb81c58c80a1dce4 (patch) | |
tree | a78602eb6fa5d46d867c00ee187179ced6c18766 /drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | |
parent | 10e2ec8edece2566b40f69bae035a555ece71ab4 (diff) | |
parent | f730f39eb981af249d57336b47cfe3925632a7fd (diff) |
Merge tag 'drm-next-2021-02-19' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie:
"A pretty normal tree, lots of refactoring across the board, ttm, i915,
nouveau, and bunch of features in various drivers.
docs:
- lots of updated docs
core:
- require crtc to have unique primary plane
- fourcc macro fix
- PCI bar quirk for bar resizing
- don't sent hotplug on error
- move vm code to legacy
- nuke hose only used on old oboslete alpha
dma-buf:
- kernel doc updates
- improved lock tracking
dp/hdmi:
- DP-HDMI2.1 protocol converter support
ttm:
- bo size handling cleanup
- release a pinned bo warning
- cleanup lru handler
- avoid using pages with drm_prime_sg_to_page_addr_arrays
cma-helper:
- prime/mmap fixes
bridge:
- add DP support
gma500:
- remove gma3600 support
i915:
- try eDP fast/narrow link again with fallback
- Intel eDP backlight control
- replace display register read/write macros
- refactor intel_display.c
- display power improvements
- HPD code cleanup
- Rocketlake display fixes
- Power/backlight/RPM fixes
- DG1 display fix
- IVB/BYT clear residuals security fix again
- make i915 mitigations options via parameter
- HSW GT1 GPU hangs fixes
- DG1 workaround hang fixes
- TGL DMAR hang avoidance
- Lots of GT fixes
- follow on fixes for residuals clear
- gen7 per-engine-reset support
- HDCP2.2 + HDCP1.4 GEN12 DP MST support
- TGL clear color support
- backlight refactoring
- VRR/Adaptive sync enabling on DP/EDP for TGL+
- async flips for all ilk+
amdgpu:
- rework IH ring handling (Vega/Navi)
- rework HDP handling (Vega/Navi)
- swSMU updates for renoir/vangogh
- Sienna Cichild overdrive support
- FP16 on DCE8-11 support
- GPU reset on navy flounder/vangogh
- SMU profile fixes for APU
- SR-IOV fixes
- Vangogh SMU fixes
- fan speed control fixes
amdkfd:
- config handling fix
- buffer free fix
- recursive lock warnings fix
nouveau:
- Turing MMU fault recovery fixes
- mDP connectors reporting fix
- audio locking fixes
- rework engines/instances code to support new scheme
tegra:
- VIC newer firmware support
- display/gr2d fixes for older tegra
- pm reference leak fix
mediatek:
- SOC MT8183 support
- decouple sub driver + share mtk mutex driver
radeon:
- PCI resource fix for some platforms
ingenic:
- pm support
- 8-bit delta RGB panels
vmwgfx:
- managed driver helpers
vc4:
- BCM2711 DSI1 support
- converted to atomic helpers
- enable 10/12 bpc outputs
- gem prime mmap helpers
- CEC fix
omap:
- use degamma table
- CTM support
- rework DSI support
imx:
- stack usage fixes
- drm managed support
- imx-tve clock provider leak fix
-
rcar-du:
- default mode fixes
- conversion to managed API
hisilicon:
- use simple encoder
vkms:
- writeback connector support
d3:
- BT2020 support"
* tag 'drm-next-2021-02-19' of git://anongit.freedesktop.org/drm/drm: (1459 commits)
drm/amdgpu: Set reference clock to 100Mhz on Renoir (v2)
drm/radeon: OLAND boards don't have VCE
drm/amdkfd: Fix recursive lock warnings
drm/amd/display: Add FPU wrappers to dcn21_validate_bandwidth()
drm/amd/display: Fix potential integer overflow
drm/amdgpu/display: remove hdcp_srm sysfs on device removal
drm/amdgpu: fix CGTS_TCC_DISABLE register offset on gfx10.3
drm/i915/gt: Correct surface base address for renderclear
drm/i915: Disallow plane x+w>stride on ilk+ with X-tiling
drm/nouveau/top/ga100: initial support
drm/nouveau/top: add ioctrl/nvjpg
drm/nouveau/privring: rename from ibus
drm/nouveau/nvkm: remove nvkm_subdev.index
drm/nouveau/nvkm: determine subdev id/order from layout
drm/nouveau/vic: switch to instanced constructor
drm/nouveau/sw: switch to instanced constructor
drm/nouveau/sec2: switch to instanced constructor
drm/nouveau/sec: switch to instanced constructor
drm/nouveau/pm: switch to instanced constructor
drm/nouveau/nvenc: switch to instanced constructor
...
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 72 |
1 files changed, 67 insertions, 5 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 37639214cbbb..84d2eaa38101 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -29,6 +29,7 @@ #include "amdgpu.h" #include "amdgpu_gfx.h" +#include "amdgpu_ring.h" #include "vi.h" #include "vi_structs.h" #include "vid.h" @@ -1923,8 +1924,7 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, + ((ring->me - 1) * adev->gfx.mec.num_pipe_per_mec) + ring->pipe; - hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe, - ring->queue) ? + hw_prio = amdgpu_gfx_is_high_priority_compute_queue(adev, ring) ? AMDGPU_GFX_PIPE_PRIO_HIGH : AMDGPU_RING_PRIO_DEFAULT; /* type-2 packets are deprecated on MEC, use type-3 instead */ r = amdgpu_ring_init(adev, ring, 1024, @@ -4442,8 +4442,7 @@ static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring, struct vi_mqd *m struct amdgpu_device *adev = ring->adev; if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) { - if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring->pipe, - ring->queue)) { + if (amdgpu_gfx_is_high_priority_compute_queue(adev, ring)) { mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH; mqd->cp_hqd_queue_priority = AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM; @@ -6847,6 +6846,66 @@ static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0x0000000A); /* poll interval */ } + +/* mmSPI_WCL_PIPE_PERCENT_CS[0-7]_DEFAULT values are same */ +#define mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT 0x0000007f +static void gfx_v8_0_emit_wave_limit_cs(struct amdgpu_ring *ring, + uint32_t pipe, bool enable) +{ + uint32_t val; + uint32_t wcl_cs_reg; + + val = enable ? 0x1 : mmSPI_WCL_PIPE_PERCENT_CS_DEFAULT; + + switch (pipe) { + case 0: + wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS0; + break; + case 1: + wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS1; + break; + case 2: + wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS2; + break; + case 3: + wcl_cs_reg = mmSPI_WCL_PIPE_PERCENT_CS3; + break; + default: + DRM_DEBUG("invalid pipe %d\n", pipe); + return; + } + + amdgpu_ring_emit_wreg(ring, wcl_cs_reg, val); + +} + +#define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT 0x07ffffff +static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t val; + int i; + + /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit + * number of gfx waves. Setting 5 bit will make sure gfx only gets + * around 25% of gpu resources. + */ + val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT; + amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val); + + /* Restrict waves for normal/low priority compute queues as well + * to get best QoS for high priority compute jobs. + * + * amdgpu controls only 1st ME(0-3 CS pipes). + */ + for (i = 0; i < adev->gfx.mec.num_pipe_per_mec; i++) { + if (i != ring->pipe) + gfx_v8_0_emit_wave_limit_cs(ring, i, enable); + + } + +} + static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { .name = "gfx_v8_0", .early_init = gfx_v8_0_early_init, @@ -6930,7 +6989,9 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { 7 + /* gfx_v8_0_ring_emit_pipeline_sync */ VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */ 7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */ - 7, /* gfx_v8_0_emit_mem_sync_compute */ + 7 + /* gfx_v8_0_emit_mem_sync_compute */ + 5 + /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */ + 15, /* for updating 3 mmSPI_WCL_PIPE_PERCENT_CS registers */ .emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */ .emit_ib = gfx_v8_0_ring_emit_ib_compute, .emit_fence = gfx_v8_0_ring_emit_fence_compute, @@ -6944,6 +7005,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { .pad_ib = amdgpu_ring_generic_pad_ib, .emit_wreg = gfx_v8_0_ring_emit_wreg, .emit_mem_sync = gfx_v8_0_emit_mem_sync_compute, + .emit_wave_limit = gfx_v8_0_emit_wave_limit, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { |