diff options
author | Dave Airlie <airlied@redhat.com> | 2018-09-21 09:52:34 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2018-09-21 09:52:53 +1000 |
commit | 36c9c3c91128e2b892c9be0dd9ee9bd82cbe82ad (patch) | |
tree | 687db2e37b7fdcb4bd756a078812d049da18c804 /include | |
parent | 0320ac5188eab5c6e8b92b110d1eae967ac272d2 (diff) | |
parent | 846311ae68f3c78365ebf3dff505c99e7da861cf (diff) |
Merge branch 'drm-next-4.20' of git://people.freedesktop.org/~agd5f/linux into drm-next
This is a new pull for drm-next on top of last weeks with the following
changes:
- Fixed 64 bit divide
- Fixed vram type on vega20
- Misc vega20 fixes
- Misc DC fixes
- Fix GDS/GWS/OA domain handling
Previous changes from last week:
amdgpu/kfd:
- Picasso (new APU) support
- Raven2 (new APU) support
- Vega20 enablement
- ACP powergating improvements
- Add ABGR/XBGR display support
- VCN JPEG engine support
- Initial xGMI support
- Use load balancing for engine scheduling
- Lots of new documentation
- Rework and clean up i2c and aux handling in DC
- Add DP YCbCr 4:2:0 support in DC
- Add DMCU firmware loading for Raven (used for ABM and PSR)
- New debugfs features in DC
- LVDS support in DC
- Implement wave kill for gfx/compute (light weight reset for shaders)
- Use AGP aperture to avoid gart mappings when possible
- GPUVM performance improvements
- Bulk moves for more efficient GPUVM LRU handling
- Merge amdgpu and amdkfd into one module
- Enable gfxoff and stutter mode on Raven
- Misc cleanups
Scheduler:
- Load balancing support
- Bug fixes
ttm:
- Bulk move functionality
- Bug fixes
radeon:
- Misc cleanups
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexdeucher@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20180920150438.12693-1-alexander.deucher@amd.com
Diffstat (limited to 'include')
-rw-r--r-- | include/drm/gpu_scheduler.h | 42 | ||||
-rw-r--r-- | include/drm/ttm/ttm_bo_api.h | 16 | ||||
-rw-r--r-- | include/drm/ttm/ttm_bo_driver.h | 28 | ||||
-rw-r--r-- | include/uapi/drm/amdgpu_drm.h | 2 |
4 files changed, 75 insertions, 13 deletions
diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 21c648b0b2a1..daec50f887b3 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -50,7 +50,10 @@ enum drm_sched_priority { * * @list: used to append this struct to the list of entities in the * runqueue. - * @rq: runqueue to which this entity belongs. + * @rq: runqueue on which this entity is currently scheduled. + * @rq_list: a list of run queues on which jobs from this entity can + * be scheduled + * @num_rq_list: number of run queues in the rq_list * @rq_lock: lock to modify the runqueue to which this entity belongs. * @job_queue: the list of jobs of this entity. * @fence_seq: a linearly increasing seqno incremented with each @@ -67,6 +70,7 @@ enum drm_sched_priority { * @fini_status: contains the exit status in case the process was signalled. * @last_scheduled: points to the finished fence of the last scheduled job. * @last_user: last group leader pushing a job into the entity. + * @stopped: Marks the enity as removed from rq and destined for termination. * * Entities will emit jobs in order to their corresponding hardware * ring, and the scheduler will alternate between entities based on @@ -75,6 +79,8 @@ enum drm_sched_priority { struct drm_sched_entity { struct list_head list; struct drm_sched_rq *rq; + struct drm_sched_rq **rq_list; + unsigned int num_rq_list; spinlock_t rq_lock; struct spsc_queue job_queue; @@ -87,6 +93,7 @@ struct drm_sched_entity { atomic_t *guilty; struct dma_fence *last_scheduled; struct task_struct *last_user; + bool stopped; }; /** @@ -257,6 +264,7 @@ struct drm_sched_backend_ops { * @job_list_lock: lock to protect the ring_mirror_list. * @hang_limit: once the hangs by a job crosses this limit then it is marked * guilty and it will be considered for scheduling further. + * @num_jobs: the number of jobs in queue in the scheduler * * One scheduler is implemented for each hardware ring. */ @@ -274,6 +282,7 @@ struct drm_gpu_scheduler { struct list_head ring_mirror_list; spinlock_t job_list_lock; int hang_limit; + atomic_t num_jobs; }; int drm_sched_init(struct drm_gpu_scheduler *sched, @@ -281,6 +290,21 @@ int drm_sched_init(struct drm_gpu_scheduler *sched, uint32_t hw_submission, unsigned hang_limit, long timeout, const char *name); void drm_sched_fini(struct drm_gpu_scheduler *sched); +int drm_sched_job_init(struct drm_sched_job *job, + struct drm_sched_entity *entity, + void *owner); +void drm_sched_wakeup(struct drm_gpu_scheduler *sched); +void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, + struct drm_sched_job *job); +void drm_sched_job_recovery(struct drm_gpu_scheduler *sched); +bool drm_sched_dependency_optimized(struct dma_fence* fence, + struct drm_sched_entity *entity); +void drm_sched_job_kickout(struct drm_sched_job *s_job); + +void drm_sched_rq_add_entity(struct drm_sched_rq *rq, + struct drm_sched_entity *entity); +void drm_sched_rq_remove_entity(struct drm_sched_rq *rq, + struct drm_sched_entity *entity); int drm_sched_entity_init(struct drm_sched_entity *entity, struct drm_sched_rq **rq_list, @@ -289,23 +313,17 @@ int drm_sched_entity_init(struct drm_sched_entity *entity, long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout); void drm_sched_entity_fini(struct drm_sched_entity *entity); void drm_sched_entity_destroy(struct drm_sched_entity *entity); +void drm_sched_entity_select_rq(struct drm_sched_entity *entity); +struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity); void drm_sched_entity_push_job(struct drm_sched_job *sched_job, struct drm_sched_entity *entity); -void drm_sched_entity_set_rq(struct drm_sched_entity *entity, - struct drm_sched_rq *rq); +void drm_sched_entity_set_priority(struct drm_sched_entity *entity, + enum drm_sched_priority priority); +bool drm_sched_entity_is_ready(struct drm_sched_entity *entity); struct drm_sched_fence *drm_sched_fence_create( struct drm_sched_entity *s_entity, void *owner); void drm_sched_fence_scheduled(struct drm_sched_fence *fence); void drm_sched_fence_finished(struct drm_sched_fence *fence); -int drm_sched_job_init(struct drm_sched_job *job, - struct drm_sched_entity *entity, - void *owner); -void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, - struct drm_sched_job *job); -void drm_sched_job_recovery(struct drm_gpu_scheduler *sched); -bool drm_sched_dependency_optimized(struct dma_fence* fence, - struct drm_sched_entity *entity); -void drm_sched_job_kickout(struct drm_sched_job *s_job); #endif diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index a01ba2032f0e..8c19470785e2 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -51,6 +51,8 @@ struct ttm_placement; struct ttm_place; +struct ttm_lru_bulk_move; + /** * struct ttm_bus_placement * @@ -405,12 +407,24 @@ void ttm_bo_del_from_lru(struct ttm_buffer_object *bo); * ttm_bo_move_to_lru_tail * * @bo: The buffer object. + * @bulk: optional bulk move structure to remember BO positions * * Move this BO to the tail of all lru lists used to lookup and reserve an * object. This function must be called with struct ttm_bo_global::lru_lock * held, and is used to make a BO less likely to be considered for eviction. */ -void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo); +void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo, + struct ttm_lru_bulk_move *bulk); + +/** + * ttm_bo_bulk_move_lru_tail + * + * @bulk: bulk move structure + * + * Bulk move BOs to the LRU tail, only valid to use when driver makes sure that + * BO order never changes. Should be called with ttm_bo_global::lru_lock held. + */ +void ttm_bo_bulk_move_lru_tail(struct ttm_lru_bulk_move *bulk); /** * ttm_bo_lock_delayed_workqueue diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h index 3234cc322e70..e4fee8e02559 100644 --- a/include/drm/ttm/ttm_bo_driver.h +++ b/include/drm/ttm/ttm_bo_driver.h @@ -491,6 +491,34 @@ struct ttm_bo_device { }; /** + * struct ttm_lru_bulk_move_pos + * + * @first: first BO in the bulk move range + * @last: last BO in the bulk move range + * + * Positions for a lru bulk move. + */ +struct ttm_lru_bulk_move_pos { + struct ttm_buffer_object *first; + struct ttm_buffer_object *last; +}; + +/** + * struct ttm_lru_bulk_move + * + * @tt: first/last lru entry for BOs in the TT domain + * @vram: first/last lru entry for BOs in the VRAM domain + * @swap: first/last lru entry for BOs on the swap list + * + * Helper structure for bulk moves on the LRU list. + */ +struct ttm_lru_bulk_move { + struct ttm_lru_bulk_move_pos tt[TTM_MAX_BO_PRIORITY]; + struct ttm_lru_bulk_move_pos vram[TTM_MAX_BO_PRIORITY]; + struct ttm_lru_bulk_move_pos swap[TTM_MAX_BO_PRIORITY]; +}; + +/** * ttm_flag_masked * * @old: Pointer to the result and original value. diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h index 1ceec56de015..370e9a5536ef 100644 --- a/include/uapi/drm/amdgpu_drm.h +++ b/include/uapi/drm/amdgpu_drm.h @@ -665,6 +665,8 @@ struct drm_amdgpu_cs_chunk_data { #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_GPM_MEM 0x10 /* Subquery id: Query GFX RLC SRLS firmware version */ #define AMDGPU_INFO_FW_GFX_RLC_RESTORE_LIST_SRM_MEM 0x11 + /* Subquery id: Query DMCU firmware version */ + #define AMDGPU_INFO_FW_DMCU 0x12 /* number of bytes moved for TTM migration */ #define AMDGPU_INFO_NUM_BYTES_MOVED 0x0f /* the used VRAM size */ |