From 0ce15d6f7d3fb1162fd7de2829dbdf6d63a6a02a Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 30 Jan 2019 16:07:29 +0100 Subject: drm/amdgpu: allocate VM PDs/PTs on demand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's start to allocate VM PDs/PTs on demand instead of pre-allocating them during mapping. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Acked-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 81ff8177f092..116605c038d2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -303,9 +303,6 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm); int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*callback)(void *p, struct amdgpu_bo *bo), void *param); -int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - uint64_t saddr, uint64_t size); int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync); int amdgpu_vm_update_directories(struct amdgpu_device *adev, struct amdgpu_vm *vm); -- cgit v1.2.3-70-g09d2 From adc7bfe50bd946b51a8a93ac15085cd218a2a9af Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 1 Feb 2019 17:11:29 +0100 Subject: drm/amdgpu: drop the huge page flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not needed any more since we now free PDs/PTs on demand. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Acked-by: Huang Rui Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 14 +------------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 1 - 2 files changed, 1 insertion(+), 14 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 92334efa19a8..045da0e5691c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1371,10 +1371,6 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, uint64_t pde, pt, flags; unsigned level; - /* Don't update huge pages here */ - if (entry->huge) - return; - for (level = 0, pbo = bo->parent; pbo; ++level) pbo = pbo->parent; @@ -1638,13 +1634,6 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, continue; } - /* If it isn't already handled it can't be a huge page */ - if (cursor.entry->huge) { - /* Add the entry to the relocated list to update it. */ - cursor.entry->huge = false; - amdgpu_vm_bo_relocated(&cursor.entry->base); - } - shift = amdgpu_vm_level_shift(adev, cursor.level); parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1); if (adev->asic_type < CHIP_VEGA10) { @@ -1703,9 +1692,8 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, } while (frag_start < entry_end); if (amdgpu_vm_pt_descendant(adev, &cursor)) { - /* Mark all child entries as huge */ + /* Free all child entries */ while (cursor.pfn < frag_start) { - cursor.entry->huge = true; amdgpu_vm_free_pts(adev, params->vm, &cursor); amdgpu_vm_pt_next(adev, &cursor); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 116605c038d2..3c6537ef659c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -140,7 +140,6 @@ struct amdgpu_vm_bo_base { struct amdgpu_vm_pt { struct amdgpu_vm_bo_base base; - bool huge; /* array of page tables, one for each directory entry */ struct amdgpu_vm_pt *entries; -- cgit v1.2.3-70-g09d2 From 04ed8459f3348f95c119569338e39294a8e02349 Mon Sep 17 00:00:00 2001 From: Christian König Date: Wed, 7 Nov 2018 13:55:01 +0100 Subject: drm/amdgpu: remove chash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the chash implementation for now since it isn't used any more. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/Kconfig | 2 - drivers/gpu/drm/Makefile | 1 - drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 105 ----- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 14 - drivers/gpu/drm/amd/include/linux/chash.h | 366 ----------------- drivers/gpu/drm/amd/lib/Kconfig | 28 -- drivers/gpu/drm/amd/lib/Makefile | 32 -- drivers/gpu/drm/amd/lib/chash.c | 638 ------------------------------ 8 files changed, 1186 deletions(-) delete mode 100644 drivers/gpu/drm/amd/include/linux/chash.h delete mode 100644 drivers/gpu/drm/amd/lib/Kconfig delete mode 100644 drivers/gpu/drm/amd/lib/Makefile delete mode 100644 drivers/gpu/drm/amd/lib/chash.c (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h') diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig index bd943a71756c..4806e7f3ccc5 100644 --- a/drivers/gpu/drm/Kconfig +++ b/drivers/gpu/drm/Kconfig @@ -225,8 +225,6 @@ config DRM_AMDGPU source "drivers/gpu/drm/amd/amdgpu/Kconfig" -source "drivers/gpu/drm/amd/lib/Kconfig" - source "drivers/gpu/drm/nouveau/Kconfig" source "drivers/gpu/drm/i915/Kconfig" diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile index 1ac55c65eac0..31b85930a09b 100644 --- a/drivers/gpu/drm/Makefile +++ b/drivers/gpu/drm/Makefile @@ -56,7 +56,6 @@ obj-$(CONFIG_DRM_TTM) += ttm/ obj-$(CONFIG_DRM_SCHED) += scheduler/ obj-$(CONFIG_DRM_TDFX) += tdfx/ obj-$(CONFIG_DRM_R128) += r128/ -obj-y += amd/lib/ obj-$(CONFIG_HSA_AMD) += amd/amdkfd/ obj-$(CONFIG_DRM_RADEON)+= radeon/ obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 52db19e88ab5..a45ca5d2cfe9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2977,22 +2977,6 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, adev->vm_manager.fragment_size); } -static struct amdgpu_retryfault_hashtable *init_fault_hash(void) -{ - struct amdgpu_retryfault_hashtable *fault_hash; - - fault_hash = kmalloc(sizeof(*fault_hash), GFP_KERNEL); - if (!fault_hash) - return fault_hash; - - INIT_CHASH_TABLE(fault_hash->hash, - AMDGPU_PAGEFAULT_HASH_BITS, 8, 0); - spin_lock_init(&fault_hash->lock); - fault_hash->count = 0; - - return fault_hash; -} - /** * amdgpu_vm_init - initialize a vm instance * @@ -3084,12 +3068,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->pasid = pasid; } - vm->fault_hash = init_fault_hash(); - if (!vm->fault_hash) { - r = -ENOMEM; - goto error_free_root; - } - INIT_KFIFO(vm->faults); return 0; @@ -3243,15 +3221,10 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) struct amdgpu_bo_va_mapping *mapping, *tmp; bool prt_fini_needed = !!adev->gmc.gmc_funcs->set_prt; struct amdgpu_bo *root; - u64 fault; int i, r; amdgpu_amdkfd_gpuvm_destroy_cb(adev, vm); - /* Clear pending page faults from IH when the VM is destroyed */ - while (kfifo_get(&vm->faults, &fault)) - amdgpu_vm_clear_fault(vm->fault_hash, fault); - if (vm->pasid) { unsigned long flags; @@ -3260,9 +3233,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); } - kfree(vm->fault_hash); - vm->fault_hash = NULL; - drm_sched_entity_destroy(&vm->entity); if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { @@ -3430,78 +3400,3 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) } } } - -/** - * amdgpu_vm_add_fault - Add a page fault record to fault hash table - * - * @fault_hash: fault hash table - * @key: 64-bit encoding of PASID and address - * - * This should be called when a retry page fault interrupt is - * received. If this is a new page fault, it will be added to a hash - * table. The return value indicates whether this is a new fault, or - * a fault that was already known and is already being handled. - * - * If there are too many pending page faults, this will fail. Retry - * interrupts should be ignored in this case until there is enough - * free space. - * - * Returns 0 if the fault was added, 1 if the fault was already known, - * -ENOSPC if there are too many pending faults. - */ -int amdgpu_vm_add_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key) -{ - unsigned long flags; - int r = -ENOSPC; - - if (WARN_ON_ONCE(!fault_hash)) - /* Should be allocated in amdgpu_vm_init - */ - return r; - - spin_lock_irqsave(&fault_hash->lock, flags); - - /* Only let the hash table fill up to 50% for best performance */ - if (fault_hash->count >= (1 << (AMDGPU_PAGEFAULT_HASH_BITS-1))) - goto unlock_out; - - r = chash_table_copy_in(&fault_hash->hash, key, NULL); - if (!r) - fault_hash->count++; - - /* chash_table_copy_in should never fail unless we're losing count */ - WARN_ON_ONCE(r < 0); - -unlock_out: - spin_unlock_irqrestore(&fault_hash->lock, flags); - return r; -} - -/** - * amdgpu_vm_clear_fault - Remove a page fault record - * - * @fault_hash: fault hash table - * @key: 64-bit encoding of PASID and address - * - * This should be called when a page fault has been handled. Any - * future interrupt with this key will be processed as a new - * page fault. - */ -void amdgpu_vm_clear_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key) -{ - unsigned long flags; - int r; - - if (!fault_hash) - return; - - spin_lock_irqsave(&fault_hash->lock, flags); - - r = chash_table_remove(&fault_hash->hash, key, NULL); - if (!WARN_ON_ONCE(r < 0)) { - fault_hash->count--; - WARN_ON_ONCE(fault_hash->count < 0); - } - - spin_unlock_irqrestore(&fault_hash->lock, flags); -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 3c6537ef659c..a1a62e3cb6e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -30,7 +30,6 @@ #include #include #include -#include #include "amdgpu_sync.h" #include "amdgpu_ring.h" @@ -178,13 +177,6 @@ struct amdgpu_task_info { pid_t tgid; }; -#define AMDGPU_PAGEFAULT_HASH_BITS 8 -struct amdgpu_retryfault_hashtable { - DECLARE_CHASH_TABLE(hash, AMDGPU_PAGEFAULT_HASH_BITS, 8, 0); - spinlock_t lock; - int count; -}; - struct amdgpu_vm { /* tree of virtual addresses mapped */ struct rb_root_cached va; @@ -244,7 +236,6 @@ struct amdgpu_vm { struct ttm_lru_bulk_move lru_bulk_move; /* mark whether can do the bulk move */ bool bulk_moveable; - struct amdgpu_retryfault_hashtable *fault_hash; }; struct amdgpu_vm_manager { @@ -354,11 +345,6 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, struct amdgpu_vm *vm); - -int amdgpu_vm_add_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key); - -void amdgpu_vm_clear_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key); - void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo); #endif diff --git a/drivers/gpu/drm/amd/include/linux/chash.h b/drivers/gpu/drm/amd/include/linux/chash.h deleted file mode 100644 index 6dc159924ed1..000000000000 --- a/drivers/gpu/drm/amd/include/linux/chash.h +++ /dev/null @@ -1,366 +0,0 @@ -/* - * Copyright 2017 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#ifndef _LINUX_CHASH_H -#define _LINUX_CHASH_H - -#include -#include -#include -#include - -#if BITS_PER_LONG == 32 -# define _CHASH_LONG_SHIFT 5 -#elif BITS_PER_LONG == 64 -# define _CHASH_LONG_SHIFT 6 -#else -# error "Unexpected BITS_PER_LONG" -#endif - -struct __chash_table { - u8 bits; - u8 key_size; - unsigned int value_size; - u32 size_mask; - unsigned long *occup_bitmap, *valid_bitmap; - union { - u32 *keys32; - u64 *keys64; - }; - u8 *values; - -#ifdef CONFIG_CHASH_STATS - u64 hits, hits_steps, hits_time_ns; - u64 miss, miss_steps, miss_time_ns; - u64 relocs, reloc_dist; -#endif -}; - -#define __CHASH_BITMAP_SIZE(bits) \ - (((1 << (bits)) + BITS_PER_LONG - 1) / BITS_PER_LONG) -#define __CHASH_ARRAY_SIZE(bits, size) \ - ((((size) << (bits)) + sizeof(long) - 1) / sizeof(long)) - -#define __CHASH_DATA_SIZE(bits, key_size, value_size) \ - (__CHASH_BITMAP_SIZE(bits) * 2 + \ - __CHASH_ARRAY_SIZE(bits, key_size) + \ - __CHASH_ARRAY_SIZE(bits, value_size)) - -#define STRUCT_CHASH_TABLE(bits, key_size, value_size) \ - struct { \ - struct __chash_table table; \ - unsigned long data \ - [__CHASH_DATA_SIZE(bits, key_size, value_size)];\ - } - -/** - * struct chash_table - Dynamically allocated closed hash table - * - * Use this struct for dynamically allocated hash tables (using - * chash_table_alloc and chash_table_free), where the size is - * determined at runtime. - */ -struct chash_table { - struct __chash_table table; - unsigned long *data; -}; - -/** - * DECLARE_CHASH_TABLE - macro to declare a closed hash table - * @table: name of the declared hash table - * @bts: Table size will be 2^bits entries - * @key_sz: Size of hash keys in bytes, 4 or 8 - * @val_sz: Size of data values in bytes, can be 0 - * - * This declares the hash table variable with a static size. - * - * The closed hash table stores key-value pairs with low memory and - * lookup overhead. In operation it performs no dynamic memory - * management. The data being stored does not require any - * list_heads. The hash table performs best with small @val_sz and as - * long as some space (about 50%) is left free in the table. But the - * table can still work reasonably efficiently even when filled up to - * about 90%. If bigger data items need to be stored and looked up, - * store the pointer to it as value in the hash table. - * - * @val_sz may be 0. This can be useful when all the stored - * information is contained in the key itself and the fact that it is - * in the hash table (or not). - */ -#define DECLARE_CHASH_TABLE(table, bts, key_sz, val_sz) \ - STRUCT_CHASH_TABLE(bts, key_sz, val_sz) table - -#ifdef CONFIG_CHASH_STATS -#define __CHASH_STATS_INIT(prefix), \ - prefix.hits = 0, \ - prefix.hits_steps = 0, \ - prefix.hits_time_ns = 0, \ - prefix.miss = 0, \ - prefix.miss_steps = 0, \ - prefix.miss_time_ns = 0, \ - prefix.relocs = 0, \ - prefix.reloc_dist = 0 -#else -#define __CHASH_STATS_INIT(prefix) -#endif - -#define __CHASH_TABLE_INIT(prefix, data, bts, key_sz, val_sz) \ - prefix.bits = (bts), \ - prefix.key_size = (key_sz), \ - prefix.value_size = (val_sz), \ - prefix.size_mask = ((1 << bts) - 1), \ - prefix.occup_bitmap = &data[0], \ - prefix.valid_bitmap = &data \ - [__CHASH_BITMAP_SIZE(bts)], \ - prefix.keys64 = (u64 *)&data \ - [__CHASH_BITMAP_SIZE(bts) * 2], \ - prefix.values = (u8 *)&data \ - [__CHASH_BITMAP_SIZE(bts) * 2 + \ - __CHASH_ARRAY_SIZE(bts, key_sz)] \ - __CHASH_STATS_INIT(prefix) - -/** - * DEFINE_CHASH_TABLE - macro to define and initialize a closed hash table - * @tbl: name of the declared hash table - * @bts: Table size will be 2^bits entries - * @key_sz: Size of hash keys in bytes, 4 or 8 - * @val_sz: Size of data values in bytes, can be 0 - * - * Note: the macro can be used for global and local hash table variables. - */ -#define DEFINE_CHASH_TABLE(tbl, bts, key_sz, val_sz) \ - DECLARE_CHASH_TABLE(tbl, bts, key_sz, val_sz) = { \ - .table = { \ - __CHASH_TABLE_INIT(, (tbl).data, bts, key_sz, val_sz) \ - }, \ - .data = {0} \ - } - -/** - * INIT_CHASH_TABLE - Initialize a hash table declared by DECLARE_CHASH_TABLE - * @tbl: name of the declared hash table - * @bts: Table size will be 2^bits entries - * @key_sz: Size of hash keys in bytes, 4 or 8 - * @val_sz: Size of data values in bytes, can be 0 - */ -#define INIT_CHASH_TABLE(tbl, bts, key_sz, val_sz) \ - __CHASH_TABLE_INIT(((tbl).table), (tbl).data, bts, key_sz, val_sz) - -int chash_table_alloc(struct chash_table *table, u8 bits, u8 key_size, - unsigned int value_size, gfp_t gfp_mask); -void chash_table_free(struct chash_table *table); - -/** - * chash_table_dump_stats - Dump statistics of a closed hash table - * @tbl: Pointer to the table structure - * - * Dumps some performance statistics of the table gathered in operation - * in the kernel log using pr_debug. If CONFIG_DYNAMIC_DEBUG is enabled, - * user must turn on messages for chash.c (file chash.c +p). - */ -#ifdef CONFIG_CHASH_STATS -#define chash_table_dump_stats(tbl) __chash_table_dump_stats(&(*tbl).table) - -void __chash_table_dump_stats(struct __chash_table *table); -#else -#define chash_table_dump_stats(tbl) -#endif - -/** - * chash_table_reset_stats - Reset statistics of a closed hash table - * @tbl: Pointer to the table structure - */ -#ifdef CONFIG_CHASH_STATS -#define chash_table_reset_stats(tbl) __chash_table_reset_stats(&(*tbl).table) - -static inline void __chash_table_reset_stats(struct __chash_table *table) -{ - (void)table __CHASH_STATS_INIT((*table)); -} -#else -#define chash_table_reset_stats(tbl) -#endif - -/** - * chash_table_copy_in - Copy a new value into the hash table - * @tbl: Pointer to the table structure - * @key: Key of the entry to add or update - * @value: Pointer to value to copy, may be NULL - * - * If @key already has an entry, its value is replaced. Otherwise a - * new entry is added. If @value is NULL, the value is left unchanged - * or uninitialized. Returns 1 if an entry already existed, 0 if a new - * entry was added or %-ENOMEM if there was no free space in the - * table. - */ -#define chash_table_copy_in(tbl, key, value) \ - __chash_table_copy_in(&(*tbl).table, key, value) - -int __chash_table_copy_in(struct __chash_table *table, u64 key, - const void *value); - -/** - * chash_table_copy_out - Copy a value out of the hash table - * @tbl: Pointer to the table structure - * @key: Key of the entry to find - * @value: Pointer to value to copy, may be NULL - * - * If @value is not NULL and the table has a non-0 value_size, the - * value at @key is copied to @value. Returns the slot index of the - * entry or %-EINVAL if @key was not found. - */ -#define chash_table_copy_out(tbl, key, value) \ - __chash_table_copy_out(&(*tbl).table, key, value, false) - -int __chash_table_copy_out(struct __chash_table *table, u64 key, - void *value, bool remove); - -/** - * chash_table_remove - Remove an entry from the hash table - * @tbl: Pointer to the table structure - * @key: Key of the entry to find - * @value: Pointer to value to copy, may be NULL - * - * If @value is not NULL and the table has a non-0 value_size, the - * value at @key is copied to @value. The entry is removed from the - * table. Returns the slot index of the removed entry or %-EINVAL if - * @key was not found. - */ -#define chash_table_remove(tbl, key, value) \ - __chash_table_copy_out(&(*tbl).table, key, value, true) - -/* - * Low level iterator API used internally by the above functions. - */ -struct chash_iter { - struct __chash_table *table; - unsigned long mask; - int slot; -}; - -/** - * CHASH_ITER_INIT - Initialize a hash table iterator - * @tbl: Pointer to hash table to iterate over - * @s: Initial slot number - */ -#define CHASH_ITER_INIT(table, s) { \ - table, \ - 1UL << ((s) & (BITS_PER_LONG - 1)), \ - s \ - } -/** - * CHASH_ITER_SET - Set hash table iterator to new slot - * @iter: Iterator - * @s: Slot number - */ -#define CHASH_ITER_SET(iter, s) \ - (iter).mask = 1UL << ((s) & (BITS_PER_LONG - 1)), \ - (iter).slot = (s) -/** - * CHASH_ITER_INC - Increment hash table iterator - * @table: Hash table to iterate over - * - * Wraps around at the end. - */ -#define CHASH_ITER_INC(iter) do { \ - (iter).mask = (iter).mask << 1 | \ - (iter).mask >> (BITS_PER_LONG - 1); \ - (iter).slot = ((iter).slot + 1) & (iter).table->size_mask; \ - } while (0) - -static inline bool chash_iter_is_valid(const struct chash_iter iter) -{ - BUG_ON((unsigned)iter.slot >= (1 << iter.table->bits)); - return !!(iter.table->valid_bitmap[iter.slot >> _CHASH_LONG_SHIFT] & - iter.mask); -} -static inline bool chash_iter_is_empty(const struct chash_iter iter) -{ - BUG_ON((unsigned)iter.slot >= (1 << iter.table->bits)); - return !(iter.table->occup_bitmap[iter.slot >> _CHASH_LONG_SHIFT] & - iter.mask); -} - -static inline void chash_iter_set_valid(const struct chash_iter iter) -{ - BUG_ON((unsigned)iter.slot >= (1 << iter.table->bits)); - iter.table->valid_bitmap[iter.slot >> _CHASH_LONG_SHIFT] |= iter.mask; - iter.table->occup_bitmap[iter.slot >> _CHASH_LONG_SHIFT] |= iter.mask; -} -static inline void chash_iter_set_invalid(const struct chash_iter iter) -{ - BUG_ON((unsigned)iter.slot >= (1 << iter.table->bits)); - iter.table->valid_bitmap[iter.slot >> _CHASH_LONG_SHIFT] &= ~iter.mask; -} -static inline void chash_iter_set_empty(const struct chash_iter iter) -{ - BUG_ON((unsigned)iter.slot >= (1 << iter.table->bits)); - iter.table->occup_bitmap[iter.slot >> _CHASH_LONG_SHIFT] &= ~iter.mask; -} - -static inline u32 chash_iter_key32(const struct chash_iter iter) -{ - BUG_ON(iter.table->key_size != 4); - BUG_ON((unsigned)iter.slot >= (1 << iter.table->bits)); - return iter.table->keys32[iter.slot]; -} -static inline u64 chash_iter_key64(const struct chash_iter iter) -{ - BUG_ON(iter.table->key_size != 8); - BUG_ON((unsigned)iter.slot >= (1 << iter.table->bits)); - return iter.table->keys64[iter.slot]; -} -static inline u64 chash_iter_key(const struct chash_iter iter) -{ - BUG_ON((unsigned)iter.slot >= (1 << iter.table->bits)); - return (iter.table->key_size == 4) ? - iter.table->keys32[iter.slot] : iter.table->keys64[iter.slot]; -} - -static inline u32 chash_iter_hash32(const struct chash_iter iter) -{ - BUG_ON(iter.table->key_size != 4); - return hash_32(chash_iter_key32(iter), iter.table->bits); -} - -static inline u32 chash_iter_hash64(const struct chash_iter iter) -{ - BUG_ON(iter.table->key_size != 8); - return hash_64(chash_iter_key64(iter), iter.table->bits); -} - -static inline u32 chash_iter_hash(const struct chash_iter iter) -{ - return (iter.table->key_size == 4) ? - hash_32(chash_iter_key32(iter), iter.table->bits) : - hash_64(chash_iter_key64(iter), iter.table->bits); -} - -static inline void *chash_iter_value(const struct chash_iter iter) -{ - BUG_ON((unsigned)iter.slot >= (1 << iter.table->bits)); - return iter.table->values + - ((unsigned long)iter.slot * iter.table->value_size); -} - -#endif /* _LINUX_CHASH_H */ diff --git a/drivers/gpu/drm/amd/lib/Kconfig b/drivers/gpu/drm/amd/lib/Kconfig deleted file mode 100644 index 776ef3434c10..000000000000 --- a/drivers/gpu/drm/amd/lib/Kconfig +++ /dev/null @@ -1,28 +0,0 @@ -menu "AMD Library routines" - -# -# Closed hash table -# -config CHASH - tristate - default DRM_AMDGPU - help - Statically sized closed hash table implementation with low - memory and CPU overhead. - -config CHASH_STATS - bool "Closed hash table performance statistics" - depends on CHASH - default n - help - Enable collection of performance statistics for closed hash tables. - -config CHASH_SELFTEST - bool "Closed hash table self test" - depends on CHASH - default n - help - Runs a selftest during module load. Several module parameters - are available to modify the behaviour of the test. - -endmenu diff --git a/drivers/gpu/drm/amd/lib/Makefile b/drivers/gpu/drm/amd/lib/Makefile deleted file mode 100644 index 690243001e1a..000000000000 --- a/drivers/gpu/drm/amd/lib/Makefile +++ /dev/null @@ -1,32 +0,0 @@ -# -# Copyright 2017 Advanced Micro Devices, Inc. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR -# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -# OTHER DEALINGS IN THE SOFTWARE. -# -# -# Makefile for AMD library routines, which are used by AMD driver -# components. -# -# This is for common library routines that can be shared between AMD -# driver components or later moved to kernel/lib for sharing with -# other drivers. - -ccflags-y := -I$(src)/../include - -obj-$(CONFIG_CHASH) += chash.o diff --git a/drivers/gpu/drm/amd/lib/chash.c b/drivers/gpu/drm/amd/lib/chash.c deleted file mode 100644 index b8e45f356a1c..000000000000 --- a/drivers/gpu/drm/amd/lib/chash.c +++ /dev/null @@ -1,638 +0,0 @@ -/* - * Copyright 2017 Advanced Micro Devices, Inc. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -/** - * chash_table_alloc - Allocate closed hash table - * @table: Pointer to the table structure - * @bits: Table size will be 2^bits entries - * @key_size: Size of hash keys in bytes, 4 or 8 - * @value_size: Size of data values in bytes, can be 0 - */ -int chash_table_alloc(struct chash_table *table, u8 bits, u8 key_size, - unsigned int value_size, gfp_t gfp_mask) -{ - if (bits > 31) - return -EINVAL; - - if (key_size != 4 && key_size != 8) - return -EINVAL; - - table->data = kcalloc(__CHASH_DATA_SIZE(bits, key_size, value_size), - sizeof(long), gfp_mask); - if (!table->data) - return -ENOMEM; - - __CHASH_TABLE_INIT(table->table, table->data, - bits, key_size, value_size); - - return 0; -} -EXPORT_SYMBOL(chash_table_alloc); - -/** - * chash_table_free - Free closed hash table - * @table: Pointer to the table structure - */ -void chash_table_free(struct chash_table *table) -{ - kfree(table->data); -} -EXPORT_SYMBOL(chash_table_free); - -#ifdef CONFIG_CHASH_STATS - -#define DIV_FRAC(nom, denom, quot, frac, frac_digits) do { \ - u64 __nom = (nom); \ - u64 __denom = (denom); \ - u64 __quot, __frac; \ - u32 __rem; \ - \ - while (__denom >> 32) { \ - __nom >>= 1; \ - __denom >>= 1; \ - } \ - __quot = __nom; \ - __rem = do_div(__quot, __denom); \ - __frac = __rem * (frac_digits) + (__denom >> 1); \ - do_div(__frac, __denom); \ - (quot) = __quot; \ - (frac) = __frac; \ - } while (0) - -void __chash_table_dump_stats(struct __chash_table *table) -{ - struct chash_iter iter = CHASH_ITER_INIT(table, 0); - u32 filled = 0, empty = 0, tombstones = 0; - u64 quot1, quot2; - u32 frac1, frac2; - - do { - if (chash_iter_is_valid(iter)) - filled++; - else if (chash_iter_is_empty(iter)) - empty++; - else - tombstones++; - CHASH_ITER_INC(iter); - } while (iter.slot); - - pr_debug("chash: key size %u, value size %u\n", - table->key_size, table->value_size); - pr_debug(" Slots total/filled/empty/tombstones: %u / %u / %u / %u\n", - 1 << table->bits, filled, empty, tombstones); - if (table->hits > 0) { - DIV_FRAC(table->hits_steps, table->hits, quot1, frac1, 1000); - DIV_FRAC(table->hits * 1000, table->hits_time_ns, - quot2, frac2, 1000); - } else { - quot1 = quot2 = 0; - frac1 = frac2 = 0; - } - pr_debug(" Hits (avg.cost, rate): %llu (%llu.%03u, %llu.%03u M/s)\n", - table->hits, quot1, frac1, quot2, frac2); - if (table->miss > 0) { - DIV_FRAC(table->miss_steps, table->miss, quot1, frac1, 1000); - DIV_FRAC(table->miss * 1000, table->miss_time_ns, - quot2, frac2, 1000); - } else { - quot1 = quot2 = 0; - frac1 = frac2 = 0; - } - pr_debug(" Misses (avg.cost, rate): %llu (%llu.%03u, %llu.%03u M/s)\n", - table->miss, quot1, frac1, quot2, frac2); - if (table->hits + table->miss > 0) { - DIV_FRAC(table->hits_steps + table->miss_steps, - table->hits + table->miss, quot1, frac1, 1000); - DIV_FRAC((table->hits + table->miss) * 1000, - (table->hits_time_ns + table->miss_time_ns), - quot2, frac2, 1000); - } else { - quot1 = quot2 = 0; - frac1 = frac2 = 0; - } - pr_debug(" Total (avg.cost, rate): %llu (%llu.%03u, %llu.%03u M/s)\n", - table->hits + table->miss, quot1, frac1, quot2, frac2); - if (table->relocs > 0) { - DIV_FRAC(table->hits + table->miss, table->relocs, - quot1, frac1, 1000); - DIV_FRAC(table->reloc_dist, table->relocs, quot2, frac2, 1000); - pr_debug(" Relocations (freq, avg.dist): %llu (1:%llu.%03u, %llu.%03u)\n", - table->relocs, quot1, frac1, quot2, frac2); - } else { - pr_debug(" No relocations\n"); - } -} -EXPORT_SYMBOL(__chash_table_dump_stats); - -#undef DIV_FRAC -#endif - -#define CHASH_INC(table, a) ((a) = ((a) + 1) & (table)->size_mask) -#define CHASH_ADD(table, a, b) (((a) + (b)) & (table)->size_mask) -#define CHASH_SUB(table, a, b) (((a) - (b)) & (table)->size_mask) -#define CHASH_IN_RANGE(table, slot, first, last) \ - (CHASH_SUB(table, slot, first) <= CHASH_SUB(table, last, first)) - -/*#define CHASH_DEBUG Uncomment this to enable verbose debug output*/ -#ifdef CHASH_DEBUG -static void chash_table_dump(struct __chash_table *table) -{ - struct chash_iter iter = CHASH_ITER_INIT(table, 0); - - do { - if ((iter.slot & 3) == 0) - pr_debug("%04x: ", iter.slot); - - if (chash_iter_is_valid(iter)) - pr_debug("[%016llx] ", chash_iter_key(iter)); - else if (chash_iter_is_empty(iter)) - pr_debug("[ ] "); - else - pr_debug("[ ] "); - - if ((iter.slot & 3) == 3) - pr_debug("\n"); - - CHASH_ITER_INC(iter); - } while (iter.slot); - - if ((iter.slot & 3) != 0) - pr_debug("\n"); -} - -static int chash_table_check(struct __chash_table *table) -{ - u32 hash; - struct chash_iter iter = CHASH_ITER_INIT(table, 0); - struct chash_iter cur = CHASH_ITER_INIT(table, 0); - - do { - if (!chash_iter_is_valid(iter)) { - CHASH_ITER_INC(iter); - continue; - } - - hash = chash_iter_hash(iter); - CHASH_ITER_SET(cur, hash); - while (cur.slot != iter.slot) { - if (chash_iter_is_empty(cur)) { - pr_err("Path to element at %x with hash %x broken at slot %x\n", - iter.slot, hash, cur.slot); - chash_table_dump(table); - return -EINVAL; - } - CHASH_ITER_INC(cur); - } - - CHASH_ITER_INC(iter); - } while (iter.slot); - - return 0; -} -#endif - -static void chash_iter_relocate(struct chash_iter dst, struct chash_iter src) -{ - BUG_ON(src.table == dst.table && src.slot == dst.slot); - BUG_ON(src.table->key_size != dst.table->key_size); - BUG_ON(src.table->value_size != dst.table->value_size); - - if (dst.table->key_size == 4) - dst.table->keys32[dst.slot] = src.table->keys32[src.slot]; - else - dst.table->keys64[dst.slot] = src.table->keys64[src.slot]; - - if (dst.table->value_size) - memcpy(chash_iter_value(dst), chash_iter_value(src), - dst.table->value_size); - - chash_iter_set_valid(dst); - chash_iter_set_invalid(src); - -#ifdef CONFIG_CHASH_STATS - if (src.table == dst.table) { - dst.table->relocs++; - dst.table->reloc_dist += - CHASH_SUB(dst.table, src.slot, dst.slot); - } -#endif -} - -/** - * __chash_table_find - Helper for looking up a hash table entry - * @iter: Pointer to hash table iterator - * @key: Key of the entry to find - * @for_removal: set to true if the element will be removed soon - * - * Searches for an entry in the hash table with a given key. iter must - * be initialized by the caller to point to the home position of the - * hypothetical entry, i.e. it must be initialized with the hash table - * and the key's hash as the initial slot for the search. - * - * This function also does some local clean-up to speed up future - * look-ups by relocating entries to better slots and removing - * tombstones that are no longer needed. - * - * If @for_removal is true, the function avoids relocating the entry - * that is being returned. - * - * Returns 0 if the search is successful. In this case iter is updated - * to point to the found entry. Otherwise %-EINVAL is returned and the - * iter is updated to point to the first available slot for the given - * key. If the table is full, the slot is set to -1. - */ -static int chash_table_find(struct chash_iter *iter, u64 key, - bool for_removal) -{ -#ifdef CONFIG_CHASH_STATS - u64 ts1 = local_clock(); -#endif - u32 hash = iter->slot; - struct chash_iter first_redundant = CHASH_ITER_INIT(iter->table, -1); - int first_avail = (for_removal ? -2 : -1); - - while (!chash_iter_is_valid(*iter) || chash_iter_key(*iter) != key) { - if (chash_iter_is_empty(*iter)) { - /* Found an empty slot, which ends the - * search. Clean up any preceding tombstones - * that are no longer needed because they lead - * to no-where - */ - if ((int)first_redundant.slot < 0) - goto not_found; - while (first_redundant.slot != iter->slot) { - if (!chash_iter_is_valid(first_redundant)) - chash_iter_set_empty(first_redundant); - CHASH_ITER_INC(first_redundant); - } -#ifdef CHASH_DEBUG - chash_table_check(iter->table); -#endif - goto not_found; - } else if (!chash_iter_is_valid(*iter)) { - /* Found a tombstone. Remember it as candidate - * for relocating the entry we're looking for - * or for adding a new entry with the given key - */ - if (first_avail == -1) - first_avail = iter->slot; - /* Or mark it as the start of a series of - * potentially redundant tombstones - */ - else if (first_redundant.slot == -1) - CHASH_ITER_SET(first_redundant, iter->slot); - } else if (first_redundant.slot >= 0) { - /* Found a valid, occupied slot with a - * preceding series of tombstones. Relocate it - * to a better position that no longer depends - * on those tombstones - */ - u32 cur_hash = chash_iter_hash(*iter); - - if (!CHASH_IN_RANGE(iter->table, cur_hash, - first_redundant.slot + 1, - iter->slot)) { - /* This entry has a hash at or before - * the first tombstone we found. We - * can relocate it to that tombstone - * and advance to the next tombstone - */ - chash_iter_relocate(first_redundant, *iter); - do { - CHASH_ITER_INC(first_redundant); - } while (chash_iter_is_valid(first_redundant)); - } else if (cur_hash != iter->slot) { - /* Relocate entry to its home position - * or as close as possible so it no - * longer depends on any preceding - * tombstones - */ - struct chash_iter new_iter = - CHASH_ITER_INIT(iter->table, cur_hash); - - while (new_iter.slot != iter->slot && - chash_iter_is_valid(new_iter)) - CHASH_ITER_INC(new_iter); - - if (new_iter.slot != iter->slot) - chash_iter_relocate(new_iter, *iter); - } - } - - CHASH_ITER_INC(*iter); - if (iter->slot == hash) { - iter->slot = -1; - goto not_found; - } - } - -#ifdef CONFIG_CHASH_STATS - iter->table->hits++; - iter->table->hits_steps += CHASH_SUB(iter->table, iter->slot, hash) + 1; -#endif - - if (first_avail >= 0) { - CHASH_ITER_SET(first_redundant, first_avail); - chash_iter_relocate(first_redundant, *iter); - iter->slot = first_redundant.slot; - iter->mask = first_redundant.mask; - } - -#ifdef CONFIG_CHASH_STATS - iter->table->hits_time_ns += local_clock() - ts1; -#endif - - return 0; - -not_found: -#ifdef CONFIG_CHASH_STATS - iter->table->miss++; - iter->table->miss_steps += (iter->slot < 0) ? - (1 << iter->table->bits) : - CHASH_SUB(iter->table, iter->slot, hash) + 1; -#endif - - if (first_avail >= 0) - CHASH_ITER_SET(*iter, first_avail); - -#ifdef CONFIG_CHASH_STATS - iter->table->miss_time_ns += local_clock() - ts1; -#endif - - return -EINVAL; -} - -int __chash_table_copy_in(struct __chash_table *table, u64 key, - const void *value) -{ - u32 hash = (table->key_size == 4) ? - hash_32(key, table->bits) : hash_64(key, table->bits); - struct chash_iter iter = CHASH_ITER_INIT(table, hash); - int r = chash_table_find(&iter, key, false); - - /* Found an existing entry */ - if (!r) { - if (value && table->value_size) - memcpy(chash_iter_value(iter), value, - table->value_size); - return 1; - } - - /* Is there a place to add a new entry? */ - if (iter.slot < 0) { - pr_err("Hash table overflow\n"); - return -ENOMEM; - } - - chash_iter_set_valid(iter); - - if (table->key_size == 4) - table->keys32[iter.slot] = key; - else - table->keys64[iter.slot] = key; - if (value && table->value_size) - memcpy(chash_iter_value(iter), value, table->value_size); - - return 0; -} -EXPORT_SYMBOL(__chash_table_copy_in); - -int __chash_table_copy_out(struct __chash_table *table, u64 key, - void *value, bool remove) -{ - u32 hash = (table->key_size == 4) ? - hash_32(key, table->bits) : hash_64(key, table->bits); - struct chash_iter iter = CHASH_ITER_INIT(table, hash); - int r = chash_table_find(&iter, key, remove); - - if (r < 0) - return r; - - if (value && table->value_size) - memcpy(value, chash_iter_value(iter), table->value_size); - - if (remove) - chash_iter_set_invalid(iter); - - return iter.slot; -} -EXPORT_SYMBOL(__chash_table_copy_out); - -#ifdef CONFIG_CHASH_SELFTEST -/** - * chash_self_test - Run a self-test of the hash table implementation - * @bits: Table size will be 2^bits entries - * @key_size: Size of hash keys in bytes, 4 or 8 - * @min_fill: Minimum fill level during the test - * @max_fill: Maximum fill level during the test - * @iterations: Number of test iterations - * - * The test adds and removes entries from a hash table, cycling the - * fill level between min_fill and max_fill entries. Also tests lookup - * and value retrieval. - */ -static int __init chash_self_test(u8 bits, u8 key_size, - int min_fill, int max_fill, - u64 iterations) -{ - struct chash_table table; - int ret; - u64 add_count, rmv_count; - u64 value; - - if (key_size == 4 && iterations > 0xffffffff) - return -EINVAL; - if (min_fill >= max_fill) - return -EINVAL; - - ret = chash_table_alloc(&table, bits, key_size, sizeof(u64), - GFP_KERNEL); - if (ret) { - pr_err("chash_table_alloc failed: %d\n", ret); - return ret; - } - - for (add_count = 0, rmv_count = 0; add_count < iterations; - add_count++) { - /* When we hit the max_fill level, remove entries down - * to min_fill - */ - if (add_count - rmv_count == max_fill) { - u64 find_count = rmv_count; - - /* First try to find all entries that we're - * about to remove, confirm their value, test - * writing them back a second time. - */ - for (; add_count - find_count > min_fill; - find_count++) { - ret = chash_table_copy_out(&table, find_count, - &value); - if (ret < 0) { - pr_err("chash_table_copy_out failed: %d\n", - ret); - goto out; - } - if (value != ~find_count) { - pr_err("Wrong value retrieved for key 0x%llx, expected 0x%llx got 0x%llx\n", - find_count, ~find_count, value); -#ifdef CHASH_DEBUG - chash_table_dump(&table.table); -#endif - ret = -EFAULT; - goto out; - } - ret = chash_table_copy_in(&table, find_count, - &value); - if (ret != 1) { - pr_err("copy_in second time returned %d, expected 1\n", - ret); - ret = -EFAULT; - goto out; - } - } - /* Remove them until we hit min_fill level */ - for (; add_count - rmv_count > min_fill; rmv_count++) { - ret = chash_table_remove(&table, rmv_count, - NULL); - if (ret < 0) { - pr_err("chash_table_remove failed: %d\n", - ret); - goto out; - } - } - } - - /* Add a new value */ - value = ~add_count; - ret = chash_table_copy_in(&table, add_count, &value); - if (ret != 0) { - pr_err("copy_in first time returned %d, expected 0\n", - ret); - ret = -EFAULT; - goto out; - } - } - - chash_table_dump_stats(&table); - chash_table_reset_stats(&table); - -out: - chash_table_free(&table); - return ret; -} - -static unsigned int chash_test_bits = 10; -MODULE_PARM_DESC(test_bits, - "Selftest number of hash bits ([4..20], default=10)"); -module_param_named(test_bits, chash_test_bits, uint, 0444); - -static unsigned int chash_test_keysize = 8; -MODULE_PARM_DESC(test_keysize, "Selftest keysize (4 or 8, default=8)"); -module_param_named(test_keysize, chash_test_keysize, uint, 0444); - -static unsigned int chash_test_minfill; -MODULE_PARM_DESC(test_minfill, "Selftest minimum #entries (default=50%)"); -module_param_named(test_minfill, chash_test_minfill, uint, 0444); - -static unsigned int chash_test_maxfill; -MODULE_PARM_DESC(test_maxfill, "Selftest maximum #entries (default=80%)"); -module_param_named(test_maxfill, chash_test_maxfill, uint, 0444); - -static unsigned long chash_test_iters; -MODULE_PARM_DESC(test_iters, "Selftest iterations (default=1000 x #entries)"); -module_param_named(test_iters, chash_test_iters, ulong, 0444); - -static int __init chash_init(void) -{ - int ret; - u64 ts1_ns; - - /* Skip self test on user errors */ - if (chash_test_bits < 4 || chash_test_bits > 20) { - pr_err("chash: test_bits out of range [4..20].\n"); - return 0; - } - if (chash_test_keysize != 4 && chash_test_keysize != 8) { - pr_err("chash: test_keysize invalid. Must be 4 or 8.\n"); - return 0; - } - - if (!chash_test_minfill) - chash_test_minfill = (1 << chash_test_bits) / 2; - if (!chash_test_maxfill) - chash_test_maxfill = (1 << chash_test_bits) * 4 / 5; - if (!chash_test_iters) - chash_test_iters = (1 << chash_test_bits) * 1000; - - if (chash_test_minfill >= (1 << chash_test_bits)) { - pr_err("chash: test_minfill too big. Must be < table size.\n"); - return 0; - } - if (chash_test_maxfill >= (1 << chash_test_bits)) { - pr_err("chash: test_maxfill too big. Must be < table size.\n"); - return 0; - } - if (chash_test_minfill >= chash_test_maxfill) { - pr_err("chash: test_minfill must be < test_maxfill.\n"); - return 0; - } - if (chash_test_keysize == 4 && chash_test_iters > 0xffffffff) { - pr_err("chash: test_iters must be < 4G for 4 byte keys.\n"); - return 0; - } - - ts1_ns = local_clock(); - ret = chash_self_test(chash_test_bits, chash_test_keysize, - chash_test_minfill, chash_test_maxfill, - chash_test_iters); - if (!ret) { - u64 ts_delta_us = local_clock() - ts1_ns; - u64 iters_per_second = (u64)chash_test_iters * 1000000; - - do_div(ts_delta_us, 1000); - do_div(iters_per_second, ts_delta_us); - pr_info("chash: self test took %llu us, %llu iterations/s\n", - ts_delta_us, iters_per_second); - } else { - pr_err("chash: self test failed: %d\n", ret); - } - - return ret; -} - -module_init(chash_init); - -#endif /* CONFIG_CHASH_SELFTEST */ - -MODULE_DESCRIPTION("Closed hash table"); -MODULE_LICENSE("GPL and additional rights"); -- cgit v1.2.3-70-g09d2 From 56753e73fb2ed232278eb69445cb72c6f54391c2 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 10 Jan 2019 16:48:23 +0100 Subject: drm/amdgpu: wait for VM to become idle during flush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure that not only the entities are flush, but that we also wait for the HW to finish all processing. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 6 +++--- drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 7 ++++--- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 12 ++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 ++ 5 files changed, 22 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index b7289f709644..a28a3d722ba2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -558,13 +558,12 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) idr_init(&mgr->ctx_handles); } -void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr) +long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout) { unsigned num_entities = amdgput_ctx_total_num_entities(); struct amdgpu_ctx *ctx; struct idr *idp; uint32_t id, i; - long max_wait = MAX_WAIT_SCHED_ENTITY_Q_EMPTY; idp = &mgr->ctx_handles; @@ -574,10 +573,11 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr) struct drm_sched_entity *entity; entity = &ctx->entities[0][i].entity; - max_wait = drm_sched_entity_flush(entity, max_wait); + timeout = drm_sched_entity_flush(entity, timeout); } } mutex_unlock(&mgr->lock); + return timeout; } void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h index 8e561daa64cb..5f1b54c9bcdb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -84,7 +84,7 @@ int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr); -void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr); +long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout); void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 6c87785db26c..ee678925e610 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -1176,13 +1176,14 @@ static int amdgpu_flush(struct file *f, fl_owner_t id) { struct drm_file *file_priv = f->private_data; struct amdgpu_fpriv *fpriv = file_priv->driver_priv; + long timeout = MAX_WAIT_SCHED_ENTITY_Q_EMPTY; - amdgpu_ctx_mgr_entity_flush(&fpriv->ctx_mgr); + timeout = amdgpu_ctx_mgr_entity_flush(&fpriv->ctx_mgr, timeout); + timeout = amdgpu_vm_wait_idle(&fpriv->vm, timeout); - return 0; + return timeout >= 0 ? 0 : timeout; } - static const struct file_operations amdgpu_driver_kms_fops = { .owner = THIS_MODULE, .open = drm_open, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index a45ca5d2cfe9..8603c85985b5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2977,6 +2977,18 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, adev->vm_manager.fragment_size); } +/** + * amdgpu_vm_wait_idle - wait for the VM to become idle + * + * @vm: VM object to wait for + * @timeout: timeout to wait for VM to become idle + */ +long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout) +{ + return reservation_object_wait_timeout_rcu(vm->root.base.bo->tbo.resv, + true, true, timeout); +} + /** * amdgpu_vm_init - initialize a vm instance * diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index a1a62e3cb6e4..f5c25c0ae367 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -281,6 +281,8 @@ struct amdgpu_vm_manager { void amdgpu_vm_manager_init(struct amdgpu_device *adev); void amdgpu_vm_manager_fini(struct amdgpu_device *adev); + +long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout); int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int vm_context, unsigned int pasid); int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid); -- cgit v1.2.3-70-g09d2 From 2c2508029f0266c0bb17fcc51beba84073b6703d Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 18 Mar 2019 15:27:20 +0100 Subject: drm/amdgpu: remove some unused VM defines MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not needed any more. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index f5c25c0ae367..8348804c46cd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -165,11 +165,6 @@ struct amdgpu_vm_pte_funcs { uint32_t incr, uint64_t flags); }; -#define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) | (addr)) -#define AMDGPU_VM_FAULT_PASID(fault) ((u64)(fault) >> 48) -#define AMDGPU_VM_FAULT_ADDR(fault) ((u64)(fault) & 0xfffffffff000ULL) - - struct amdgpu_task_info { char process_name[TASK_COMM_LEN]; char task_name[TASK_COMM_LEN]; -- cgit v1.2.3-70-g09d2 From d1e29462a06ac3d11645b7d939b00bcf51b10cc3 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 18 Mar 2019 12:27:35 +0100 Subject: drm/amdgpu: move and rename amdgpu_pte_update_params MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the update parameter into the VM header and rename them. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 75 +++++++--------------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 45 ++++++++++++++++++++ 2 files changed, 60 insertions(+), 60 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 5ae0f8892e19..41db2af931b4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -66,51 +66,6 @@ INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last, #undef START #undef LAST -/** - * struct amdgpu_pte_update_params - Local structure - * - * Encapsulate some VM table update parameters to reduce - * the number of function parameters - * - */ -struct amdgpu_pte_update_params { - - /** - * @adev: amdgpu device we do this update for - */ - struct amdgpu_device *adev; - - /** - * @vm: optional amdgpu_vm we do this update for - */ - struct amdgpu_vm *vm; - - /** - * @pages_addr: - * - * DMA addresses to use for mapping - */ - dma_addr_t *pages_addr; - - /** - * @src: address where to copy page table entries from - */ - uint64_t src; - - /** - * @ib: indirect buffer to fill with commands - */ - struct amdgpu_ib *ib; - - /** - * @func: Function which actually does the update - */ - void (*func)(struct amdgpu_pte_update_params *params, - struct amdgpu_bo *bo, uint64_t pe, - uint64_t addr, unsigned count, uint32_t incr, - uint64_t flags); -}; - /** * struct amdgpu_prt_cb - Helper to disable partial resident texture feature from a fence callback */ @@ -1218,7 +1173,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, /** * amdgpu_vm_do_set_ptes - helper to call the right asic function * - * @params: see amdgpu_pte_update_params definition + * @params: see amdgpu_vm_update_params definition * @bo: PD/PT to update * @pe: addr of the page entry * @addr: dst addr to write into pe @@ -1229,7 +1184,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, * Traces the parameters and calls the right asic functions * to setup the page table using the DMA. */ -static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, +static void amdgpu_vm_do_set_ptes(struct amdgpu_vm_update_params *params, struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, @@ -1251,7 +1206,7 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, /** * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART * - * @params: see amdgpu_pte_update_params definition + * @params: see amdgpu_vm_update_params definition * @bo: PD/PT to update * @pe: addr of the page entry * @addr: dst addr to write into pe @@ -1261,7 +1216,7 @@ static void amdgpu_vm_do_set_ptes(struct amdgpu_pte_update_params *params, * * Traces the parameters and calls the DMA function to copy the PTEs. */ -static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params, +static void amdgpu_vm_do_copy_ptes(struct amdgpu_vm_update_params *params, struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, @@ -1305,7 +1260,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) /** * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU * - * @params: see amdgpu_pte_update_params definition + * @params: see amdgpu_vm_update_params definition * @bo: PD/PT to update * @pe: kmap addr of the page entry * @addr: dst addr to write into pe @@ -1315,7 +1270,7 @@ static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) * * Write count number of PT/PD entries directly. */ -static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, +static void amdgpu_vm_cpu_set_ptes(struct amdgpu_vm_update_params *params, struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, @@ -1343,7 +1298,7 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params, * * Calls the update function for both the given BO as well as its shadow. */ -static void amdgpu_vm_update_func(struct amdgpu_pte_update_params *params, +static void amdgpu_vm_update_func(struct amdgpu_vm_update_params *params, struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, @@ -1364,7 +1319,7 @@ static void amdgpu_vm_update_func(struct amdgpu_pte_update_params *params, * * Makes sure the requested entry in parent is up to date. */ -static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, +static void amdgpu_vm_update_pde(struct amdgpu_vm_update_params *params, struct amdgpu_vm *vm, struct amdgpu_vm_pt *parent, struct amdgpu_vm_pt *entry) @@ -1415,7 +1370,7 @@ static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev, int amdgpu_vm_update_directories(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - struct amdgpu_pte_update_params params; + struct amdgpu_vm_update_params params; struct amdgpu_job *job; unsigned ndw = 0; int r = 0; @@ -1506,7 +1461,7 @@ error: * * Make sure to set the right flags for the PTEs at the desired level. */ -static void amdgpu_vm_update_flags(struct amdgpu_pte_update_params *params, +static void amdgpu_vm_update_flags(struct amdgpu_vm_update_params *params, struct amdgpu_bo *bo, unsigned level, uint64_t pe, uint64_t addr, unsigned count, uint32_t incr, @@ -1531,7 +1486,7 @@ static void amdgpu_vm_update_flags(struct amdgpu_pte_update_params *params, /** * amdgpu_vm_fragment - get fragment for PTEs * - * @params: see amdgpu_pte_update_params definition + * @params: see amdgpu_vm_update_params definition * @start: first PTE to handle * @end: last PTE to handle * @flags: hw mapping flags @@ -1540,7 +1495,7 @@ static void amdgpu_vm_update_flags(struct amdgpu_pte_update_params *params, * * Returns the first possible fragment for the start and end address. */ -static void amdgpu_vm_fragment(struct amdgpu_pte_update_params *params, +static void amdgpu_vm_fragment(struct amdgpu_vm_update_params *params, uint64_t start, uint64_t end, uint64_t flags, unsigned int *frag, uint64_t *frag_end) { @@ -1592,7 +1547,7 @@ static void amdgpu_vm_fragment(struct amdgpu_pte_update_params *params, /** * amdgpu_vm_update_ptes - make sure that page tables are valid * - * @params: see amdgpu_pte_update_params definition + * @params: see amdgpu_vm_update_params definition * @start: start of GPU address range * @end: end of GPU address range * @dst: destination address to map to, the next dst inside the function @@ -1603,7 +1558,7 @@ static void amdgpu_vm_fragment(struct amdgpu_pte_update_params *params, * Returns: * 0 for success, -EINVAL for failure. */ -static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, +static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params, uint64_t start, uint64_t end, uint64_t dst, uint64_t flags) { @@ -1746,7 +1701,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, void *owner = AMDGPU_FENCE_OWNER_VM; unsigned nptes, ncmds, ndw; struct amdgpu_job *job; - struct amdgpu_pte_update_params params; + struct amdgpu_vm_update_params params; struct dma_fence *f = NULL; int r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 8348804c46cd..6df4d9e382ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -172,6 +172,51 @@ struct amdgpu_task_info { pid_t tgid; }; +/** + * struct amdgpu_vm_update_params + * + * Encapsulate some VM table update parameters to reduce + * the number of function parameters + * + */ +struct amdgpu_vm_update_params { + + /** + * @adev: amdgpu device we do this update for + */ + struct amdgpu_device *adev; + + /** + * @vm: optional amdgpu_vm we do this update for + */ + struct amdgpu_vm *vm; + + /** + * @pages_addr: + * + * DMA addresses to use for mapping + */ + dma_addr_t *pages_addr; + + /** + * @src: address where to copy page table entries from + */ + uint64_t src; + + /** + * @ib: indirect buffer to fill with commands + */ + struct amdgpu_ib *ib; + + /** + * @func: Function which actually does the update + */ + void (*func)(struct amdgpu_vm_update_params *params, + struct amdgpu_bo *bo, uint64_t pe, + uint64_t addr, unsigned count, uint32_t incr, + uint64_t flags); +}; + struct amdgpu_vm { /* tree of virtual addresses mapped */ struct rb_root_cached va; -- cgit v1.2.3-70-g09d2 From 6dd09027a22ac78f6ab1881aa7e2e21ad61c02b8 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 18 Mar 2019 13:16:03 +0100 Subject: drm/amdgpu: new VM update backends MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Separate out all functions for SDMA and CPU based page table updates into separate backends. This way we can keep most of the complexity of those from the core VM code. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/Makefile | 3 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 7 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 30 +++- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c | 116 +++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 248 ++++++++++++++++++++++++++++ 5 files changed, 401 insertions(+), 3 deletions(-) create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c create mode 100644 drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h') diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index 6039944abb71..7d539ba6400d 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -53,7 +53,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ - amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o + amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \ + amdgpu_vm_sdma.o # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 75ac3c97e143..0e47e604158e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1242,7 +1242,7 @@ static void amdgpu_vm_do_copy_ptes(struct amdgpu_vm_update_params *params, * Returns: * The pointer for the page table entry. */ -static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) +uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) { uint64_t result; @@ -3000,6 +3000,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->use_cpu_for_update ? "CPU" : "SDMA"); WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)), "CPU update of VM recommended only for large BAR system\n"); + + if (vm->use_cpu_for_update) + vm->update_funcs = &amdgpu_vm_cpu_funcs; + else + vm->update_funcs = &amdgpu_vm_sdma_funcs; vm->last_update = NULL; amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, &bp); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 6df4d9e382ac..a99b4caba13c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -203,11 +203,21 @@ struct amdgpu_vm_update_params { */ uint64_t src; + /** + * @job: job to used for hw submission + */ + struct amdgpu_job *job; + /** * @ib: indirect buffer to fill with commands */ struct amdgpu_ib *ib; + /** + * @num_dw_left: number of dw left for the IB + */ + unsigned int num_dw_left; + /** * @func: Function which actually does the update */ @@ -217,6 +227,17 @@ struct amdgpu_vm_update_params { uint64_t flags); }; +struct amdgpu_vm_update_funcs { + + int (*prepare)(struct amdgpu_vm_update_params *p, void * owner, + struct dma_fence *exclusive); + int (*update)(struct amdgpu_vm_update_params *p, + struct amdgpu_bo *bo, uint64_t pe, uint64_t addr, + unsigned count, uint32_t incr, uint64_t flags); + int (*commit)(struct amdgpu_vm_update_params *p, + struct dma_fence **fence); +}; + struct amdgpu_vm { /* tree of virtual addresses mapped */ struct rb_root_cached va; @@ -252,7 +273,10 @@ struct amdgpu_vm { struct amdgpu_vmid *reserved_vmid[AMDGPU_MAX_VMHUBS]; /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */ - bool use_cpu_for_update; + bool use_cpu_for_update; + + /* Functions to use for VM table updates */ + const struct amdgpu_vm_update_funcs *update_funcs; /* Flag to indicate ATS support from PTE for GFX9 */ bool pte_support_ats; @@ -319,6 +343,9 @@ struct amdgpu_vm_manager { #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr))) #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) +extern const struct amdgpu_vm_update_funcs amdgpu_vm_cpu_funcs; +extern const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs; + void amdgpu_vm_manager_init(struct amdgpu_device *adev); void amdgpu_vm_manager_fini(struct amdgpu_device *adev); @@ -348,6 +375,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, bool clear); void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, struct amdgpu_bo *bo, bool evicted); +uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr); struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, struct amdgpu_bo *bo); struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c new file mode 100644 index 000000000000..9d53982021de --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -0,0 +1,116 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "amdgpu_vm.h" +#include "amdgpu_object.h" +#include "amdgpu_trace.h" + +/** + * amdgpu_vm_cpu_prepare - prepare page table update with the CPU + * + * @p: see amdgpu_vm_update_params definition + * @owner: owner we need to sync to + * @exclusive: exclusive move fence we need to sync to + * + * Returns: + * Negativ errno, 0 for success. + */ +static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p, void *owner, + struct dma_fence *exclusive) +{ + int r; + + /* Wait for PT BOs to be idle. PTs share the same resv. object + * as the root PD BO + */ + r = amdgpu_bo_sync_wait(p->vm->root.base.bo, owner, true); + if (unlikely(r)) + return r; + + /* Wait for any BO move to be completed */ + if (exclusive) { + r = dma_fence_wait(exclusive, true); + if (unlikely(r)) + return r; + } + + return 0; +} + +/** + * amdgpu_vm_cpu_update - helper to update page tables via CPU + * + * @p: see amdgpu_vm_update_params definition + * @bo: PD/PT to update + * @pe: kmap addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: hw access flags + * + * Write count number of PT/PD entries directly. + */ +static int amdgpu_vm_cpu_update(struct amdgpu_vm_update_params *p, + struct amdgpu_bo *bo, uint64_t pe, + uint64_t addr, unsigned count, uint32_t incr, + uint64_t flags) +{ + unsigned int i; + uint64_t value; + + pe += (unsigned long)amdgpu_bo_kptr(bo); + + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); + + for (i = 0; i < count; i++) { + value = p->pages_addr ? + amdgpu_vm_map_gart(p->pages_addr, addr) : + addr; + amdgpu_gmc_set_pte_pde(p->adev, (void *)(uintptr_t)pe, + i, value, flags); + addr += incr; + } + return 0; +} + +/** + * amdgpu_vm_cpu_commit - commit page table update to the HW + * + * @p: see amdgpu_vm_update_params definition + * @fence: unused + * + * Make sure that the hardware sees the page table updates. + */ +static int amdgpu_vm_cpu_commit(struct amdgpu_vm_update_params *p, + struct dma_fence **fence) +{ + /* Flush HDP */ + mb(); + amdgpu_asic_flush_hdp(p->adev, NULL); + return 0; +} + +const struct amdgpu_vm_update_funcs amdgpu_vm_cpu_funcs = { + .prepare = amdgpu_vm_cpu_prepare, + .update = amdgpu_vm_cpu_update, + .commit = amdgpu_vm_cpu_commit +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c new file mode 100644 index 000000000000..e4bacdb44c68 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -0,0 +1,248 @@ +/* + * Copyright 2019 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "amdgpu_vm.h" +#include "amdgpu_job.h" +#include "amdgpu_object.h" +#include "amdgpu_trace.h" + +#define AMDGPU_VM_SDMA_MIN_NUM_DW 256u +#define AMDGPU_VM_SDMA_MAX_NUM_DW (16u * 1024u) + +/** + * amdgpu_vm_sdma_prepare - prepare SDMA command submission + * + * @p: see amdgpu_vm_update_params definition + * @owner: owner we need to sync to + * @exclusive: exclusive move fence we need to sync to + * + * Returns: + * Negativ errno, 0 for success. + */ +static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, + void *owner, struct dma_fence *exclusive) +{ + struct amdgpu_bo *root = p->vm->root.base.bo; + unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW; + int r; + + r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, &p->job); + if (r) + return r; + + r = amdgpu_sync_fence(p->adev, &p->job->sync, exclusive, false); + if (r) + return r; + + r = amdgpu_sync_resv(p->adev, &p->job->sync, root->tbo.resv, + owner, false); + if (r) + return r; + + p->num_dw_left = ndw; + p->ib = &p->job->ibs[0]; + return 0; +} + +/** + * amdgpu_vm_sdma_commit - commit SDMA command submission + * + * @p: see amdgpu_vm_update_params definition + * @fence: resulting fence + * + * Returns: + * Negativ errno, 0 for success. + */ +static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, + struct dma_fence **fence) +{ + struct amdgpu_bo *root = p->vm->root.base.bo; + struct amdgpu_ring *ring; + struct dma_fence *f; + int r; + + ring = container_of(p->vm->entity.rq->sched, struct amdgpu_ring, sched); + + WARN_ON(p->ib->length_dw == 0); + amdgpu_ring_pad_ib(ring, p->ib); + WARN_ON(p->ib->length_dw > p->num_dw_left); + r = amdgpu_job_submit(p->job, &p->vm->entity, + AMDGPU_FENCE_OWNER_VM, &f); + if (r) + goto error; + + amdgpu_bo_fence(root, f, true); + if (fence) + swap(*fence, f); + dma_fence_put(f); + return 0; + +error: + amdgpu_job_free(p->job); + return r; +} + + +/** + * amdgpu_vm_sdma_copy_ptes - copy the PTEs from mapping + * + * @p: see amdgpu_vm_update_params definition + * @bo: PD/PT to update + * @pe: addr of the page entry + * @count: number of page entries to copy + * + * Traces the parameters and calls the DMA function to copy the PTEs. + */ +static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p, + struct amdgpu_bo *bo, uint64_t pe, + unsigned count) +{ + uint64_t src = p->ib->gpu_addr; + + src += p->num_dw_left * 4; + + pe += amdgpu_bo_gpu_offset(bo); + trace_amdgpu_vm_copy_ptes(pe, src, count); + + amdgpu_vm_copy_pte(p->adev, p->ib, pe, src, count); +} + +/** + * amdgpu_vm_sdma_set_ptes - helper to call the right asic function + * + * @p: see amdgpu_vm_update_params definition + * @bo: PD/PT to update + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: hw access flags + * + * Traces the parameters and calls the right asic functions + * to setup the page table using the DMA. + */ +static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p, + struct amdgpu_bo *bo, uint64_t pe, + uint64_t addr, unsigned count, + uint32_t incr, uint64_t flags) +{ + pe += amdgpu_bo_gpu_offset(bo); + trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); + if (count < 3) { + amdgpu_vm_write_pte(p->adev, p->ib, pe, addr | flags, + count, incr); + } else { + amdgpu_vm_set_pte_pde(p->adev, p->ib, pe, addr, + count, incr, flags); + } +} + +/** + * amdgpu_vm_sdma_update - execute VM update + * + * @p: see amdgpu_vm_update_params definition + * @bo: PD/PT to update + * @pe: addr of the page entry + * @addr: dst addr to write into pe + * @count: number of page entries to update + * @incr: increase next addr by incr bytes + * @flags: hw access flags + * + * Reserve space in the IB, setup mapping buffer on demand and write commands to + * the IB. + */ +static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, + struct amdgpu_bo *bo, uint64_t pe, + uint64_t addr, unsigned count, uint32_t incr, + uint64_t flags) +{ + unsigned int i, ndw, nptes; + uint64_t *pte; + int r; + + do { + ndw = p->num_dw_left; + ndw -= p->ib->length_dw; + + if (ndw < 32) { + r = amdgpu_vm_sdma_commit(p, NULL); + if (r) + return r; + + /* estimate how many dw we need */ + ndw = 32; + if (p->pages_addr) + ndw += count * 2; + ndw = max(ndw, AMDGPU_VM_SDMA_MIN_NUM_DW); + ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW); + + r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, &p->job); + if (r) + return r; + + p->num_dw_left = ndw; + p->ib = &p->job->ibs[0]; + } + + if (!p->pages_addr) { + /* set page commands needed */ + if (bo->shadow) + amdgpu_vm_sdma_set_ptes(p, bo->shadow, pe, addr, + count, incr, flags); + amdgpu_vm_sdma_set_ptes(p, bo, pe, addr, count, + incr, flags); + return 0; + } + + /* copy commands needed */ + ndw -= p->adev->vm_manager.vm_pte_funcs->copy_pte_num_dw * + (bo->shadow ? 2 : 1); + + /* for padding */ + ndw -= 7; + + nptes = min(count, ndw / 2); + + /* Put the PTEs at the end of the IB. */ + p->num_dw_left -= nptes * 2; + pte = (uint64_t *)&(p->ib->ptr[p->num_dw_left]); + for (i = 0; i < nptes; ++i, addr += incr) { + pte[i] = amdgpu_vm_map_gart(p->pages_addr, addr); + pte[i] |= flags; + } + + if (bo->shadow) + amdgpu_vm_sdma_copy_ptes(p, bo->shadow, pe, nptes); + amdgpu_vm_sdma_copy_ptes(p, bo, pe, nptes); + + pe += nptes * 8; + count -= nptes; + } while (count); + + return 0; +} + +const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs = { + .prepare = amdgpu_vm_sdma_prepare, + .update = amdgpu_vm_sdma_update, + .commit = amdgpu_vm_sdma_commit +}; -- cgit v1.2.3-70-g09d2 From c3546695830e7d919f8aadba2c64807c40dad02b Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 18 Mar 2019 20:19:36 +0100 Subject: drm/amdgpu: use the new VM backend for PTEs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit And remove the existing code when it is unused. Signed-off-by: Christian König Reviewed-by: Chunming Zhou Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 229 +-------------------------------- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 13 -- 2 files changed, 6 insertions(+), 236 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index f66fcaf9016d..daa32f078b6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -1170,66 +1170,6 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, return NULL; } -/** - * amdgpu_vm_do_set_ptes - helper to call the right asic function - * - * @params: see amdgpu_vm_update_params definition - * @bo: PD/PT to update - * @pe: addr of the page entry - * @addr: dst addr to write into pe - * @count: number of page entries to update - * @incr: increase next addr by incr bytes - * @flags: hw access flags - * - * Traces the parameters and calls the right asic functions - * to setup the page table using the DMA. - */ -static void amdgpu_vm_do_set_ptes(struct amdgpu_vm_update_params *params, - struct amdgpu_bo *bo, - uint64_t pe, uint64_t addr, - unsigned count, uint32_t incr, - uint64_t flags) -{ - pe += amdgpu_bo_gpu_offset(bo); - trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); - - if (count < 3) { - amdgpu_vm_write_pte(params->adev, params->ib, pe, - addr | flags, count, incr); - - } else { - amdgpu_vm_set_pte_pde(params->adev, params->ib, pe, addr, - count, incr, flags); - } -} - -/** - * amdgpu_vm_do_copy_ptes - copy the PTEs from the GART - * - * @params: see amdgpu_vm_update_params definition - * @bo: PD/PT to update - * @pe: addr of the page entry - * @addr: dst addr to write into pe - * @count: number of page entries to update - * @incr: increase next addr by incr bytes - * @flags: hw access flags - * - * Traces the parameters and calls the DMA function to copy the PTEs. - */ -static void amdgpu_vm_do_copy_ptes(struct amdgpu_vm_update_params *params, - struct amdgpu_bo *bo, - uint64_t pe, uint64_t addr, - unsigned count, uint32_t incr, - uint64_t flags) -{ - uint64_t src = (params->src + (addr >> 12) * 8); - - pe += amdgpu_bo_gpu_offset(bo); - trace_amdgpu_vm_copy_ptes(pe, src, count); - - amdgpu_vm_copy_pte(params->adev, params->ib, pe, src, count); -} - /** * amdgpu_vm_map_gart - Resolve gart mapping of addr * @@ -1257,58 +1197,6 @@ uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr) return result; } -/** - * amdgpu_vm_cpu_set_ptes - helper to update page tables via CPU - * - * @params: see amdgpu_vm_update_params definition - * @bo: PD/PT to update - * @pe: kmap addr of the page entry - * @addr: dst addr to write into pe - * @count: number of page entries to update - * @incr: increase next addr by incr bytes - * @flags: hw access flags - * - * Write count number of PT/PD entries directly. - */ -static void amdgpu_vm_cpu_set_ptes(struct amdgpu_vm_update_params *params, - struct amdgpu_bo *bo, - uint64_t pe, uint64_t addr, - unsigned count, uint32_t incr, - uint64_t flags) -{ - unsigned int i; - uint64_t value; - - pe += (unsigned long)amdgpu_bo_kptr(bo); - - trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); - - for (i = 0; i < count; i++) { - value = params->pages_addr ? - amdgpu_vm_map_gart(params->pages_addr, addr) : - addr; - amdgpu_gmc_set_pte_pde(params->adev, (void *)(uintptr_t)pe, - i, value, flags); - addr += incr; - } -} - -/** - * amdgpu_vm_update_func - helper to call update function - * - * Calls the update function for both the given BO as well as its shadow. - */ -static void amdgpu_vm_update_func(struct amdgpu_vm_update_params *params, - struct amdgpu_bo *bo, - uint64_t pe, uint64_t addr, - unsigned count, uint32_t incr, - uint64_t flags) -{ - if (bo->shadow) - params->func(params, bo->shadow, pe, addr, count, incr, flags); - params->func(params, bo, pe, addr, count, incr, flags); -} - /* * amdgpu_vm_update_pde - update a single level in the hierarchy * @@ -1434,7 +1322,8 @@ static void amdgpu_vm_update_flags(struct amdgpu_vm_update_params *params, flags |= AMDGPU_PTE_EXECUTABLE; } - amdgpu_vm_update_func(params, bo, pe, addr, count, incr, flags); + params->vm->update_funcs->update(params, bo, pe, addr, count, incr, + flags); } /** @@ -1651,12 +1540,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, uint64_t flags, uint64_t addr, struct dma_fence **fence) { - struct amdgpu_ring *ring; - void *owner = AMDGPU_FENCE_OWNER_VM; - unsigned nptes, ncmds, ndw; - struct amdgpu_job *job; struct amdgpu_vm_update_params params; - struct dma_fence *f = NULL; + void *owner = AMDGPU_FENCE_OWNER_VM; int r; memset(¶ms, 0, sizeof(params)); @@ -1668,116 +1553,15 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (!(flags & AMDGPU_PTE_VALID)) owner = AMDGPU_FENCE_OWNER_KFD; - if (vm->use_cpu_for_update) { - /* Wait for PT BOs to be idle. PTs share the same resv. object - * as the root PD BO - */ - r = amdgpu_bo_sync_wait(vm->root.base.bo, owner, true); - if (unlikely(r)) - return r; - - /* Wait for any BO move to be completed */ - if (exclusive) { - r = dma_fence_wait(exclusive, true); - if (unlikely(r)) - return r; - } - - params.func = amdgpu_vm_cpu_set_ptes; - return amdgpu_vm_update_ptes(¶ms, start, last + 1, - addr, flags); - } - - ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched); - - nptes = last - start + 1; - - /* - * reserve space for two commands every (1 << BLOCK_SIZE) - * entries or 2k dwords (whatever is smaller) - */ - ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1); - - /* The second command is for the shadow pagetables. */ - if (vm->root.base.bo->shadow) - ncmds *= 2; - - /* padding, etc. */ - ndw = 64; - - if (pages_addr) { - /* copy commands needed */ - ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw; - - /* and also PTEs */ - ndw += nptes * 2; - - params.func = amdgpu_vm_do_copy_ptes; - - } else { - /* set page commands needed */ - ndw += ncmds * 10; - - /* extra commands for begin/end fragments */ - ncmds = 2 * adev->vm_manager.fragment_size; - if (vm->root.base.bo->shadow) - ncmds *= 2; - - ndw += 10 * ncmds; - - params.func = amdgpu_vm_do_set_ptes; - } - - r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job); + r = vm->update_funcs->prepare(¶ms, owner, exclusive); if (r) return r; - params.ib = &job->ibs[0]; - - if (pages_addr) { - uint64_t *pte; - unsigned i; - - /* Put the PTEs at the end of the IB. */ - i = ndw - nptes * 2; - pte= (uint64_t *)&(job->ibs->ptr[i]); - params.src = job->ibs->gpu_addr + i * 4; - - for (i = 0; i < nptes; ++i) { - pte[i] = amdgpu_vm_map_gart(pages_addr, addr + i * - AMDGPU_GPU_PAGE_SIZE); - pte[i] |= flags; - } - addr = 0; - } - - r = amdgpu_sync_fence(adev, &job->sync, exclusive, false); - if (r) - goto error_free; - - r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv, - owner, false); - if (r) - goto error_free; - r = amdgpu_vm_update_ptes(¶ms, start, last + 1, addr, flags); if (r) - goto error_free; - - amdgpu_ring_pad_ib(ring, params.ib); - WARN_ON(params.ib->length_dw > ndw); - r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_VM, &f); - if (r) - goto error_free; - - amdgpu_bo_fence(vm->root.base.bo, f, true); - dma_fence_put(*fence); - *fence = f; - return 0; + return r; -error_free: - amdgpu_job_free(job); - return r; + return vm->update_funcs->commit(¶ms, fence); } /** @@ -1860,7 +1644,6 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev, if (pages_addr) { uint64_t count; - max_entries = min(max_entries, 16ull * 1024ull); for (count = 1; count < max_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE; ++count) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index a99b4caba13c..520122be798b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -198,11 +198,6 @@ struct amdgpu_vm_update_params { */ dma_addr_t *pages_addr; - /** - * @src: address where to copy page table entries from - */ - uint64_t src; - /** * @job: job to used for hw submission */ @@ -217,14 +212,6 @@ struct amdgpu_vm_update_params { * @num_dw_left: number of dw left for the IB */ unsigned int num_dw_left; - - /** - * @func: Function which actually does the update - */ - void (*func)(struct amdgpu_vm_update_params *params, - struct amdgpu_bo *bo, uint64_t pe, - uint64_t addr, unsigned count, uint32_t incr, - uint64_t flags); }; struct amdgpu_vm_update_funcs { -- cgit v1.2.3-70-g09d2 From df399b064118bf9a5b9a3faaa67feb1cbb34e9d4 Mon Sep 17 00:00:00 2001 From: shaoyunl Date: Wed, 20 Mar 2019 16:14:56 -0400 Subject: drm/amdgpu: XGMI pstate switch initial support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Driver vote low to high pstate switch whenever there is an outstanding XGMI mapping request. Driver vote high to low pstate when all the outstanding XGMI mapping is terminated. Signed-off-by: shaoyunl Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_object.h | 2 ++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 21 +++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 4 ++++ drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 16 +++++++++++++++- drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 10 ++++++++++ 6 files changed, 56 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 865e067b1b5b..7cee269ec3e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2018,6 +2018,10 @@ static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work) r = amdgpu_device_enable_mgpu_fan_boost(); if (r) DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); + + /*set to low pstate by default */ + amdgpu_xgmi_set_pstate(adev, 0); + } static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 220a6a7b1bc1..c430e8259038 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -72,6 +72,8 @@ struct amdgpu_bo_va { /* If the mappings are cleared or filled */ bool cleared; + + bool is_xgmi; }; struct amdgpu_bo { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b7f937ceebee..a5c6a1e5fe74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -34,6 +34,7 @@ #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" #include "amdgpu_gmc.h" +#include "amdgpu_xgmi.h" /** * DOC: GPUVM @@ -2045,6 +2046,15 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev, INIT_LIST_HEAD(&bo_va->valids); INIT_LIST_HEAD(&bo_va->invalids); + if (bo && amdgpu_xgmi_same_hive(adev, amdgpu_ttm_adev(bo->tbo.bdev))) { + bo_va->is_xgmi = true; + mutex_lock(&adev->vm_manager.lock_pstate); + /* Power up XGMI if it can be potentially used */ + if (++adev->vm_manager.xgmi_map_counter == 1) + amdgpu_xgmi_set_pstate(adev, 1); + mutex_unlock(&adev->vm_manager.lock_pstate); + } + return bo_va; } @@ -2463,6 +2473,14 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, } dma_fence_put(bo_va->last_pt_update); + + if (bo && bo_va->is_xgmi) { + mutex_lock(&adev->vm_manager.lock_pstate); + if (--adev->vm_manager.xgmi_map_counter == 0) + amdgpu_xgmi_set_pstate(adev, 0); + mutex_unlock(&adev->vm_manager.lock_pstate); + } + kfree(bo_va); } @@ -2970,6 +2988,9 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) idr_init(&adev->vm_manager.pasid_idr); spin_lock_init(&adev->vm_manager.pasid_lock); + + adev->vm_manager.xgmi_map_counter = 0; + mutex_init(&adev->vm_manager.lock_pstate); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 520122be798b..f586b38f3076 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -324,6 +324,10 @@ struct amdgpu_vm_manager { */ struct idr pasid_idr; spinlock_t pasid_lock; + + /* counter of mapped memory through xgmi */ + uint32_t xgmi_map_counter; + struct mutex lock_pstate; }; #define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c index fcc4b05c745c..336834797af3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -200,12 +200,26 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lo if (lock) mutex_lock(&tmp->hive_lock); - + tmp->pstate = -1; mutex_unlock(&xgmi_mutex); return tmp; } +int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate) +{ + int ret = 0; + struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev, 0); + + if (!hive) + return 0; + + if (hive->pstate == pstate) + return 0; + /* Todo : sent the message to SMU for pstate change */ + return ret; +} + int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev) { int ret = -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h index 24a3b0362f98..3e9c91e9a4bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -33,11 +33,21 @@ struct amdgpu_hive_info { struct kobject *kobj; struct device_attribute dev_attr; struct amdgpu_device *adev; + int pstate; /*0 -- low , 1 -- high , -1 unknown*/ }; struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock); int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev); int amdgpu_xgmi_add_device(struct amdgpu_device *adev); void amdgpu_xgmi_remove_device(struct amdgpu_device *adev); +int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate); + +static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev, + struct amdgpu_device *bo_adev) +{ + return (adev != bo_adev && + adev->gmc.xgmi.hive_id && + adev->gmc.xgmi.hive_id == bo_adev->gmc.xgmi.hive_id); +} #endif -- cgit v1.2.3-70-g09d2 From ecf96b52bf98a22f14bd33c7deee0aad8eb6b569 Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 21 Mar 2019 16:34:18 +0100 Subject: drm/amdgpu: move VM table mapping into the backend as well MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clean that up further and also fix another case where the BO wasn't kmapped for CPU based updates. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 31 +++++------------------------ drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c | 11 ++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 20 +++++++++++++++++++ 4 files changed, 37 insertions(+), 27 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index a5c6a1e5fe74..3ada094852c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -659,17 +659,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (bo->tbo.type != ttm_bo_type_kernel) { amdgpu_vm_bo_moved(bo_base); } else { - if (vm->use_cpu_for_update) - r = amdgpu_bo_kmap(bo, NULL); - else - r = amdgpu_ttm_alloc_gart(&bo->tbo); - if (r) - break; - if (bo->shadow) { - r = amdgpu_ttm_alloc_gart(&bo->shadow->tbo); - if (r) - break; - } + vm->update_funcs->map_table(bo); amdgpu_vm_bo_relocated(bo_base); } } @@ -751,22 +741,17 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, if (r) return r; - r = amdgpu_ttm_alloc_gart(&bo->tbo); - if (r) - return r; - if (bo->shadow) { r = ttm_bo_validate(&bo->shadow->tbo, &bo->shadow->placement, &ctx); if (r) return r; - - r = amdgpu_ttm_alloc_gart(&bo->shadow->tbo); - if (r) - return r; - } + r = vm->update_funcs->map_table(bo); + if (r) + return r; + memset(¶ms, 0, sizeof(params)); params.adev = adev; params.vm = vm; @@ -877,12 +862,6 @@ static int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, if (r) return r; - if (vm->use_cpu_for_update) { - r = amdgpu_bo_kmap(pt, NULL); - if (r) - goto error_free_pt; - } - /* Keep a reference to the root directory to avoid * freeing them up in the wrong order. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index f586b38f3076..9dbdf00cd74b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -215,7 +215,7 @@ struct amdgpu_vm_update_params { }; struct amdgpu_vm_update_funcs { - + int (*map_table)(struct amdgpu_bo *bo); int (*prepare)(struct amdgpu_vm_update_params *p, void * owner, struct dma_fence *exclusive); int (*update)(struct amdgpu_vm_update_params *p, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c index 9d53982021de..5222d165abfc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c @@ -24,6 +24,16 @@ #include "amdgpu_object.h" #include "amdgpu_trace.h" +/** + * amdgpu_vm_cpu_map_table - make sure new PDs/PTs are kmapped + * + * @table: newly allocated or validated PD/PT + */ +static int amdgpu_vm_cpu_map_table(struct amdgpu_bo *table) +{ + return amdgpu_bo_kmap(table, NULL); +} + /** * amdgpu_vm_cpu_prepare - prepare page table update with the CPU * @@ -110,6 +120,7 @@ static int amdgpu_vm_cpu_commit(struct amdgpu_vm_update_params *p, } const struct amdgpu_vm_update_funcs amdgpu_vm_cpu_funcs = { + .map_table = amdgpu_vm_cpu_map_table, .prepare = amdgpu_vm_cpu_prepare, .update = amdgpu_vm_cpu_update, .commit = amdgpu_vm_cpu_commit diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index e4bacdb44c68..4bccd69fe30d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -28,6 +28,25 @@ #define AMDGPU_VM_SDMA_MIN_NUM_DW 256u #define AMDGPU_VM_SDMA_MAX_NUM_DW (16u * 1024u) +/** + * amdgpu_vm_sdma_map_table - make sure new PDs/PTs are GTT mapped + * + * @table: newly allocated or validated PD/PT + */ +static int amdgpu_vm_sdma_map_table(struct amdgpu_bo *table) +{ + int r; + + r = amdgpu_ttm_alloc_gart(&table->tbo); + if (r) + return r; + + if (table->shadow) + r = amdgpu_ttm_alloc_gart(&table->shadow->tbo); + + return r; +} + /** * amdgpu_vm_sdma_prepare - prepare SDMA command submission * @@ -242,6 +261,7 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, } const struct amdgpu_vm_update_funcs amdgpu_vm_sdma_funcs = { + .map_table = amdgpu_vm_sdma_map_table, .prepare = amdgpu_vm_sdma_prepare, .update = amdgpu_vm_sdma_update, .commit = amdgpu_vm_sdma_commit -- cgit v1.2.3-70-g09d2 From 110aef572afccc8d18c8d1ad6cb027795ec01cda Mon Sep 17 00:00:00 2001 From: Christian König Date: Thu, 21 Mar 2019 16:43:39 +0100 Subject: drm/amdgpu: drop the ib from the VM update parameters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is redundant with the job pointer. Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 5 ----- drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 24 +++++++++++++----------- 2 files changed, 13 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 9dbdf00cd74b..beac15bca526 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -203,11 +203,6 @@ struct amdgpu_vm_update_params { */ struct amdgpu_job *job; - /** - * @ib: indirect buffer to fill with commands - */ - struct amdgpu_ib *ib; - /** * @num_dw_left: number of dw left for the IB */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c index 4bccd69fe30d..ddd181f5ed37 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c @@ -78,7 +78,6 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p, return r; p->num_dw_left = ndw; - p->ib = &p->job->ibs[0]; return 0; } @@ -95,15 +94,16 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p, struct dma_fence **fence) { struct amdgpu_bo *root = p->vm->root.base.bo; + struct amdgpu_ib *ib = p->job->ibs; struct amdgpu_ring *ring; struct dma_fence *f; int r; ring = container_of(p->vm->entity.rq->sched, struct amdgpu_ring, sched); - WARN_ON(p->ib->length_dw == 0); - amdgpu_ring_pad_ib(ring, p->ib); - WARN_ON(p->ib->length_dw > p->num_dw_left); + WARN_ON(ib->length_dw == 0); + amdgpu_ring_pad_ib(ring, ib); + WARN_ON(ib->length_dw > p->num_dw_left); r = amdgpu_job_submit(p->job, &p->vm->entity, AMDGPU_FENCE_OWNER_VM, &f); if (r) @@ -135,14 +135,15 @@ static void amdgpu_vm_sdma_copy_ptes(struct amdgpu_vm_update_params *p, struct amdgpu_bo *bo, uint64_t pe, unsigned count) { - uint64_t src = p->ib->gpu_addr; + struct amdgpu_ib *ib = p->job->ibs; + uint64_t src = ib->gpu_addr; src += p->num_dw_left * 4; pe += amdgpu_bo_gpu_offset(bo); trace_amdgpu_vm_copy_ptes(pe, src, count); - amdgpu_vm_copy_pte(p->adev, p->ib, pe, src, count); + amdgpu_vm_copy_pte(p->adev, ib, pe, src, count); } /** @@ -164,13 +165,15 @@ static void amdgpu_vm_sdma_set_ptes(struct amdgpu_vm_update_params *p, uint64_t addr, unsigned count, uint32_t incr, uint64_t flags) { + struct amdgpu_ib *ib = p->job->ibs; + pe += amdgpu_bo_gpu_offset(bo); trace_amdgpu_vm_set_ptes(pe, addr, count, incr, flags); if (count < 3) { - amdgpu_vm_write_pte(p->adev, p->ib, pe, addr | flags, + amdgpu_vm_write_pte(p->adev, ib, pe, addr | flags, count, incr); } else { - amdgpu_vm_set_pte_pde(p->adev, p->ib, pe, addr, + amdgpu_vm_set_pte_pde(p->adev, ib, pe, addr, count, incr, flags); } } @@ -200,7 +203,7 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, do { ndw = p->num_dw_left; - ndw -= p->ib->length_dw; + ndw -= p->job->ibs->length_dw; if (ndw < 32) { r = amdgpu_vm_sdma_commit(p, NULL); @@ -219,7 +222,6 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, return r; p->num_dw_left = ndw; - p->ib = &p->job->ibs[0]; } if (!p->pages_addr) { @@ -243,7 +245,7 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p, /* Put the PTEs at the end of the IB. */ p->num_dw_left -= nptes * 2; - pte = (uint64_t *)&(p->ib->ptr[p->num_dw_left]); + pte = (uint64_t *)&(p->job->ibs->ptr[p->num_dw_left]); for (i = 0; i < nptes; ++i, addr += incr) { pte[i] = amdgpu_vm_map_gart(p->pages_addr, addr); pte[i] |= flags; -- cgit v1.2.3-70-g09d2