From 38a8791aa7bfaa497e12e56b904a6b45986162d8 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Thu, 17 Aug 2017 16:37:49 -0400
Subject: drm/amdgpu: Fix huge page updates with CPU
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Correctly detect system memory mappings when using CPU and don't use
huge pages for them.

Avoid incorrectly translating a physical page table GPU address when
splitting a huge page while mapping system memory.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6b1343e5541d..ba475af99332 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1277,7 +1277,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
 	/* In the case of a mixed PT the PDE must point to it*/
 	if (p->adev->asic_type < CHIP_VEGA10 ||
 	    nptes != AMDGPU_VM_PTE_COUNT(p->adev) ||
-	    p->func == amdgpu_vm_do_copy_ptes ||
+	    p->src ||
 	    !(flags & AMDGPU_PTE_VALID)) {
 
 		dst = amdgpu_bo_gpu_offset(entry->bo);
@@ -1294,9 +1294,23 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
 	entry->addr = (dst | flags);
 
 	if (use_cpu_update) {
+		/* In case a huge page is replaced with a system
+		 * memory mapping, p->pages_addr != NULL and
+		 * amdgpu_vm_cpu_set_ptes would try to translate dst
+		 * through amdgpu_vm_map_gart. But dst is already a
+		 * GPU address (of the page table). Disable
+		 * amdgpu_vm_map_gart temporarily.
+		 */
+		dma_addr_t *tmp;
+
+		tmp = p->pages_addr;
+		p->pages_addr = NULL;
+
 		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo);
 		pde = pd_addr + (entry - parent->entries) * 8;
 		amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags);
+
+		p->pages_addr = tmp;
 	} else {
 		if (parent->bo->shadow) {
 			pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
-- 
cgit v1.2.3-70-g09d2


From cf273a59ca3068caced2adaf2deeb44d2013c8a9 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 18 Aug 2017 15:50:17 +0200
Subject: drm/amdgpu: fix and cleanup shadow handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Set the shadow flag on the shadow and not the parent, always bind shadow BOs
during allocation instead of manually, use the reservation_object wrappers
to grab the lock.

This fixes a couple of issues with binding the shadow BOs as well as correctly
evicting them when memory becomes tight.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  6 ----
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 46 +++++++++++++++---------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     |  8 ------
 3 files changed, 23 insertions(+), 37 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index f7ffb029f6d5..e630d918fefc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2622,12 +2622,6 @@ static int amdgpu_recover_vram_from_shadow(struct amdgpu_device *adev,
 			goto err;
 		}
 
-		r = amdgpu_ttm_bind(&bo->shadow->tbo, &bo->shadow->tbo.mem);
-		if (r) {
-			DRM_ERROR("%p bind failed\n", bo->shadow);
-			goto err;
-		}
-
 		r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
 						 NULL, fence, true);
 		if (r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index e7e899190bef..9e495da0bb03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -91,7 +91,10 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
 
 	if (domain & AMDGPU_GEM_DOMAIN_GTT) {
 		places[c].fpfn = 0;
-		places[c].lpfn = 0;
+		if (flags & AMDGPU_GEM_CREATE_SHADOW)
+			places[c].lpfn = adev->mc.gart_size >> PAGE_SHIFT;
+		else
+			places[c].lpfn = 0;
 		places[c].flags = TTM_PL_FLAG_TT;
 		if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
 			places[c].flags |= TTM_PL_FLAG_WC |
@@ -446,17 +449,16 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
 	if (bo->shadow)
 		return 0;
 
-	bo->flags |= AMDGPU_GEM_CREATE_SHADOW;
-	memset(&placements, 0,
-	       (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place));
-
-	amdgpu_ttm_placement_init(adev, &placement,
-				  placements, AMDGPU_GEM_DOMAIN_GTT,
-				  AMDGPU_GEM_CREATE_CPU_GTT_USWC);
+	memset(&placements, 0, sizeof(placements));
+	amdgpu_ttm_placement_init(adev, &placement, placements,
+				  AMDGPU_GEM_DOMAIN_GTT,
+				  AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+				  AMDGPU_GEM_CREATE_SHADOW);
 
 	r = amdgpu_bo_create_restricted(adev, size, byte_align, true,
 					AMDGPU_GEM_DOMAIN_GTT,
-					AMDGPU_GEM_CREATE_CPU_GTT_USWC,
+					AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+					AMDGPU_GEM_CREATE_SHADOW,
 					NULL, &placement,
 					bo->tbo.resv,
 					0,
@@ -484,30 +486,28 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 {
 	struct ttm_placement placement = {0};
 	struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1];
+	uint64_t parent_flags = flags & ~AMDGPU_GEM_CREATE_SHADOW;
 	int r;
 
-	memset(&placements, 0,
-	       (AMDGPU_GEM_DOMAIN_MAX + 1) * sizeof(struct ttm_place));
+	memset(&placements, 0, sizeof(placements));
+	amdgpu_ttm_placement_init(adev, &placement, placements,
+				  domain, parent_flags);
 
-	amdgpu_ttm_placement_init(adev, &placement,
-				  placements, domain, flags);
-
-	r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel,
-					domain, flags, sg, &placement,
-					resv, init_value, bo_ptr);
+	r = amdgpu_bo_create_restricted(adev, size, byte_align, kernel, domain,
+					parent_flags, sg, &placement, resv,
+					init_value, bo_ptr);
 	if (r)
 		return r;
 
-	if (amdgpu_need_backup(adev) && (flags & AMDGPU_GEM_CREATE_SHADOW)) {
-		if (!resv) {
-			r = ww_mutex_lock(&(*bo_ptr)->tbo.resv->lock, NULL);
-			WARN_ON(r != 0);
-		}
+	if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) {
+		if (!resv)
+			WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
+							NULL));
 
 		r = amdgpu_bo_create_shadow(adev, size, byte_align, (*bo_ptr));
 
 		if (!resv)
-			ww_mutex_unlock(&(*bo_ptr)->tbo.resv->lock);
+			reservation_object_unlock((*bo_ptr)->tbo.resv);
 
 		if (r)
 			amdgpu_bo_unref(bo_ptr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ba475af99332..96ec4e2b56e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -165,14 +165,6 @@ static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent,
 	unsigned i;
 	int r;
 
-	if (parent->bo->shadow) {
-		struct amdgpu_bo *shadow = parent->bo->shadow;
-
-		r = amdgpu_ttm_bind(&shadow->tbo, &shadow->tbo.mem);
-		if (r)
-			return r;
-	}
-
 	if (use_cpu_for_update) {
 		r = amdgpu_bo_kmap(parent->bo, NULL);
 		if (r)
-- 
cgit v1.2.3-70-g09d2


From febb84a60304e7257d42b90cc191ef84279e5152 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Tue, 22 Aug 2017 12:50:46 +0200
Subject: drm/amdgpu: remove the GART copy hack
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This isn't used since we don't map evicted BOs to GART any more.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Roger He <Hongbo.He@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 22 +++++-----------------
 1 file changed, 5 insertions(+), 17 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 96ec4e2b56e9..3bd430e180b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1616,7 +1616,6 @@ error_free:
  *
  * @adev: amdgpu_device pointer
  * @exclusive: fence we need to sync to
- * @gtt_flags: flags as they are used for GTT
  * @pages_addr: DMA addresses to use for mapping
  * @vm: requested vm
  * @mapping: mapped range and flags to use for the update
@@ -1630,7 +1629,6 @@ error_free:
  */
 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 				      struct dma_fence *exclusive,
-				      uint64_t gtt_flags,
 				      dma_addr_t *pages_addr,
 				      struct amdgpu_vm *vm,
 				      struct amdgpu_bo_va_mapping *mapping,
@@ -1685,11 +1683,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 		}
 
 		if (pages_addr) {
-			if (flags == gtt_flags)
-				src = adev->gart.table_addr +
-					(addr >> AMDGPU_GPU_PAGE_SHIFT) * 8;
-			else
-				max_entries = min(max_entries, 16ull * 1024ull);
+			max_entries = min(max_entries, 16ull * 1024ull);
 			addr = 0;
 		} else if (flags & AMDGPU_PTE_VALID) {
 			addr += adev->vm_manager.vram_base_offset;
@@ -1734,10 +1728,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	struct amdgpu_vm *vm = bo_va->base.vm;
 	struct amdgpu_bo_va_mapping *mapping;
 	dma_addr_t *pages_addr = NULL;
-	uint64_t gtt_flags, flags;
 	struct ttm_mem_reg *mem;
 	struct drm_mm_node *nodes;
 	struct dma_fence *exclusive;
+	uint64_t flags;
 	int r;
 
 	if (clear || !bo_va->base.bo) {
@@ -1757,15 +1751,10 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 		exclusive = reservation_object_get_excl(bo->tbo.resv);
 	}
 
-	if (bo) {
+	if (bo)
 		flags = amdgpu_ttm_tt_pte_flags(adev, bo->tbo.ttm, mem);
-		gtt_flags = (amdgpu_ttm_is_bound(bo->tbo.ttm) &&
-			adev == amdgpu_ttm_adev(bo->tbo.bdev)) ?
-			flags : 0;
-	} else {
+	else
 		flags = 0x0;
-		gtt_flags = ~0x0;
-	}
 
 	spin_lock(&vm->status_lock);
 	if (!list_empty(&bo_va->base.vm_status))
@@ -1773,8 +1762,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	spin_unlock(&vm->status_lock);
 
 	list_for_each_entry(mapping, &bo_va->invalids, list) {
-		r = amdgpu_vm_bo_split_mapping(adev, exclusive,
-					       gtt_flags, pages_addr, vm,
+		r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
 					       mapping, flags, nodes,
 					       &bo_va->last_pt_update);
 		if (r)
-- 
cgit v1.2.3-70-g09d2


From 87f64a76b38acaa081b2bc46c3169884b9ccd6f2 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 23 Aug 2017 14:05:48 +0200
Subject: drm/amdgpu: fix amdgpu_vm_bo_map trace point
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

That somehow got lost.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 3bd430e180b5..db63ff5c80f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2111,6 +2111,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 
 	if (flags & AMDGPU_PTE_PRT)
 		amdgpu_vm_prt_get(adev);
+	trace_amdgpu_vm_bo_map(bo_va, mapping);
 
 	return 0;
 }
@@ -2176,6 +2177,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
 
 	if (flags & AMDGPU_PTE_PRT)
 		amdgpu_vm_prt_get(adev);
+	trace_amdgpu_vm_bo_map(bo_va, mapping);
 
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 34d7be5dc28041b95254da517fd0f0f740544ece Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 24 Aug 2017 12:32:55 +0200
Subject: drm/amdgpu: fix and cleanup VM ready check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Stop checking the mapped BO itself, cause that one is
certainly not a page table.

Additional to that move the code into amdgpu_vm.c

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 33 ++-------------------------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 32 ++++++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  1 +
 3 files changed, 35 insertions(+), 31 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 7171968f261e..9b1b6bdd4841 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -127,35 +127,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
 	return 0;
 }
 
-static int amdgpu_gem_vm_check(void *param, struct amdgpu_bo *bo)
-{
-	/* if anything is swapped out don't swap it in here,
-	   just abort and wait for the next CS */
-	if (!amdgpu_bo_gpu_accessible(bo))
-		return -ERESTARTSYS;
-
-	if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow))
-		return -ERESTARTSYS;
-
-	return 0;
-}
-
-static bool amdgpu_gem_vm_ready(struct amdgpu_device *adev,
-				struct amdgpu_vm *vm,
-				struct list_head *list)
-{
-	struct ttm_validate_buffer *entry;
-
-	list_for_each_entry(entry, list, head) {
-		struct amdgpu_bo *bo =
-			container_of(entry->bo, struct amdgpu_bo, tbo);
-		if (amdgpu_gem_vm_check(NULL, bo))
-			return false;
-	}
-
-	return !amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_gem_vm_check, NULL);
-}
-
 void amdgpu_gem_object_close(struct drm_gem_object *obj,
 			     struct drm_file *file_priv)
 {
@@ -189,7 +160,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 	if (bo_va && --bo_va->ref_count == 0) {
 		amdgpu_vm_bo_rmv(adev, bo_va);
 
-		if (amdgpu_gem_vm_ready(adev, vm, &list)) {
+		if (amdgpu_vm_ready(adev, vm)) {
 			struct dma_fence *fence = NULL;
 
 			r = amdgpu_vm_clear_freed(adev, vm, &fence);
@@ -513,7 +484,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 {
 	int r = -ERESTARTSYS;
 
-	if (!amdgpu_gem_vm_ready(adev, vm, list))
+	if (!amdgpu_vm_ready(adev, vm))
 		goto error;
 
 	r = amdgpu_vm_update_directories(adev, vm);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index db63ff5c80f2..67c37b22f8ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -231,6 +231,38 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 					adev->mman.bdev.glob);
 }
 
+/**
+ * amdgpu_vm_check - helper for amdgpu_vm_ready
+ */
+static int amdgpu_vm_check(void *param, struct amdgpu_bo *bo)
+{
+	/* if anything is swapped out don't swap it in here,
+	   just abort and wait for the next CS */
+	if (!amdgpu_bo_gpu_accessible(bo))
+		return -ERESTARTSYS;
+
+	if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow))
+		return -ERESTARTSYS;
+
+	return 0;
+}
+
+/**
+ * amdgpu_vm_ready - check VM is ready for updates
+ *
+ * @adev: amdgpu device
+ * @vm: VM to check
+ *
+ * Check if all VM PDs/PTs are ready for updates
+ */
+bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+{
+	if (amdgpu_vm_check(NULL, vm->root.bo))
+		return false;
+
+	return !amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_vm_check, NULL);
+}
+
 /**
  * amdgpu_vm_alloc_levels - allocate the PD/PT levels
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index ba6691b58ee7..9347d28c3c1e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -225,6 +225,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 			 struct list_head *validated,
 			 struct amdgpu_bo_list_entry *entry);
+bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			      int (*callback)(void *p, struct amdgpu_bo *bo),
 			      void *param);
-- 
cgit v1.2.3-70-g09d2


From 3d7d4d3a1b9f67c0caecf2b2aa8d7c347f074a33 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 23 Aug 2017 16:13:33 +0200
Subject: drm/amdgpu: rework moved handling in the VM v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of using the vm_state use a separate flag to note
that the BO was moved.

v2: reorder patches to avoid temporary lockless access

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 13 ++++++++++---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  3 +++
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 67c37b22f8ef..1b36c62997b3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1788,10 +1788,16 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	else
 		flags = 0x0;
 
-	spin_lock(&vm->status_lock);
-	if (!list_empty(&bo_va->base.vm_status))
+	if (!clear && bo_va->base.moved) {
+		bo_va->base.moved = false;
 		list_splice_init(&bo_va->valids, &bo_va->invalids);
-	spin_unlock(&vm->status_lock);
+
+	} else {
+		spin_lock(&vm->status_lock);
+		if (!list_empty(&bo_va->base.vm_status))
+			list_splice_init(&bo_va->valids, &bo_va->invalids);
+		spin_unlock(&vm->status_lock);
+	}
 
 	list_for_each_entry(mapping, &bo_va->invalids, list) {
 		r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
@@ -2419,6 +2425,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 	struct amdgpu_vm_bo_base *bo_base;
 
 	list_for_each_entry(bo_base, &bo->va, bo_list) {
+		bo_base->moved = true;
 		spin_lock(&bo_base->vm->status_lock);
 		if (list_empty(&bo_base->vm_status))
 			list_add(&bo_base->vm_status,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 9347d28c3c1e..1b478e62a948 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -105,6 +105,9 @@ struct amdgpu_vm_bo_base {
 
 	/* protected by spinlock */
 	struct list_head		vm_status;
+
+	/* protected by the BO being reserved */
+	bool				moved;
 };
 
 struct amdgpu_vm_pt {
-- 
cgit v1.2.3-70-g09d2


From cb7b6ec2f8b8759b6b5beb4d17ea6984867a3296 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Tue, 15 Aug 2017 17:08:12 +0200
Subject: drm/amdgpu: add bo_va cleared flag again v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We changed this to use an extra list a while back, but for the next
series I need a separate flag again.

v2: reorder to avoid unlocked list access

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 35 +++++++++++-------------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h     |  3 ---
 3 files changed, 16 insertions(+), 25 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index a288fa6d72c8..e613ba42f167 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -55,6 +55,9 @@ struct amdgpu_bo_va {
 	/* mappings for this bo_va */
 	struct list_head		invalids;
 	struct list_head		valids;
+
+	/* If the mappings are cleared or filled */
+	bool				cleared;
 };
 
 struct amdgpu_bo {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 1b36c62997b3..1334bbb82634 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1792,11 +1792,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 		bo_va->base.moved = false;
 		list_splice_init(&bo_va->valids, &bo_va->invalids);
 
-	} else {
-		spin_lock(&vm->status_lock);
-		if (!list_empty(&bo_va->base.vm_status))
-			list_splice_init(&bo_va->valids, &bo_va->invalids);
-		spin_unlock(&vm->status_lock);
+	} else if (bo_va->cleared != clear) {
+		list_splice_init(&bo_va->valids, &bo_va->invalids);
 	}
 
 	list_for_each_entry(mapping, &bo_va->invalids, list) {
@@ -1807,25 +1804,22 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			return r;
 	}
 
-	if (trace_amdgpu_vm_bo_mapping_enabled()) {
-		list_for_each_entry(mapping, &bo_va->valids, list)
-			trace_amdgpu_vm_bo_mapping(mapping);
-
-		list_for_each_entry(mapping, &bo_va->invalids, list)
-			trace_amdgpu_vm_bo_mapping(mapping);
+	if (vm->use_cpu_for_update) {
+		/* Flush HDP */
+		mb();
+		amdgpu_gart_flush_gpu_tlb(adev, 0);
 	}
 
 	spin_lock(&vm->status_lock);
-	list_splice_init(&bo_va->invalids, &bo_va->valids);
 	list_del_init(&bo_va->base.vm_status);
-	if (clear)
-		list_add(&bo_va->base.vm_status, &vm->cleared);
 	spin_unlock(&vm->status_lock);
 
-	if (vm->use_cpu_for_update) {
-		/* Flush HDP */
-		mb();
-		amdgpu_gart_flush_gpu_tlb(adev, 0);
+	list_splice_init(&bo_va->invalids, &bo_va->valids);
+	bo_va->cleared = clear;
+
+	if (trace_amdgpu_vm_bo_mapping_enabled()) {
+		list_for_each_entry(mapping, &bo_va->valids, list)
+			trace_amdgpu_vm_bo_mapping(mapping);
 	}
 
 	return 0;
@@ -2427,9 +2421,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 	list_for_each_entry(bo_base, &bo->va, bo_list) {
 		bo_base->moved = true;
 		spin_lock(&bo_base->vm->status_lock);
-		if (list_empty(&bo_base->vm_status))
-			list_add(&bo_base->vm_status,
-				 &bo_base->vm->moved);
+		list_move(&bo_base->vm_status, &bo_base->vm->moved);
 		spin_unlock(&bo_base->vm->status_lock);
 	}
 }
@@ -2516,7 +2508,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		vm->reserved_vmid[i] = NULL;
 	spin_lock_init(&vm->status_lock);
 	INIT_LIST_HEAD(&vm->moved);
-	INIT_LIST_HEAD(&vm->cleared);
 	INIT_LIST_HEAD(&vm->freed);
 
 	/* create scheduler entity for page table updates */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 1b478e62a948..ff093d4b5e11 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -129,9 +129,6 @@ struct amdgpu_vm {
 	/* BOs moved, but not yet updated in the PT */
 	struct list_head	moved;
 
-	/* BOs cleared in the PT because of a move */
-	struct list_head	cleared;
-
 	/* BO mappings freed, but not yet updated in the PT */
 	struct list_head	freed;
 
-- 
cgit v1.2.3-70-g09d2


From 3f3333f8a0e90ac26f84ed7b0aa344efce695c08 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 3 Aug 2017 14:02:13 +0200
Subject: drm/amdgpu: track evicted page tables v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Instead of validating all page tables when one was evicted,
track which one needs a validation.

v2: simplify amdgpu_vm_ready as well

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v1)
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     |   7 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c    |   8 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |   2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 227 +++++++++++++----------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h     |  16 +-
 5 files changed, 119 insertions(+), 141 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index d6ddd5562c16..8bf178a912f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -636,9 +636,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 
 	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
 				     p->bytes_moved_vis);
-	fpriv->vm.last_eviction_counter =
-		atomic64_read(&p->adev->num_evictions);
-
 	if (p->bo_list) {
 		struct amdgpu_bo *gds = p->bo_list->gds_obj;
 		struct amdgpu_bo *gws = p->bo_list->gws_obj;
@@ -835,7 +832,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 			if (!bo)
 				continue;
 
-			amdgpu_vm_bo_invalidate(adev, bo);
+			amdgpu_vm_bo_invalidate(adev, bo, false);
 		}
 	}
 
@@ -860,7 +857,7 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 	}
 
 	if (p->job->vm) {
-		p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.bo);
+		p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo);
 
 		r = amdgpu_bo_vm_update_pte(p);
 		if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index ba012933e6aa..d02880640ee7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -160,7 +160,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 	if (bo_va && --bo_va->ref_count == 0) {
 		amdgpu_vm_bo_rmv(adev, bo_va);
 
-		if (amdgpu_vm_ready(adev, vm)) {
+		if (amdgpu_vm_ready(vm)) {
 			struct dma_fence *fence = NULL;
 
 			r = amdgpu_vm_clear_freed(adev, vm, &fence);
@@ -481,10 +481,10 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 				    struct list_head *list,
 				    uint32_t operation)
 {
-	int r = -ERESTARTSYS;
+	int r;
 
-	if (!amdgpu_vm_ready(adev, vm))
-		goto error;
+	if (!amdgpu_vm_ready(vm))
+		return;
 
 	r = amdgpu_vm_update_directories(adev, vm);
 	if (r)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 9e495da0bb03..52d0109c0d9c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -929,7 +929,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 		return;
 
 	abo = container_of(bo, struct amdgpu_bo, tbo);
-	amdgpu_vm_bo_invalidate(adev, abo);
+	amdgpu_vm_bo_invalidate(adev, abo, evict);
 
 	amdgpu_bo_kunmap(abo);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 1334bbb82634..6ff3c1bf035e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -140,7 +140,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 			 struct list_head *validated,
 			 struct amdgpu_bo_list_entry *entry)
 {
-	entry->robj = vm->root.bo;
+	entry->robj = vm->root.base.bo;
 	entry->priority = 0;
 	entry->tv.bo = &entry->robj->tbo;
 	entry->tv.shared = true;
@@ -148,61 +148,6 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 	list_add(&entry->tv.head, validated);
 }
 
-/**
- * amdgpu_vm_validate_layer - validate a single page table level
- *
- * @parent: parent page table level
- * @validate: callback to do the validation
- * @param: parameter for the validation callback
- *
- * Validate the page table BOs on command submission if neccessary.
- */
-static int amdgpu_vm_validate_level(struct amdgpu_vm_pt *parent,
-				    int (*validate)(void *, struct amdgpu_bo *),
-				    void *param, bool use_cpu_for_update,
-				    struct ttm_bo_global *glob)
-{
-	unsigned i;
-	int r;
-
-	if (use_cpu_for_update) {
-		r = amdgpu_bo_kmap(parent->bo, NULL);
-		if (r)
-			return r;
-	}
-
-	if (!parent->entries)
-		return 0;
-
-	for (i = 0; i <= parent->last_entry_used; ++i) {
-		struct amdgpu_vm_pt *entry = &parent->entries[i];
-
-		if (!entry->bo)
-			continue;
-
-		r = validate(param, entry->bo);
-		if (r)
-			return r;
-
-		spin_lock(&glob->lru_lock);
-		ttm_bo_move_to_lru_tail(&entry->bo->tbo);
-		if (entry->bo->shadow)
-			ttm_bo_move_to_lru_tail(&entry->bo->shadow->tbo);
-		spin_unlock(&glob->lru_lock);
-
-		/*
-		 * Recurse into the sub directory. This is harmless because we
-		 * have only a maximum of 5 layers.
-		 */
-		r = amdgpu_vm_validate_level(entry, validate, param,
-					     use_cpu_for_update, glob);
-		if (r)
-			return r;
-	}
-
-	return r;
-}
-
 /**
  * amdgpu_vm_validate_pt_bos - validate the page table BOs
  *
@@ -217,32 +162,43 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			      int (*validate)(void *p, struct amdgpu_bo *bo),
 			      void *param)
 {
-	uint64_t num_evictions;
+	struct ttm_bo_global *glob = adev->mman.bdev.glob;
+	int r;
 
-	/* We only need to validate the page tables
-	 * if they aren't already valid.
-	 */
-	num_evictions = atomic64_read(&adev->num_evictions);
-	if (num_evictions == vm->last_eviction_counter)
-		return 0;
+	spin_lock(&vm->status_lock);
+	while (!list_empty(&vm->evicted)) {
+		struct amdgpu_vm_bo_base *bo_base;
+		struct amdgpu_bo *bo;
 
-	return amdgpu_vm_validate_level(&vm->root, validate, param,
-					vm->use_cpu_for_update,
-					adev->mman.bdev.glob);
-}
+		bo_base = list_first_entry(&vm->evicted,
+					   struct amdgpu_vm_bo_base,
+					   vm_status);
+		spin_unlock(&vm->status_lock);
 
-/**
- * amdgpu_vm_check - helper for amdgpu_vm_ready
- */
-static int amdgpu_vm_check(void *param, struct amdgpu_bo *bo)
-{
-	/* if anything is swapped out don't swap it in here,
-	   just abort and wait for the next CS */
-	if (!amdgpu_bo_gpu_accessible(bo))
-		return -ERESTARTSYS;
+		bo = bo_base->bo;
+		BUG_ON(!bo);
+		if (bo->parent) {
+			r = validate(param, bo);
+			if (r)
+				return r;
 
-	if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow))
-		return -ERESTARTSYS;
+			spin_lock(&glob->lru_lock);
+			ttm_bo_move_to_lru_tail(&bo->tbo);
+			if (bo->shadow)
+				ttm_bo_move_to_lru_tail(&bo->shadow->tbo);
+			spin_unlock(&glob->lru_lock);
+		}
+
+		if (vm->use_cpu_for_update) {
+			r = amdgpu_bo_kmap(bo, NULL);
+			if (r)
+				return r;
+		}
+
+		spin_lock(&vm->status_lock);
+		list_del_init(&bo_base->vm_status);
+	}
+	spin_unlock(&vm->status_lock);
 
 	return 0;
 }
@@ -250,17 +206,19 @@ static int amdgpu_vm_check(void *param, struct amdgpu_bo *bo)
 /**
  * amdgpu_vm_ready - check VM is ready for updates
  *
- * @adev: amdgpu device
  * @vm: VM to check
  *
  * Check if all VM PDs/PTs are ready for updates
  */
-bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+bool amdgpu_vm_ready(struct amdgpu_vm *vm)
 {
-	if (amdgpu_vm_check(NULL, vm->root.bo))
-		return false;
+	bool ready;
+
+	spin_lock(&vm->status_lock);
+	ready = list_empty(&vm->evicted);
+	spin_unlock(&vm->status_lock);
 
-	return !amdgpu_vm_validate_pt_bos(adev, vm, amdgpu_vm_check, NULL);
+	return ready;
 }
 
 /**
@@ -326,11 +284,11 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 
 	/* walk over the address space and allocate the page tables */
 	for (pt_idx = from; pt_idx <= to; ++pt_idx) {
-		struct reservation_object *resv = vm->root.bo->tbo.resv;
+		struct reservation_object *resv = vm->root.base.bo->tbo.resv;
 		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
 		struct amdgpu_bo *pt;
 
-		if (!entry->bo) {
+		if (!entry->base.bo) {
 			r = amdgpu_bo_create(adev,
 					     amdgpu_vm_bo_size(adev, level),
 					     AMDGPU_GPU_PAGE_SIZE, true,
@@ -351,9 +309,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 			/* Keep a reference to the root directory to avoid
 			* freeing them up in the wrong order.
 			*/
-			pt->parent = amdgpu_bo_ref(vm->root.bo);
+			pt->parent = amdgpu_bo_ref(vm->root.base.bo);
 
-			entry->bo = pt;
+			entry->base.vm = vm;
+			entry->base.bo = pt;
+			list_add_tail(&entry->base.bo_list, &pt->va);
+			INIT_LIST_HEAD(&entry->base.vm_status);
 			entry->addr = 0;
 		}
 
@@ -1020,7 +981,7 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	int r;
 
 	amdgpu_sync_create(&sync);
-	amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.resv, owner);
+	amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner);
 	r = amdgpu_sync_wait(&sync, true);
 	amdgpu_sync_free(&sync);
 
@@ -1059,10 +1020,10 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 
 	memset(&params, 0, sizeof(params));
 	params.adev = adev;
-	shadow = parent->bo->shadow;
+	shadow = parent->base.bo->shadow;
 
 	if (vm->use_cpu_for_update) {
-		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo);
+		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
 		r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
 		if (unlikely(r))
 			return r;
@@ -1078,7 +1039,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 		/* assume the worst case */
 		ndw += parent->last_entry_used * 6;
 
-		pd_addr = amdgpu_bo_gpu_offset(parent->bo);
+		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
 
 		if (shadow) {
 			shadow_addr = amdgpu_bo_gpu_offset(shadow);
@@ -1098,7 +1059,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 
 	/* walk over the address space and update the directory */
 	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
-		struct amdgpu_bo *bo = parent->entries[pt_idx].bo;
+		struct amdgpu_bo *bo = parent->entries[pt_idx].base.bo;
 		uint64_t pde, pt;
 
 		if (bo == NULL)
@@ -1141,7 +1102,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 	}
 
 	if (count) {
-		if (vm->root.bo->shadow)
+		if (vm->root.base.bo->shadow)
 			params.func(&params, last_shadow, last_pt,
 				    count, incr, AMDGPU_PTE_VALID);
 
@@ -1154,7 +1115,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 			amdgpu_job_free(job);
 		} else {
 			amdgpu_ring_pad_ib(ring, params.ib);
-			amdgpu_sync_resv(adev, &job->sync, parent->bo->tbo.resv,
+			amdgpu_sync_resv(adev, &job->sync,
+					 parent->base.bo->tbo.resv,
 					 AMDGPU_FENCE_OWNER_VM);
 			if (shadow)
 				amdgpu_sync_resv(adev, &job->sync,
@@ -1167,7 +1129,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 			if (r)
 				goto error_free;
 
-			amdgpu_bo_fence(parent->bo, fence, true);
+			amdgpu_bo_fence(parent->base.bo, fence, true);
 			dma_fence_put(vm->last_dir_update);
 			vm->last_dir_update = dma_fence_get(fence);
 			dma_fence_put(fence);
@@ -1180,7 +1142,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
 		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
 
-		if (!entry->bo)
+		if (!entry->base.bo)
 			continue;
 
 		r = amdgpu_vm_update_level(adev, vm, entry, level + 1);
@@ -1213,7 +1175,7 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent)
 	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
 		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
 
-		if (!entry->bo)
+		if (!entry->base.bo)
 			continue;
 
 		entry->addr = ~0ULL;
@@ -1268,7 +1230,7 @@ void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr,
 	*entry = &p->vm->root;
 	while ((*entry)->entries) {
 		idx = addr >> (p->adev->vm_manager.block_size * level--);
-		idx %= amdgpu_bo_size((*entry)->bo) / 8;
+		idx %= amdgpu_bo_size((*entry)->base.bo) / 8;
 		*parent = *entry;
 		*entry = &(*entry)->entries[idx];
 	}
@@ -1304,7 +1266,7 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
 	    p->src ||
 	    !(flags & AMDGPU_PTE_VALID)) {
 
-		dst = amdgpu_bo_gpu_offset(entry->bo);
+		dst = amdgpu_bo_gpu_offset(entry->base.bo);
 		dst = amdgpu_gart_get_vm_pde(p->adev, dst);
 		flags = AMDGPU_PTE_VALID;
 	} else {
@@ -1330,18 +1292,18 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
 		tmp = p->pages_addr;
 		p->pages_addr = NULL;
 
-		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->bo);
+		pd_addr = (unsigned long)amdgpu_bo_kptr(parent->base.bo);
 		pde = pd_addr + (entry - parent->entries) * 8;
 		amdgpu_vm_cpu_set_ptes(p, pde, dst, 1, 0, flags);
 
 		p->pages_addr = tmp;
 	} else {
-		if (parent->bo->shadow) {
-			pd_addr = amdgpu_bo_gpu_offset(parent->bo->shadow);
+		if (parent->base.bo->shadow) {
+			pd_addr = amdgpu_bo_gpu_offset(parent->base.bo->shadow);
 			pde = pd_addr + (entry - parent->entries) * 8;
 			amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
 		}
-		pd_addr = amdgpu_bo_gpu_offset(parent->bo);
+		pd_addr = amdgpu_bo_gpu_offset(parent->base.bo);
 		pde = pd_addr + (entry - parent->entries) * 8;
 		amdgpu_vm_do_set_ptes(p, pde, dst, 1, 0, flags);
 	}
@@ -1392,7 +1354,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 		if (entry->addr & AMDGPU_PDE_PTE)
 			continue;
 
-		pt = entry->bo;
+		pt = entry->base.bo;
 		if (use_cpu_update) {
 			pe_start = (unsigned long)amdgpu_bo_kptr(pt);
 		} else {
@@ -1612,12 +1574,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (r)
 		goto error_free;
 
-	r = amdgpu_sync_resv(adev, &job->sync, vm->root.bo->tbo.resv,
+	r = amdgpu_sync_resv(adev, &job->sync, vm->root.base.bo->tbo.resv,
 			     owner);
 	if (r)
 		goto error_free;
 
-	r = reservation_object_reserve_shared(vm->root.bo->tbo.resv);
+	r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv);
 	if (r)
 		goto error_free;
 
@@ -1632,7 +1594,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (r)
 		goto error_free;
 
-	amdgpu_bo_fence(vm->root.bo, f, true);
+	amdgpu_bo_fence(vm->root.base.bo, f, true);
 	dma_fence_put(*fence);
 	*fence = f;
 	return 0;
@@ -1927,7 +1889,7 @@ static void amdgpu_vm_free_mapping(struct amdgpu_device *adev,
  */
 static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
-	struct reservation_object *resv = vm->root.bo->tbo.resv;
+	struct reservation_object *resv = vm->root.base.bo->tbo.resv;
 	struct dma_fence *excl, **shared;
 	unsigned i, shared_count;
 	int r;
@@ -2414,12 +2376,25 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
  * Mark @bo as invalid.
  */
 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
-			     struct amdgpu_bo *bo)
+			     struct amdgpu_bo *bo, bool evicted)
 {
 	struct amdgpu_vm_bo_base *bo_base;
 
 	list_for_each_entry(bo_base, &bo->va, bo_list) {
+		struct amdgpu_vm *vm = bo_base->vm;
+
 		bo_base->moved = true;
+		if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
+			spin_lock(&bo_base->vm->status_lock);
+			list_move(&bo_base->vm_status, &vm->evicted);
+			spin_unlock(&bo_base->vm->status_lock);
+			continue;
+		}
+
+		/* Don't add page tables to the moved state */
+		if (bo->tbo.type == ttm_bo_type_kernel)
+			continue;
+
 		spin_lock(&bo_base->vm->status_lock);
 		list_move(&bo_base->vm_status, &bo_base->vm->moved);
 		spin_unlock(&bo_base->vm->status_lock);
@@ -2507,6 +2482,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
 		vm->reserved_vmid[i] = NULL;
 	spin_lock_init(&vm->status_lock);
+	INIT_LIST_HEAD(&vm->evicted);
 	INIT_LIST_HEAD(&vm->moved);
 	INIT_LIST_HEAD(&vm->freed);
 
@@ -2551,30 +2527,31 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	r = amdgpu_bo_create(adev, amdgpu_vm_bo_size(adev, 0), align, true,
 			     AMDGPU_GEM_DOMAIN_VRAM,
 			     flags,
-			     NULL, NULL, init_pde_value, &vm->root.bo);
+			     NULL, NULL, init_pde_value, &vm->root.base.bo);
 	if (r)
 		goto error_free_sched_entity;
 
-	r = amdgpu_bo_reserve(vm->root.bo, false);
-	if (r)
-		goto error_free_root;
-
-	vm->last_eviction_counter = atomic64_read(&adev->num_evictions);
+	vm->root.base.vm = vm;
+	list_add_tail(&vm->root.base.bo_list, &vm->root.base.bo->va);
+	INIT_LIST_HEAD(&vm->root.base.vm_status);
 
 	if (vm->use_cpu_for_update) {
-		r = amdgpu_bo_kmap(vm->root.bo, NULL);
+		r = amdgpu_bo_reserve(vm->root.base.bo, false);
 		if (r)
 			goto error_free_root;
-	}
 
-	amdgpu_bo_unreserve(vm->root.bo);
+		r = amdgpu_bo_kmap(vm->root.base.bo, NULL);
+		if (r)
+			goto error_free_root;
+		amdgpu_bo_unreserve(vm->root.base.bo);
+	}
 
 	return 0;
 
 error_free_root:
-	amdgpu_bo_unref(&vm->root.bo->shadow);
-	amdgpu_bo_unref(&vm->root.bo);
-	vm->root.bo = NULL;
+	amdgpu_bo_unref(&vm->root.base.bo->shadow);
+	amdgpu_bo_unref(&vm->root.base.bo);
+	vm->root.base.bo = NULL;
 
 error_free_sched_entity:
 	amd_sched_entity_fini(&ring->sched, &vm->entity);
@@ -2593,9 +2570,11 @@ static void amdgpu_vm_free_levels(struct amdgpu_vm_pt *level)
 {
 	unsigned i;
 
-	if (level->bo) {
-		amdgpu_bo_unref(&level->bo->shadow);
-		amdgpu_bo_unref(&level->bo);
+	if (level->base.bo) {
+		list_del(&level->base.bo_list);
+		list_del(&level->base.vm_status);
+		amdgpu_bo_unref(&level->base.bo->shadow);
+		amdgpu_bo_unref(&level->base.bo);
 	}
 
 	if (level->entries)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index ff093d4b5e11..4e465e817fe8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -111,12 +111,12 @@ struct amdgpu_vm_bo_base {
 };
 
 struct amdgpu_vm_pt {
-	struct amdgpu_bo	*bo;
-	uint64_t		addr;
+	struct amdgpu_vm_bo_base	base;
+	uint64_t			addr;
 
 	/* array of page tables, one for each directory entry */
-	struct amdgpu_vm_pt	*entries;
-	unsigned		last_entry_used;
+	struct amdgpu_vm_pt		*entries;
+	unsigned			last_entry_used;
 };
 
 struct amdgpu_vm {
@@ -126,6 +126,9 @@ struct amdgpu_vm {
 	/* protecting invalidated */
 	spinlock_t		status_lock;
 
+	/* BOs who needs a validation */
+	struct list_head	evicted;
+
 	/* BOs moved, but not yet updated in the PT */
 	struct list_head	moved;
 
@@ -135,7 +138,6 @@ struct amdgpu_vm {
 	/* contains the page directory */
 	struct amdgpu_vm_pt     root;
 	struct dma_fence	*last_dir_update;
-	uint64_t		last_eviction_counter;
 
 	/* protecting freed */
 	spinlock_t		freed_lock;
@@ -225,7 +227,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 			 struct list_head *validated,
 			 struct amdgpu_bo_list_entry *entry);
-bool amdgpu_vm_ready(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+bool amdgpu_vm_ready(struct amdgpu_vm *vm);
 int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			      int (*callback)(void *p, struct amdgpu_bo *bo),
 			      void *param);
@@ -250,7 +252,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			struct amdgpu_bo_va *bo_va,
 			bool clear);
 void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
-			     struct amdgpu_bo *bo);
+			     struct amdgpu_bo *bo, bool evicted);
 struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
 				       struct amdgpu_bo *bo);
 struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
-- 
cgit v1.2.3-70-g09d2


From ea09729c930223edf492d0ca647c27e7eb0ccb12 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 9 Aug 2017 14:15:46 +0200
Subject: drm/amdgpu: rework page directory filling v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Keep track off relocated PDs/PTs instead of walking and checking all PDs.

v2: fix root PD handling

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com> (v1)
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 89 +++++++++++++++++++++++-----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  3 ++
 2 files changed, 63 insertions(+), 29 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6ff3c1bf035e..faa08d5728da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -196,7 +196,7 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		}
 
 		spin_lock(&vm->status_lock);
-		list_del_init(&bo_base->vm_status);
+		list_move(&bo_base->vm_status, &vm->relocated);
 	}
 	spin_unlock(&vm->status_lock);
 
@@ -314,8 +314,10 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 			entry->base.vm = vm;
 			entry->base.bo = pt;
 			list_add_tail(&entry->base.bo_list, &pt->va);
-			INIT_LIST_HEAD(&entry->base.vm_status);
-			entry->addr = 0;
+			spin_lock(&vm->status_lock);
+			list_add(&entry->base.vm_status, &vm->relocated);
+			spin_unlock(&vm->status_lock);
+			entry->addr = ~0ULL;
 		}
 
 		if (level < adev->vm_manager.num_level) {
@@ -1000,18 +1002,17 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  */
 static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 				  struct amdgpu_vm *vm,
-				  struct amdgpu_vm_pt *parent,
-				  unsigned level)
+				  struct amdgpu_vm_pt *parent)
 {
 	struct amdgpu_bo *shadow;
 	struct amdgpu_ring *ring = NULL;
 	uint64_t pd_addr, shadow_addr = 0;
-	uint32_t incr = amdgpu_vm_bo_size(adev, level + 1);
 	uint64_t last_pde = ~0, last_pt = ~0, last_shadow = ~0;
 	unsigned count = 0, pt_idx, ndw = 0;
 	struct amdgpu_job *job;
 	struct amdgpu_pte_update_params params;
 	struct dma_fence *fence = NULL;
+	uint32_t incr;
 
 	int r;
 
@@ -1059,12 +1060,17 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 
 	/* walk over the address space and update the directory */
 	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
-		struct amdgpu_bo *bo = parent->entries[pt_idx].base.bo;
+		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
+		struct amdgpu_bo *bo = entry->base.bo;
 		uint64_t pde, pt;
 
 		if (bo == NULL)
 			continue;
 
+		spin_lock(&vm->status_lock);
+		list_del_init(&entry->base.vm_status);
+		spin_unlock(&vm->status_lock);
+
 		pt = amdgpu_bo_gpu_offset(bo);
 		pt = amdgpu_gart_get_vm_pde(adev, pt);
 		/* Don't update huge pages here */
@@ -1075,6 +1081,7 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 		parent->entries[pt_idx].addr = pt | AMDGPU_PTE_VALID;
 
 		pde = pd_addr + pt_idx * 8;
+		incr = amdgpu_bo_size(bo);
 		if (((last_pde + 8 * count) != pde) ||
 		    ((last_pt + incr * count) != pt) ||
 		    (count == AMDGPU_VM_MAX_UPDATE_SIZE)) {
@@ -1135,20 +1142,6 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 			dma_fence_put(fence);
 		}
 	}
-	/*
-	 * Recurse into the subdirectories. This recursion is harmless because
-	 * we only have a maximum of 5 layers.
-	 */
-	for (pt_idx = 0; pt_idx <= parent->last_entry_used; ++pt_idx) {
-		struct amdgpu_vm_pt *entry = &parent->entries[pt_idx];
-
-		if (!entry->base.bo)
-			continue;
-
-		r = amdgpu_vm_update_level(adev, vm, entry, level + 1);
-		if (r)
-			return r;
-	}
 
 	return 0;
 
@@ -1164,7 +1157,8 @@ error_free:
  *
  * Mark all PD level as invalid after an error.
  */
-static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent)
+static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
+				       struct amdgpu_vm_pt *parent)
 {
 	unsigned pt_idx;
 
@@ -1179,7 +1173,10 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm_pt *parent)
 			continue;
 
 		entry->addr = ~0ULL;
-		amdgpu_vm_invalidate_level(entry);
+		spin_lock(&vm->status_lock);
+		list_move(&entry->base.vm_status, &vm->relocated);
+		spin_unlock(&vm->status_lock);
+		amdgpu_vm_invalidate_level(vm, entry);
 	}
 }
 
@@ -1197,9 +1194,38 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 {
 	int r;
 
-	r = amdgpu_vm_update_level(adev, vm, &vm->root, 0);
-	if (r)
-		amdgpu_vm_invalidate_level(&vm->root);
+	spin_lock(&vm->status_lock);
+	while (!list_empty(&vm->relocated)) {
+		struct amdgpu_vm_bo_base *bo_base;
+		struct amdgpu_bo *bo;
+
+		bo_base = list_first_entry(&vm->relocated,
+					   struct amdgpu_vm_bo_base,
+					   vm_status);
+		spin_unlock(&vm->status_lock);
+
+		bo = bo_base->bo->parent;
+		if (bo) {
+			struct amdgpu_vm_bo_base *parent;
+			struct amdgpu_vm_pt *pt;
+
+			parent = list_first_entry(&bo->va,
+						  struct amdgpu_vm_bo_base,
+						  bo_list);
+			pt = container_of(parent, struct amdgpu_vm_pt, base);
+
+			r = amdgpu_vm_update_level(adev, vm, pt);
+			if (r) {
+				amdgpu_vm_invalidate_level(vm, &vm->root);
+				return r;
+			}
+			spin_lock(&vm->status_lock);
+		} else {
+			spin_lock(&vm->status_lock);
+			list_del_init(&bo_base->vm_status);
+		}
+	}
+	spin_unlock(&vm->status_lock);
 
 	if (vm->use_cpu_for_update) {
 		/* Flush HDP */
@@ -1601,7 +1627,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 error_free:
 	amdgpu_job_free(job);
-	amdgpu_vm_invalidate_level(&vm->root);
+	amdgpu_vm_invalidate_level(vm, &vm->root);
 	return r;
 }
 
@@ -2391,9 +2417,13 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 			continue;
 		}
 
-		/* Don't add page tables to the moved state */
-		if (bo->tbo.type == ttm_bo_type_kernel)
+		if (bo->tbo.type == ttm_bo_type_kernel) {
+			spin_lock(&bo_base->vm->status_lock);
+			if (list_empty(&bo_base->vm_status))
+				list_add(&bo_base->vm_status, &vm->relocated);
+			spin_unlock(&bo_base->vm->status_lock);
 			continue;
+		}
 
 		spin_lock(&bo_base->vm->status_lock);
 		list_move(&bo_base->vm_status, &bo_base->vm->moved);
@@ -2483,6 +2513,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		vm->reserved_vmid[i] = NULL;
 	spin_lock_init(&vm->status_lock);
 	INIT_LIST_HEAD(&vm->evicted);
+	INIT_LIST_HEAD(&vm->relocated);
 	INIT_LIST_HEAD(&vm->moved);
 	INIT_LIST_HEAD(&vm->freed);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 4e465e817fe8..c3753afe9853 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -129,6 +129,9 @@ struct amdgpu_vm {
 	/* BOs who needs a validation */
 	struct list_head	evicted;
 
+	/* PT BOs which relocated and their parent need an update */
+	struct list_head	relocated;
+
 	/* BOs moved, but not yet updated in the PT */
 	struct list_head	moved;
 
-- 
cgit v1.2.3-70-g09d2


From 570144c6522f5f332635d20dfa278cfcc764229c Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 30 Aug 2017 15:38:45 +0200
Subject: drm/amdgpu: cleanup the VM code a bit more
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The src isn't used any more after GART hack removal.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 20 +++++---------------
 1 file changed, 5 insertions(+), 15 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index faa08d5728da..1582feba9289 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1476,7 +1476,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
  *
  * @adev: amdgpu_device pointer
  * @exclusive: fence we need to sync to
- * @src: address where to copy page table entries from
  * @pages_addr: DMA addresses to use for mapping
  * @vm: requested vm
  * @start: start of mapped range
@@ -1490,7 +1489,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
  */
 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 				       struct dma_fence *exclusive,
-				       uint64_t src,
 				       dma_addr_t *pages_addr,
 				       struct amdgpu_vm *vm,
 				       uint64_t start, uint64_t last,
@@ -1508,7 +1506,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	memset(&params, 0, sizeof(params));
 	params.adev = adev;
 	params.vm = vm;
-	params.src = src;
 
 	/* sync to everything on unmapping */
 	if (!(flags & AMDGPU_PTE_VALID))
@@ -1548,13 +1545,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	/* one PDE write for each huge page */
 	ndw += ((nptes >> adev->vm_manager.block_size) + 1) * 6;
 
-	if (src) {
-		/* only copy commands needed */
-		ndw += ncmds * 7;
-
-		params.func = amdgpu_vm_do_copy_ptes;
-
-	} else if (pages_addr) {
+	if (pages_addr) {
 		/* copy commands needed */
 		ndw += ncmds * 7;
 
@@ -1579,7 +1570,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 	params.ib = &job->ibs[0];
 
-	if (!src && pages_addr) {
+	if (pages_addr) {
 		uint64_t *pte;
 		unsigned i;
 
@@ -1656,7 +1647,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 				      struct drm_mm_node *nodes,
 				      struct dma_fence **fence)
 {
-	uint64_t pfn, src = 0, start = mapping->start;
+	uint64_t pfn, start = mapping->start;
 	int r;
 
 	/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
@@ -1711,8 +1702,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 		addr += pfn << PAGE_SHIFT;
 
 		last = min((uint64_t)mapping->last, start + max_entries - 1);
-		r = amdgpu_vm_bo_update_mapping(adev, exclusive,
-						src, pages_addr, vm,
+		r = amdgpu_vm_bo_update_mapping(adev, exclusive, pages_addr, vm,
 						start, last, flags, addr,
 						fence);
 		if (r)
@@ -1973,7 +1963,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 		if (vm->pte_support_ats)
 			init_pte_value = AMDGPU_PTE_SYSTEM;
 
-		r = amdgpu_vm_bo_update_mapping(adev, NULL, 0, NULL, vm,
+		r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm,
 						mapping->start, mapping->last,
 						init_pte_value, 0, &f);
 		amdgpu_vm_free_mapping(adev, vm, mapping, f);
-- 
cgit v1.2.3-70-g09d2


From 0f2fc435d837213202bec3b8e26fbb67a4d6df24 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Thu, 31 Aug 2017 10:46:20 +0200
Subject: drm/amdgpu: fix new PD update code for Vega10 v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We need to refer to the parent instead of the root BO for multi
level page tables on Vega10. Also don't set the PDE_PTE bit.

v2: Don't set the PDE_PTE bit either.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-and-Tested-by: Roger He <Hongbo.He@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 1582feba9289..d3c48557555c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -309,7 +309,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 			/* Keep a reference to the root directory to avoid
 			* freeing them up in the wrong order.
 			*/
-			pt->parent = amdgpu_bo_ref(vm->root.base.bo);
+			pt->parent = amdgpu_bo_ref(parent->base.bo);
 
 			entry->base.vm = vm;
 			entry->base.bo = pt;
@@ -317,7 +317,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 			spin_lock(&vm->status_lock);
 			list_add(&entry->base.vm_status, &vm->relocated);
 			spin_unlock(&vm->status_lock);
-			entry->addr = ~0ULL;
+			entry->addr = 0;
 		}
 
 		if (level < adev->vm_manager.num_level) {
-- 
cgit v1.2.3-70-g09d2


From 73fb16e7ebee12953de32a7a2552e0cf2bf74ebf Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 16 Aug 2017 11:13:48 +0200
Subject: drm/amdgpu: add support for per VM BOs v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Per VM BOs are handled like VM PDs and PTs. They are always valid and don't
need to be specified in the BO lists.

v2: validate PDs/PTs first

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 79 ++++++++++++++++++++++++----------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  5 ++-
 3 files changed, 60 insertions(+), 26 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 8bf178a912f2..b57adc0723cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -822,7 +822,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 
 	}
 
-	r = amdgpu_vm_clear_moved(adev, vm, &p->job->sync);
+	r = amdgpu_vm_handle_moved(adev, vm, &p->job->sync);
 
 	if (amdgpu_vm_debug && p->bo_list) {
 		/* Invalidate all BOs to test for userspace bugs */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index d3c48557555c..26eb7dce5fe5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -189,14 +189,18 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			spin_unlock(&glob->lru_lock);
 		}
 
-		if (vm->use_cpu_for_update) {
+		if (bo->tbo.type == ttm_bo_type_kernel &&
+		    vm->use_cpu_for_update) {
 			r = amdgpu_bo_kmap(bo, NULL);
 			if (r)
 				return r;
 		}
 
 		spin_lock(&vm->status_lock);
-		list_move(&bo_base->vm_status, &vm->relocated);
+		if (bo->tbo.type != ttm_bo_type_kernel)
+			list_move(&bo_base->vm_status, &vm->moved);
+		else
+			list_move(&bo_base->vm_status, &vm->relocated);
 	}
 	spin_unlock(&vm->status_lock);
 
@@ -1985,20 +1989,23 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 }
 
 /**
- * amdgpu_vm_clear_moved - clear moved BOs in the PT
+ * amdgpu_vm_handle_moved - handle moved BOs in the PT
  *
  * @adev: amdgpu_device pointer
  * @vm: requested vm
+ * @sync: sync object to add fences to
  *
- * Make sure all moved BOs are cleared in the PT.
+ * Make sure all BOs which are moved are updated in the PTs.
  * Returns 0 for success.
  *
- * PTs have to be reserved and mutex must be locked!
+ * PTs have to be reserved!
  */
-int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-			    struct amdgpu_sync *sync)
+int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
+			   struct amdgpu_vm *vm,
+			   struct amdgpu_sync *sync)
 {
 	struct amdgpu_bo_va *bo_va = NULL;
+	bool clear;
 	int r = 0;
 
 	spin_lock(&vm->status_lock);
@@ -2007,7 +2014,10 @@ int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			struct amdgpu_bo_va, base.vm_status);
 		spin_unlock(&vm->status_lock);
 
-		r = amdgpu_vm_bo_update(adev, bo_va, true);
+		/* Per VM BOs never need to bo cleared in the page tables */
+		clear = bo_va->base.bo->tbo.resv != vm->root.base.bo->tbo.resv;
+
+		r = amdgpu_vm_bo_update(adev, bo_va, clear);
 		if (r)
 			return r;
 
@@ -2059,6 +2069,37 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
 	return bo_va;
 }
 
+
+/**
+ * amdgpu_vm_bo_insert_mapping - insert a new mapping
+ *
+ * @adev: amdgpu_device pointer
+ * @bo_va: bo_va to store the address
+ * @mapping: the mapping to insert
+ *
+ * Insert a new mapping into all structures.
+ */
+static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
+				    struct amdgpu_bo_va *bo_va,
+				    struct amdgpu_bo_va_mapping *mapping)
+{
+	struct amdgpu_vm *vm = bo_va->base.vm;
+	struct amdgpu_bo *bo = bo_va->base.bo;
+
+	list_add(&mapping->list, &bo_va->invalids);
+	amdgpu_vm_it_insert(mapping, &vm->va);
+
+	if (mapping->flags & AMDGPU_PTE_PRT)
+		amdgpu_vm_prt_get(adev);
+
+	if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
+		spin_lock(&vm->status_lock);
+		list_move(&bo_va->base.vm_status, &vm->moved);
+		spin_unlock(&vm->status_lock);
+	}
+	trace_amdgpu_vm_bo_map(bo_va, mapping);
+}
+
 /**
  * amdgpu_vm_bo_map - map bo inside a vm
  *
@@ -2110,18 +2151,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 	if (!mapping)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&mapping->list);
 	mapping->start = saddr;
 	mapping->last = eaddr;
 	mapping->offset = offset;
 	mapping->flags = flags;
 
-	list_add(&mapping->list, &bo_va->invalids);
-	amdgpu_vm_it_insert(mapping, &vm->va);
-
-	if (flags & AMDGPU_PTE_PRT)
-		amdgpu_vm_prt_get(adev);
-	trace_amdgpu_vm_bo_map(bo_va, mapping);
+	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
 
 	return 0;
 }
@@ -2148,7 +2183,6 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
 {
 	struct amdgpu_bo_va_mapping *mapping;
 	struct amdgpu_bo *bo = bo_va->base.bo;
-	struct amdgpu_vm *vm = bo_va->base.vm;
 	uint64_t eaddr;
 	int r;
 
@@ -2182,12 +2216,7 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
 	mapping->offset = offset;
 	mapping->flags = flags;
 
-	list_add(&mapping->list, &bo_va->invalids);
-	amdgpu_vm_it_insert(mapping, &vm->va);
-
-	if (flags & AMDGPU_PTE_PRT)
-		amdgpu_vm_prt_get(adev);
-	trace_amdgpu_vm_bo_map(bo_va, mapping);
+	amdgpu_vm_bo_insert_map(adev, bo_va, mapping);
 
 	return 0;
 }
@@ -2402,7 +2431,11 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 		bo_base->moved = true;
 		if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
 			spin_lock(&bo_base->vm->status_lock);
-			list_move(&bo_base->vm_status, &vm->evicted);
+			if (bo->tbo.type == ttm_bo_type_kernel)
+				list_move(&bo_base->vm_status, &vm->evicted);
+			else
+				list_move_tail(&bo_base->vm_status,
+					       &vm->evicted);
 			spin_unlock(&bo_base->vm->status_lock);
 			continue;
 		}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index c3753afe9853..90b7741d024b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -249,8 +249,9 @@ int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			  struct amdgpu_vm *vm,
 			  struct dma_fence **fence);
-int amdgpu_vm_clear_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-			  struct amdgpu_sync *sync);
+int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
+			   struct amdgpu_vm *vm,
+			   struct amdgpu_sync *sync);
 int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			struct amdgpu_bo_va *bo_va,
 			bool clear);
-- 
cgit v1.2.3-70-g09d2


From 6849d47cabc36e8f2697043f8c81e7719876dfd3 Mon Sep 17 00:00:00 2001
From: Roger He <Hongbo.He@amd.com>
Date: Wed, 30 Aug 2017 13:01:19 +0800
Subject: drm/amdgpu: handle all fragment sizes v4
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This can improve performance for some cases.

v2 (chk): handle all sizes, simplify the patch quite a bit
v3 (chk): adjust dw estimation as well
v4 (chk): use single loop, make end mask 64bit

Signed-off-by: Roger He <Hongbo.He@amd.com>
Signed-off-by: Christian König <christian.koenig@amd.com>
Tested-by: Roger He <Hongbo.He@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 55 ++++++++++++++++------------------
 1 file changed, 26 insertions(+), 29 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 26eb7dce5fe5..b83e0fa1f269 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1420,8 +1420,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 				uint64_t start, uint64_t end,
 				uint64_t dst, uint64_t flags)
 {
-	int r;
-
 	/**
 	 * The MC L1 TLB supports variable sized pages, based on a fragment
 	 * field in the PTE. When this field is set to a non-zero value, page
@@ -1440,39 +1438,38 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 	 * Userspace can support this by aligning virtual base address and
 	 * allocation size to the fragment size.
 	 */
-	unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
-	uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
-	uint64_t frag_align = 1 << pages_per_frag;
-
-	uint64_t frag_start = ALIGN(start, frag_align);
-	uint64_t frag_end = end & ~(frag_align - 1);
+	unsigned max_frag = params->adev->vm_manager.fragment_size;
+	int r;
 
 	/* system pages are non continuously */
-	if (params->src || !(flags & AMDGPU_PTE_VALID) ||
-	    (frag_start >= frag_end))
+	if (params->src || !(flags & AMDGPU_PTE_VALID))
 		return amdgpu_vm_update_ptes(params, start, end, dst, flags);
 
-	/* handle the 4K area at the beginning */
-	if (start != frag_start) {
-		r = amdgpu_vm_update_ptes(params, start, frag_start,
-					  dst, flags);
+	while (start != end) {
+		uint64_t frag_flags, frag_end;
+		unsigned frag;
+
+		/* This intentionally wraps around if no bit is set */
+		frag = min((unsigned)ffs(start) - 1,
+			   (unsigned)fls64(end - start) - 1);
+		if (frag >= max_frag) {
+			frag_flags = AMDGPU_PTE_FRAG(max_frag);
+			frag_end = end & ~((1ULL << max_frag) - 1);
+		} else {
+			frag_flags = AMDGPU_PTE_FRAG(frag);
+			frag_end = start + (1 << frag);
+		}
+
+		r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
+					  flags | frag_flags);
 		if (r)
 			return r;
-		dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
-	}
-
-	/* handle the area in the middle */
-	r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
-				  flags | frag_flags);
-	if (r)
-		return r;
 
-	/* handle the 4K area at the end */
-	if (frag_end != end) {
-		dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
-		r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
+		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
+		start = frag_end;
 	}
-	return r;
+
+	return 0;
 }
 
 /**
@@ -1562,8 +1559,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 		/* set page commands needed */
 		ndw += ncmds * 10;
 
-		/* two extra commands for begin/end of fragment */
-		ndw += 2 * 10;
+		/* extra commands for begin/end fragments */
+		ndw += 2 * 10 * adev->vm_manager.fragment_size;
 
 		params.func = amdgpu_vm_do_set_ptes;
 	}
-- 
cgit v1.2.3-70-g09d2


From 481c2e94897eb74abcfb4a3cdb87f5f89499b93f Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 1 Sep 2017 14:46:19 +0200
Subject: drm/amdgpu: fix moved list handling in the VM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Only move BOs to the moved/relocated list when they aren't already on a list.

This prevents accidential removal from the evicted list.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b83e0fa1f269..c9223a5184de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1178,7 +1178,8 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_vm *vm,
 
 		entry->addr = ~0ULL;
 		spin_lock(&vm->status_lock);
-		list_move(&entry->base.vm_status, &vm->relocated);
+		if (list_empty(&entry->base.vm_status))
+			list_add(&entry->base.vm_status, &vm->relocated);
 		spin_unlock(&vm->status_lock);
 		amdgpu_vm_invalidate_level(vm, entry);
 	}
@@ -2091,7 +2092,8 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
 
 	if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv) {
 		spin_lock(&vm->status_lock);
-		list_move(&bo_va->base.vm_status, &vm->moved);
+		if (list_empty(&bo_va->base.vm_status))
+			list_add(&bo_va->base.vm_status, &vm->moved);
 		spin_unlock(&vm->status_lock);
 	}
 	trace_amdgpu_vm_bo_map(bo_va, mapping);
@@ -2446,7 +2448,8 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 		}
 
 		spin_lock(&bo_base->vm->status_lock);
-		list_move(&bo_base->vm_status, &bo_base->vm->moved);
+		if (list_empty(&bo_base->vm_status))
+			list_add(&bo_base->vm_status, &vm->moved);
 		spin_unlock(&bo_base->vm->status_lock);
 	}
 }
-- 
cgit v1.2.3-70-g09d2


From 862095237c392887819e98018d6dc187d8c78726 Mon Sep 17 00:00:00 2001
From: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Date: Thu, 7 Sep 2017 13:23:21 +0200
Subject: drm/amdgpu: Account for shadow PTs in mapping update IB size.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When amdgpu_vm_frag_ptes calls amdgpu_vm_update_ptes and the pt
has a shadow PT we mirror all the write to the shadow PT too, which
results in twice the commands.

Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index c9223a5184de..545531db66db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1536,10 +1536,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	nptes = last - start + 1;
 
 	/*
-	 * reserve space for one command every (1 << BLOCK_SIZE)
+	 * reserve space for two commands every (1 << BLOCK_SIZE)
 	 *  entries or 2k dwords (whatever is smaller)
+         *
+         * The second command is for the shadow pagetables.
 	 */
-	ncmds = (nptes >> min(adev->vm_manager.block_size, 11u)) + 1;
+	ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2;
 
 	/* padding, etc. */
 	ndw = 64;
-- 
cgit v1.2.3-70-g09d2


From aebc5e6f50f770ec9392c3ca804f18b30797dfa7 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Wed, 6 Sep 2017 16:55:16 +0200
Subject: drm/amdgpu: rework amdgpu_cs_find_mapping
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use the VM instead of the BO list to find the BO for a virtual address.

This fixes UVD/VCE in physical mode with VM local BOs.

Signed-off-by: Christian König <christian.koenig@amd.com>
Acked-by: Leo Liu <leo.liu@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c     | 42 +++++++-----------------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c     | 17 ++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h     |  2 ++
 4 files changed, 30 insertions(+), 32 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index c30110a3024a..5f19227b35e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1479,46 +1479,24 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
 			   uint64_t addr, struct amdgpu_bo **bo,
 			   struct amdgpu_bo_va_mapping **map)
 {
+	struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
+	struct amdgpu_vm *vm = &fpriv->vm;
 	struct amdgpu_bo_va_mapping *mapping;
-	unsigned i;
 	int r;
 
-	if (!parser->bo_list)
-		return 0;
-
 	addr /= AMDGPU_GPU_PAGE_SIZE;
 
-	for (i = 0; i < parser->bo_list->num_entries; i++) {
-		struct amdgpu_bo_list_entry *lobj;
-
-		lobj = &parser->bo_list->array[i];
-		if (!lobj->bo_va)
-			continue;
-
-		list_for_each_entry(mapping, &lobj->bo_va->valids, list) {
-			if (mapping->start > addr ||
-			    addr > mapping->last)
-				continue;
-
-			*bo = lobj->bo_va->base.bo;
-			*map = mapping;
-			goto found;
-		}
-
-		list_for_each_entry(mapping, &lobj->bo_va->invalids, list) {
-			if (mapping->start > addr ||
-			    addr > mapping->last)
-				continue;
+	mapping = amdgpu_vm_bo_lookup_mapping(vm, addr);
+	if (!mapping || !mapping->bo_va || !mapping->bo_va->base.bo)
+		return -EINVAL;
 
-			*bo = lobj->bo_va->base.bo;
-			*map = mapping;
-			goto found;
-		}
-	}
+	*bo = mapping->bo_va->base.bo;
+	*map = mapping;
 
-	return -EINVAL;
+	/* Double check that the BO is reserved by this CS */
+	if (READ_ONCE((*bo)->tbo.resv->lock.ctx) != &parser->ticket)
+		return -EINVAL;
 
-found:
 	r = amdgpu_ttm_bind(&(*bo)->tbo, &(*bo)->tbo.mem);
 	if (unlikely(r))
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 42492e63b3a2..a4891bea2ca8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -35,6 +35,7 @@
 
 /* bo virtual addresses in a vm */
 struct amdgpu_bo_va_mapping {
+	struct amdgpu_bo_va		*bo_va;
 	struct list_head		list;
 	struct rb_node			rb;
 	uint64_t			start;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 545531db66db..758bbb9e77f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2086,6 +2086,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
 	struct amdgpu_vm *vm = bo_va->base.vm;
 	struct amdgpu_bo *bo = bo_va->base.bo;
 
+	mapping->bo_va = bo_va;
 	list_add(&mapping->list, &bo_va->invalids);
 	amdgpu_vm_it_insert(mapping, &vm->va);
 
@@ -2263,6 +2264,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
 
 	list_del(&mapping->list);
 	amdgpu_vm_it_remove(mapping, &vm->va);
+	mapping->bo_va = NULL;
 	trace_amdgpu_vm_bo_unmap(bo_va, mapping);
 
 	if (valid)
@@ -2348,6 +2350,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
 		if (tmp->last > eaddr)
 		    tmp->last = eaddr;
 
+		tmp->bo_va = NULL;
 		list_add(&tmp->list, &vm->freed);
 		trace_amdgpu_vm_bo_unmap(NULL, tmp);
 	}
@@ -2373,6 +2376,19 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
 	return 0;
 }
 
+/**
+ * amdgpu_vm_bo_lookup_mapping - find mapping by address
+ *
+ * @vm: the requested VM
+ *
+ * Find a mapping by it's address.
+ */
+struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
+							 uint64_t addr)
+{
+	return amdgpu_vm_it_iter_first(&vm->va, addr, addr);
+}
+
 /**
  * amdgpu_vm_bo_rmv - remove a bo to a specific vm
  *
@@ -2398,6 +2414,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 	list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
 		list_del(&mapping->list);
 		amdgpu_vm_it_remove(mapping, &vm->va);
+		mapping->bo_va = NULL;
 		trace_amdgpu_vm_bo_unmap(bo_va, mapping);
 		list_add(&mapping->list, &vm->freed);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 90b7741d024b..c1accd15efc8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -276,6 +276,8 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
 int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
 				struct amdgpu_vm *vm,
 				uint64_t saddr, uint64_t size);
+struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
+							 uint64_t addr);
 void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 		      struct amdgpu_bo_va *bo_va);
 void amdgpu_vm_set_fragment_size(struct amdgpu_device *adev,
-- 
cgit v1.2.3-70-g09d2


From d5884513a31df072879c89c80306d544467ee770 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Fri, 8 Sep 2017 14:09:41 +0200
Subject: drm/amdgpu: fix VM sync with always valid BOs v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

All users of a VM must always wait for updates with always
valid BOs to be completed.

v2: remove debugging leftovers, rename struct member

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Roger He <Hongbo.He@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 10 ++++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 15 ++++++++++-----
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  2 +-
 3 files changed, 17 insertions(+), 10 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 5f19227b35e9..ff61073b7181 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -761,10 +761,6 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 	if (r)
 		return r;
 
-	r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_dir_update);
-	if (r)
-		return r;
-
 	r = amdgpu_vm_clear_freed(adev, vm, NULL);
 	if (r)
 		return r;
@@ -819,6 +815,12 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 	}
 
 	r = amdgpu_vm_handle_moved(adev, vm, &p->job->sync);
+	if (r)
+		return r;
+
+	r = amdgpu_sync_fence(adev, &p->job->sync, vm->last_update);
+	if (r)
+		return r;
 
 	if (amdgpu_vm_debug && p->bo_list) {
 		/* Invalidate all BOs to test for userspace bugs */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 758bbb9e77f3..64baa3138965 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1141,9 +1141,8 @@ static int amdgpu_vm_update_level(struct amdgpu_device *adev,
 				goto error_free;
 
 			amdgpu_bo_fence(parent->base.bo, fence, true);
-			dma_fence_put(vm->last_dir_update);
-			vm->last_dir_update = dma_fence_get(fence);
-			dma_fence_put(fence);
+			dma_fence_put(vm->last_update);
+			vm->last_update = fence;
 		}
 	}
 
@@ -1804,6 +1803,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			trace_amdgpu_vm_bo_mapping(mapping);
 	}
 
+	if (bo_va->base.bo &&
+	    bo_va->base.bo->tbo.resv == vm->root.base.bo->tbo.resv) {
+		dma_fence_put(vm->last_update);
+		vm->last_update = dma_fence_get(bo_va->last_pt_update);
+	}
+
 	return 0;
 }
 
@@ -2587,7 +2592,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			 vm->use_cpu_for_update ? "CPU" : "SDMA");
 	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
 		  "CPU update of VM recommended only for large BAR system\n");
-	vm->last_dir_update = NULL;
+	vm->last_update = NULL;
 
 	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
 			AMDGPU_GEM_CREATE_VRAM_CLEARED;
@@ -2693,7 +2698,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	}
 
 	amdgpu_vm_free_levels(&vm->root);
-	dma_fence_put(vm->last_dir_update);
+	dma_fence_put(vm->last_update);
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
 		amdgpu_vm_free_reserved_vmid(adev, vm, i);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index c1accd15efc8..cb6a6222fc3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -140,7 +140,7 @@ struct amdgpu_vm {
 
 	/* contains the page directory */
 	struct amdgpu_vm_pt     root;
-	struct dma_fence	*last_dir_update;
+	struct dma_fence	*last_update;
 
 	/* protecting freed */
 	spinlock_t		freed_lock;
-- 
cgit v1.2.3-70-g09d2


From 4e55eb3879fea6d8c7d414cebaa5bff1da58b4a1 Mon Sep 17 00:00:00 2001
From: Christian König <christian.koenig@amd.com>
Date: Mon, 11 Sep 2017 16:54:59 +0200
Subject: drm/amdgpu: fix amdgpu_vm_handle_moved as well v2
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is no guarantee that the last BO_VA actually needed an update.

Additional to that all command submissions must wait for moved BOs to
be cleared, not just the first one.

v2: Don't overwrite any newer fence.

Signed-off-by: Christian König <christian.koenig@amd.com>
Reviewed-by: Chunming Zhou <david1.zhou@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 24 ++++++++++--------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  3 +--
 3 files changed, 12 insertions(+), 17 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index ff61073b7181..9f1202a4182f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -814,7 +814,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 
 	}
 
-	r = amdgpu_vm_handle_moved(adev, vm, &p->job->sync);
+	r = amdgpu_vm_handle_moved(adev, vm);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 64baa3138965..2df254cc802e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1743,7 +1743,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	dma_addr_t *pages_addr = NULL;
 	struct ttm_mem_reg *mem;
 	struct drm_mm_node *nodes;
-	struct dma_fence *exclusive;
+	struct dma_fence *exclusive, **last_update;
 	uint64_t flags;
 	int r;
 
@@ -1769,6 +1769,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	else
 		flags = 0x0;
 
+	if (clear || (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv))
+		last_update = &vm->last_update;
+	else
+		last_update = &bo_va->last_pt_update;
+
 	if (!clear && bo_va->base.moved) {
 		bo_va->base.moved = false;
 		list_splice_init(&bo_va->valids, &bo_va->invalids);
@@ -1780,7 +1785,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	list_for_each_entry(mapping, &bo_va->invalids, list) {
 		r = amdgpu_vm_bo_split_mapping(adev, exclusive, pages_addr, vm,
 					       mapping, flags, nodes,
-					       &bo_va->last_pt_update);
+					       last_update);
 		if (r)
 			return r;
 	}
@@ -1803,12 +1808,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			trace_amdgpu_vm_bo_mapping(mapping);
 	}
 
-	if (bo_va->base.bo &&
-	    bo_va->base.bo->tbo.resv == vm->root.base.bo->tbo.resv) {
-		dma_fence_put(vm->last_update);
-		vm->last_update = dma_fence_get(bo_va->last_pt_update);
-	}
-
 	return 0;
 }
 
@@ -2006,15 +2005,15 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
  * PTs have to be reserved!
  */
 int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
-			   struct amdgpu_vm *vm,
-			   struct amdgpu_sync *sync)
+			   struct amdgpu_vm *vm)
 {
-	struct amdgpu_bo_va *bo_va = NULL;
 	bool clear;
 	int r = 0;
 
 	spin_lock(&vm->status_lock);
 	while (!list_empty(&vm->moved)) {
+		struct amdgpu_bo_va *bo_va;
+
 		bo_va = list_first_entry(&vm->moved,
 			struct amdgpu_bo_va, base.vm_status);
 		spin_unlock(&vm->status_lock);
@@ -2030,9 +2029,6 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 	}
 	spin_unlock(&vm->status_lock);
 
-	if (bo_va)
-		r = amdgpu_sync_fence(adev, sync, bo_va->last_pt_update);
-
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index cb6a6222fc3f..48c58ae4bb3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -250,8 +250,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			  struct amdgpu_vm *vm,
 			  struct dma_fence **fence);
 int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
-			   struct amdgpu_vm *vm,
-			   struct amdgpu_sync *sync);
+			   struct amdgpu_vm *vm);
 int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			struct amdgpu_bo_va *bo_va,
 			bool clear);
-- 
cgit v1.2.3-70-g09d2


From ca290da8f6345c8e8e180256fbe092c751fa9654 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Fri, 25 Aug 2017 20:15:04 -0400
Subject: drm/amdgpu: Fix error handling in amdgpu_vm_init

Make sure vm->root.bo is not left reserved if amdgpu_bo_kmap fails.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 2df254cc802e..2196bca7331c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2615,9 +2615,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			goto error_free_root;
 
 		r = amdgpu_bo_kmap(vm->root.base.bo, NULL);
+		amdgpu_bo_unreserve(vm->root.base.bo);
 		if (r)
 			goto error_free_root;
-		amdgpu_bo_unreserve(vm->root.base.bo);
 	}
 
 	return 0;
-- 
cgit v1.2.3-70-g09d2


From 02208441cc3a5110191996bb129db39ff10e7395 Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Fri, 25 Aug 2017 20:40:26 -0400
Subject: drm/amdgpu: Add PASID management

Allows assigning a PASID to a VM for identifying VMs involved in page
faults. The global PASID manager is also exported in the KFD
interface so that AMDGPU and KFD can share the PASID space.

PASIDs of different sizes can be requested. On APUs, the PASID size
is deterined by the capabilities of the IOMMU. So KFD must be able
to allocate PASIDs in a smaller range.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Reviewed-by: Oded Gabbay <oded.gabbay@gmail.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c |  2 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c           |  2 +-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c            | 75 ++++++++++++++++++++++-
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h            | 14 ++++-
 drivers/gpu/drm/amd/include/kgd_kfd_interface.h   |  6 ++
 6 files changed, 97 insertions(+), 4 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index b9dbbf9cb8b0..dc7e25cce741 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -169,6 +169,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.get_vmem_size = get_vmem_size,
 	.get_gpu_clock_counter = get_gpu_clock_counter,
 	.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
+	.alloc_pasid = amdgpu_vm_alloc_pasid,
+	.free_pasid = amdgpu_vm_free_pasid,
 	.program_sh_mem_settings = kgd_program_sh_mem_settings,
 	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
 	.init_pipeline = kgd_init_pipeline,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 309f2419c6d8..c678c69936a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -128,6 +128,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.get_vmem_size = get_vmem_size,
 	.get_gpu_clock_counter = get_gpu_clock_counter,
 	.get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz,
+	.alloc_pasid = amdgpu_vm_alloc_pasid,
+	.free_pasid = amdgpu_vm_free_pasid,
 	.program_sh_mem_settings = kgd_program_sh_mem_settings,
 	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
 	.init_pipeline = kgd_init_pipeline,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index e16229000a98..79d9ab43d42c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -825,7 +825,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 	}
 
 	r = amdgpu_vm_init(adev, &fpriv->vm,
-			   AMDGPU_VM_CONTEXT_GFX);
+			   AMDGPU_VM_CONTEXT_GFX, 0);
 	if (r) {
 		kfree(fpriv);
 		goto out_suspend;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 2196bca7331c..9b795915cab1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -27,11 +27,58 @@
  */
 #include <linux/dma-fence-array.h>
 #include <linux/interval_tree_generic.h>
+#include <linux/idr.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 
+/*
+ * PASID manager
+ *
+ * PASIDs are global address space identifiers that can be shared
+ * between the GPU, an IOMMU and the driver. VMs on different devices
+ * may use the same PASID if they share the same address
+ * space. Therefore PASIDs are allocated using a global IDA. VMs are
+ * looked up from the PASID per amdgpu_device.
+ */
+static DEFINE_IDA(amdgpu_vm_pasid_ida);
+
+/**
+ * amdgpu_vm_alloc_pasid - Allocate a PASID
+ * @bits: Maximum width of the PASID in bits, must be at least 1
+ *
+ * Allocates a PASID of the given width while keeping smaller PASIDs
+ * available if possible.
+ *
+ * Returns a positive integer on success. Returns %-EINVAL if bits==0.
+ * Returns %-ENOSPC if no PASID was available. Returns %-ENOMEM on
+ * memory allocation failure.
+ */
+int amdgpu_vm_alloc_pasid(unsigned int bits)
+{
+	int pasid = -EINVAL;
+
+	for (bits = min(bits, 31U); bits > 0; bits--) {
+		pasid = ida_simple_get(&amdgpu_vm_pasid_ida,
+				       1U << (bits - 1), 1U << bits,
+				       GFP_KERNEL);
+		if (pasid != -ENOSPC)
+			break;
+	}
+
+	return pasid;
+}
+
+/**
+ * amdgpu_vm_free_pasid - Free a PASID
+ * @pasid: PASID to free
+ */
+void amdgpu_vm_free_pasid(unsigned int pasid)
+{
+	ida_simple_remove(&amdgpu_vm_pasid_ida, pasid);
+}
+
 /*
  * GPUVM
  * GPUVM is similar to the legacy gart on older asics, however
@@ -2539,7 +2586,7 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size, uint32_
  * Init @vm fields.
  */
 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-		   int vm_context)
+		   int vm_context, unsigned int pasid)
 {
 	const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
 		AMDGPU_VM_PTE_COUNT(adev) * 8);
@@ -2620,6 +2667,19 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			goto error_free_root;
 	}
 
+	if (pasid) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
+		r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1,
+			      GFP_ATOMIC);
+		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
+		if (r < 0)
+			goto error_free_root;
+
+		vm->pasid = pasid;
+	}
+
 	return 0;
 
 error_free_root:
@@ -2673,6 +2733,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt;
 	int i;
 
+	if (vm->pasid) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
+		idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
+		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
+	}
+
 	amd_sched_entity_fini(vm->entity.sched, &vm->entity);
 
 	if (!RB_EMPTY_ROOT(&vm->va)) {
@@ -2752,6 +2820,8 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 	adev->vm_manager.vm_update_mode = 0;
 #endif
 
+	idr_init(&adev->vm_manager.pasid_idr);
+	spin_lock_init(&adev->vm_manager.pasid_lock);
 }
 
 /**
@@ -2765,6 +2835,9 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
 {
 	unsigned i, j;
 
+	WARN_ON(!idr_is_empty(&adev->vm_manager.pasid_idr));
+	idr_destroy(&adev->vm_manager.pasid_idr);
+
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
 		struct amdgpu_vm_id_manager *id_mgr =
 			&adev->vm_manager.id_mgr[i];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 48c58ae4bb3a..7873dfa8c0f9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -25,6 +25,7 @@
 #define __AMDGPU_VM_H__
 
 #include <linux/rbtree.h>
+#include <linux/idr.h>
 
 #include "gpu_scheduler.h"
 #include "amdgpu_sync.h"
@@ -148,8 +149,9 @@ struct amdgpu_vm {
 	/* Scheduler entity for page table updates */
 	struct amd_sched_entity	entity;
 
-	/* client id */
+	/* client id and PASID (TODO: replace client_id with PASID) */
 	u64                     client_id;
+	unsigned int		pasid;
 	/* dedicated to vm */
 	struct amdgpu_vm_id	*reserved_vmid[AMDGPU_MAX_VMHUBS];
 
@@ -220,12 +222,20 @@ struct amdgpu_vm_manager {
 	 * BIT1[= 0] Compute updated by SDMA [= 1] by CPU
 	 */
 	int					vm_update_mode;
+
+	/* PASID to VM mapping, will be used in interrupt context to
+	 * look up VM of a page fault
+	 */
+	struct idr				pasid_idr;
+	spinlock_t				pasid_lock;
 };
 
+int amdgpu_vm_alloc_pasid(unsigned int bits);
+void amdgpu_vm_free_pasid(unsigned int pasid);
 void amdgpu_vm_manager_init(struct amdgpu_device *adev);
 void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-		   int vm_context);
+		   int vm_context, unsigned int pasid);
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 			 struct list_head *validated,
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 94277cb734d2..f516fd10e6ba 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -112,6 +112,9 @@ struct tile_config {
  *
  * @get_max_engine_clock_in_mhz: Retrieves maximum GPU clock in MHz
  *
+ * @alloc_pasid: Allocate a PASID
+ * @free_pasid: Free a PASID
+ *
  * @program_sh_mem_settings: A function that should initiate the memory
  * properties such as main aperture memory type (cache / non cached) and
  * secondary aperture base address, size and memory type.
@@ -160,6 +163,9 @@ struct kfd2kgd_calls {
 
 	uint32_t (*get_max_engine_clock_in_mhz)(struct kgd_dev *kgd);
 
+	int (*alloc_pasid)(unsigned int bits);
+	void (*free_pasid)(unsigned int pasid);
+
 	/* Register access functions */
 	void (*program_sh_mem_settings)(struct kgd_dev *kgd, uint32_t vmid,
 			uint32_t sh_mem_config,	uint32_t sh_mem_ape1_base,
-- 
cgit v1.2.3-70-g09d2


From a2f14820e3493145c25095873d4a510a1b25efdc Mon Sep 17 00:00:00 2001
From: Felix Kuehling <Felix.Kuehling@amd.com>
Date: Sat, 26 Aug 2017 02:43:06 -0400
Subject: drm/amdgpu: Track pending retry faults in IH and VM (v2)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

IH tracks pending retry faults in a hash table for fast lookup in
interrupt context. Each VM has a short FIFO of pending VM faults for
processing in a bottom half.

The IH prescreening stage adds retry faults and filters out repeated
retry interrupts to minimize the impact of interrupt storms.

It's the VM's responsibility remove pending faults once they are
handled. For now this is only done when the VM is destroyed.

v2:
- Made the hash table smaller and the FIFO longer. I never want the
  FIFO to fill up, because that would make prescreen take longer.
  128 pending page faults should be enough to keep migrations busy.

Signed-off-by: Felix Kuehling <Felix.Kuehling@amd.com>
Acked-by: Christian König <christian.koenig@amd.com> (v1)
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/Kconfig                |  1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 76 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h | 12 ++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c |  7 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h |  7 +++
 drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 78 +++++++++++++++++++++++++++++++++-
 6 files changed, 180 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 1989c276138c..7fb8492d8e63 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -184,6 +184,7 @@ config DRM_AMDGPU
 	select BACKLIGHT_CLASS_DEVICE
 	select BACKLIGHT_LCD_SUPPORT
 	select INTERVAL_TREE
+	select CHASH
 	help
 	  Choose this option if you have a recent AMD Radeon graphics card.
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
index c834a40cfad6..f5f27e4f0f7f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@@ -196,3 +196,79 @@ restart_ih:
 
 	return IRQ_HANDLED;
 }
+
+/**
+ * amdgpu_ih_add_fault - Add a page fault record
+ *
+ * @adev: amdgpu device pointer
+ * @key: 64-bit encoding of PASID and address
+ *
+ * This should be called when a retry page fault interrupt is
+ * received. If this is a new page fault, it will be added to a hash
+ * table. The return value indicates whether this is a new fault, or
+ * a fault that was already known and is already being handled.
+ *
+ * If there are too many pending page faults, this will fail. Retry
+ * interrupts should be ignored in this case until there is enough
+ * free space.
+ *
+ * Returns 0 if the fault was added, 1 if the fault was already known,
+ * -ENOSPC if there are too many pending faults.
+ */
+int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key)
+{
+	unsigned long flags;
+	int r = -ENOSPC;
+
+	if (WARN_ON_ONCE(!adev->irq.ih.faults))
+		/* Should be allocated in <IP>_ih_sw_init on GPUs that
+		 * support retry faults and require retry filtering.
+		 */
+		return r;
+
+	spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);
+
+	/* Only let the hash table fill up to 50% for best performance */
+	if (adev->irq.ih.faults->count >= (1 << (AMDGPU_PAGEFAULT_HASH_BITS-1)))
+		goto unlock_out;
+
+	r = chash_table_copy_in(&adev->irq.ih.faults->hash, key, NULL);
+	if (!r)
+		adev->irq.ih.faults->count++;
+
+	/* chash_table_copy_in should never fail unless we're losing count */
+	WARN_ON_ONCE(r < 0);
+
+unlock_out:
+	spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
+	return r;
+}
+
+/**
+ * amdgpu_ih_clear_fault - Remove a page fault record
+ *
+ * @adev: amdgpu device pointer
+ * @key: 64-bit encoding of PASID and address
+ *
+ * This should be called when a page fault has been handled. Any
+ * future interrupt with this key will be processed as a new
+ * page fault.
+ */
+void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key)
+{
+	unsigned long flags;
+	int r;
+
+	if (!adev->irq.ih.faults)
+		return;
+
+	spin_lock_irqsave(&adev->irq.ih.faults->lock, flags);
+
+	r = chash_table_remove(&adev->irq.ih.faults->hash, key, NULL);
+	if (!WARN_ON_ONCE(r < 0)) {
+		adev->irq.ih.faults->count--;
+		WARN_ON_ONCE(adev->irq.ih.faults->count < 0);
+	}
+
+	spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
index 3de8e74e5b3a..ada89358e220 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -24,6 +24,8 @@
 #ifndef __AMDGPU_IH_H__
 #define __AMDGPU_IH_H__
 
+#include <linux/chash.h>
+
 struct amdgpu_device;
  /*
   * vega10+ IH clients
@@ -69,6 +71,13 @@ enum amdgpu_ih_clientid
 
 #define AMDGPU_IH_CLIENTID_LEGACY 0
 
+#define AMDGPU_PAGEFAULT_HASH_BITS 8
+struct amdgpu_retryfault_hashtable {
+	DECLARE_CHASH_TABLE(hash, AMDGPU_PAGEFAULT_HASH_BITS, 8, 0);
+	spinlock_t	lock;
+	int		count;
+};
+
 /*
  * R6xx+ IH ring
  */
@@ -87,6 +96,7 @@ struct amdgpu_ih_ring {
 	bool			use_doorbell;
 	bool			use_bus_addr;
 	dma_addr_t		rb_dma_addr; /* only used when use_bus_addr = true */
+	struct amdgpu_retryfault_hashtable *faults;
 };
 
 #define AMDGPU_IH_SRC_DATA_MAX_SIZE_DW 4
@@ -109,5 +119,7 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size,
 			bool use_bus_addr);
 void amdgpu_ih_ring_fini(struct amdgpu_device *adev);
 int amdgpu_ih_process(struct amdgpu_device *adev);
+int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key);
+void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9b795915cab1..6c1133298b17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2680,6 +2680,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		vm->pasid = pasid;
 	}
 
+	INIT_KFIFO(vm->faults);
+
 	return 0;
 
 error_free_root:
@@ -2731,8 +2733,13 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
 	struct amdgpu_bo_va_mapping *mapping, *tmp;
 	bool prt_fini_needed = !!adev->gart.gart_funcs->set_prt;
+	u64 fault;
 	int i;
 
+	/* Clear pending page faults from IH when the VM is destroyed */
+	while (kfifo_get(&vm->faults, &fault))
+		amdgpu_ih_clear_fault(adev, fault);
+
 	if (vm->pasid) {
 		unsigned long flags;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 7873dfa8c0f9..447ed6e7e586 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -120,6 +120,10 @@ struct amdgpu_vm_pt {
 	unsigned			last_entry_used;
 };
 
+#define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) | (addr))
+#define AMDGPU_VM_FAULT_PASID(fault) ((u64)(fault) >> 48)
+#define AMDGPU_VM_FAULT_ADDR(fault)  ((u64)(fault) & 0xfffffffff000ULL)
+
 struct amdgpu_vm {
 	/* tree of virtual addresses mapped */
 	struct rb_root		va;
@@ -160,6 +164,9 @@ struct amdgpu_vm {
 
 	/* Flag to indicate ATS support from PTE for GFX9 */
 	bool			pte_support_ats;
+
+	/* Up to 128 pending page faults */
+	DECLARE_KFIFO(faults, u64, 128);
 };
 
 struct amdgpu_vm_id {
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index eda4771e273f..dd6af2176d3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -235,8 +235,73 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev)
  */
 static bool vega10_ih_prescreen_iv(struct amdgpu_device *adev)
 {
-	/* TODO: Filter known pending page faults */
+	u32 ring_index = adev->irq.ih.rptr >> 2;
+	u32 dw0, dw3, dw4, dw5;
+	u16 pasid;
+	u64 addr, key;
+	struct amdgpu_vm *vm;
+	int r;
+
+	dw0 = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]);
+	dw3 = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
+	dw4 = le32_to_cpu(adev->irq.ih.ring[ring_index + 4]);
+	dw5 = le32_to_cpu(adev->irq.ih.ring[ring_index + 5]);
+
+	/* Filter retry page faults, let only the first one pass. If
+	 * there are too many outstanding faults, ignore them until
+	 * some faults get cleared.
+	 */
+	switch (dw0 & 0xff) {
+	case AMDGPU_IH_CLIENTID_VMC:
+	case AMDGPU_IH_CLIENTID_UTCL2:
+		break;
+	default:
+		/* Not a VM fault */
+		return true;
+	}
+
+	/* Not a retry fault */
+	if (!(dw5 & 0x80))
+		return true;
+
+	pasid = dw3 & 0xffff;
+	/* No PASID, can't identify faulting process */
+	if (!pasid)
+		return true;
+
+	addr = ((u64)(dw5 & 0xf) << 44) | ((u64)dw4 << 12);
+	key = AMDGPU_VM_FAULT(pasid, addr);
+	r = amdgpu_ih_add_fault(adev, key);
+
+	/* Hash table is full or the fault is already being processed,
+	 * ignore further page faults
+	 */
+	if (r != 0)
+		goto ignore_iv;
+
+	/* Track retry faults in per-VM fault FIFO. */
+	spin_lock(&adev->vm_manager.pasid_lock);
+	vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+	spin_unlock(&adev->vm_manager.pasid_lock);
+	if (WARN_ON_ONCE(!vm)) {
+		/* VM not found, process it normally */
+		amdgpu_ih_clear_fault(adev, key);
+		return true;
+	}
+	/* No locking required with single writer and single reader */
+	r = kfifo_put(&vm->faults, key);
+	if (!r) {
+		/* FIFO is full. Ignore it until there is space */
+		amdgpu_ih_clear_fault(adev, key);
+		goto ignore_iv;
+	}
+
+	/* It's the first fault for this address, process it normally */
 	return true;
+
+ignore_iv:
+	adev->irq.ih.rptr += 32;
+	return false;
 }
 
 /**
@@ -323,6 +388,14 @@ static int vega10_ih_sw_init(void *handle)
 	adev->irq.ih.use_doorbell = true;
 	adev->irq.ih.doorbell_index = AMDGPU_DOORBELL64_IH << 1;
 
+	adev->irq.ih.faults = kmalloc(sizeof(*adev->irq.ih.faults), GFP_KERNEL);
+	if (!adev->irq.ih.faults)
+		return -ENOMEM;
+	INIT_CHASH_TABLE(adev->irq.ih.faults->hash,
+			 AMDGPU_PAGEFAULT_HASH_BITS, 8, 0);
+	spin_lock_init(&adev->irq.ih.faults->lock);
+	adev->irq.ih.faults->count = 0;
+
 	r = amdgpu_irq_init(adev);
 
 	return r;
@@ -335,6 +408,9 @@ static int vega10_ih_sw_fini(void *handle)
 	amdgpu_irq_fini(adev);
 	amdgpu_ih_ring_fini(adev);
 
+	kfree(adev->irq.ih.faults);
+	adev->irq.ih.faults = NULL;
+
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 7bdc53f925af085ffa0580f10489f82b36cc2f1c Mon Sep 17 00:00:00 2001
From: Yong Zhao <yong.zhao@amd.com>
Date: Fri, 15 Sep 2017 18:20:37 -0400
Subject: drm/amdgpu: Fix a bug in amdgpu_fill_buffer()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When max_bytes is not 8 bytes aligned and bo size is larger than
max_bytes, the last 8 bytes in a ttm node may be left unchanged.
For example, on pre SDMA 4.0, max_bytes = 0x1fffff, and the bo size
is 0x200000, the problem will happen.

In order to fix the problem, we separately store the max nums of
PTEs/PDEs a single operation can set in amdgpu_vm_pte_funcs
structure, rather than inferring it from bytes limit of SDMA
constant fill, i.e. fill_max_bytes.

Together with the fix, we replace the hard code value "10" in
amdgpu_vm_bo_update_mapping() with the corresponding values from
structure amdgpu_vm_pte_funcs.

Signed-off-by: Yong Zhao <yong.zhao@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 7 +++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 8 ++++----
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 5 +++--
 drivers/gpu/drm/amd/amdgpu/cik_sdma.c   | 3 +++
 drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c  | 3 +++
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c  | 4 ++++
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c  | 3 +++
 drivers/gpu/drm/amd/amdgpu/si_dma.c     | 3 +++
 8 files changed, 30 insertions(+), 6 deletions(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f085c8c9f267..1bf42a8ef23e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -302,6 +302,13 @@ struct amdgpu_vm_pte_funcs {
 	void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
 			  uint64_t value, unsigned count,
 			  uint32_t incr);
+
+	/* maximum nums of PTEs/PDEs in a single operation */
+	uint32_t	set_max_nums_pte_pde;
+
+	/* number of dw to reserve per operation */
+	unsigned	set_pte_pde_num_dw;
+
 	/* for linear pte/pde updates without addr mapping */
 	void (*set_pte_pde)(struct amdgpu_ib *ib,
 			    uint64_t pe,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index ce435dbbb398..1086f039d8e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1527,8 +1527,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 		       struct dma_fence **fence)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-	/* max_bytes applies to SDMA_OP_PTEPDE as well as SDMA_OP_CONST_FILL*/
-	uint32_t max_bytes = adev->mman.buffer_funcs->fill_max_bytes;
+	uint32_t max_bytes = 8 *
+			adev->vm_manager.vm_pte_funcs->set_max_nums_pte_pde;
 	struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
 
 	struct drm_mm_node *mm_node;
@@ -1560,8 +1560,8 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
 		++mm_node;
 	}
 
-	/* 10 double words for each SDMA_OP_PTEPDE cmd */
-	num_dw = num_loops * 10;
+	/* num of dwords for each SDMA_OP_PTEPDE cmd */
+	num_dw = num_loops * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw;
 
 	/* for IB padding */
 	num_dw += 64;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 6c1133298b17..28d16781377f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1606,10 +1606,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 	} else {
 		/* set page commands needed */
-		ndw += ncmds * 10;
+		ndw += ncmds * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw;
 
 		/* extra commands for begin/end fragments */
-		ndw += 2 * 10 * adev->vm_manager.fragment_size;
+		ndw += 2 * adev->vm_manager.vm_pte_funcs->set_pte_pde_num_dw
+				* adev->vm_manager.fragment_size;
 
 		params.func = amdgpu_vm_do_set_ptes;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index f508f4d01e4a..c64dcd1883b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -1389,6 +1389,9 @@ static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev)
 static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
 	.copy_pte = cik_sdma_vm_copy_pte,
 	.write_pte = cik_sdma_vm_write_pte,
+
+	.set_max_nums_pte_pde = 0x1fffff >> 3,
+	.set_pte_pde_num_dw = 10,
 	.set_pte_pde = cik_sdma_vm_set_pte_pde,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index f2d0710258cb..c05eb74d3404 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -1326,6 +1326,9 @@ static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev)
 static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
 	.copy_pte = sdma_v2_4_vm_copy_pte,
 	.write_pte = sdma_v2_4_vm_write_pte,
+
+	.set_max_nums_pte_pde = 0x1fffff >> 3,
+	.set_pte_pde_num_dw = 10,
 	.set_pte_pde = sdma_v2_4_vm_set_pte_pde,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 4858c9974c86..2079340656d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -1750,6 +1750,10 @@ static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev)
 static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
 	.copy_pte = sdma_v3_0_vm_copy_pte,
 	.write_pte = sdma_v3_0_vm_write_pte,
+
+	/* not 0x3fffff due to HW limitation */
+	.set_max_nums_pte_pde = 0x3fffe0 >> 3,
+	.set_pte_pde_num_dw = 10,
 	.set_pte_pde = sdma_v3_0_vm_set_pte_pde,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 3524060f8480..2605faf56dff 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1716,6 +1716,9 @@ static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev)
 static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {
 	.copy_pte = sdma_v4_0_vm_copy_pte,
 	.write_pte = sdma_v4_0_vm_write_pte,
+
+	.set_max_nums_pte_pde = 0x400000 >> 3,
+	.set_pte_pde_num_dw = 10,
 	.set_pte_pde = sdma_v4_0_vm_set_pte_pde,
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index 112969f3301a..adb6ae7d63ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -889,6 +889,9 @@ static void si_dma_set_buffer_funcs(struct amdgpu_device *adev)
 static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = {
 	.copy_pte = si_dma_vm_copy_pte,
 	.write_pte = si_dma_vm_write_pte,
+
+	.set_max_nums_pte_pde = 0xffff8 >> 3,
+	.set_pte_pde_num_dw = 9,
 	.set_pte_pde = si_dma_vm_set_pte_pde,
 };
 
-- 
cgit v1.2.3-70-g09d2


From e6d921974a51e607515b39baa8d1c3f1a27d008b Mon Sep 17 00:00:00 2001
From: Yong Zhao <yong.zhao@amd.com>
Date: Tue, 19 Sep 2017 12:58:15 -0400
Subject: drm/amdgpu: Add copy_pte_num_dw member in amdgpu_vm_pte_funcs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use it to replace the hard coded value in amdgpu_vm_bo_update_mapping().

Signed-off-by: Yong Zhao <yong.zhao@amd.com>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h    | 4 ++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 2 +-
 drivers/gpu/drm/amd/amdgpu/cik_sdma.c  | 2 ++
 drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c | 2 ++
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 2 ++
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 2 ++
 drivers/gpu/drm/amd/amdgpu/si_dma.c    | 2 ++
 7 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 1bf42a8ef23e..7c43add4e444 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -294,10 +294,14 @@ struct amdgpu_buffer_funcs {
 
 /* provided by hw blocks that can write ptes, e.g., sdma */
 struct amdgpu_vm_pte_funcs {
+	/* number of dw to reserve per operation */
+	unsigned	copy_pte_num_dw;
+
 	/* copy pte entries from GART */
 	void (*copy_pte)(struct amdgpu_ib *ib,
 			 uint64_t pe, uint64_t src,
 			 unsigned count);
+
 	/* write pte one entry at a time with addr mapping */
 	void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe,
 			  uint64_t value, unsigned count,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 28d16781377f..8fcc743dfa86 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1597,7 +1597,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 
 	if (pages_addr) {
 		/* copy commands needed */
-		ndw += ncmds * 7;
+		ndw += ncmds * adev->vm_manager.vm_pte_funcs->copy_pte_num_dw;
 
 		/* and also PTEs */
 		ndw += nptes * 2;
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index c64dcd1883b5..60cecd117705 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -1387,7 +1387,9 @@ static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev)
 }
 
 static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
+	.copy_pte_num_dw = 7,
 	.copy_pte = cik_sdma_vm_copy_pte,
+
 	.write_pte = cik_sdma_vm_write_pte,
 
 	.set_max_nums_pte_pde = 0x1fffff >> 3,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index c05eb74d3404..acdee3a4602c 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -1324,7 +1324,9 @@ static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev)
 }
 
 static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
+	.copy_pte_num_dw = 7,
 	.copy_pte = sdma_v2_4_vm_copy_pte,
+
 	.write_pte = sdma_v2_4_vm_write_pte,
 
 	.set_max_nums_pte_pde = 0x1fffff >> 3,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 2079340656d2..72f31cc7df00 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -1748,7 +1748,9 @@ static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev)
 }
 
 static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
+	.copy_pte_num_dw = 7,
 	.copy_pte = sdma_v3_0_vm_copy_pte,
+
 	.write_pte = sdma_v3_0_vm_write_pte,
 
 	/* not 0x3fffff due to HW limitation */
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 2605faf56dff..61572c5d19ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1714,7 +1714,9 @@ static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev)
 }
 
 static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = {
+	.copy_pte_num_dw = 7,
 	.copy_pte = sdma_v4_0_vm_copy_pte,
+
 	.write_pte = sdma_v4_0_vm_write_pte,
 
 	.set_max_nums_pte_pde = 0x400000 >> 3,
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index adb6ae7d63ef..3fa2fbf8c9a1 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -887,7 +887,9 @@ static void si_dma_set_buffer_funcs(struct amdgpu_device *adev)
 }
 
 static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = {
+	.copy_pte_num_dw = 5,
 	.copy_pte = si_dma_vm_copy_pte,
+
 	.write_pte = si_dma_vm_write_pte,
 
 	.set_max_nums_pte_pde = 0xffff8 >> 3,
-- 
cgit v1.2.3-70-g09d2