1 files changed, 195 insertions, 108 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9a4e3b63f1cb..f68b7cdc370a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -127,16 +127,16 @@ struct amdgpu_bo_list_entry *amdgpu_vm_get_bos(struct amdgpu_device *adev,
 /**
  * amdgpu_vm_grab_id - allocate the next free VMID
  *
- * @ring: ring we want to submit job to
  * @vm: vm to allocate id for
+ * @ring: ring we want to submit job to
+ * @sync: sync object where we add dependencies
  *
- * Allocate an id for the vm (cayman+).
- * Returns the fence we need to sync to (if any).
+ * Allocate an id for the vm, adding fences to the sync obj as necessary.
  *
- * Global and local mutex must be locked!
+ * Global mutex must be locked!
  */
-struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring,
-				       struct amdgpu_vm *vm)
+int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
+		      struct amdgpu_sync *sync)
 {
 	struct amdgpu_fence *best[AMDGPU_MAX_RINGS] = {};
 	struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
@@ -148,7 +148,7 @@ struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring,
 	/* check if the id is still valid */
 	if (vm_id->id && vm_id->last_id_use &&
 	    vm_id->last_id_use == adev->vm_manager.active[vm_id->id])
-		return NULL;
+		return 0;
 
 	/* we definately need to flush */
 	vm_id->pd_gpu_addr = ~0ll;
@@ -161,7 +161,7 @@ struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring,
 			/* found a free one */
 			vm_id->id = i;
 			trace_amdgpu_vm_grab_id(i, ring->idx);
-			return NULL;
+			return 0;
 		}
 
 		if (amdgpu_fence_is_earlier(fence, best[fence->ring->idx])) {
@@ -172,15 +172,19 @@ struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring,
 
 	for (i = 0; i < 2; ++i) {
 		if (choices[i]) {
+			struct amdgpu_fence *fence;
+
+			fence  = adev->vm_manager.active[choices[i]];
 			vm_id->id = choices[i];
+
 			trace_amdgpu_vm_grab_id(choices[i], ring->idx);
-			return adev->vm_manager.active[choices[i]];
+			return amdgpu_sync_fence(ring->adev, sync, &fence->base);
 		}
 	}
 
 	/* should never happen */
 	BUG();
-	return NULL;
+	return -EINVAL;
 }
 
 /**
@@ -196,17 +200,29 @@ struct amdgpu_fence *amdgpu_vm_grab_id(struct amdgpu_ring *ring,
  */
 void amdgpu_vm_flush(struct amdgpu_ring *ring,
 		     struct amdgpu_vm *vm,
-		     struct amdgpu_fence *updates)
+		     struct fence *updates)
 {
 	uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
 	struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
+	struct fence *flushed_updates = vm_id->flushed_updates;
+	bool is_earlier = false;
+
+	if (flushed_updates && updates) {
+		BUG_ON(flushed_updates->context != updates->context);
+		is_earlier = (updates->seqno - flushed_updates->seqno <=
+			      INT_MAX) ? true : false;
+	}
 
-	if (pd_addr != vm_id->pd_gpu_addr || !vm_id->flushed_updates ||
-	    amdgpu_fence_is_earlier(vm_id->flushed_updates, updates)) {
+	if (pd_addr != vm_id->pd_gpu_addr || !flushed_updates ||
+	    is_earlier) {
 
 		trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id);
-		amdgpu_fence_unref(&vm_id->flushed_updates);
-		vm_id->flushed_updates = amdgpu_fence_ref(updates);
+		if (is_earlier) {
+			vm_id->flushed_updates = fence_get(updates);
+			fence_put(flushed_updates);
+		}
+		if (!flushed_updates)
+			vm_id->flushed_updates = fence_get(updates);
 		vm_id->pd_gpu_addr = pd_addr;
 		amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr);
 	}
@@ -300,6 +316,15 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
 	}
 }
 
+int amdgpu_vm_free_job(struct amdgpu_job *sched_job)
+{
+	int i;
+	for (i = 0; i < sched_job->num_ibs; i++)
+		amdgpu_ib_free(sched_job->adev, &sched_job->ibs[i]);
+	kfree(sched_job->ibs);
+	return 0;
+}
+
 /**
  * amdgpu_vm_clear_bo - initially clear the page dir/table
  *
@@ -310,7 +335,8 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 			      struct amdgpu_bo *bo)
 {
 	struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring;
-	struct amdgpu_ib ib;
+	struct fence *fence = NULL;
+	struct amdgpu_ib *ib;
 	unsigned entries;
 	uint64_t addr;
 	int r;
@@ -330,24 +356,33 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 	addr = amdgpu_bo_gpu_offset(bo);
 	entries = amdgpu_bo_size(bo) / 8;
 
-	r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, &ib);
-	if (r)
+	ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
+	if (!ib)
 		goto error_unreserve;
 
-	ib.length_dw = 0;
-
-	amdgpu_vm_update_pages(adev, &ib, addr, 0, entries, 0, 0, 0);
-	amdgpu_vm_pad_ib(adev, &ib);
-	WARN_ON(ib.length_dw > 64);
-
-	r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_VM);
+	r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, ib);
 	if (r)
 		goto error_free;
 
-	amdgpu_bo_fence(bo, ib.fence, true);
+	ib->length_dw = 0;
 
+	amdgpu_vm_update_pages(adev, ib, addr, 0, entries, 0, 0, 0);
+	amdgpu_vm_pad_ib(adev, ib);
+	WARN_ON(ib->length_dw > 64);
+	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
+						 &amdgpu_vm_free_job,
+						 AMDGPU_FENCE_OWNER_VM,
+						 &fence);
+	if (!r)
+		amdgpu_bo_fence(bo, fence, true);
+	fence_put(fence);
+	if (amdgpu_enable_scheduler) {
+		amdgpu_bo_unreserve(bo);
+		return 0;
+	}
 error_free:
-	amdgpu_ib_free(adev, &ib);
+	amdgpu_ib_free(adev, ib);
+	kfree(ib);
 
 error_unreserve:
 	amdgpu_bo_unreserve(bo);
@@ -400,7 +435,9 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 	uint32_t incr = AMDGPU_VM_PTE_COUNT * 8;
 	uint64_t last_pde = ~0, last_pt = ~0;
 	unsigned count = 0, pt_idx, ndw;
-	struct amdgpu_ib ib;
+	struct amdgpu_ib *ib;
+	struct fence *fence = NULL;
+
 	int r;
 
 	/* padding, etc. */
@@ -413,10 +450,14 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 	if (ndw > 0xfffff)
 		return -ENOMEM;
 
-	r = amdgpu_ib_get(ring, NULL, ndw * 4, &ib);
+	ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
+	if (!ib)
+		return -ENOMEM;
+
+	r = amdgpu_ib_get(ring, NULL, ndw * 4, ib);
 	if (r)
 		return r;
-	ib.length_dw = 0;
+	ib->length_dw = 0;
 
 	/* walk over the address space and update the page directory */
 	for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
@@ -436,7 +477,7 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 		    ((last_pt + incr * count) != pt)) {
 
 			if (count) {
-				amdgpu_vm_update_pages(adev, &ib, last_pde,
+				amdgpu_vm_update_pages(adev, ib, last_pde,
 						       last_pt, count, incr,
 						       AMDGPU_PTE_VALID, 0);
 			}
@@ -450,23 +491,37 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
 	}
 
 	if (count)
-		amdgpu_vm_update_pages(adev, &ib, last_pde, last_pt, count,
+		amdgpu_vm_update_pages(adev, ib, last_pde, last_pt, count,
 				       incr, AMDGPU_PTE_VALID, 0);
 
-	if (ib.length_dw != 0) {
-		amdgpu_vm_pad_ib(adev, &ib);
-		amdgpu_sync_resv(adev, &ib.sync, pd->tbo.resv, AMDGPU_FENCE_OWNER_VM);
-		WARN_ON(ib.length_dw > ndw);
-		r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_VM);
-		if (r) {
-			amdgpu_ib_free(adev, &ib);
-			return r;
-		}
-		amdgpu_bo_fence(pd, ib.fence, true);
+	if (ib->length_dw != 0) {
+		amdgpu_vm_pad_ib(adev, ib);
+		amdgpu_sync_resv(adev, &ib->sync, pd->tbo.resv, AMDGPU_FENCE_OWNER_VM);
+		WARN_ON(ib->length_dw > ndw);
+		r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
+							 &amdgpu_vm_free_job,
+							 AMDGPU_FENCE_OWNER_VM,
+							 &fence);
+		if (r)
+			goto error_free;
+
+		amdgpu_bo_fence(pd, fence, true);
+		fence_put(vm->page_directory_fence);
+		vm->page_directory_fence = fence_get(fence);
+		fence_put(fence);
+	}
+
+	if (!amdgpu_enable_scheduler || ib->length_dw == 0) {
+		amdgpu_ib_free(adev, ib);
+		kfree(ib);
 	}
-	amdgpu_ib_free(adev, &ib);
 
 	return 0;
+
+error_free:
+	amdgpu_ib_free(adev, ib);
+	kfree(ib);
+	return r;
 }
 
 /**
@@ -572,9 +627,14 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
 {
 	uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
 	uint64_t last_pte = ~0, last_dst = ~0;
+	void *owner = AMDGPU_FENCE_OWNER_VM;
 	unsigned count = 0;
 	uint64_t addr;
 
+	/* sync to everything on unmapping */
+	if (!(flags & AMDGPU_PTE_VALID))
+		owner = AMDGPU_FENCE_OWNER_UNDEFINED;
+
 	/* walk over the address space and update the page tables */
 	for (addr = start; addr < end; ) {
 		uint64_t pt_idx = addr >> amdgpu_vm_block_size;
@@ -583,8 +643,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
 		uint64_t pte;
 		int r;
 
-		amdgpu_sync_resv(adev, &ib->sync, pt->tbo.resv,
-				 AMDGPU_FENCE_OWNER_VM);
+		amdgpu_sync_resv(adev, &ib->sync, pt->tbo.resv, owner);
 		r = reservation_object_reserve_shared(pt->tbo.resv);
 		if (r)
 			return r;
@@ -640,7 +699,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
  */
 static void amdgpu_vm_fence_pts(struct amdgpu_vm *vm,
 				uint64_t start, uint64_t end,
-				struct amdgpu_fence *fence)
+				struct fence *fence)
 {
 	unsigned i;
 
@@ -670,12 +729,13 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 				       struct amdgpu_vm *vm,
 				       struct amdgpu_bo_va_mapping *mapping,
 				       uint64_t addr, uint32_t gtt_flags,
-				       struct amdgpu_fence **fence)
+				       struct fence **fence)
 {
 	struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring;
 	unsigned nptes, ncmds, ndw;
 	uint32_t flags = gtt_flags;
-	struct amdgpu_ib ib;
+	struct amdgpu_ib *ib;
+	struct fence *f = NULL;
 	int r;
 
 	/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
@@ -722,46 +782,54 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (ndw > 0xfffff)
 		return -ENOMEM;
 
-	r = amdgpu_ib_get(ring, NULL, ndw * 4, &ib);
-	if (r)
-		return r;
-	ib.length_dw = 0;
-
-	if (!(flags & AMDGPU_PTE_VALID)) {
-		unsigned i;
+	ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
+	if (!ib)
+		return -ENOMEM;
 
-		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-			struct amdgpu_fence *f = vm->ids[i].last_id_use;
-			amdgpu_sync_fence(&ib.sync, f);
-		}
+	r = amdgpu_ib_get(ring, NULL, ndw * 4, ib);
+	if (r) {
+		kfree(ib);
+		return r;
 	}
 
-	r = amdgpu_vm_update_ptes(adev, vm, &ib, mapping->it.start,
+	ib->length_dw = 0;
+
+	r = amdgpu_vm_update_ptes(adev, vm, ib, mapping->it.start,
 				  mapping->it.last + 1, addr + mapping->offset,
 				  flags, gtt_flags);
 
 	if (r) {
-		amdgpu_ib_free(adev, &ib);
+		amdgpu_ib_free(adev, ib);
+		kfree(ib);
 		return r;
 	}
 
-	amdgpu_vm_pad_ib(adev, &ib);
-	WARN_ON(ib.length_dw > ndw);
+	amdgpu_vm_pad_ib(adev, ib);
+	WARN_ON(ib->length_dw > ndw);
+	r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
+						 &amdgpu_vm_free_job,
+						 AMDGPU_FENCE_OWNER_VM,
+						 &f);
+	if (r)
+		goto error_free;
 
-	r = amdgpu_ib_schedule(adev, 1, &ib, AMDGPU_FENCE_OWNER_VM);
-	if (r) {
-		amdgpu_ib_free(adev, &ib);
-		return r;
-	}
 	amdgpu_vm_fence_pts(vm, mapping->it.start,
-			    mapping->it.last + 1, ib.fence);
+			    mapping->it.last + 1, f);
 	if (fence) {
-		amdgpu_fence_unref(fence);
-		*fence = amdgpu_fence_ref(ib.fence);
+		fence_put(*fence);
+		*fence = fence_get(f);
+	}
+	fence_put(f);
+	if (!amdgpu_enable_scheduler) {
+		amdgpu_ib_free(adev, ib);
+		kfree(ib);
 	}
-	amdgpu_ib_free(adev, &ib);
-
 	return 0;
+
+error_free:
+	amdgpu_ib_free(adev, ib);
+	kfree(ib);
+	return r;
 }
 
 /**
@@ -794,21 +862,25 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 		addr = 0;
 	}
 
-	if (addr == bo_va->addr)
-		return 0;
-
 	flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
 
-	list_for_each_entry(mapping, &bo_va->mappings, list) {
+	spin_lock(&vm->status_lock);
+	if (!list_empty(&bo_va->vm_status))
+		list_splice_init(&bo_va->valids, &bo_va->invalids);
+	spin_unlock(&vm->status_lock);
+
+	list_for_each_entry(mapping, &bo_va->invalids, list) {
 		r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, addr,
 						flags, &bo_va->last_pt_update);
 		if (r)
 			return r;
 	}
 
-	bo_va->addr = addr;
 	spin_lock(&vm->status_lock);
+	list_splice_init(&bo_va->invalids, &bo_va->valids);
 	list_del_init(&bo_va->vm_status);
+	if (!mem)
+		list_add(&bo_va->vm_status, &vm->cleared);
 	spin_unlock(&vm->status_lock);
 
 	return 0;
@@ -861,7 +933,7 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
 			     struct amdgpu_vm *vm, struct amdgpu_sync *sync)
 {
 	struct amdgpu_bo_va *bo_va = NULL;
-	int r;
+	int r = 0;
 
 	spin_lock(&vm->status_lock);
 	while (!list_empty(&vm->invalidated)) {
@@ -878,8 +950,9 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
 	spin_unlock(&vm->status_lock);
 
 	if (bo_va)
-		amdgpu_sync_fence(sync, bo_va->last_pt_update);
-	return 0;
+		r = amdgpu_sync_fence(adev, sync, bo_va->last_pt_update);
+
+	return r;
 }
 
 /**
@@ -907,10 +980,10 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
 	}
 	bo_va->vm = vm;
 	bo_va->bo = bo;
-	bo_va->addr = 0;
 	bo_va->ref_count = 1;
 	INIT_LIST_HEAD(&bo_va->bo_list);
-	INIT_LIST_HEAD(&bo_va->mappings);
+	INIT_LIST_HEAD(&bo_va->valids);
+	INIT_LIST_HEAD(&bo_va->invalids);
 	INIT_LIST_HEAD(&bo_va->vm_status);
 
 	mutex_lock(&vm->mutex);
@@ -999,12 +1072,10 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 	mapping->offset = offset;
 	mapping->flags = flags;
 
-	list_add(&mapping->list, &bo_va->mappings);
+	list_add(&mapping->list, &bo_va->invalids);
 	interval_tree_insert(&mapping->it, &vm->va);
 	trace_amdgpu_vm_bo_map(bo_va, mapping);
 
-	bo_va->addr = 0;
-
 	/* Make sure the page tables are allocated */
 	saddr >>= amdgpu_vm_block_size;
 	eaddr >>= amdgpu_vm_block_size;
@@ -1028,7 +1099,9 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 
 		r = amdgpu_bo_create(adev, AMDGPU_VM_PTE_COUNT * 8,
 				     AMDGPU_GPU_PAGE_SIZE, true,
-				     AMDGPU_GEM_DOMAIN_VRAM, 0, NULL, &pt);
+				     AMDGPU_GEM_DOMAIN_VRAM,
+				     AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
+				     NULL, &pt);
 		if (r)
 			goto error_free;
 
@@ -1085,17 +1158,27 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
 {
 	struct amdgpu_bo_va_mapping *mapping;
 	struct amdgpu_vm *vm = bo_va->vm;
+	bool valid = true;
 
 	saddr /= AMDGPU_GPU_PAGE_SIZE;
 
-	list_for_each_entry(mapping, &bo_va->mappings, list) {
+	list_for_each_entry(mapping, &bo_va->valids, list) {
 		if (mapping->it.start == saddr)
 			break;
 	}
 
-	if (&mapping->list == &bo_va->mappings) {
-		amdgpu_bo_unreserve(bo_va->bo);
-		return -ENOENT;
+	if (&mapping->list == &bo_va->valids) {
+		valid = false;
+
+		list_for_each_entry(mapping, &bo_va->invalids, list) {
+			if (mapping->it.start == saddr)
+				break;
+		}
+
+		if (&mapping->list == &bo_va->invalids) {
+			amdgpu_bo_unreserve(bo_va->bo);
+			return -ENOENT;
+		}
 	}
 
 	mutex_lock(&vm->mutex);
@@ -1103,12 +1186,10 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
 	interval_tree_remove(&mapping->it, &vm->va);
 	trace_amdgpu_vm_bo_unmap(bo_va, mapping);
 
-	if (bo_va->addr) {
-		/* clear the old address */
+	if (valid)
 		list_add(&mapping->list, &vm->freed);
-	} else {
+	else
 		kfree(mapping);
-	}
 	mutex_unlock(&vm->mutex);
 	amdgpu_bo_unreserve(bo_va->bo);
 
@@ -1139,16 +1220,19 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 	list_del(&bo_va->vm_status);
 	spin_unlock(&vm->status_lock);
 
-	list_for_each_entry_safe(mapping, next, &bo_va->mappings, list) {
+	list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
 		list_del(&mapping->list);
 		interval_tree_remove(&mapping->it, &vm->va);
 		trace_amdgpu_vm_bo_unmap(bo_va, mapping);
-		if (bo_va->addr)
-			list_add(&mapping->list, &vm->freed);
-		else
-			kfree(mapping);
+		list_add(&mapping->list, &vm->freed);
+	}
+	list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
+		list_del(&mapping->list);
+		interval_tree_remove(&mapping->it, &vm->va);
+		kfree(mapping);
 	}
-	amdgpu_fence_unref(&bo_va->last_pt_update);
+
+	fence_put(bo_va->last_pt_update);
 	kfree(bo_va);
 
 	mutex_unlock(&vm->mutex);
@@ -1169,12 +1253,10 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 	struct amdgpu_bo_va *bo_va;
 
 	list_for_each_entry(bo_va, &bo->va, bo_list) {
-		if (bo_va->addr) {
-			spin_lock(&bo_va->vm->status_lock);
-			list_del(&bo_va->vm_status);
+		spin_lock(&bo_va->vm->status_lock);
+		if (list_empty(&bo_va->vm_status))
 			list_add(&bo_va->vm_status, &bo_va->vm->invalidated);
-			spin_unlock(&bo_va->vm->status_lock);
-		}
+		spin_unlock(&bo_va->vm->status_lock);
 	}
 }
 
@@ -1202,6 +1284,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	vm->va = RB_ROOT;
 	spin_lock_init(&vm->status_lock);
 	INIT_LIST_HEAD(&vm->invalidated);
+	INIT_LIST_HEAD(&vm->cleared);
 	INIT_LIST_HEAD(&vm->freed);
 
 	pd_size = amdgpu_vm_directory_size(adev);
@@ -1215,8 +1298,11 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 		return -ENOMEM;
 	}
 
+	vm->page_directory_fence = NULL;
+
 	r = amdgpu_bo_create(adev, pd_size, align, true,
-			     AMDGPU_GEM_DOMAIN_VRAM, 0,
+			     AMDGPU_GEM_DOMAIN_VRAM,
+			     AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
 			     NULL, &vm->page_directory);
 	if (r)
 		return r;
@@ -1263,9 +1349,10 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	kfree(vm->page_tables);
 
 	amdgpu_bo_unref(&vm->page_directory);
+	fence_put(vm->page_directory_fence);
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		amdgpu_fence_unref(&vm->ids[i].flushed_updates);
+		fence_put(vm->ids[i].flushed_updates);
 		amdgpu_fence_unref(&vm->ids[i].last_id_use);
 	}