diff options
Diffstat (limited to 'virt/kvm/kvm_main.c')
| -rw-r--r-- | virt/kvm/kvm_main.c | 95 | 
1 files changed, 46 insertions, 49 deletions
| diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 515dfe9d3bcf..584a5bab3af3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -702,30 +702,31 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,  	/*  	 * .change_pte() must be surrounded by .invalidate_range_{start,end}(). -	 * If mmu_notifier_count is zero, then no in-progress invalidations, -	 * including this one, found a relevant memslot at start(); rechecking -	 * memslots here is unnecessary.  Note, a false positive (count elevated -	 * by a different invalidation) is sub-optimal but functionally ok. +	 * If mmu_invalidate_in_progress is zero, then no in-progress +	 * invalidations, including this one, found a relevant memslot at +	 * start(); rechecking memslots here is unnecessary.  Note, a false +	 * positive (count elevated by a different invalidation) is sub-optimal +	 * but functionally ok.  	 */  	WARN_ON_ONCE(!READ_ONCE(kvm->mn_active_invalidate_count)); -	if (!READ_ONCE(kvm->mmu_notifier_count)) +	if (!READ_ONCE(kvm->mmu_invalidate_in_progress))  		return;  	kvm_handle_hva_range(mn, address, address + 1, pte, kvm_set_spte_gfn);  } -void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start, -				   unsigned long end) +void kvm_mmu_invalidate_begin(struct kvm *kvm, unsigned long start, +			      unsigned long end)  {  	/*  	 * The count increase must become visible at unlock time as no  	 * spte can be established without taking the mmu_lock and  	 * count is also read inside the mmu_lock critical section.  	 */ -	kvm->mmu_notifier_count++; -	if (likely(kvm->mmu_notifier_count == 1)) { -		kvm->mmu_notifier_range_start = start; -		kvm->mmu_notifier_range_end = end; +	kvm->mmu_invalidate_in_progress++; +	if (likely(kvm->mmu_invalidate_in_progress == 1)) { +		kvm->mmu_invalidate_range_start = start; +		kvm->mmu_invalidate_range_end = end;  	} else {  		/*  		 * Fully tracking multiple concurrent ranges has diminishing @@ -736,10 +737,10 @@ void kvm_inc_notifier_count(struct kvm *kvm, unsigned long start,  		 * accumulate and persist until all outstanding invalidates  		 * complete.  		 */ -		kvm->mmu_notifier_range_start = -			min(kvm->mmu_notifier_range_start, start); -		kvm->mmu_notifier_range_end = -			max(kvm->mmu_notifier_range_end, end); +		kvm->mmu_invalidate_range_start = +			min(kvm->mmu_invalidate_range_start, start); +		kvm->mmu_invalidate_range_end = +			max(kvm->mmu_invalidate_range_end, end);  	}  } @@ -752,7 +753,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,  		.end		= range->end,  		.pte		= __pte(0),  		.handler	= kvm_unmap_gfn_range, -		.on_lock	= kvm_inc_notifier_count, +		.on_lock	= kvm_mmu_invalidate_begin,  		.on_unlock	= kvm_arch_guest_memory_reclaimed,  		.flush_on_ret	= true,  		.may_block	= mmu_notifier_range_blockable(range), @@ -763,7 +764,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,  	/*  	 * Prevent memslot modification between range_start() and range_end()  	 * so that conditionally locking provides the same result in both -	 * functions.  Without that guarantee, the mmu_notifier_count +	 * functions.  Without that guarantee, the mmu_invalidate_in_progress  	 * adjustments will be imbalanced.  	 *  	 * Pairs with the decrement in range_end(). @@ -779,7 +780,8 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,  	 * any given time, and the caches themselves can check for hva overlap,  	 * i.e. don't need to rely on memslot overlap checks for performance.  	 * Because this runs without holding mmu_lock, the pfn caches must use -	 * mn_active_invalidate_count (see above) instead of mmu_notifier_count. +	 * mn_active_invalidate_count (see above) instead of +	 * mmu_invalidate_in_progress.  	 */  	gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end,  					  hva_range.may_block); @@ -789,22 +791,22 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,  	return 0;  } -void kvm_dec_notifier_count(struct kvm *kvm, unsigned long start, -				   unsigned long end) +void kvm_mmu_invalidate_end(struct kvm *kvm, unsigned long start, +			    unsigned long end)  {  	/*  	 * This sequence increase will notify the kvm page fault that  	 * the page that is going to be mapped in the spte could have  	 * been freed.  	 */ -	kvm->mmu_notifier_seq++; +	kvm->mmu_invalidate_seq++;  	smp_wmb();  	/*  	 * The above sequence increase must be visible before the  	 * below count decrease, which is ensured by the smp_wmb above -	 * in conjunction with the smp_rmb in mmu_notifier_retry(). +	 * in conjunction with the smp_rmb in mmu_invalidate_retry().  	 */ -	kvm->mmu_notifier_count--; +	kvm->mmu_invalidate_in_progress--;  }  static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, @@ -816,7 +818,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,  		.end		= range->end,  		.pte		= __pte(0),  		.handler	= (void *)kvm_null_fn, -		.on_lock	= kvm_dec_notifier_count, +		.on_lock	= kvm_mmu_invalidate_end,  		.on_unlock	= (void *)kvm_null_fn,  		.flush_on_ret	= false,  		.may_block	= mmu_notifier_range_blockable(range), @@ -837,7 +839,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,  	if (wake)  		rcuwait_wake_up(&kvm->mn_memslots_update_rcuwait); -	BUG_ON(kvm->mmu_notifier_count < 0); +	BUG_ON(kvm->mmu_invalidate_in_progress < 0);  }  static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn, @@ -1134,6 +1136,9 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)  	if (!kvm)  		return ERR_PTR(-ENOMEM); +	/* KVM is pinned via open("/dev/kvm"), the fd passed to this ioctl(). */ +	__module_get(kvm_chardev_ops.owner); +  	KVM_MMU_LOCK_INIT(kvm);  	mmgrab(current->mm);  	kvm->mm = current->mm; @@ -1211,9 +1216,17 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)  	if (r)  		goto out_err_no_mmu_notifier; +	r = kvm_coalesced_mmio_init(kvm); +	if (r < 0) +		goto out_no_coalesced_mmio; + +	r = kvm_create_vm_debugfs(kvm, fdname); +	if (r) +		goto out_err_no_debugfs; +  	r = kvm_arch_post_init_vm(kvm);  	if (r) -		goto out_err_mmu_notifier; +		goto out_err;  	mutex_lock(&kvm_lock);  	list_add(&kvm->vm_list, &vm_list); @@ -1222,25 +1235,13 @@ static struct kvm *kvm_create_vm(unsigned long type, const char *fdname)  	preempt_notifier_inc();  	kvm_init_pm_notifier(kvm); -	/* -	 * When the fd passed to this ioctl() is opened it pins the module, -	 * but try_module_get() also prevents getting a reference if the module -	 * is in MODULE_STATE_GOING (e.g. if someone ran "rmmod --wait"). -	 */ -	if (!try_module_get(kvm_chardev_ops.owner)) { -		r = -ENODEV; -		goto out_err_mmu_notifier; -	} - -	r = kvm_create_vm_debugfs(kvm, fdname); -	if (r) -		goto out_err; -  	return kvm;  out_err: -	module_put(kvm_chardev_ops.owner); -out_err_mmu_notifier: +	kvm_destroy_vm_debugfs(kvm); +out_err_no_debugfs: +	kvm_coalesced_mmio_free(kvm); +out_no_coalesced_mmio:  #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)  	if (kvm->mmu_notifier.ops)  		mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); @@ -1259,6 +1260,7 @@ out_err_no_irq_srcu:  out_err_no_srcu:  	kvm_arch_free_vm(kvm);  	mmdrop(current->mm); +	module_put(kvm_chardev_ops.owner);  	return ERR_PTR(r);  } @@ -2516,7 +2518,7 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,  {  	unsigned int flags = FOLL_HWPOISON;  	struct page *page; -	int npages = 0; +	int npages;  	might_sleep(); @@ -4378,7 +4380,7 @@ void kvm_unregister_device_ops(u32 type)  static int kvm_ioctl_create_device(struct kvm *kvm,  				   struct kvm_create_device *cd)  { -	const struct kvm_device_ops *ops = NULL; +	const struct kvm_device_ops *ops;  	struct kvm_device *dev;  	bool test = cd->flags & KVM_CREATE_DEVICE_TEST;  	int type; @@ -4913,11 +4915,6 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)  		goto put_fd;  	} -#ifdef CONFIG_KVM_MMIO -	r = kvm_coalesced_mmio_init(kvm); -	if (r < 0) -		goto put_kvm; -#endif  	file = anon_inode_getfile("kvm-vm", &kvm_vm_fops, kvm, O_RDWR);  	if (IS_ERR(file)) {  		r = PTR_ERR(file); | 
