summaryrefslogtreecommitdiff
path: root/virt
diff options
context:
space:
mode:
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/dirty_ring.c8
-rw-r--r--virt/kvm/kvm_main.c54
-rw-r--r--virt/kvm/mmu_lock.h23
3 files changed, 60 insertions, 25 deletions
diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c
index 9d01299563ee..7aafefc50aa7 100644
--- a/virt/kvm/dirty_ring.c
+++ b/virt/kvm/dirty_ring.c
@@ -9,6 +9,7 @@
#include <linux/vmalloc.h>
#include <linux/kvm_dirty_ring.h>
#include <trace/events/kvm.h>
+#include "mmu_lock.h"
int __weak kvm_cpu_dirty_log_size(void)
{
@@ -60,17 +61,16 @@ static void kvm_reset_dirty_gfn(struct kvm *kvm, u32 slot, u64 offset, u64 mask)
if (!memslot || (offset + __fls(mask)) >= memslot->npages)
return;
- spin_lock(&kvm->mmu_lock);
+ KVM_MMU_LOCK(kvm);
kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot, offset, mask);
- spin_unlock(&kvm->mmu_lock);
+ KVM_MMU_UNLOCK(kvm);
}
int kvm_dirty_ring_alloc(struct kvm_dirty_ring *ring, int index, u32 size)
{
- ring->dirty_gfns = vmalloc(size);
+ ring->dirty_gfns = vzalloc(size);
if (!ring->dirty_gfns)
return -ENOMEM;
- memset(ring->dirty_gfns, 0, size);
ring->size = size / sizeof(struct kvm_dirty_gfn);
ring->soft_limit = ring->size - kvm_dirty_ring_get_rsvd_entries();
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 8367d88ce39b..001b9de4e727 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -58,6 +58,7 @@
#include "coalesced_mmio.h"
#include "async_pf.h"
+#include "mmu_lock.h"
#include "vfio.h"
#define CREATE_TRACE_POINTS
@@ -459,13 +460,15 @@ static void kvm_mmu_notifier_change_pte(struct mmu_notifier *mn,
int idx;
idx = srcu_read_lock(&kvm->srcu);
- spin_lock(&kvm->mmu_lock);
+
+ KVM_MMU_LOCK(kvm);
+
kvm->mmu_notifier_seq++;
if (kvm_set_spte_hva(kvm, address, pte))
kvm_flush_remote_tlbs(kvm);
- spin_unlock(&kvm->mmu_lock);
+ KVM_MMU_UNLOCK(kvm);
srcu_read_unlock(&kvm->srcu, idx);
}
@@ -476,7 +479,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
int need_tlb_flush = 0, idx;
idx = srcu_read_lock(&kvm->srcu);
- spin_lock(&kvm->mmu_lock);
+ KVM_MMU_LOCK(kvm);
/*
* The count increase must become visible at unlock time as no
* spte can be established without taking the mmu_lock and
@@ -489,7 +492,7 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
if (need_tlb_flush || kvm->tlbs_dirty)
kvm_flush_remote_tlbs(kvm);
- spin_unlock(&kvm->mmu_lock);
+ KVM_MMU_UNLOCK(kvm);
srcu_read_unlock(&kvm->srcu, idx);
return 0;
@@ -500,7 +503,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
- spin_lock(&kvm->mmu_lock);
+ KVM_MMU_LOCK(kvm);
/*
* This sequence increase will notify the kvm page fault that
* the page that is going to be mapped in the spte could have
@@ -514,7 +517,7 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
* in conjunction with the smp_rmb in mmu_notifier_retry().
*/
kvm->mmu_notifier_count--;
- spin_unlock(&kvm->mmu_lock);
+ KVM_MMU_UNLOCK(kvm);
BUG_ON(kvm->mmu_notifier_count < 0);
}
@@ -528,13 +531,13 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
int young, idx;
idx = srcu_read_lock(&kvm->srcu);
- spin_lock(&kvm->mmu_lock);
+ KVM_MMU_LOCK(kvm);
young = kvm_age_hva(kvm, start, end);
if (young)
kvm_flush_remote_tlbs(kvm);
- spin_unlock(&kvm->mmu_lock);
+ KVM_MMU_UNLOCK(kvm);
srcu_read_unlock(&kvm->srcu, idx);
return young;
@@ -549,7 +552,7 @@ static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
int young, idx;
idx = srcu_read_lock(&kvm->srcu);
- spin_lock(&kvm->mmu_lock);
+ KVM_MMU_LOCK(kvm);
/*
* Even though we do not flush TLB, this will still adversely
* affect performance on pre-Haswell Intel EPT, where there is
@@ -564,7 +567,7 @@ static int kvm_mmu_notifier_clear_young(struct mmu_notifier *mn,
* more sophisticated heuristic later.
*/
young = kvm_age_hva(kvm, start, end);
- spin_unlock(&kvm->mmu_lock);
+ KVM_MMU_UNLOCK(kvm);
srcu_read_unlock(&kvm->srcu, idx);
return young;
@@ -578,9 +581,9 @@ static int kvm_mmu_notifier_test_young(struct mmu_notifier *mn,
int young, idx;
idx = srcu_read_lock(&kvm->srcu);
- spin_lock(&kvm->mmu_lock);
+ KVM_MMU_LOCK(kvm);
young = kvm_test_age_hva(kvm, address);
- spin_unlock(&kvm->mmu_lock);
+ KVM_MMU_UNLOCK(kvm);
srcu_read_unlock(&kvm->srcu, idx);
return young;
@@ -745,7 +748,7 @@ static struct kvm *kvm_create_vm(unsigned long type)
if (!kvm)
return ERR_PTR(-ENOMEM);
- spin_lock_init(&kvm->mmu_lock);
+ KVM_MMU_LOCK_INIT(kvm);
mmgrab(current->mm);
kvm->mm = current->mm;
kvm_eventfd_init(kvm);
@@ -1525,7 +1528,7 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
dirty_bitmap_buffer = kvm_second_dirty_bitmap(memslot);
memset(dirty_bitmap_buffer, 0, n);
- spin_lock(&kvm->mmu_lock);
+ KVM_MMU_LOCK(kvm);
for (i = 0; i < n / sizeof(long); i++) {
unsigned long mask;
gfn_t offset;
@@ -1541,7 +1544,7 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log)
kvm_arch_mmu_enable_log_dirty_pt_masked(kvm, memslot,
offset, mask);
}
- spin_unlock(&kvm->mmu_lock);
+ KVM_MMU_UNLOCK(kvm);
}
if (flush)
@@ -1636,7 +1639,7 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
if (copy_from_user(dirty_bitmap_buffer, log->dirty_bitmap, n))
return -EFAULT;
- spin_lock(&kvm->mmu_lock);
+ KVM_MMU_LOCK(kvm);
for (offset = log->first_page, i = offset / BITS_PER_LONG,
n = DIV_ROUND_UP(log->num_pages, BITS_PER_LONG); n--;
i++, offset += BITS_PER_LONG) {
@@ -1659,7 +1662,7 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm,
offset, mask);
}
}
- spin_unlock(&kvm->mmu_lock);
+ KVM_MMU_UNLOCK(kvm);
if (flush)
kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
@@ -1903,10 +1906,12 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
bool write_fault, bool *writable,
kvm_pfn_t *p_pfn)
{
- unsigned long pfn;
+ kvm_pfn_t pfn;
+ pte_t *ptep;
+ spinlock_t *ptl;
int r;
- r = follow_pfn(vma, addr, &pfn);
+ r = follow_pte(vma->vm_mm, addr, &ptep, &ptl);
if (r) {
/*
* get_user_pages fails for VM_IO and VM_PFNMAP vmas and does
@@ -1921,14 +1926,19 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
if (r)
return r;
- r = follow_pfn(vma, addr, &pfn);
+ r = follow_pte(vma->vm_mm, addr, &ptep, &ptl);
if (r)
return r;
+ }
+ if (write_fault && !pte_write(*ptep)) {
+ pfn = KVM_PFN_ERR_RO_FAULT;
+ goto out;
}
if (writable)
- *writable = true;
+ *writable = pte_write(*ptep);
+ pfn = pte_pfn(*ptep);
/*
* Get a reference here because callers of *hva_to_pfn* and
@@ -1943,6 +1953,8 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
*/
kvm_get_pfn(pfn);
+out:
+ pte_unmap_unlock(ptep, ptl);
*p_pfn = pfn;
return 0;
}
diff --git a/virt/kvm/mmu_lock.h b/virt/kvm/mmu_lock.h
new file mode 100644
index 000000000000..9e1308f9734c
--- /dev/null
+++ b/virt/kvm/mmu_lock.h
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#ifndef KVM_MMU_LOCK_H
+#define KVM_MMU_LOCK_H 1
+
+/*
+ * Architectures can choose whether to use an rwlock or spinlock
+ * for the mmu_lock. These macros, for use in common code
+ * only, avoids using #ifdefs in places that must deal with
+ * multiple architectures.
+ */
+
+#ifdef KVM_HAVE_MMU_RWLOCK
+#define KVM_MMU_LOCK_INIT(kvm) rwlock_init(&(kvm)->mmu_lock)
+#define KVM_MMU_LOCK(kvm) write_lock(&(kvm)->mmu_lock)
+#define KVM_MMU_UNLOCK(kvm) write_unlock(&(kvm)->mmu_lock)
+#else
+#define KVM_MMU_LOCK_INIT(kvm) spin_lock_init(&(kvm)->mmu_lock)
+#define KVM_MMU_LOCK(kvm) spin_lock(&(kvm)->mmu_lock)
+#define KVM_MMU_UNLOCK(kvm) spin_unlock(&(kvm)->mmu_lock)
+#endif /* KVM_HAVE_MMU_RWLOCK */
+
+#endif