summaryrefslogtreecommitdiff
path: root/arch/arm64/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm64/kvm')
-rw-r--r--arch/arm64/kvm/arch_timer.c45
-rw-r--r--arch/arm64/kvm/hypercalls.c2
-rw-r--r--arch/arm64/kvm/mmu.c99
-rw-r--r--arch/arm64/kvm/pmu-emul.c3
-rw-r--r--arch/arm64/kvm/sys_regs.c21
5 files changed, 96 insertions, 74 deletions
diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c
index 00610477ec7b..e1af4301b913 100644
--- a/arch/arm64/kvm/arch_timer.c
+++ b/arch/arm64/kvm/arch_timer.c
@@ -84,14 +84,10 @@ u64 timer_get_cval(struct arch_timer_context *ctxt)
static u64 timer_get_offset(struct arch_timer_context *ctxt)
{
- struct kvm_vcpu *vcpu = ctxt->vcpu;
+ if (ctxt->offset.vm_offset)
+ return *ctxt->offset.vm_offset;
- switch(arch_timer_ctx_index(ctxt)) {
- case TIMER_VTIMER:
- return __vcpu_sys_reg(vcpu, CNTVOFF_EL2);
- default:
- return 0;
- }
+ return 0;
}
static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
@@ -128,15 +124,12 @@ static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
static void timer_set_offset(struct arch_timer_context *ctxt, u64 offset)
{
- struct kvm_vcpu *vcpu = ctxt->vcpu;
-
- switch(arch_timer_ctx_index(ctxt)) {
- case TIMER_VTIMER:
- __vcpu_sys_reg(vcpu, CNTVOFF_EL2) = offset;
- break;
- default:
+ if (!ctxt->offset.vm_offset) {
WARN(offset, "timer %ld\n", arch_timer_ctx_index(ctxt));
+ return;
}
+
+ WRITE_ONCE(*ctxt->offset.vm_offset, offset);
}
u64 kvm_phys_timer_read(void)
@@ -765,25 +758,6 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
return 0;
}
-/* Make the updates of cntvoff for all vtimer contexts atomic */
-static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff)
-{
- unsigned long i;
- struct kvm *kvm = vcpu->kvm;
- struct kvm_vcpu *tmp;
-
- mutex_lock(&kvm->lock);
- kvm_for_each_vcpu(i, tmp, kvm)
- timer_set_offset(vcpu_vtimer(tmp), cntvoff);
-
- /*
- * When called from the vcpu create path, the CPU being created is not
- * included in the loop above, so we just set it here as well.
- */
- timer_set_offset(vcpu_vtimer(vcpu), cntvoff);
- mutex_unlock(&kvm->lock);
-}
-
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
{
struct arch_timer_cpu *timer = vcpu_timer(vcpu);
@@ -791,10 +765,11 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
vtimer->vcpu = vcpu;
+ vtimer->offset.vm_offset = &vcpu->kvm->arch.timer_data.voffset;
ptimer->vcpu = vcpu;
/* Synchronize cntvoff across all vtimers of a VM. */
- update_vtimer_cntvoff(vcpu, kvm_phys_timer_read());
+ timer_set_offset(vtimer, kvm_phys_timer_read());
timer_set_offset(ptimer, 0);
hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
@@ -840,7 +815,7 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
break;
case KVM_REG_ARM_TIMER_CNT:
timer = vcpu_vtimer(vcpu);
- update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value);
+ timer_set_offset(timer, kvm_phys_timer_read() - value);
break;
case KVM_REG_ARM_TIMER_CVAL:
timer = vcpu_vtimer(vcpu);
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index 64c086c02c60..5da884e11337 100644
--- a/arch/arm64/kvm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
@@ -44,7 +44,7 @@ static void kvm_ptp_get_time(struct kvm_vcpu *vcpu, u64 *val)
feature = smccc_get_arg1(vcpu);
switch (feature) {
case KVM_PTP_VIRT_COUNTER:
- cycles = systime_snapshot.cycles - vcpu_read_sys_reg(vcpu, CNTVOFF_EL2);
+ cycles = systime_snapshot.cycles - vcpu->kvm->arch.timer_data.voffset;
break;
case KVM_PTP_PHYS_COUNTER:
cycles = systime_snapshot.cycles;
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index 7113587222ff..3b9d4d24c361 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -666,14 +666,33 @@ static int get_user_mapping_size(struct kvm *kvm, u64 addr)
CONFIG_PGTABLE_LEVELS),
.mm_ops = &kvm_user_mm_ops,
};
+ unsigned long flags;
kvm_pte_t pte = 0; /* Keep GCC quiet... */
u32 level = ~0;
int ret;
+ /*
+ * Disable IRQs so that we hazard against a concurrent
+ * teardown of the userspace page tables (which relies on
+ * IPI-ing threads).
+ */
+ local_irq_save(flags);
ret = kvm_pgtable_get_leaf(&pgt, addr, &pte, &level);
- VM_BUG_ON(ret);
- VM_BUG_ON(level >= KVM_PGTABLE_MAX_LEVELS);
- VM_BUG_ON(!(pte & PTE_VALID));
+ local_irq_restore(flags);
+
+ if (ret)
+ return ret;
+
+ /*
+ * Not seeing an error, but not updating level? Something went
+ * deeply wrong...
+ */
+ if (WARN_ON(level >= KVM_PGTABLE_MAX_LEVELS))
+ return -EFAULT;
+
+ /* Oops, the userspace PTs are gone... Replay the fault */
+ if (!kvm_pte_valid(pte))
+ return -EAGAIN;
return BIT(ARM64_HW_PGTABLE_LEVEL_SHIFT(level));
}
@@ -1079,7 +1098,7 @@ static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot,
*
* Returns the size of the mapping.
*/
-static unsigned long
+static long
transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long hva, kvm_pfn_t *pfnp,
phys_addr_t *ipap)
@@ -1091,8 +1110,15 @@ transparent_hugepage_adjust(struct kvm *kvm, struct kvm_memory_slot *memslot,
* sure that the HVA and IPA are sufficiently aligned and that the
* block map is contained within the memslot.
*/
- if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE) &&
- get_user_mapping_size(kvm, hva) >= PMD_SIZE) {
+ if (fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) {
+ int sz = get_user_mapping_size(kvm, hva);
+
+ if (sz < 0)
+ return sz;
+
+ if (sz < PMD_SIZE)
+ return PAGE_SIZE;
+
/*
* The address we faulted on is backed by a transparent huge
* page. However, because we map the compound huge page and
@@ -1192,7 +1218,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
{
int ret = 0;
bool write_fault, writable, force_pte = false;
- bool exec_fault;
+ bool exec_fault, mte_allowed;
bool device = false;
unsigned long mmu_seq;
struct kvm *kvm = vcpu->kvm;
@@ -1203,7 +1229,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
kvm_pfn_t pfn;
bool logging_active = memslot_is_logging(memslot);
unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
- unsigned long vma_pagesize, fault_granule;
+ long vma_pagesize, fault_granule;
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
struct kvm_pgtable *pgt;
@@ -1218,6 +1244,20 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
}
/*
+ * Permission faults just need to update the existing leaf entry,
+ * and so normally don't require allocations from the memcache. The
+ * only exception to this is when dirty logging is enabled at runtime
+ * and a write fault needs to collapse a block entry into a table.
+ */
+ if (fault_status != ESR_ELx_FSC_PERM ||
+ (logging_active && write_fault)) {
+ ret = kvm_mmu_topup_memory_cache(memcache,
+ kvm_mmu_cache_min_pages(kvm));
+ if (ret)
+ return ret;
+ }
+
+ /*
* Let's check if we will get back a huge page backed by hugetlbfs, or
* get block mapping for device MMIO region.
*/
@@ -1269,37 +1309,21 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
fault_ipa &= ~(vma_pagesize - 1);
gfn = fault_ipa >> PAGE_SHIFT;
- mmap_read_unlock(current->mm);
+ mte_allowed = kvm_vma_mte_allowed(vma);
- /*
- * Permission faults just need to update the existing leaf entry,
- * and so normally don't require allocations from the memcache. The
- * only exception to this is when dirty logging is enabled at runtime
- * and a write fault needs to collapse a block entry into a table.
- */
- if (fault_status != ESR_ELx_FSC_PERM ||
- (logging_active && write_fault)) {
- ret = kvm_mmu_topup_memory_cache(memcache,
- kvm_mmu_cache_min_pages(kvm));
- if (ret)
- return ret;
- }
+ /* Don't use the VMA after the unlock -- it may have vanished */
+ vma = NULL;
- mmu_seq = vcpu->kvm->mmu_invalidate_seq;
/*
- * Ensure the read of mmu_invalidate_seq happens before we call
- * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk
- * the page we just got a reference to gets unmapped before we have a
- * chance to grab the mmu_lock, which ensure that if the page gets
- * unmapped afterwards, the call to kvm_unmap_gfn will take it away
- * from us again properly. This smp_rmb() interacts with the smp_wmb()
- * in kvm_mmu_notifier_invalidate_<page|range_end>.
+ * Read mmu_invalidate_seq so that KVM can detect if the results of
+ * vma_lookup() or __gfn_to_pfn_memslot() become stale prior to
+ * acquiring kvm->mmu_lock.
*
- * Besides, __gfn_to_pfn_memslot() instead of gfn_to_pfn_prot() is
- * used to avoid unnecessary overhead introduced to locate the memory
- * slot because it's always fixed even @gfn is adjusted for huge pages.
+ * Rely on mmap_read_unlock() for an implicit smp_rmb(), which pairs
+ * with the smp_wmb() in kvm_mmu_invalidate_end().
*/
- smp_rmb();
+ mmu_seq = vcpu->kvm->mmu_invalidate_seq;
+ mmap_read_unlock(current->mm);
pfn = __gfn_to_pfn_memslot(memslot, gfn, false, false, NULL,
write_fault, &writable, NULL);
@@ -1350,11 +1374,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
vma_pagesize = transparent_hugepage_adjust(kvm, memslot,
hva, &pfn,
&fault_ipa);
+
+ if (vma_pagesize < 0) {
+ ret = vma_pagesize;
+ goto out_unlock;
+ }
}
if (fault_status != ESR_ELx_FSC_PERM && !device && kvm_has_mte(kvm)) {
/* Check the VMM hasn't introduced a new disallowed VMA */
- if (kvm_vma_mte_allowed(vma)) {
+ if (mte_allowed) {
sanitise_mte_tags(kvm, pfn, vma_pagesize);
} else {
ret = -EFAULT;
diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c
index 24908400e190..c243b10f3e15 100644
--- a/arch/arm64/kvm/pmu-emul.c
+++ b/arch/arm64/kvm/pmu-emul.c
@@ -538,7 +538,8 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
if (!kvm_pmu_is_3p5(vcpu))
val &= ~ARMV8_PMU_PMCR_LP;
- __vcpu_sys_reg(vcpu, PMCR_EL0) = val;
+ /* The reset bits don't indicate any state, and shouldn't be saved. */
+ __vcpu_sys_reg(vcpu, PMCR_EL0) = val & ~(ARMV8_PMU_PMCR_C | ARMV8_PMU_PMCR_P);
if (val & ARMV8_PMU_PMCR_E) {
kvm_pmu_enable_counter_mask(vcpu,
diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
index 53749d3a0996..1b2c161120be 100644
--- a/arch/arm64/kvm/sys_regs.c
+++ b/arch/arm64/kvm/sys_regs.c
@@ -856,6 +856,22 @@ static bool pmu_counter_idx_valid(struct kvm_vcpu *vcpu, u64 idx)
return true;
}
+static int get_pmu_evcntr(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r,
+ u64 *val)
+{
+ u64 idx;
+
+ if (r->CRn == 9 && r->CRm == 13 && r->Op2 == 0)
+ /* PMCCNTR_EL0 */
+ idx = ARMV8_PMU_CYCLE_IDX;
+ else
+ /* PMEVCNTRn_EL0 */
+ idx = ((r->CRm & 3) << 3) | (r->Op2 & 7);
+
+ *val = kvm_pmu_get_counter_value(vcpu, idx);
+ return 0;
+}
+
static bool access_pmu_evcntr(struct kvm_vcpu *vcpu,
struct sys_reg_params *p,
const struct sys_reg_desc *r)
@@ -1072,7 +1088,7 @@ static bool access_pmuserenr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
/* Macro to expand the PMEVCNTRn_EL0 register */
#define PMU_PMEVCNTR_EL0(n) \
{ PMU_SYS_REG(SYS_PMEVCNTRn_EL0(n)), \
- .reset = reset_pmevcntr, \
+ .reset = reset_pmevcntr, .get_user = get_pmu_evcntr, \
.access = access_pmu_evcntr, .reg = (PMEVCNTR0_EL0 + n), }
/* Macro to expand the PMEVTYPERn_EL0 register */
@@ -1982,7 +1998,8 @@ static const struct sys_reg_desc sys_reg_descs[] = {
{ PMU_SYS_REG(SYS_PMCEID1_EL0),
.access = access_pmceid, .reset = NULL },
{ PMU_SYS_REG(SYS_PMCCNTR_EL0),
- .access = access_pmu_evcntr, .reset = reset_unknown, .reg = PMCCNTR_EL0 },
+ .access = access_pmu_evcntr, .reset = reset_unknown,
+ .reg = PMCCNTR_EL0, .get_user = get_pmu_evcntr},
{ PMU_SYS_REG(SYS_PMXEVTYPER_EL0),
.access = access_pmu_evtyper, .reset = NULL },
{ PMU_SYS_REG(SYS_PMXEVCNTR_EL0),