summaryrefslogtreecommitdiff
path: root/arch/arm64/kvm
diff options
context:
space:
mode:
authorMarc Zyngier <maz@kernel.org>2023-08-28 09:29:02 +0100
committerMarc Zyngier <maz@kernel.org>2023-08-28 09:29:02 +0100
commitd58335d10fd7617addb48b3c4f06c373c2f76529 (patch)
tree24cce346ac5c2808af3ff437e1f9f8e2f1cbd0e6 /arch/arm64/kvm
parentc1907626dddc0e1c9aaea6af6c2ae49c861177ce (diff)
parent7657ea920c54218f123ddc1b572821695b669c13 (diff)
Merge branch kvm-arm64/tlbi-range into kvmarm-master/next
* kvm-arm64/tlbi-range: : . : FEAT_TLBIRANGE support, courtesy of Raghavendra Rao Ananta. : From the cover letter: : : "In certain code paths, KVM/ARM currently invalidates the entire VM's : page-tables instead of just invalidating a necessary range. For example, : when collapsing a table PTE to a block PTE, instead of iterating over : each PTE and flushing them, KVM uses 'vmalls12e1is' TLBI operation to : flush all the entries. This is inefficient since the guest would have : to refill the TLBs again, even for the addresses that aren't covered : by the table entry. The performance impact would scale poorly if many : addresses in the VM is going through this remapping. : : For architectures that implement FEAT_TLBIRANGE, KVM can replace such : inefficient paths by performing the invalidations only on the range of : addresses that are in scope. This series tries to achieve the same in : the areas of stage-2 map, unmap and write-protecting the pages." : . KVM: arm64: Use TLBI range-based instructions for unmap KVM: arm64: Invalidate the table entries upon a range KVM: arm64: Flush only the memslot after write-protect KVM: arm64: Implement kvm_arch_flush_remote_tlbs_range() KVM: arm64: Define kvm_tlb_flush_vmid_range() KVM: arm64: Implement __kvm_tlb_flush_vmid_range() arm64: tlb: Implement __flush_s2_tlb_range_op() arm64: tlb: Refactor the core flush algorithm of __flush_tlb_range KVM: Move kvm_arch_flush_remote_tlbs_memslot() to common code KVM: Allow range-based TLB invalidation from common code KVM: Remove CONFIG_HAVE_KVM_ARCH_TLB_FLUSH_ALL KVM: arm64: Use kvm_arch_flush_remote_tlbs() KVM: Declare kvm_arch_flush_remote_tlbs() globally KVM: Rename kvm_arch_flush_remote_tlb() to kvm_arch_flush_remote_tlbs() Signed-off-by: Marc Zyngier <maz@kernel.org>
Diffstat (limited to 'arch/arm64/kvm')
-rw-r--r--arch/arm64/kvm/Kconfig1
-rw-r--r--arch/arm64/kvm/arm.c6
-rw-r--r--arch/arm64/kvm/hyp/nvhe/hyp-main.c11
-rw-r--r--arch/arm64/kvm/hyp/nvhe/tlb.c30
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c63
-rw-r--r--arch/arm64/kvm/hyp/vhe/tlb.c28
-rw-r--r--arch/arm64/kvm/mmu.c16
7 files changed, 136 insertions, 19 deletions
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index f4f0b19d755b..83c1e09be42e 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -25,7 +25,6 @@ menuconfig KVM
select MMU_NOTIFIER
select PREEMPT_NOTIFIERS
select HAVE_KVM_CPU_RELAX_INTERCEPT
- select HAVE_KVM_ARCH_TLB_FLUSH_ALL
select KVM_MMIO
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_XFER_TO_GUEST_WORK
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 183786fc553c..1cad7368aedf 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -1534,12 +1534,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
}
-void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm,
- const struct kvm_memory_slot *memslot)
-{
- kvm_flush_remote_tlbs(kvm);
-}
-
static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
struct kvm_arm_device_addr *dev_addr)
{
diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
index a169c619db60..857d9bc04fd4 100644
--- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c
+++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c
@@ -135,6 +135,16 @@ static void handle___kvm_tlb_flush_vmid_ipa_nsh(struct kvm_cpu_context *host_ctx
__kvm_tlb_flush_vmid_ipa_nsh(kern_hyp_va(mmu), ipa, level);
}
+static void
+handle___kvm_tlb_flush_vmid_range(struct kvm_cpu_context *host_ctxt)
+{
+ DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
+ DECLARE_REG(phys_addr_t, start, host_ctxt, 2);
+ DECLARE_REG(unsigned long, pages, host_ctxt, 3);
+
+ __kvm_tlb_flush_vmid_range(kern_hyp_va(mmu), start, pages);
+}
+
static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
{
DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
@@ -327,6 +337,7 @@ static const hcall_t host_hcall[] = {
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa_nsh),
HANDLE_FUNC(__kvm_tlb_flush_vmid),
+ HANDLE_FUNC(__kvm_tlb_flush_vmid_range),
HANDLE_FUNC(__kvm_flush_cpu_context),
HANDLE_FUNC(__kvm_timer_set_cntvoff),
HANDLE_FUNC(__vgic_v3_read_vmcr),
diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c
index b9991bbd8e3f..1b265713d6be 100644
--- a/arch/arm64/kvm/hyp/nvhe/tlb.c
+++ b/arch/arm64/kvm/hyp/nvhe/tlb.c
@@ -182,6 +182,36 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
__tlb_switch_to_host(&cxt);
}
+void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+ phys_addr_t start, unsigned long pages)
+{
+ struct tlb_inv_context cxt;
+ unsigned long stride;
+
+ /*
+ * Since the range of addresses may not be mapped at
+ * the same level, assume the worst case as PAGE_SIZE
+ */
+ stride = PAGE_SIZE;
+ start = round_down(start, stride);
+
+ /* Switch to requested VMID */
+ __tlb_switch_to_guest(mmu, &cxt, false);
+
+ __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+
+ dsb(ish);
+ __tlbi(vmalle1is);
+ dsb(ish);
+ isb();
+
+ /* See the comment in __kvm_tlb_flush_vmid_ipa() */
+ if (icache_is_vpipt())
+ icache_inval_all_pou();
+
+ __tlb_switch_to_host(&cxt);
+}
+
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
{
struct tlb_inv_context cxt;
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index f7a93ef29250..f155b8c9e98c 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -670,6 +670,26 @@ static bool stage2_has_fwb(struct kvm_pgtable *pgt)
return !(pgt->flags & KVM_PGTABLE_S2_NOFWB);
}
+void kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+ phys_addr_t addr, size_t size)
+{
+ unsigned long pages, inval_pages;
+
+ if (!system_supports_tlb_range()) {
+ kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
+ return;
+ }
+
+ pages = size >> PAGE_SHIFT;
+ while (pages > 0) {
+ inval_pages = min(pages, MAX_TLBI_RANGE_PAGES);
+ kvm_call_hyp(__kvm_tlb_flush_vmid_range, mmu, addr, inval_pages);
+
+ addr += inval_pages << PAGE_SHIFT;
+ pages -= inval_pages;
+ }
+}
+
#define KVM_S2_MEMATTR(pgt, attr) PAGE_S2_MEMATTR(attr, stage2_has_fwb(pgt))
static int stage2_set_prot_attr(struct kvm_pgtable *pgt, enum kvm_pgtable_prot prot,
@@ -786,7 +806,8 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx,
* evicted pte value (if any).
*/
if (kvm_pte_table(ctx->old, ctx->level))
- kvm_call_hyp(__kvm_tlb_flush_vmid, mmu);
+ kvm_tlb_flush_vmid_range(mmu, ctx->addr,
+ kvm_granule_size(ctx->level));
else if (kvm_pte_valid(ctx->old))
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
ctx->addr, ctx->level);
@@ -810,16 +831,36 @@ static void stage2_make_pte(const struct kvm_pgtable_visit_ctx *ctx, kvm_pte_t n
smp_store_release(ctx->ptep, new);
}
-static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu,
- struct kvm_pgtable_mm_ops *mm_ops)
+static bool stage2_unmap_defer_tlb_flush(struct kvm_pgtable *pgt)
+{
+ /*
+ * If FEAT_TLBIRANGE is implemented, defer the individual
+ * TLB invalidations until the entire walk is finished, and
+ * then use the range-based TLBI instructions to do the
+ * invalidations. Condition deferred TLB invalidation on the
+ * system supporting FWB as the optimization is entirely
+ * pointless when the unmap walker needs to perform CMOs.
+ */
+ return system_supports_tlb_range() && stage2_has_fwb(pgt);
+}
+
+static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
+ struct kvm_s2_mmu *mmu,
+ struct kvm_pgtable_mm_ops *mm_ops)
{
+ struct kvm_pgtable *pgt = ctx->arg;
+
/*
- * Clear the existing PTE, and perform break-before-make with
- * TLB maintenance if it was valid.
+ * Clear the existing PTE, and perform break-before-make if it was
+ * valid. Depending on the system support, defer the TLB maintenance
+ * for the same until the entire unmap walk is completed.
*/
if (kvm_pte_valid(ctx->old)) {
kvm_clear_pte(ctx->ptep);
- kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level);
+
+ if (!stage2_unmap_defer_tlb_flush(pgt))
+ kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
+ ctx->addr, ctx->level);
}
mm_ops->put_page(ctx->ptep);
@@ -1077,7 +1118,7 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
* block entry and rely on the remaining portions being faulted
* back lazily.
*/
- stage2_put_pte(ctx, mmu, mm_ops);
+ stage2_unmap_put_pte(ctx, mmu, mm_ops);
if (need_flush && mm_ops->dcache_clean_inval_poc)
mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops),
@@ -1091,13 +1132,19 @@ static int stage2_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx,
int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
{
+ int ret;
struct kvm_pgtable_walker walker = {
.cb = stage2_unmap_walker,
.arg = pgt,
.flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
};
- return kvm_pgtable_walk(pgt, addr, size, &walker);
+ ret = kvm_pgtable_walk(pgt, addr, size, &walker);
+ if (stage2_unmap_defer_tlb_flush(pgt))
+ /* Perform the deferred TLB invalidations */
+ kvm_tlb_flush_vmid_range(pgt->mmu, addr, size);
+
+ return ret;
}
struct stage2_attr_data {
diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c
index e69da550cdc5..46bd43f61d76 100644
--- a/arch/arm64/kvm/hyp/vhe/tlb.c
+++ b/arch/arm64/kvm/hyp/vhe/tlb.c
@@ -143,6 +143,34 @@ void __kvm_tlb_flush_vmid_ipa_nsh(struct kvm_s2_mmu *mmu,
__tlb_switch_to_host(&cxt);
}
+void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
+ phys_addr_t start, unsigned long pages)
+{
+ struct tlb_inv_context cxt;
+ unsigned long stride;
+
+ /*
+ * Since the range of addresses may not be mapped at
+ * the same level, assume the worst case as PAGE_SIZE
+ */
+ stride = PAGE_SIZE;
+ start = round_down(start, stride);
+
+ dsb(ishst);
+
+ /* Switch to requested VMID */
+ __tlb_switch_to_guest(mmu, &cxt);
+
+ __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0);
+
+ dsb(ish);
+ __tlbi(vmalle1is);
+ dsb(ish);
+ isb();
+
+ __tlb_switch_to_host(&cxt);
+}
+
void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
{
struct tlb_inv_context cxt;
diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c
index d3b4feed460c..b16aff3f65f6 100644
--- a/arch/arm64/kvm/mmu.c
+++ b/arch/arm64/kvm/mmu.c
@@ -161,15 +161,23 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
}
/**
- * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8
+ * kvm_arch_flush_remote_tlbs() - flush all VM TLB entries for v7/8
* @kvm: pointer to kvm structure.
*
* Interface to HYP function to flush all VM TLB entries
*/
-void kvm_flush_remote_tlbs(struct kvm *kvm)
+int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
{
- ++kvm->stat.generic.remote_tlb_flush_requests;
kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
+ return 0;
+}
+
+int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
+ gfn_t gfn, u64 nr_pages)
+{
+ kvm_tlb_flush_vmid_range(&kvm->arch.mmu,
+ gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
+ return 0;
}
static bool kvm_is_device_pfn(unsigned long pfn)
@@ -1075,7 +1083,7 @@ static void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot)
write_lock(&kvm->mmu_lock);
stage2_wp_range(&kvm->arch.mmu, start, end);
write_unlock(&kvm->mmu_lock);
- kvm_flush_remote_tlbs(kvm);
+ kvm_flush_remote_tlbs_memslot(kvm, memslot);
}
/**