diff options
author | Marc Zyngier <maz@kernel.org> | 2021-12-16 13:06:09 +0000 |
---|---|---|
committer | Marc Zyngier <maz@kernel.org> | 2021-12-16 13:06:09 +0000 |
commit | 43d8ac22125e365c2df85ffff503129567350c21 (patch) | |
tree | b5b3b608c25d28716cc705714f30cbb28636c4fb | |
parent | ce5b5b05c16802a27cbf12fbce1446eb4998f975 (diff) | |
parent | 52b28657ebd7cd20e931ce71190f235d0fa018a6 (diff) |
Merge branch kvm-arm64/pkvm-hyp-sharing into kvmarm-master/next
* kvm-arm64/pkvm-hyp-sharing:
: .
: Series from Quentin Perret, implementing HYP page share/unshare:
:
: This series implements an unshare hypercall at EL2 in nVHE
: protected mode, and makes use of it to unmmap guest-specific
: data-structures from EL2 stage-1 during guest tear-down.
: Crucially, the implementation of the share and unshare
: routines use page refcounts in the host kernel to avoid
: accidentally unmapping data-structures that overlap a common
: page.
: [...]
: .
KVM: arm64: pkvm: Unshare guest structs during teardown
KVM: arm64: Expose unshare hypercall to the host
KVM: arm64: Implement do_unshare() helper for unsharing memory
KVM: arm64: Implement __pkvm_host_share_hyp() using do_share()
KVM: arm64: Implement do_share() helper for sharing memory
KVM: arm64: Introduce wrappers for host and hyp spin lock accessors
KVM: arm64: Extend pkvm_page_state enumeration to handle absent pages
KVM: arm64: pkvm: Refcount the pages shared with EL2
KVM: arm64: Introduce kvm_share_hyp()
KVM: arm64: Implement kvm_pgtable_hyp_unmap() at EL2
KVM: arm64: Hook up ->page_count() for hypervisor stage-1 page-table
KVM: arm64: Fixup hyp stage-1 refcount
KVM: arm64: Refcount hyp stage-1 pgtable pages
KVM: arm64: Provide {get,put}_page() stubs for early hyp allocator
Signed-off-by: Marc Zyngier <maz@kernel.org>
-rw-r--r-- | arch/arm64/include/asm/kvm_asm.h | 1 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_host.h | 2 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_mmu.h | 2 | ||||
-rw-r--r-- | arch/arm64/include/asm/kvm_pgtable.h | 21 | ||||
-rw-r--r-- | arch/arm64/kvm/arm.c | 6 | ||||
-rw-r--r-- | arch/arm64/kvm/fpsimd.c | 36 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/include/nvhe/mem_protect.h | 6 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/early_alloc.c | 5 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/hyp-main.c | 8 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/mem_protect.c | 500 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/nvhe/setup.c | 22 | ||||
-rw-r--r-- | arch/arm64/kvm/hyp/pgtable.c | 102 | ||||
-rw-r--r-- | arch/arm64/kvm/mmu.c | 137 | ||||
-rw-r--r-- | arch/arm64/kvm/reset.c | 10 |
14 files changed, 739 insertions, 119 deletions
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 50d5e4de244c..d5b0386ef765 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -63,6 +63,7 @@ enum __kvm_host_smccc_func { /* Hypercalls available after pKVM finalisation */ __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp, + __KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp, __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc, __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run, __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context, diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index cf858a7e3533..9360a2804df1 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -321,6 +321,7 @@ struct kvm_vcpu_arch { struct kvm_guest_debug_arch external_debug_state; struct user_fpsimd_state *host_fpsimd_state; /* hyp VA */ + struct task_struct *parent_task; struct { /* {Break,watch}point registers */ @@ -737,6 +738,7 @@ void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxflush_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_ctxsync_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_put_fp(struct kvm_vcpu *vcpu); +void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu); static inline bool kvm_pmu_counter_deferred(struct perf_event_attr *attr) { diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 02d378887743..81839e9a8a24 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -150,6 +150,8 @@ static __always_inline unsigned long __kern_hyp_va(unsigned long v) #include <asm/kvm_pgtable.h> #include <asm/stage2_pgtable.h> +int kvm_share_hyp(void *from, void *to); +void kvm_unshare_hyp(void *from, void *to); int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot); int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, void __iomem **kaddr, diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h index 027783829584..9d076f36401d 100644 --- a/arch/arm64/include/asm/kvm_pgtable.h +++ b/arch/arm64/include/asm/kvm_pgtable.h @@ -252,6 +252,27 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, enum kvm_pgtable_prot prot); /** + * kvm_pgtable_hyp_unmap() - Remove a mapping from a hypervisor stage-1 page-table. + * @pgt: Page-table structure initialised by kvm_pgtable_hyp_init(). + * @addr: Virtual address from which to remove the mapping. + * @size: Size of the mapping. + * + * The offset of @addr within a page is ignored, @size is rounded-up to + * the next page boundary and @phys is rounded-down to the previous page + * boundary. + * + * TLB invalidation is performed for each page-table entry cleared during the + * unmapping operation and the reference count for the page-table page + * containing the cleared entry is decremented, with unreferenced pages being + * freed. The unmapping operation will stop early if it encounters either an + * invalid page-table entry or a valid block mapping which maps beyond the range + * being unmapped. + * + * Return: Number of bytes unmapped, which may be 0. + */ +u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size); + +/** * kvm_get_vtcr() - Helper to construct VTCR_EL2 * @mmfr0: Sanitized value of SYS_ID_AA64MMFR0_EL1 register. * @mmfr1: Sanitized value of SYS_ID_AA64MMFR1_EL1 register. diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 9b745d2bc89a..6057f3c5aafe 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -146,7 +146,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) return ret; - ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP); + ret = kvm_share_hyp(kvm, kvm + 1); if (ret) goto out_free_stage2_pgd; @@ -188,6 +188,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) } } atomic_set(&kvm->online_vcpus, 0); + + kvm_unshare_hyp(kvm, kvm + 1); } int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) @@ -342,7 +344,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) if (err) return err; - return create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP); + return kvm_share_hyp(vcpu, vcpu + 1); } void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) diff --git a/arch/arm64/kvm/fpsimd.c b/arch/arm64/kvm/fpsimd.c index 5526d79c7b47..2f48fd362a8c 100644 --- a/arch/arm64/kvm/fpsimd.c +++ b/arch/arm64/kvm/fpsimd.c @@ -14,6 +14,19 @@ #include <asm/kvm_mmu.h> #include <asm/sysreg.h> +void kvm_vcpu_unshare_task_fp(struct kvm_vcpu *vcpu) +{ + struct task_struct *p = vcpu->arch.parent_task; + struct user_fpsimd_state *fpsimd; + + if (!is_protected_kvm_enabled() || !p) + return; + + fpsimd = &p->thread.uw.fpsimd_state; + kvm_unshare_hyp(fpsimd, fpsimd + 1); + put_task_struct(p); +} + /* * Called on entry to KVM_RUN unless this vcpu previously ran at least * once and the most recent prior KVM_RUN for this vcpu was called from @@ -29,12 +42,27 @@ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu) struct user_fpsimd_state *fpsimd = ¤t->thread.uw.fpsimd_state; + kvm_vcpu_unshare_task_fp(vcpu); + /* Make sure the host task fpsimd state is visible to hyp: */ - ret = create_hyp_mappings(fpsimd, fpsimd + 1, PAGE_HYP); - if (!ret) - vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd); + ret = kvm_share_hyp(fpsimd, fpsimd + 1); + if (ret) + return ret; + + vcpu->arch.host_fpsimd_state = kern_hyp_va(fpsimd); + + /* + * We need to keep current's task_struct pinned until its data has been + * unshared with the hypervisor to make sure it is not re-used by the + * kernel and donated to someone else while already shared -- see + * kvm_vcpu_unshare_task_fp() for the matching put_task_struct(). + */ + if (is_protected_kvm_enabled()) { + get_task_struct(current); + vcpu->arch.parent_task = current; + } - return ret; + return 0; } /* diff --git a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h index b58c910babaf..80e99836eac7 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h +++ b/arch/arm64/kvm/hyp/include/nvhe/mem_protect.h @@ -24,6 +24,11 @@ enum pkvm_page_state { PKVM_PAGE_OWNED = 0ULL, PKVM_PAGE_SHARED_OWNED = KVM_PGTABLE_PROT_SW0, PKVM_PAGE_SHARED_BORROWED = KVM_PGTABLE_PROT_SW1, + __PKVM_PAGE_RESERVED = KVM_PGTABLE_PROT_SW0 | + KVM_PGTABLE_PROT_SW1, + + /* Meta-states which aren't encoded directly in the PTE's SW bits */ + PKVM_NOPAGE, }; #define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1) @@ -50,6 +55,7 @@ extern const u8 pkvm_hyp_id; int __pkvm_prot_finalize(void); int __pkvm_host_share_hyp(u64 pfn); +int __pkvm_host_unshare_hyp(u64 pfn); bool addr_is_memory(phys_addr_t phys); int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot); diff --git a/arch/arm64/kvm/hyp/nvhe/early_alloc.c b/arch/arm64/kvm/hyp/nvhe/early_alloc.c index 1306c430ab87..00de04153cc6 100644 --- a/arch/arm64/kvm/hyp/nvhe/early_alloc.c +++ b/arch/arm64/kvm/hyp/nvhe/early_alloc.c @@ -43,6 +43,9 @@ void *hyp_early_alloc_page(void *arg) return hyp_early_alloc_contig(1); } +static void hyp_early_alloc_get_page(void *addr) { } +static void hyp_early_alloc_put_page(void *addr) { } + void hyp_early_alloc_init(void *virt, unsigned long size) { base = cur = (unsigned long)virt; @@ -51,4 +54,6 @@ void hyp_early_alloc_init(void *virt, unsigned long size) hyp_early_alloc_mm_ops.zalloc_page = hyp_early_alloc_page; hyp_early_alloc_mm_ops.phys_to_virt = hyp_phys_to_virt; hyp_early_alloc_mm_ops.virt_to_phys = hyp_virt_to_phys; + hyp_early_alloc_mm_ops.get_page = hyp_early_alloc_get_page; + hyp_early_alloc_mm_ops.put_page = hyp_early_alloc_put_page; } diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index b096bf009144..5e2197db0d32 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -147,6 +147,13 @@ static void handle___pkvm_host_share_hyp(struct kvm_cpu_context *host_ctxt) cpu_reg(host_ctxt, 1) = __pkvm_host_share_hyp(pfn); } +static void handle___pkvm_host_unshare_hyp(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(u64, pfn, host_ctxt, 1); + + cpu_reg(host_ctxt, 1) = __pkvm_host_unshare_hyp(pfn); +} + static void handle___pkvm_create_private_mapping(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(phys_addr_t, phys, host_ctxt, 1); @@ -184,6 +191,7 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__pkvm_prot_finalize), HANDLE_FUNC(__pkvm_host_share_hyp), + HANDLE_FUNC(__pkvm_host_unshare_hyp), HANDLE_FUNC(__kvm_adjust_pc), HANDLE_FUNC(__kvm_vcpu_run), HANDLE_FUNC(__kvm_flush_vm_context), diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 92262e89672d..16776d1d6151 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -28,6 +28,26 @@ static struct hyp_pool host_s2_pool; const u8 pkvm_hyp_id = 1; +static void host_lock_component(void) +{ + hyp_spin_lock(&host_kvm.lock); +} + +static void host_unlock_component(void) +{ + hyp_spin_unlock(&host_kvm.lock); +} + +static void hyp_lock_component(void) +{ + hyp_spin_lock(&pkvm_pgd_lock); +} + +static void hyp_unlock_component(void) +{ + hyp_spin_unlock(&pkvm_pgd_lock); +} + static void *host_s2_zalloc_pages_exact(size_t size) { void *addr = hyp_alloc_pages(&host_s2_pool, get_order(size)); @@ -339,116 +359,446 @@ static int host_stage2_idmap(u64 addr) prot = is_memory ? PKVM_HOST_MEM_PROT : PKVM_HOST_MMIO_PROT; - hyp_spin_lock(&host_kvm.lock); + host_lock_component(); ret = host_stage2_adjust_range(addr, &range); if (ret) goto unlock; ret = host_stage2_idmap_locked(range.start, range.end - range.start, prot); unlock: - hyp_spin_unlock(&host_kvm.lock); + host_unlock_component(); return ret; } -static inline bool check_prot(enum kvm_pgtable_prot prot, - enum kvm_pgtable_prot required, - enum kvm_pgtable_prot denied) +void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) { - return (prot & (required | denied)) == required; + struct kvm_vcpu_fault_info fault; + u64 esr, addr; + int ret = 0; + + esr = read_sysreg_el2(SYS_ESR); + BUG_ON(!__get_fault_info(esr, &fault)); + + addr = (fault.hpfar_el2 & HPFAR_MASK) << 8; + ret = host_stage2_idmap(addr); + BUG_ON(ret && ret != -EAGAIN); } -int __pkvm_host_share_hyp(u64 pfn) +/* This corresponds to locking order */ +enum pkvm_component_id { + PKVM_ID_HOST, + PKVM_ID_HYP, +}; + +struct pkvm_mem_transition { + u64 nr_pages; + + struct { + enum pkvm_component_id id; + /* Address in the initiator's address space */ + u64 addr; + + union { + struct { + /* Address in the completer's address space */ + u64 completer_addr; + } host; + }; + } initiator; + + struct { + enum pkvm_component_id id; + } completer; +}; + +struct pkvm_mem_share { + const struct pkvm_mem_transition tx; + const enum kvm_pgtable_prot completer_prot; +}; + +struct check_walk_data { + enum pkvm_page_state desired; + enum pkvm_page_state (*get_page_state)(kvm_pte_t pte); +}; + +static int __check_page_state_visitor(u64 addr, u64 end, u32 level, + kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, + void * const arg) { - phys_addr_t addr = hyp_pfn_to_phys(pfn); - enum kvm_pgtable_prot prot, cur; - void *virt = __hyp_va(addr); - enum pkvm_page_state state; - kvm_pte_t pte; - int ret; + struct check_walk_data *d = arg; + kvm_pte_t pte = *ptep; - if (!addr_is_memory(addr)) + if (kvm_pte_valid(pte) && !addr_is_memory(kvm_pte_to_phys(pte))) return -EINVAL; - hyp_spin_lock(&host_kvm.lock); - hyp_spin_lock(&pkvm_pgd_lock); + return d->get_page_state(pte) == d->desired ? 0 : -EPERM; +} + +static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size, + struct check_walk_data *data) +{ + struct kvm_pgtable_walker walker = { + .cb = __check_page_state_visitor, + .arg = data, + .flags = KVM_PGTABLE_WALK_LEAF, + }; + + return kvm_pgtable_walk(pgt, addr, size, &walker); +} + +static enum pkvm_page_state host_get_page_state(kvm_pte_t pte) +{ + if (!kvm_pte_valid(pte) && pte) + return PKVM_NOPAGE; + + return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)); +} + +static int __host_check_page_state_range(u64 addr, u64 size, + enum pkvm_page_state state) +{ + struct check_walk_data d = { + .desired = state, + .get_page_state = host_get_page_state, + }; + + hyp_assert_lock_held(&host_kvm.lock); + return check_page_state_range(&host_kvm.pgt, addr, size, &d); +} + +static int __host_set_page_state_range(u64 addr, u64 size, + enum pkvm_page_state state) +{ + enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state); + + return host_stage2_idmap_locked(addr, size, prot); +} + +static int host_request_owned_transition(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + u64 addr = tx->initiator.addr; + + *completer_addr = tx->initiator.host.completer_addr; + return __host_check_page_state_range(addr, size, PKVM_PAGE_OWNED); +} + +static int host_request_unshare(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + u64 addr = tx->initiator.addr; + + *completer_addr = tx->initiator.host.completer_addr; + return __host_check_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED); +} + +static int host_initiate_share(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + u64 addr = tx->initiator.addr; + + *completer_addr = tx->initiator.host.completer_addr; + return __host_set_page_state_range(addr, size, PKVM_PAGE_SHARED_OWNED); +} + +static int host_initiate_unshare(u64 *completer_addr, + const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + u64 addr = tx->initiator.addr; + + *completer_addr = tx->initiator.host.completer_addr; + return __host_set_page_state_range(addr, size, PKVM_PAGE_OWNED); +} + +static enum pkvm_page_state hyp_get_page_state(kvm_pte_t pte) +{ + if (!kvm_pte_valid(pte)) + return PKVM_NOPAGE; + + return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte)); +} + +static int __hyp_check_page_state_range(u64 addr, u64 size, + enum pkvm_page_state state) +{ + struct check_walk_data d = { + .desired = state, + .get_page_state = hyp_get_page_state, + }; + + hyp_assert_lock_held(&pkvm_pgd_lock); + return check_page_state_range(&pkvm_pgtable, addr, size, &d); +} + +static bool __hyp_ack_skip_pgtable_check(const struct pkvm_mem_transition *tx) +{ + return !(IS_ENABLED(CONFIG_NVHE_EL2_DEBUG) || + tx->initiator.id != PKVM_ID_HOST); +} + +static int hyp_ack_share(u64 addr, const struct pkvm_mem_transition *tx, + enum kvm_pgtable_prot perms) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + + if (perms != PAGE_HYP) + return -EPERM; + + if (__hyp_ack_skip_pgtable_check(tx)) + return 0; + + return __hyp_check_page_state_range(addr, size, PKVM_NOPAGE); +} + +static int hyp_ack_unshare(u64 addr, const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + + if (__hyp_ack_skip_pgtable_check(tx)) + return 0; + + return __hyp_check_page_state_range(addr, size, + PKVM_PAGE_SHARED_BORROWED); +} + +static int hyp_complete_share(u64 addr, const struct pkvm_mem_transition *tx, + enum kvm_pgtable_prot perms) +{ + void *start = (void *)addr, *end = start + (tx->nr_pages * PAGE_SIZE); + enum kvm_pgtable_prot prot; + + prot = pkvm_mkstate(perms, PKVM_PAGE_SHARED_BORROWED); + return pkvm_create_mappings_locked(start, end, prot); +} + +static int hyp_complete_unshare(u64 addr, const struct pkvm_mem_transition *tx) +{ + u64 size = tx->nr_pages * PAGE_SIZE; + int ret = kvm_pgtable_hyp_unmap(&pkvm_pgtable, addr, size); + + return (ret != size) ? -EFAULT : 0; +} + +static int check_share(struct pkvm_mem_share *share) +{ + const struct pkvm_mem_transition *tx = &share->tx; + u64 completer_addr; + int ret; + + switch (tx->initiator.id) { + case PKVM_ID_HOST: + ret = host_request_owned_transition(&completer_addr, tx); + break; + default: + ret = -EINVAL; + } - ret = kvm_pgtable_get_leaf(&host_kvm.pgt, addr, &pte, NULL); if (ret) - goto unlock; - if (!pte) - goto map_shared; + return ret; - /* - * Check attributes in the host stage-2 PTE. We need the page to be: - * - mapped RWX as we're sharing memory; - * - not borrowed, as that implies absence of ownership. - * Otherwise, we can't let it got through - */ - cur = kvm_pgtable_stage2_pte_prot(pte); - prot = pkvm_mkstate(0, PKVM_PAGE_SHARED_BORROWED); - if (!check_prot(cur, PKVM_HOST_MEM_PROT, prot)) { - ret = -EPERM; - goto unlock; + switch (tx->completer.id) { + case PKVM_ID_HYP: + ret = hyp_ack_share(completer_addr, tx, share->completer_prot); + break; + default: + ret = -EINVAL; } - state = pkvm_getstate(cur); - if (state == PKVM_PAGE_OWNED) - goto map_shared; + return ret; +} - /* - * Tolerate double-sharing the same page, but this requires - * cross-checking the hypervisor stage-1. - */ - if (state != PKVM_PAGE_SHARED_OWNED) { - ret = -EPERM; - goto unlock; +static int __do_share(struct pkvm_mem_share *share) +{ + const struct pkvm_mem_transition *tx = &share->tx; + u64 completer_addr; + int ret; + + switch (tx->initiator.id) { + case PKVM_ID_HOST: + ret = host_initiate_share(&completer_addr, tx); + break; + default: + ret = -EINVAL; } - ret = kvm_pgtable_get_leaf(&pkvm_pgtable, (u64)virt, &pte, NULL); if (ret) - goto unlock; + return ret; - /* - * If the page has been shared with the hypervisor, it must be - * already mapped as SHARED_BORROWED in its stage-1. - */ - cur = kvm_pgtable_hyp_pte_prot(pte); - prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED); - if (!check_prot(cur, prot, ~prot)) - ret = -EPERM; - goto unlock; + switch (tx->completer.id) { + case PKVM_ID_HYP: + ret = hyp_complete_share(completer_addr, tx, share->completer_prot); + break; + default: + ret = -EINVAL; + } -map_shared: - /* - * If the page is not yet shared, adjust mappings in both page-tables - * while both locks are held. - */ - prot = pkvm_mkstate(PAGE_HYP, PKVM_PAGE_SHARED_BORROWED); - ret = pkvm_create_mappings_locked(virt, virt + PAGE_SIZE, prot); - BUG_ON(ret); + return ret; +} - prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED); - ret = host_stage2_idmap_locked(addr, PAGE_SIZE, prot); - BUG_ON(ret); +/* + * do_share(): + * + * The page owner grants access to another component with a given set + * of permissions. + * + * Initiator: OWNED => SHARED_OWNED + * Completer: NOPAGE => SHARED_BORROWED + */ +static int do_share(struct pkvm_mem_share *share) +{ + int ret; -unlock: - hyp_spin_unlock(&pkvm_pgd_lock); - hyp_spin_unlock(&host_kvm.lock); + ret = check_share(share); + if (ret) + return ret; + + return WARN_ON(__do_share(share)); +} + +static int check_unshare(struct pkvm_mem_share *share) +{ + const struct pkvm_mem_transition *tx = &share->tx; + u64 completer_addr; + int ret; + + switch (tx->initiator.id) { + case PKVM_ID_HOST: + ret = host_request_unshare(&completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + if (ret) + return ret; + + switch (tx->completer.id) { + case PKVM_ID_HYP: + ret = hyp_ack_unshare(completer_addr, tx); + break; + default: + ret = -EINVAL; + } return ret; } -void handle_host_mem_abort(struct kvm_cpu_context *host_ctxt) +static int __do_unshare(struct pkvm_mem_share *share) { - struct kvm_vcpu_fault_info fault; - u64 esr, addr; - int ret = 0; + const struct pkvm_mem_transition *tx = &share->tx; + u64 completer_addr; + int ret; - esr = read_sysreg_el2(SYS_ESR); - BUG_ON(!__get_fault_info(esr, &fault)); + switch (tx->initiator.id) { + case PKVM_ID_HOST: + ret = host_initiate_unshare(&completer_addr, tx); + break; + default: + ret = -EINVAL; + } - addr = (fault.hpfar_el2 & HPFAR_MASK) << 8; - ret = host_stage2_idmap(addr); - BUG_ON(ret && ret != -EAGAIN); + if (ret) + return ret; + + switch (tx->completer.id) { + case PKVM_ID_HYP: + ret = hyp_complete_unshare(completer_addr, tx); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +/* + * do_unshare(): + * + * The page owner revokes access from another component for a range of + * pages which were previously shared using do_share(). + * + * Initiator: SHARED_OWNED => OWNED + * Completer: SHARED_BORROWED => NOPAGE + */ +static int do_unshare(struct pkvm_mem_share *share) +{ + int ret; + + ret = check_unshare(share); + if (ret) + return ret; + + return WARN_ON(__do_unshare(share)); +} + +int __pkvm_host_share_hyp(u64 pfn) +{ + int ret; + u64 host_addr = hyp_pfn_to_phys(pfn); + u64 hyp_addr = (u64)__hyp_va(host_addr); + struct pkvm_mem_share share = { + .tx = { + .nr_pages = 1, + .initiator = { + .id = PKVM_ID_HOST, + .addr = host_addr, + .host = { + .completer_addr = hyp_addr, + }, + }, + .completer = { + .id = PKVM_ID_HYP, + }, + }, + .completer_prot = PAGE_HYP, + }; + + host_lock_component(); + hyp_lock_component(); + + ret = do_share(&share); + + hyp_unlock_component(); + host_unlock_component(); + + return ret; +} + +int __pkvm_host_unshare_hyp(u64 pfn) +{ + int ret; + u64 host_addr = hyp_pfn_to_phys(pfn); + u64 hyp_addr = (u64)__hyp_va(host_addr); + struct pkvm_mem_share share = { + .tx = { + .nr_pages = 1, + .initiator = { + .id = PKVM_ID_HOST, + .addr = host_addr, + .host = { + .completer_addr = hyp_addr, + }, + }, + .completer = { + .id = PKVM_ID_HYP, + }, + }, + .completer_prot = PAGE_HYP, + }; + + host_lock_component(); + hyp_lock_component(); + + ret = do_unshare(&share); + + hyp_unlock_component(); + host_unlock_component(); + + return ret; } diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index e31149965204..27af337f9fea 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -166,6 +166,7 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, enum kvm_pgtable_walk_flags flag, void * const arg) { + struct kvm_pgtable_mm_ops *mm_ops = arg; enum kvm_pgtable_prot prot; enum pkvm_page_state state; kvm_pte_t pte = *ptep; @@ -174,6 +175,15 @@ static int finalize_host_mappings_walker(u64 addr, u64 end, u32 level, if (!kvm_pte_valid(pte)) return 0; + /* + * Fix-up the refcount for the page-table pages as the early allocator + * was unable to access the hyp_vmemmap and so the buddy allocator has + * initialised the refcount to '1'. + */ + mm_ops->get_page(ptep); + if (flag != KVM_PGTABLE_WALK_LEAF) + return 0; + if (level != (KVM_PGTABLE_MAX_LEVELS - 1)) return -EINVAL; @@ -206,7 +216,8 @@ static int finalize_host_mappings(void) { struct kvm_pgtable_walker walker = { .cb = finalize_host_mappings_walker, - .flags = KVM_PGTABLE_WALK_LEAF, + .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, + .arg = pkvm_pgtable.mm_ops, }; int i, ret; @@ -241,19 +252,20 @@ void __noreturn __pkvm_init_finalise(void) if (ret) goto out; - ret = finalize_host_mappings(); - if (ret) - goto out; - pkvm_pgtable_mm_ops = (struct kvm_pgtable_mm_ops) { .zalloc_page = hyp_zalloc_hyp_page, .phys_to_virt = hyp_phys_to_virt, .virt_to_phys = hyp_virt_to_phys, .get_page = hpool_get_page, .put_page = hpool_put_page, + .page_count = hyp_page_count, }; pkvm_pgtable.mm_ops = &pkvm_pgtable_mm_ops; + ret = finalize_host_mappings(); + if (ret) + goto out; + out: /* * We tail-called to here from handle___pkvm_init() and will not return, diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index f8ceebe4982e..adc73f8cd24f 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -383,21 +383,6 @@ enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte) return prot; } -static bool hyp_pte_needs_update(kvm_pte_t old, kvm_pte_t new) -{ - /* - * Tolerate KVM recreating the exact same mapping, or changing software - * bits if the existing mapping was valid. - */ - if (old == new) - return false; - - if (!kvm_pte_valid(old)) - return true; - - return !WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW); -} - static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, struct hyp_map_data *data) { @@ -407,11 +392,16 @@ static bool hyp_map_walker_try_leaf(u64 addr, u64 end, u32 level, if (!kvm_block_mapping_supported(addr, end, phys, level)) return false; + data->phys += granule; new = kvm_init_valid_leaf_pte(phys, data->attr, level); - if (hyp_pte_needs_update(old, new)) - smp_store_release(ptep, new); + if (old == new) + return true; + if (!kvm_pte_valid(old)) + data->mm_ops->get_page(ptep); + else if (WARN_ON((old ^ new) & ~KVM_PTE_LEAF_ATTR_HI_SW)) + return false; - data->phys += granule; + smp_store_release(ptep, new); return true; } @@ -433,6 +423,7 @@ static int hyp_map_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, return -ENOMEM; kvm_set_table_pte(ptep, childp, mm_ops); + mm_ops->get_page(ptep); return 0; } @@ -460,6 +451,69 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys, return ret; } +struct hyp_unmap_data { + u64 unmapped; + struct kvm_pgtable_mm_ops *mm_ops; +}; + +static int hyp_unmap_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, + enum kvm_pgtable_walk_flags flag, void * const arg) +{ + kvm_pte_t pte = *ptep, *childp = NULL; + u64 granule = kvm_granule_size(level); + struct hyp_unmap_data *data = arg; + struct kvm_pgtable_mm_ops *mm_ops = data->mm_ops; + + if (!kvm_pte_valid(pte)) + return -EINVAL; + + if (kvm_pte_table(pte, level)) { + childp = kvm_pte_follow(pte, mm_ops); + + if (mm_ops->page_count(childp) != 1) + return 0; + + kvm_clear_pte(ptep); + dsb(ishst); + __tlbi_level(vae2is, __TLBI_VADDR(addr, 0), level); + } else { + if (end - addr < granule) + return -EINVAL; + + kvm_clear_pte(ptep); + dsb(ishst); + __tlbi_level(vale2is, __TLBI_VADDR(addr, 0), level); + data->unmapped += granule; + } + + dsb(ish); + isb(); + mm_ops->put_page(ptep); + + if (childp) + mm_ops->put_page(childp); + + return 0; +} + +u64 kvm_pgtable_hyp_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size) +{ + struct hyp_unmap_data unmap_data = { + .mm_ops = pgt->mm_ops, + }; + struct kvm_pgtable_walker walker = { + .cb = hyp_unmap_walker, + .arg = &unmap_data, + .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, + }; + + if (!pgt->mm_ops->page_count) + return 0; + + kvm_pgtable_walk(pgt, addr, size, &walker); + return unmap_data.unmapped; +} + int kvm_pgtable_hyp_init(struct kvm_pgtable *pgt, u32 va_bits, struct kvm_pgtable_mm_ops *mm_ops) { @@ -482,8 +536,16 @@ static int hyp_free_walker(u64 addr, u64 end, u32 level, kvm_pte_t *ptep, enum kvm_pgtable_walk_flags flag, void * const arg) { struct kvm_pgtable_mm_ops *mm_ops = arg; + kvm_pte_t pte = *ptep; + + if (!kvm_pte_valid(pte)) + return 0; + + mm_ops->put_page(ptep); + + if (kvm_pte_table(pte, level)) + mm_ops->put_page(kvm_pte_follow(pte, mm_ops)); - mm_ops->put_page((void *)kvm_pte_follow(*ptep, mm_ops)); return 0; } @@ -491,7 +553,7 @@ void kvm_pgtable_hyp_destroy(struct kvm_pgtable *pgt) { struct kvm_pgtable_walker walker = { .cb = hyp_free_walker, - .flags = KVM_PGTABLE_WALK_TABLE_POST, + .flags = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST, .arg = pgt->mm_ops, }; diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index ea840fa223b5..b53e5bc3f4c3 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -284,14 +284,117 @@ static phys_addr_t kvm_kaddr_to_phys(void *kaddr) } } -static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end) +struct hyp_shared_pfn { + u64 pfn; + int count; + struct rb_node node; +}; + +static DEFINE_MUTEX(hyp_shared_pfns_lock); +static struct rb_root hyp_shared_pfns = RB_ROOT; + +static struct hyp_shared_pfn *find_shared_pfn(u64 pfn, struct rb_node ***node, + struct rb_node **parent) { - phys_addr_t addr; + struct hyp_shared_pfn *this; + + *node = &hyp_shared_pfns.rb_node; + *parent = NULL; + while (**node) { + this = container_of(**node, struct hyp_shared_pfn, node); + *parent = **node; + if (this->pfn < pfn) + *node = &((**node)->rb_left); + else if (this->pfn > pfn) + *node = &((**node)->rb_right); + else + return this; + } + + return NULL; +} + +static int share_pfn_hyp(u64 pfn) +{ + struct rb_node **node, *parent; + struct hyp_shared_pfn *this; + int ret = 0; + + mutex_lock(&hyp_shared_pfns_lock); + this = find_shared_pfn(pfn, &node, &parent); + if (this) { + this->count++; + goto unlock; + } + + this = kzalloc(sizeof(*this), GFP_KERNEL); + if (!this) { + ret = -ENOMEM; + goto unlock; + } + + this->pfn = pfn; + this->count = 1; + rb_link_node(&this->node, parent, node); + rb_insert_color(&this->node, &hyp_shared_pfns); + ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, pfn, 1); +unlock: + mutex_unlock(&hyp_shared_pfns_lock); + + return ret; +} + +static int unshare_pfn_hyp(u64 pfn) +{ + struct rb_node **node, *parent; + struct hyp_shared_pfn *this; + int ret = 0; + + mutex_lock(&hyp_shared_pfns_lock); + this = find_shared_pfn(pfn, &node, &parent); + if (WARN_ON(!this)) { + ret = -ENOENT; + goto unlock; + } + + this->count--; + if (this->count) + goto unlock; + + rb_erase(&this->node, &hyp_shared_pfns); + kfree(this); + ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_hyp, pfn, 1); +unlock: + mutex_unlock(&hyp_shared_pfns_lock); + + return ret; +} + +int kvm_share_hyp(void *from, void *to) +{ + phys_addr_t start, end, cur; + u64 pfn; int ret; - for (addr = ALIGN_DOWN(start, PAGE_SIZE); addr < end; addr += PAGE_SIZE) { - ret = kvm_call_hyp_nvhe(__pkvm_host_share_hyp, - __phys_to_pfn(addr)); + if (is_kernel_in_hyp_mode()) + return 0; + + /* + * The share hcall maps things in the 'fixed-offset' region of the hyp + * VA space, so we can only share physically contiguous data-structures + * for now. + */ + if (is_vmalloc_or_module_addr(from) || is_vmalloc_or_module_addr(to)) + return -EINVAL; + + if (kvm_host_owns_hyp_mappings()) + return create_hyp_mappings(from, to, PAGE_HYP); + + start = ALIGN_DOWN(__pa(from), PAGE_SIZE); + end = PAGE_ALIGN(__pa(to)); + for (cur = start; cur < end; cur += PAGE_SIZE) { + pfn = __phys_to_pfn(cur); + ret = share_pfn_hyp(pfn); if (ret) return ret; } @@ -299,6 +402,22 @@ static int pkvm_share_hyp(phys_addr_t start, phys_addr_t end) return 0; } +void kvm_unshare_hyp(void *from, void *to) +{ + phys_addr_t start, end, cur; + u64 pfn; + + if (is_kernel_in_hyp_mode() || kvm_host_owns_hyp_mappings() || !from) + return; + + start = ALIGN_DOWN(__pa(from), PAGE_SIZE); + end = PAGE_ALIGN(__pa(to)); + for (cur = start; cur < end; cur += PAGE_SIZE) { + pfn = __phys_to_pfn(cur); + WARN_ON(unshare_pfn_hyp(pfn)); + } +} + /** * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode * @from: The virtual kernel start address of the range @@ -319,12 +438,8 @@ int create_hyp_mappings(void *from, void *to, enum kvm_pgtable_prot prot) if (is_kernel_in_hyp_mode()) return 0; - if (!kvm_host_owns_hyp_mappings()) { - if (WARN_ON(prot != PAGE_HYP)) - return -EPERM; - return pkvm_share_hyp(kvm_kaddr_to_phys(from), - kvm_kaddr_to_phys(to)); - } + if (!kvm_host_owns_hyp_mappings()) + return -EPERM; start = start & PAGE_MASK; end = PAGE_ALIGN(end); diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index c7a0249df840..798a84eddbde 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -113,7 +113,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu) if (!buf) return -ENOMEM; - ret = create_hyp_mappings(buf, buf + reg_sz, PAGE_HYP); + ret = kvm_share_hyp(buf, buf + reg_sz); if (ret) { kfree(buf); return ret; @@ -150,7 +150,13 @@ bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu) void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu) { - kfree(vcpu->arch.sve_state); + void *sve_state = vcpu->arch.sve_state; + + kvm_vcpu_unshare_task_fp(vcpu); + kvm_unshare_hyp(vcpu, vcpu + 1); + if (sve_state) + kvm_unshare_hyp(sve_state, sve_state + vcpu_sve_state_size(vcpu)); + kfree(sve_state); } static void kvm_vcpu_reset_sve(struct kvm_vcpu *vcpu) |