From 78b497f2e62d8c7514de5f83c80837bbb120e93e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 3 Sep 2021 15:08:05 +0200 Subject: kvm: use kvfree() in kvm_arch_free_vm() By switching from kfree() to kvfree() in kvm_arch_free_vm() Arm64 can use the common variant. This can be accomplished by adding another macro __KVM_HAVE_ARCH_VM_FREE, which will be used only by x86 for now. Further simplification can be achieved by adding __kvm_arch_free_vm() doing the common part. Suggested-by: Paolo Bonzini Signed-off-by: Juergen Gross Message-Id: <20210903130808.30142-5-jgross@suse.com> Signed-off-by: Paolo Bonzini --- arch/arm64/include/asm/kvm_host.h | 1 - arch/arm64/kvm/arm.c | 8 -------- 2 files changed, 9 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f8be56d5342b..369c30e28301 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -771,7 +771,6 @@ int kvm_set_ipa_limit(void); #define __KVM_HAVE_ARCH_VM_ALLOC struct kvm *kvm_arch_alloc_vm(void); -void kvm_arch_free_vm(struct kvm *kvm); int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index fe102cd2e518..7838e9fb693e 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -297,14 +297,6 @@ struct kvm *kvm_arch_alloc_vm(void) return vzalloc(sizeof(struct kvm)); } -void kvm_arch_free_vm(struct kvm *kvm) -{ - if (!has_vhe()) - kfree(kvm); - else - vfree(kvm); -} - int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) { if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) -- cgit v1.2.3-70-g09d2 From a78738ed1d9bf40d09109599b884508c69d188b8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 8 Oct 2021 14:58:34 +0100 Subject: KVM: arm64: Turn __KVM_HOST_SMCCC_FUNC_* into an enum (mostly) __KVM_HOST_SMCCC_FUNC_* is a royal pain, as there is a fair amount of churn around these #defines, and we avoid making it an enum only for the sake of the early init, low level code that requires __KVM_HOST_SMCCC_FUNC___kvm_hyp_init to be usable from assembly. Let's be brave and turn everything but this symbol into an enum, using a bit of arithmetic to avoid any overlap. Acked-by: Will Deacon Link: https://lore.kernel.org/r/877depq9gw.wl-maz@kernel.org Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon Link: https://lore.kernel.org/r/20211008135839.1193-2-will@kernel.org --- arch/arm64/include/asm/kvm_asm.h | 44 ++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 20 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index e86045ac43ba..43b5e213ae43 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -44,31 +44,35 @@ #define KVM_HOST_SMCCC_FUNC(name) KVM_HOST_SMCCC_ID(__KVM_HOST_SMCCC_FUNC_##name) #define __KVM_HOST_SMCCC_FUNC___kvm_hyp_init 0 -#define __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run 1 -#define __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context 2 -#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa 3 -#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid 4 -#define __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context 5 -#define __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff 6 -#define __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs 7 -#define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config 8 -#define __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr 9 -#define __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr 10 -#define __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs 11 -#define __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 12 -#define __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs 13 -#define __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs 14 -#define __KVM_HOST_SMCCC_FUNC___pkvm_init 15 -#define __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp 16 -#define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping 17 -#define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector 18 -#define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19 -#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc 20 #ifndef __ASSEMBLY__ #include +enum __kvm_host_smccc_func { + /* __KVM_HOST_SMCCC_FUNC___kvm_hyp_init */ + __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1, + __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context, + __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa, + __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid, + __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context, + __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff, + __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs, + __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config, + __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr, + __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr, + __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs, + __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2, + __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs, + __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs, + __KVM_HOST_SMCCC_FUNC___pkvm_init, + __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp, + __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping, + __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector, + __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize, + __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc, +}; + #define DECLARE_KVM_VHE_SYM(sym) extern char sym[] #define DECLARE_KVM_NVHE_SYM(sym) extern char kvm_nvhe_sym(sym)[] -- cgit v1.2.3-70-g09d2 From 8f4566f18db5d1257fc2d5442e16274424a529c1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 8 Oct 2021 14:58:35 +0100 Subject: arm64: Prevent kexec and hibernation if is_protected_kvm_enabled() When pKVM is enabled, the hypervisor code at EL2 and its data structures are inaccessible to the host kernel and cannot be torn down or replaced as this would defeat the integrity properies which pKVM aims to provide. Furthermore, the ABI between the host and EL2 is flexible and private to whatever the current implementation of KVM requires and so booting a new kernel with an old EL2 component is very likely to end in disaster. In preparation for uninstalling the hyp stub calls which are relied upon to reset EL2, disable kexec and hibernation in the host when protected KVM is enabled. Cc: Marc Zyngier Cc: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211008135839.1193-3-will@kernel.org --- arch/arm64/kernel/smp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 6f6ff072acbd..44369b99a57e 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -1128,5 +1128,6 @@ bool cpus_are_stuck_in_kernel(void) { bool smp_spin_tables = (num_possible_cpus() > 1 && !have_cpu_die()); - return !!cpus_stuck_in_kernel || smp_spin_tables; + return !!cpus_stuck_in_kernel || smp_spin_tables || + is_protected_kvm_enabled(); } -- cgit v1.2.3-70-g09d2 From 8579a185bacaa64c65e43e251ceede2f7600f7e2 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 8 Oct 2021 14:58:36 +0100 Subject: KVM: arm64: Reject stub hypercalls after pKVM has been initialised The stub hypercalls provide mechanisms to reset and replace the EL2 code, so uninstall them once pKVM has been initialised in order to ensure the integrity of the hypervisor code. To ensure pKVM initialisation remains functional, split cpu_hyp_reinit() into two helper functions to separate usage of the stub from usage of pkvm hypercalls either side of __pkvm_init on the boot CPU. Cc: Marc Zyngier Cc: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211008135839.1193-4-will@kernel.org --- arch/arm64/kvm/arm.c | 31 +++++++++++++++++++++++-------- arch/arm64/kvm/hyp/nvhe/host.S | 26 +++++++++++++++++--------- 2 files changed, 40 insertions(+), 17 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index fe102cd2e518..9506cf88fa0e 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1579,25 +1579,33 @@ static void cpu_set_hyp_vector(void) kvm_call_hyp_nvhe(__pkvm_cpu_set_vector, data->slot); } -static void cpu_hyp_reinit(void) +static void cpu_hyp_init_context(void) { kvm_init_host_cpu_context(&this_cpu_ptr_hyp_sym(kvm_host_data)->host_ctxt); - cpu_hyp_reset(); - - if (is_kernel_in_hyp_mode()) - kvm_timer_init_vhe(); - else + if (!is_kernel_in_hyp_mode()) cpu_init_hyp_mode(); +} +static void cpu_hyp_init_features(void) +{ cpu_set_hyp_vector(); - kvm_arm_init_debug(); + if (is_kernel_in_hyp_mode()) + kvm_timer_init_vhe(); + if (vgic_present) kvm_vgic_init_cpu_hardware(); } +static void cpu_hyp_reinit(void) +{ + cpu_hyp_reset(); + cpu_hyp_init_context(); + cpu_hyp_init_features(); +} + static void _kvm_arch_hardware_enable(void *discard) { if (!__this_cpu_read(kvm_arm_hardware_enabled)) { @@ -1788,10 +1796,17 @@ static int do_pkvm_init(u32 hyp_va_bits) int ret; preempt_disable(); - hyp_install_host_vector(); + cpu_hyp_init_context(); ret = kvm_call_hyp_nvhe(__pkvm_init, hyp_mem_base, hyp_mem_size, num_possible_cpus(), kern_hyp_va(per_cpu_base), hyp_va_bits); + cpu_hyp_init_features(); + + /* + * The stub hypercalls are now disabled, so set our local flag to + * prevent a later re-init attempt in kvm_arch_hardware_enable(). + */ + __this_cpu_write(kvm_arm_hardware_enabled, 1); preempt_enable(); return ret; diff --git a/arch/arm64/kvm/hyp/nvhe/host.S b/arch/arm64/kvm/hyp/nvhe/host.S index 4b652ffb591d..0c6116d34e18 100644 --- a/arch/arm64/kvm/hyp/nvhe/host.S +++ b/arch/arm64/kvm/hyp/nvhe/host.S @@ -110,17 +110,14 @@ SYM_FUNC_START(__hyp_do_panic) b __host_enter_for_panic SYM_FUNC_END(__hyp_do_panic) -.macro host_el1_sync_vect - .align 7 -.L__vect_start\@: - stp x0, x1, [sp, #-16]! - mrs x0, esr_el2 - lsr x0, x0, #ESR_ELx_EC_SHIFT - cmp x0, #ESR_ELx_EC_HVC64 - b.ne __host_exit - +SYM_FUNC_START(__host_hvc) ldp x0, x1, [sp] // Don't fixup the stack yet + /* No stub for you, sonny Jim */ +alternative_if ARM64_KVM_PROTECTED_MODE + b __host_exit +alternative_else_nop_endif + /* Check for a stub HVC call */ cmp x0, #HVC_STUB_HCALL_NR b.hs __host_exit @@ -137,6 +134,17 @@ SYM_FUNC_END(__hyp_do_panic) ldr x5, =__kvm_handle_stub_hvc hyp_pa x5, x6 br x5 +SYM_FUNC_END(__host_hvc) + +.macro host_el1_sync_vect + .align 7 +.L__vect_start\@: + stp x0, x1, [sp, #-16]! + mrs x0, esr_el2 + lsr x0, x0, #ESR_ELx_EC_SHIFT + cmp x0, #ESR_ELx_EC_HVC64 + b.eq __host_hvc + b __host_exit .L__vect_end\@: .if ((.L__vect_end\@ - .L__vect_start\@) > 0x80) .error "host_el1_sync_vect larger than vector entry" -- cgit v1.2.3-70-g09d2 From 2f2e1a5069679491d18cf9021da19b40c56a17f3 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 8 Oct 2021 14:58:37 +0100 Subject: KVM: arm64: Propagate errors from __pkvm_prot_finalize hypercall If the __pkvm_prot_finalize hypercall returns an error, we WARN but fail to propagate the failure code back to kvm_arch_init(). Pass a pointer to a zero-initialised return variable so that failure to finalise the pKVM protections on a host CPU can be reported back to KVM. Cc: Marc Zyngier Cc: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211008135839.1193-5-will@kernel.org --- arch/arm64/kvm/arm.c | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 9506cf88fa0e..13bbf35896cd 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1986,9 +1986,25 @@ out_err: return err; } -static void _kvm_host_prot_finalize(void *discard) +static void _kvm_host_prot_finalize(void *arg) { - WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize)); + int *err = arg; + + if (WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize))) + WRITE_ONCE(*err, -EINVAL); +} + +static int pkvm_drop_host_privileges(void) +{ + int ret = 0; + + /* + * Flip the static key upfront as that may no longer be possible + * once the host stage 2 is installed. + */ + static_branch_enable(&kvm_protected_mode_initialized); + on_each_cpu(_kvm_host_prot_finalize, &ret, 1); + return ret; } static int finalize_hyp_mode(void) @@ -2002,15 +2018,7 @@ static int finalize_hyp_mode(void) * None of other sections should ever be introspected. */ kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start); - - /* - * Flip the static key upfront as that may no longer be possible - * once the host stage 2 is installed. - */ - static_branch_enable(&kvm_protected_mode_initialized); - on_each_cpu(_kvm_host_prot_finalize, NULL, 1); - - return 0; + return pkvm_drop_host_privileges(); } struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr) -- cgit v1.2.3-70-g09d2 From 07036cffe17ec07e8fb630d86f8ea21832d9e57d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 8 Oct 2021 14:58:38 +0100 Subject: KVM: arm64: Prevent re-finalisation of pKVM for a given CPU __pkvm_prot_finalize() completes the deprivilege of the host when pKVM is in use by installing a stage-2 translation table for the calling CPU. Issuing the hypercall multiple times for a given CPU makes little sense, but in such a case just return early with -EPERM rather than go through the whole page-table dance again. Cc: Marc Zyngier Cc: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211008135839.1193-6-will@kernel.org --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index bacd493a4eac..cafe17e5fa8f 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -123,6 +123,9 @@ int __pkvm_prot_finalize(void) struct kvm_s2_mmu *mmu = &host_kvm.arch.mmu; struct kvm_nvhe_init_params *params = this_cpu_ptr(&kvm_init_params); + if (params->hcr_el2 & HCR_VM) + return -EPERM; + params->vttbr = kvm_get_vttbr(mmu); params->vtcr = host_kvm.arch.vtcr; params->hcr_el2 |= HCR_VM; -- cgit v1.2.3-70-g09d2 From 057bed206f70d624c2eacb43ec56551950a26832 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 8 Oct 2021 14:58:39 +0100 Subject: KVM: arm64: Disable privileged hypercalls after pKVM finalisation After pKVM has been 'finalised' using the __pkvm_prot_finalize hypercall, the calling CPU will have a Stage-2 translation enabled to prevent access to memory pages owned by EL2. Although this forms a significant part of the process to deprivilege the host kernel, we also need to ensure that the hypercall interface is reduced so that the EL2 code cannot, for example, be re-initialised using a new set of vectors. Re-order the hypercalls so that only a suffix remains available after finalisation of pKVM. Cc: Marc Zyngier Cc: Quentin Perret Signed-off-by: Will Deacon Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211008135839.1193-7-will@kernel.org --- arch/arm64/include/asm/kvm_asm.h | 25 ++++++++++++++----------- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 37 ++++++++++++++++++++++++++----------- 2 files changed, 40 insertions(+), 22 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 43b5e213ae43..4654d27fd221 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -50,27 +50,30 @@ #include enum __kvm_host_smccc_func { + /* Hypercalls available only prior to pKVM finalisation */ /* __KVM_HOST_SMCCC_FUNC___kvm_hyp_init */ - __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1, + __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2 = __KVM_HOST_SMCCC_FUNC___kvm_hyp_init + 1, + __KVM_HOST_SMCCC_FUNC___pkvm_init, + __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping, + __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector, + __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs, + __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs, + __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config, + __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize, + + /* Hypercalls available after pKVM finalisation */ + __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp, + __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc, + __KVM_HOST_SMCCC_FUNC___kvm_vcpu_run, __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context, __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa, __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid, __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context, __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff, - __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs, - __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config, __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr, __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr, - __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs, - __KVM_HOST_SMCCC_FUNC___kvm_get_mdcr_el2, __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs, __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs, - __KVM_HOST_SMCCC_FUNC___pkvm_init, - __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp, - __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping, - __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector, - __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize, - __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc, }; #define DECLARE_KVM_VHE_SYM(sym) extern char sym[] diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 2da6aa8da868..8566805ef62c 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -165,36 +165,51 @@ typedef void (*hcall_t)(struct kvm_cpu_context *); #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x static const hcall_t host_hcall[] = { - HANDLE_FUNC(__kvm_vcpu_run), + /* ___kvm_hyp_init */ + HANDLE_FUNC(__kvm_get_mdcr_el2), + HANDLE_FUNC(__pkvm_init), + HANDLE_FUNC(__pkvm_create_private_mapping), + HANDLE_FUNC(__pkvm_cpu_set_vector), + HANDLE_FUNC(__kvm_enable_ssbs), + HANDLE_FUNC(__vgic_v3_init_lrs), + HANDLE_FUNC(__vgic_v3_get_gic_config), + HANDLE_FUNC(__pkvm_prot_finalize), + + HANDLE_FUNC(__pkvm_host_share_hyp), HANDLE_FUNC(__kvm_adjust_pc), + HANDLE_FUNC(__kvm_vcpu_run), HANDLE_FUNC(__kvm_flush_vm_context), HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa), HANDLE_FUNC(__kvm_tlb_flush_vmid), HANDLE_FUNC(__kvm_flush_cpu_context), HANDLE_FUNC(__kvm_timer_set_cntvoff), - HANDLE_FUNC(__kvm_enable_ssbs), - HANDLE_FUNC(__vgic_v3_get_gic_config), HANDLE_FUNC(__vgic_v3_read_vmcr), HANDLE_FUNC(__vgic_v3_write_vmcr), - HANDLE_FUNC(__vgic_v3_init_lrs), - HANDLE_FUNC(__kvm_get_mdcr_el2), HANDLE_FUNC(__vgic_v3_save_aprs), HANDLE_FUNC(__vgic_v3_restore_aprs), - HANDLE_FUNC(__pkvm_init), - HANDLE_FUNC(__pkvm_cpu_set_vector), - HANDLE_FUNC(__pkvm_host_share_hyp), - HANDLE_FUNC(__pkvm_create_private_mapping), - HANDLE_FUNC(__pkvm_prot_finalize), }; static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) { DECLARE_REG(unsigned long, id, host_ctxt, 0); + unsigned long hcall_min = 0; hcall_t hfn; + /* + * If pKVM has been initialised then reject any calls to the + * early "privileged" hypercalls. Note that we cannot reject + * calls to __pkvm_prot_finalize for two reasons: (1) The static + * key used to determine initialisation must be toggled prior to + * finalisation and (2) finalisation is performed on a per-CPU + * basis. This is all fine, however, since __pkvm_prot_finalize + * returns -EPERM after the first call for a given CPU. + */ + if (static_branch_unlikely(&kvm_protected_mode_initialized)) + hcall_min = __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize; + id -= KVM_HOST_SMCCC_ID(0); - if (unlikely(id >= ARRAY_SIZE(host_hcall))) + if (unlikely(id < hcall_min || id >= ARRAY_SIZE(host_hcall))) goto inval; hfn = host_hcall[id]; -- cgit v1.2.3-70-g09d2 From f25c5e4dafd859b941a4654cbab9eb83ff994bcd Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Mon, 4 Oct 2021 18:19:11 -0700 Subject: kvm: arm64: vgic: Introduce vgic_check_iorange Add the new vgic_check_iorange helper that checks that an iorange is sane: the start address and size have valid alignments, the range is within the addressable PA range, start+size doesn't overflow, and the start wasn't already defined. No functional change. Reviewed-by: Eric Auger Signed-off-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211005011921.437353-2-ricarkol@google.com --- arch/arm64/kvm/vgic/vgic-kvm-device.c | 22 ++++++++++++++++++++++ arch/arm64/kvm/vgic/vgic.h | 4 ++++ 2 files changed, 26 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index 7740995de982..cc0ad227b380 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -29,6 +29,28 @@ int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, return 0; } +int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr, + phys_addr_t addr, phys_addr_t alignment, + phys_addr_t size) +{ + int ret; + + ret = vgic_check_ioaddr(kvm, &ioaddr, addr, alignment); + if (ret) + return ret; + + if (!IS_ALIGNED(size, alignment)) + return -EINVAL; + + if (addr + size < addr) + return -EINVAL; + + if (addr + size > kvm_phys_size(kvm)) + return -E2BIG; + + return 0; +} + static int vgic_check_type(struct kvm *kvm, int type_needed) { if (kvm->arch.vgic.vgic_model != type_needed) diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 14a9218641f5..4be01c38e8f1 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -175,6 +175,10 @@ void vgic_irq_handle_resampling(struct vgic_irq *irq, int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, phys_addr_t addr, phys_addr_t alignment); +int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr, + phys_addr_t addr, phys_addr_t alignment, + phys_addr_t size); + void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu); void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr); -- cgit v1.2.3-70-g09d2 From 4612d98f58c73ad63928200fd332f75c8e524dae Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Mon, 4 Oct 2021 18:19:12 -0700 Subject: KVM: arm64: vgic-v3: Check redist region is not above the VM IPA size Verify that the redistributor regions do not extend beyond the VM-specified IPA range (phys_size). This can happen when using KVM_VGIC_V3_ADDR_TYPE_REDIST or KVM_VGIC_V3_ADDR_TYPE_REDIST_REGIONS with: base + size > phys_size AND base < phys_size Add the missing check into vgic_v3_alloc_redist_region() which is called when setting the regions, and into vgic_v3_check_base() which is called when attempting the first vcpu-run. The vcpu-run check does not apply to KVM_VGIC_V3_ADDR_TYPE_REDIST_REGIONS because the regions size is known before the first vcpu-run. Note that using the REDIST_REGIONS API results in a different check, which already exists, at first vcpu run: that the number of redist regions is enough for all vcpus. Finally, this patch also enables some extra tests in vgic_v3_alloc_redist_region() by calculating "size" early for the legacy redist api: like checking that the REDIST region can fit all the already created vcpus. Reviewed-by: Eric Auger Signed-off-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211005011921.437353-3-ricarkol@google.com --- arch/arm64/kvm/vgic/vgic-mmio-v3.c | 6 ++++-- arch/arm64/kvm/vgic/vgic-v3.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index a09cdc0b953c..a9642fc71fdf 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -796,7 +796,9 @@ static int vgic_v3_alloc_redist_region(struct kvm *kvm, uint32_t index, struct vgic_dist *d = &kvm->arch.vgic; struct vgic_redist_region *rdreg; struct list_head *rd_regions = &d->rd_regions; - size_t size = count * KVM_VGIC_V3_REDIST_SIZE; + int nr_vcpus = atomic_read(&kvm->online_vcpus); + size_t size = count ? count * KVM_VGIC_V3_REDIST_SIZE + : nr_vcpus * KVM_VGIC_V3_REDIST_SIZE; int ret; /* cross the end of memory ? */ @@ -840,7 +842,7 @@ static int vgic_v3_alloc_redist_region(struct kvm *kvm, uint32_t index, rdreg->base = VGIC_ADDR_UNDEF; - ret = vgic_check_ioaddr(kvm, &rdreg->base, base, SZ_64K); + ret = vgic_check_iorange(kvm, rdreg->base, base, SZ_64K, size); if (ret) goto free; diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 21a6207fb2ee..960f51a8691f 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -483,8 +483,10 @@ bool vgic_v3_check_base(struct kvm *kvm) return false; list_for_each_entry(rdreg, &d->rd_regions, list) { - if (rdreg->base + vgic_v3_rd_region_size(kvm, rdreg) < - rdreg->base) + size_t sz = vgic_v3_rd_region_size(kvm, rdreg); + + if (vgic_check_iorange(kvm, VGIC_ADDR_UNDEF, + rdreg->base, SZ_64K, sz)) return false; } -- cgit v1.2.3-70-g09d2 From c56a87da0a7fa14180082249ac954c7ebc9e74e1 Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Mon, 4 Oct 2021 18:19:13 -0700 Subject: KVM: arm64: vgic-v2: Check cpu interface region is not above the VM IPA size Verify that the GICv2 CPU interface does not extend beyond the VM-specified IPA range (phys_size). base + size > phys_size AND base < phys_size Add the missing check into kvm_vgic_addr() which is called when setting the region. This patch also enables some superfluous checks for the distributor (vgic_check_ioaddr was enough as alignment == size for the distributors). Reviewed-by: Eric Auger Signed-off-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211005011921.437353-4-ricarkol@google.com --- arch/arm64/kvm/vgic/vgic-kvm-device.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index cc0ad227b380..08ae34b1a986 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -79,7 +79,7 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) { int r = 0; struct vgic_dist *vgic = &kvm->arch.vgic; - phys_addr_t *addr_ptr, alignment; + phys_addr_t *addr_ptr, alignment, size; u64 undef_value = VGIC_ADDR_UNDEF; mutex_lock(&kvm->lock); @@ -88,16 +88,19 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); addr_ptr = &vgic->vgic_dist_base; alignment = SZ_4K; + size = KVM_VGIC_V2_DIST_SIZE; break; case KVM_VGIC_V2_ADDR_TYPE_CPU: r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); addr_ptr = &vgic->vgic_cpu_base; alignment = SZ_4K; + size = KVM_VGIC_V2_CPU_SIZE; break; case KVM_VGIC_V3_ADDR_TYPE_DIST: r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3); addr_ptr = &vgic->vgic_dist_base; alignment = SZ_64K; + size = KVM_VGIC_V3_DIST_SIZE; break; case KVM_VGIC_V3_ADDR_TYPE_REDIST: { struct vgic_redist_region *rdreg; @@ -162,7 +165,7 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) goto out; if (write) { - r = vgic_check_ioaddr(kvm, addr_ptr, *addr, alignment); + r = vgic_check_iorange(kvm, *addr_ptr, *addr, alignment, size); if (!r) *addr_ptr = *addr; } else { -- cgit v1.2.3-70-g09d2 From 2ec02f6c64f043a249850c835ca7975c3a155d8b Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Mon, 4 Oct 2021 18:19:14 -0700 Subject: KVM: arm64: vgic-v3: Check ITS region is not above the VM IPA size Verify that the ITS region does not extend beyond the VM-specified IPA range (phys_size). base + size > phys_size AND base < phys_size Add the missing check into vgic_its_set_attr() which is called when setting the region. Reviewed-by: Eric Auger Signed-off-by: Ricardo Koller Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211005011921.437353-5-ricarkol@google.com --- arch/arm64/kvm/vgic/vgic-its.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 61728c543eb9..ad55bb8cd30f 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -2710,8 +2710,8 @@ static int vgic_its_set_attr(struct kvm_device *dev, if (copy_from_user(&addr, uaddr, sizeof(addr))) return -EFAULT; - ret = vgic_check_ioaddr(dev->kvm, &its->vgic_its_base, - addr, SZ_64K); + ret = vgic_check_iorange(dev->kvm, its->vgic_its_base, + addr, SZ_64K, KVM_VGIC_V3_ITS_SIZE); if (ret) return ret; -- cgit v1.2.3-70-g09d2 From 96e903896969679104c7fef2c776ed1b5b09584f Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Mon, 4 Oct 2021 18:19:15 -0700 Subject: KVM: arm64: vgic: Drop vgic_check_ioaddr() There are no more users of vgic_check_ioaddr(). Move its checks to vgic_check_iorange() and then remove it. Signed-off-by: Ricardo Koller Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211005011921.437353-6-ricarkol@google.com --- arch/arm64/kvm/vgic/vgic-kvm-device.c | 26 ++++---------------------- arch/arm64/kvm/vgic/vgic.h | 3 --- 2 files changed, 4 insertions(+), 25 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c index 08ae34b1a986..0d000d2fe8d2 100644 --- a/arch/arm64/kvm/vgic/vgic-kvm-device.c +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -14,38 +14,20 @@ /* common helpers */ -int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, - phys_addr_t addr, phys_addr_t alignment) -{ - if (addr & ~kvm_phys_mask(kvm)) - return -E2BIG; - - if (!IS_ALIGNED(addr, alignment)) - return -EINVAL; - - if (!IS_VGIC_ADDR_UNDEF(*ioaddr)) - return -EEXIST; - - return 0; -} - int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr, phys_addr_t addr, phys_addr_t alignment, phys_addr_t size) { - int ret; - - ret = vgic_check_ioaddr(kvm, &ioaddr, addr, alignment); - if (ret) - return ret; + if (!IS_VGIC_ADDR_UNDEF(ioaddr)) + return -EEXIST; - if (!IS_ALIGNED(size, alignment)) + if (!IS_ALIGNED(addr, alignment) || !IS_ALIGNED(size, alignment)) return -EINVAL; if (addr + size < addr) return -EINVAL; - if (addr + size > kvm_phys_size(kvm)) + if (addr & ~kvm_phys_mask(kvm) || addr + size > kvm_phys_size(kvm)) return -E2BIG; return 0; diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h index 4be01c38e8f1..3fd6c86a7ef3 100644 --- a/arch/arm64/kvm/vgic/vgic.h +++ b/arch/arm64/kvm/vgic/vgic.h @@ -172,9 +172,6 @@ void vgic_kick_vcpus(struct kvm *kvm); void vgic_irq_handle_resampling(struct vgic_irq *irq, bool lr_deactivated, bool lr_pending); -int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, - phys_addr_t addr, phys_addr_t alignment); - int vgic_check_iorange(struct kvm *kvm, phys_addr_t ioaddr, phys_addr_t addr, phys_addr_t alignment, phys_addr_t size); -- cgit v1.2.3-70-g09d2 From b6a68b97af23cc75781bed38221ce73144ac2e39 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 1 Oct 2021 18:05:53 +0100 Subject: KVM: arm64: Allow KVM to be disabled from the command line Although KVM can be compiled out of the kernel, it cannot be disabled at runtime. Allow this possibility by introducing a new mode that will prevent KVM from initialising. This is useful in the (limited) circumstances where you don't want KVM to be available (what is wrong with you?), or when you want to install another hypervisor instead (good luck with that). Reviewed-by: David Brazdil Acked-by: Will Deacon Acked-by: Suzuki K Poulose Signed-off-by: Marc Zyngier Reviewed-by: Andrew Scull Link: https://lore.kernel.org/r/20211001170553.3062988-1-maz@kernel.org --- Documentation/admin-guide/kernel-parameters.txt | 2 ++ arch/arm64/include/asm/kvm_host.h | 1 + arch/arm64/kvm/arm.c | 14 +++++++++++++- 3 files changed, 16 insertions(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 91ba391f9b32..f268731a3d4d 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2365,6 +2365,8 @@ kvm-arm.mode= [KVM,ARM] Select one of KVM/arm64's modes of operation. + none: Forcefully disable KVM. + nvhe: Standard nVHE-based mode, without support for protected guests. diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f8be56d5342b..019490c67976 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -58,6 +58,7 @@ enum kvm_mode { KVM_MODE_DEFAULT, KVM_MODE_PROTECTED, + KVM_MODE_NONE, }; enum kvm_mode kvm_get_mode(void); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index fe102cd2e518..658171231af9 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2064,6 +2064,11 @@ int kvm_arch_init(void *opaque) return -ENODEV; } + if (kvm_get_mode() == KVM_MODE_NONE) { + kvm_info("KVM disabled from command line\n"); + return -ENODEV; + } + in_hyp_mode = is_kernel_in_hyp_mode(); if (cpus_have_final_cap(ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE) || @@ -2137,8 +2142,15 @@ static int __init early_kvm_mode_cfg(char *arg) return 0; } - if (strcmp(arg, "nvhe") == 0 && !WARN_ON(is_kernel_in_hyp_mode())) + if (strcmp(arg, "nvhe") == 0 && !WARN_ON(is_kernel_in_hyp_mode())) { + kvm_mode = KVM_MODE_DEFAULT; return 0; + } + + if (strcmp(arg, "none") == 0) { + kvm_mode = KVM_MODE_NONE; + return 0; + } return -EINVAL; } -- cgit v1.2.3-70-g09d2 From c8f1e96734069c788b10545f4fd82bcbb6b55dfa Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 21 Sep 2021 15:22:30 -0700 Subject: KVM: arm64: Unconditionally include generic KVM's Kconfig Unconditionally "source" the generic KVM Kconfig instead of wrapping it with KVM=y. A future patch will select HAVE_KVM so that referencing HAVE_KVM in common kernel code doesn't break, and because KVM=y and HAVE_KVM=n is weird. Source the generic KVM Kconfig unconditionally so that HAVE_KVM and KVM don't end up with a circular dependency. Note, all but one of generic KVM's "configs" are of the HAVE_XYZ nature, and the one outlier correctly takes a dependency on CONFIG_KVM, i.e. the generic Kconfig is intended to be included unconditionally. No functional change intended. Signed-off-by: Sean Christopherson [maz: made NVHE_EL2_DEBUG depend on KVM] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210921222231.518092-2-seanjc@google.com --- arch/arm64/kvm/Kconfig | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index d7eec0b43744..1170b20d68a7 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -4,6 +4,7 @@ # source "virt/lib/Kconfig" +source "virt/kvm/Kconfig" menuconfig VIRTUALIZATION bool "Virtualization" @@ -43,12 +44,9 @@ menuconfig KVM If unsure, say N. -if KVM - -source "virt/kvm/Kconfig" - config NVHE_EL2_DEBUG bool "Debug mode for non-VHE EL2 object" + depends on KVM help Say Y here to enable the debug mode for the non-VHE KVM EL2 object. Failure reports will BUG() in the hypervisor. This is intended for @@ -56,6 +54,4 @@ config NVHE_EL2_DEBUG If unsure, say N. -endif # KVM - endif # VIRTUALIZATION -- cgit v1.2.3-70-g09d2 From e26bb75aa2f17fc079e6a24dff653b098e1f5d37 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 21 Sep 2021 15:22:31 -0700 Subject: KVM: arm64: Depend on HAVE_KVM instead of OF Select HAVE_KVM at all times on arm64, as the OF requirement is always there (even in the case of an ACPI system, we still depend on some of the OF infrastructure), and won't fo away. No functional change intended. Signed-off-by: Sean Christopherson Acked-by: Will Deacon [maz: Drop the "HAVE_KVM if OF" dependency, as OF is always there on arm64, new commit message] Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210921222231.518092-3-seanjc@google.com --- arch/arm64/Kconfig | 1 + arch/arm64/kvm/Kconfig | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5c7ae4c3954b..0d921d21fd35 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -186,6 +186,7 @@ config ARM64 select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_IRQ_TIME_ACCOUNTING + select HAVE_KVM select HAVE_NMI select HAVE_PATA_PLATFORM select HAVE_PERF_EVENTS diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 1170b20d68a7..8ffcbe29395e 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -20,7 +20,7 @@ if VIRTUALIZATION menuconfig KVM bool "Kernel-based Virtual Machine (KVM) support" - depends on OF + depends on HAVE_KVM select MMU_NOTIFIER select PREEMPT_NOTIFIERS select HAVE_KVM_CPU_RELAX_INTERCEPT -- cgit v1.2.3-70-g09d2 From 00d5101b254b77c35a8d55fe46331b19192866f3 Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Mon, 11 Oct 2021 11:58:38 +0100 Subject: KVM: arm64: Return early from read_id_reg() if register is RAZ If read_id_reg() is called for an ID register which is Read-As-Zero (RAZ), it initializes the return value to zero, then goes through a list of registers which require special handling before returning the final value. By not returning as soon as it checks that the register should be RAZ, the function creates the opportunity for bugs, if, for example, a patch changes a register to RAZ (like has happened with PMSWINC_EL0 in commit 11663111cd49), but doesn't remove the special handling from read_id_reg(); or if a register is RAZ in certain situations, but readable in others. Return early to make it impossible for a RAZ register to be anything other than zero. Reviewed-by: Andrew Jones Signed-off-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211011105840.155815-2-alexandru.elisei@arm.com --- arch/arm64/kvm/sys_regs.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1d46e185f31e..4adda8bf3168 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1064,7 +1064,12 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, struct sys_reg_desc const *r, bool raz) { u32 id = reg_to_encoding(r); - u64 val = raz ? 0 : read_sanitised_ftr_reg(id); + u64 val; + + if (raz) + return 0; + + val = read_sanitised_ftr_reg(id); switch (id) { case SYS_ID_AA64PFR0_EL1: -- cgit v1.2.3-70-g09d2 From 5a4309762356f05df4c92629e5df15ab75c42c0d Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Mon, 11 Oct 2021 11:58:39 +0100 Subject: KVM: arm64: Use get_raz_reg() for userspace reads of PMSWINC_EL0 PMSWINC_EL0 is a write-only register and was initially part of the VCPU register state, but was later removed in commit 7a3ba3095a32 ("KVM: arm64: Remove PMSWINC_EL0 shadow register"). To prevent regressions, the register was kept accessible from userspace as Read-As-Zero (RAZ). The read function that is used to handle userspace reads of this register is get_raz_id_reg(), which, while technically correct, as it returns 0, it is not semantically correct, as PMSWINC_EL0 is not an ID register as the function name suggests. Add a new function, get_raz_reg(), to use it as the accessor for PMSWINC_EL0, as to not conflate get_raz_id_reg() to handle other types of registers. No functional change intended. Signed-off-by: Alexandru Elisei Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211011105840.155815-3-alexandru.elisei@arm.com --- arch/arm64/kvm/sys_regs.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 4adda8bf3168..1be827740f87 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1285,6 +1285,15 @@ static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, return __set_id_reg(vcpu, rd, uaddr, true); } +static int get_raz_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, + const struct kvm_one_reg *reg, void __user *uaddr) +{ + const u64 id = sys_reg_to_index(rd); + const u64 val = 0; + + return reg_to_user(uaddr, &val, id); +} + static int set_wi_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { @@ -1647,7 +1656,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { * previously (and pointlessly) advertised in the past... */ { PMU_SYS_REG(SYS_PMSWINC_EL0), - .get_user = get_raz_id_reg, .set_user = set_wi_reg, + .get_user = get_raz_reg, .set_user = set_wi_reg, .access = access_pmswinc, .reset = NULL }, { PMU_SYS_REG(SYS_PMSELR_EL0), .access = access_pmselr, .reset = reset_pmselr, .reg = PMSELR_EL0 }, -- cgit v1.2.3-70-g09d2 From ebf6aa8c047352fe43b1e20e580f12d5564da28e Mon Sep 17 00:00:00 2001 From: Alexandru Elisei Date: Mon, 11 Oct 2021 11:58:40 +0100 Subject: KVM: arm64: Replace get_raz_id_reg() with get_raz_reg() Reading a RAZ ID register isn't different from reading any other RAZ register, so get rid of get_raz_id_reg() and replace it with get_raz_reg(), which does the same thing, but does it without going through two layers of indirection. No functional change. Suggested-by: Andrew Jones Signed-off-by: Alexandru Elisei Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211011105840.155815-4-alexandru.elisei@arm.com --- arch/arm64/kvm/sys_regs.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1be827740f87..3aff06aafd0c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1273,12 +1273,6 @@ static int set_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, return __set_id_reg(vcpu, rd, uaddr, raz); } -static int get_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, - const struct kvm_one_reg *reg, void __user *uaddr) -{ - return __get_id_reg(vcpu, rd, uaddr, true); -} - static int set_raz_id_reg(struct kvm_vcpu *vcpu, const struct sys_reg_desc *rd, const struct kvm_one_reg *reg, void __user *uaddr) { @@ -1402,7 +1396,7 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, #define ID_UNALLOCATED(crm, op2) { \ Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \ .access = access_raz_id_reg, \ - .get_user = get_raz_id_reg, \ + .get_user = get_raz_reg, \ .set_user = set_raz_id_reg, \ } @@ -1414,7 +1408,7 @@ static unsigned int mte_visibility(const struct kvm_vcpu *vcpu, #define ID_HIDDEN(name) { \ SYS_DESC(SYS_##name), \ .access = access_raz_id_reg, \ - .get_user = get_raz_id_reg, \ + .get_user = get_raz_reg, \ .set_user = set_raz_id_reg, \ } -- cgit v1.2.3-70-g09d2 From 7dd9b5a157485ae8c48f76f087b1867ace016613 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 10 Oct 2021 15:56:26 +0100 Subject: KVM: arm64: Move __get_fault_info() and co into their own include file In order to avoid including the whole of the switching helpers in unrelated files, move the __get_fault_info() and related helpers into their own include file. Signed-off-by: Marc Zyngier Signed-off-by: Fuad Tabba Link: https://lore.kernel.org/r/20211010145636.1950948-2-tabba@google.com --- arch/arm64/kvm/hyp/include/hyp/fault.h | 75 +++++++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/include/hyp/switch.h | 61 +-------------------------- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 2 +- 3 files changed, 77 insertions(+), 61 deletions(-) create mode 100644 arch/arm64/kvm/hyp/include/hyp/fault.h (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/include/hyp/fault.h b/arch/arm64/kvm/hyp/include/hyp/fault.h new file mode 100644 index 000000000000..1b8a2dcd712f --- /dev/null +++ b/arch/arm64/kvm/hyp/include/hyp/fault.h @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 - ARM Ltd + * Author: Marc Zyngier + */ + +#ifndef __ARM64_KVM_HYP_FAULT_H__ +#define __ARM64_KVM_HYP_FAULT_H__ + +#include +#include +#include +#include + +static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) +{ + u64 par, tmp; + + /* + * Resolve the IPA the hard way using the guest VA. + * + * Stage-1 translation already validated the memory access + * rights. As such, we can use the EL1 translation regime, and + * don't have to distinguish between EL0 and EL1 access. + * + * We do need to save/restore PAR_EL1 though, as we haven't + * saved the guest context yet, and we may return early... + */ + par = read_sysreg_par(); + if (!__kvm_at("s1e1r", far)) + tmp = read_sysreg_par(); + else + tmp = SYS_PAR_EL1_F; /* back to the guest */ + write_sysreg(par, par_el1); + + if (unlikely(tmp & SYS_PAR_EL1_F)) + return false; /* Translation failed, back to guest */ + + /* Convert PAR to HPFAR format */ + *hpfar = PAR_TO_HPFAR(tmp); + return true; +} + +static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault) +{ + u64 hpfar, far; + + far = read_sysreg_el2(SYS_FAR); + + /* + * The HPFAR can be invalid if the stage 2 fault did not + * happen during a stage 1 page table walk (the ESR_EL2.S1PTW + * bit is clear) and one of the two following cases are true: + * 1. The fault was due to a permission fault + * 2. The processor carries errata 834220 + * + * Therefore, for all non S1PTW faults where we either have a + * permission fault or the errata workaround is enabled, we + * resolve the IPA using the AT instruction. + */ + if (!(esr & ESR_ELx_S1PTW) && + (cpus_have_final_cap(ARM64_WORKAROUND_834220) || + (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { + if (!__translate_far_to_hpfar(far, &hpfar)) + return false; + } else { + hpfar = read_sysreg(hpfar_el2); + } + + fault->far_el2 = far; + fault->hpfar_el2 = hpfar; + return true; +} + +#endif diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index a0e78a6027be..54abc8298ec3 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -8,6 +8,7 @@ #define __ARM64_KVM_HYP_SWITCH_H__ #include +#include #include #include @@ -133,66 +134,6 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) } } -static inline bool __translate_far_to_hpfar(u64 far, u64 *hpfar) -{ - u64 par, tmp; - - /* - * Resolve the IPA the hard way using the guest VA. - * - * Stage-1 translation already validated the memory access - * rights. As such, we can use the EL1 translation regime, and - * don't have to distinguish between EL0 and EL1 access. - * - * We do need to save/restore PAR_EL1 though, as we haven't - * saved the guest context yet, and we may return early... - */ - par = read_sysreg_par(); - if (!__kvm_at("s1e1r", far)) - tmp = read_sysreg_par(); - else - tmp = SYS_PAR_EL1_F; /* back to the guest */ - write_sysreg(par, par_el1); - - if (unlikely(tmp & SYS_PAR_EL1_F)) - return false; /* Translation failed, back to guest */ - - /* Convert PAR to HPFAR format */ - *hpfar = PAR_TO_HPFAR(tmp); - return true; -} - -static inline bool __get_fault_info(u64 esr, struct kvm_vcpu_fault_info *fault) -{ - u64 hpfar, far; - - far = read_sysreg_el2(SYS_FAR); - - /* - * The HPFAR can be invalid if the stage 2 fault did not - * happen during a stage 1 page table walk (the ESR_EL2.S1PTW - * bit is clear) and one of the two following cases are true: - * 1. The fault was due to a permission fault - * 2. The processor carries errata 834220 - * - * Therefore, for all non S1PTW faults where we either have a - * permission fault or the errata workaround is enabled, we - * resolve the IPA using the AT instruction. - */ - if (!(esr & ESR_ELx_S1PTW) && - (cpus_have_final_cap(ARM64_WORKAROUND_834220) || - (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) { - if (!__translate_far_to_hpfar(far, &hpfar)) - return false; - } else { - hpfar = read_sysreg(hpfar_el2); - } - - fault->far_el2 = far; - fault->hpfar_el2 = hpfar; - return true; -} - static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) { u8 ec; diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index bacd493a4eac..2a07d63b8498 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include -- cgit v1.2.3-70-g09d2 From cc1e6fdfa92b82902883b70dafa729d3bd427b80 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 10 Oct 2021 15:56:27 +0100 Subject: KVM: arm64: Don't include switch.h into nvhe/kvm-main.c hyp-main.c includes switch.h while it only requires adjust-pc.h. Fix it to remove an unnecessary dependency. Signed-off-by: Marc Zyngier Signed-off-by: Fuad Tabba Link: https://lore.kernel.org/r/20211010145636.1950948-3-tabba@google.com --- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 2da6aa8da868..8ca1104f4774 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -4,7 +4,7 @@ * Author: Andrew Scull */ -#include +#include #include #include -- cgit v1.2.3-70-g09d2 From 8fb2046180a0ad347f2e5bcae760dca67e65aa73 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 10 Oct 2021 15:56:28 +0100 Subject: KVM: arm64: Move early handlers to per-EC handlers Simplify the early exception handling by slicing the gigantic decoding tree into a more manageable set of functions, similar to what we have in handle_exit.c. This will also make the structure reusable for pKVM's own early exit handling. Signed-off-by: Marc Zyngier Signed-off-by: Fuad Tabba Link: https://lore.kernel.org/r/20211010145636.1950948-4-tabba@google.com --- arch/arm64/kvm/hyp/include/hyp/switch.h | 160 ++++++++++++++++++-------------- arch/arm64/kvm/hyp/nvhe/switch.c | 16 ++++ arch/arm64/kvm/hyp/vhe/switch.c | 16 ++++ 3 files changed, 124 insertions(+), 68 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 54abc8298ec3..1e4177322be7 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -136,16 +136,7 @@ static inline void ___deactivate_traps(struct kvm_vcpu *vcpu) static inline bool __populate_fault_info(struct kvm_vcpu *vcpu) { - u8 ec; - u64 esr; - - esr = vcpu->arch.fault.esr_el2; - ec = ESR_ELx_EC(esr); - - if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW) - return true; - - return __get_fault_info(esr, &vcpu->arch.fault); + return __get_fault_info(vcpu->arch.fault.esr_el2, &vcpu->arch.fault); } static inline void __hyp_sve_save_host(struct kvm_vcpu *vcpu) @@ -166,8 +157,13 @@ static inline void __hyp_sve_restore_guest(struct kvm_vcpu *vcpu) write_sysreg_el1(__vcpu_sys_reg(vcpu, ZCR_EL1), SYS_ZCR); } -/* Check for an FPSIMD/SVE trap and handle as appropriate */ -static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) +/* + * We trap the first access to the FP/SIMD to save the host context and + * restore the guest context lazily. + * If FP/SIMD is not implemented, handle the trap and inject an undefined + * instruction exception to the guest. Similarly for trapped SVE accesses. + */ +static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) { bool sve_guest, sve_host; u8 esr_ec; @@ -185,9 +181,6 @@ static inline bool __hyp_handle_fpsimd(struct kvm_vcpu *vcpu) } esr_ec = kvm_vcpu_trap_get_class(vcpu); - if (esr_ec != ESR_ELx_EC_FP_ASIMD && - esr_ec != ESR_ELx_EC_SVE) - return false; /* Don't handle SVE traps for non-SVE vcpus here: */ if (!sve_guest && esr_ec != ESR_ELx_EC_FP_ASIMD) @@ -325,7 +318,7 @@ static inline bool esr_is_ptrauth_trap(u32 esr) DECLARE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); -static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) +static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code) { struct kvm_cpu_context *ctxt; u64 val; @@ -350,6 +343,87 @@ static inline bool __hyp_handle_ptrauth(struct kvm_vcpu *vcpu) return true; } +static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && + handle_tx2_tvm(vcpu)) + return true; + + if (static_branch_unlikely(&vgic_v3_cpuif_trap) && + __vgic_v3_perform_cpuif_access(vcpu) == 1) + return true; + + return false; +} + +static bool kvm_hyp_handle_cp15_32(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (static_branch_unlikely(&vgic_v3_cpuif_trap) && + __vgic_v3_perform_cpuif_access(vcpu) == 1) + return true; + + return false; +} + +static bool kvm_hyp_handle_iabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (!__populate_fault_info(vcpu)) + return true; + + return false; +} + +static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (!__populate_fault_info(vcpu)) + return true; + + if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { + bool valid; + + valid = kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && + kvm_vcpu_dabt_isvalid(vcpu) && + !kvm_vcpu_abt_issea(vcpu) && + !kvm_vcpu_abt_iss1tw(vcpu); + + if (valid) { + int ret = __vgic_v2_perform_cpuif_access(vcpu); + + if (ret == 1) + return true; + + /* Promote an illegal access to an SError.*/ + if (ret == -1) + *exit_code = ARM_EXCEPTION_EL1_SERROR; + } + } + + return false; +} + +typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *); + +static const exit_handler_fn *kvm_get_exit_handler_array(void); + +/* + * Allow the hypervisor to handle the exit with an exit handler if it has one. + * + * Returns true if the hypervisor handled the exit, and control should go back + * to the guest, or false if it hasn't. + */ +static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + const exit_handler_fn *handlers = kvm_get_exit_handler_array(); + exit_handler_fn fn; + + fn = handlers[kvm_vcpu_trap_get_class(vcpu)]; + + if (fn) + return fn(vcpu, exit_code); + + return false; +} + /* * Return true when we were able to fixup the guest exit and should return to * the guest, false when we should restore the host state and return to the @@ -384,59 +458,9 @@ static inline bool fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) if (*exit_code != ARM_EXCEPTION_TRAP) goto exit; - if (cpus_have_final_cap(ARM64_WORKAROUND_CAVIUM_TX2_219_TVM) && - kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 && - handle_tx2_tvm(vcpu)) + /* Check if there's an exit handler and allow it to handle the exit. */ + if (kvm_hyp_handle_exit(vcpu, exit_code)) goto guest; - - /* - * We trap the first access to the FP/SIMD to save the host context - * and restore the guest context lazily. - * If FP/SIMD is not implemented, handle the trap and inject an - * undefined instruction exception to the guest. - * Similarly for trapped SVE accesses. - */ - if (__hyp_handle_fpsimd(vcpu)) - goto guest; - - if (__hyp_handle_ptrauth(vcpu)) - goto guest; - - if (!__populate_fault_info(vcpu)) - goto guest; - - if (static_branch_unlikely(&vgic_v2_cpuif_trap)) { - bool valid; - - valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && - kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && - kvm_vcpu_dabt_isvalid(vcpu) && - !kvm_vcpu_abt_issea(vcpu) && - !kvm_vcpu_abt_iss1tw(vcpu); - - if (valid) { - int ret = __vgic_v2_perform_cpuif_access(vcpu); - - if (ret == 1) - goto guest; - - /* Promote an illegal access to an SError.*/ - if (ret == -1) - *exit_code = ARM_EXCEPTION_EL1_SERROR; - - goto exit; - } - } - - if (static_branch_unlikely(&vgic_v3_cpuif_trap) && - (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || - kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { - int ret = __vgic_v3_perform_cpuif_access(vcpu); - - if (ret == 1) - goto guest; - } - exit: /* Return to the host kernel and handle the exit */ return false; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index a34b01cc8ab9..4f3992a1aabd 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -158,6 +158,22 @@ static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) write_sysreg(pmu->events_host, pmcntenset_el0); } +static const exit_handler_fn hyp_exit_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = NULL, + [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32, + [ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg, + [ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd, + [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd, + [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, + [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, + [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, +}; + +static const exit_handler_fn *kvm_get_exit_handler_array(void) +{ + return hyp_exit_handlers; +} + /* Switch to the guest for legacy non-VHE systems */ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) { diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index ded2c66675f0..9aedc8afc8b9 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -96,6 +96,22 @@ void deactivate_traps_vhe_put(struct kvm_vcpu *vcpu) __deactivate_traps_common(vcpu); } +static const exit_handler_fn hyp_exit_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = NULL, + [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32, + [ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg, + [ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd, + [ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd, + [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, + [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, + [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, +}; + +static const exit_handler_fn *kvm_get_exit_handler_array(void) +{ + return hyp_exit_handlers; +} + /* Switch to the guest for VHE systems running in EL2 */ static int __kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) { -- cgit v1.2.3-70-g09d2 From 3b1a690eda0dc1891e8fc93991b122bff6fabf8c Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Sun, 10 Oct 2021 15:56:29 +0100 Subject: KVM: arm64: Pass struct kvm to per-EC handlers We need struct kvm to check for protected VMs to be able to pick the right handlers for them in subsequent patches. Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211010145636.1950948-5-tabba@google.com --- arch/arm64/kvm/hyp/include/hyp/switch.h | 4 ++-- arch/arm64/kvm/hyp/nvhe/switch.c | 2 +- arch/arm64/kvm/hyp/vhe/switch.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 1e4177322be7..481399bf9b94 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -403,7 +403,7 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *); -static const exit_handler_fn *kvm_get_exit_handler_array(void); +static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm *kvm); /* * Allow the hypervisor to handle the exit with an exit handler if it has one. @@ -413,7 +413,7 @@ static const exit_handler_fn *kvm_get_exit_handler_array(void); */ static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code) { - const exit_handler_fn *handlers = kvm_get_exit_handler_array(); + const exit_handler_fn *handlers = kvm_get_exit_handler_array(kern_hyp_va(vcpu->kvm)); exit_handler_fn fn; fn = handlers[kvm_vcpu_trap_get_class(vcpu)]; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 4f3992a1aabd..8c9a0464be00 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -169,7 +169,7 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, }; -static const exit_handler_fn *kvm_get_exit_handler_array(void) +static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm *kvm) { return hyp_exit_handlers; } diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 9aedc8afc8b9..f6fb97accf65 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -107,7 +107,7 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, }; -static const exit_handler_fn *kvm_get_exit_handler_array(void) +static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm *kvm) { return hyp_exit_handlers; } -- cgit v1.2.3-70-g09d2 From 53868390778270f2890621f4498a53587719a3ff Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Sun, 10 Oct 2021 15:56:30 +0100 Subject: KVM: arm64: Add missing field descriptor for MDCR_EL2 It's not currently used. Added for completeness. No functional change intended. Suggested-by: Marc Zyngier Signed-off-by: Fuad Tabba Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211010145636.1950948-6-tabba@google.com --- arch/arm64/include/asm/kvm_arm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 327120c0089f..a39fcf318c77 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -295,6 +295,7 @@ #define MDCR_EL2_HPMFZO (UL(1) << 29) #define MDCR_EL2_MTPME (UL(1) << 28) #define MDCR_EL2_TDCC (UL(1) << 27) +#define MDCR_EL2_HLP (UL(1) << 26) #define MDCR_EL2_HCCD (UL(1) << 23) #define MDCR_EL2_TTRF (UL(1) << 19) #define MDCR_EL2_HPMD (UL(1) << 17) -- cgit v1.2.3-70-g09d2 From 16dd1fbb12f72effcd3539561c2a94aed3ab6581 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Sun, 10 Oct 2021 15:56:31 +0100 Subject: KVM: arm64: Simplify masking out MTE in feature id reg Simplify code for hiding MTE support in feature id register when MTE is not enabled/supported by KVM. No functional change intended. Signed-off-by: Fuad Tabba Reviewed-by: Andrew Jones Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211010145636.1950948-7-tabba@google.com --- arch/arm64/kvm/sys_regs.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1d46e185f31e..447acce9ca84 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1077,14 +1077,8 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3); break; case SYS_ID_AA64PFR1_EL1: - val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE); - if (kvm_has_mte(vcpu->kvm)) { - u64 pfr, mte; - - pfr = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); - mte = cpuid_feature_extract_unsigned_field(pfr, ID_AA64PFR1_MTE_SHIFT); - val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), mte); - } + if (!kvm_has_mte(vcpu->kvm)) + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE); break; case SYS_ID_AA64ISAR1_EL1: if (!vcpu_has_ptrauth(vcpu)) -- cgit v1.2.3-70-g09d2 From 6c30bfb18d0b7d09593f204c936493cfcd153956 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Sun, 10 Oct 2021 15:56:32 +0100 Subject: KVM: arm64: Add handlers for protected VM System Registers Add system register handlers for protected VMs. These cover Sys64 registers (including feature id registers), and debug. No functional change intended as these are not hooked in yet to the guest exit handlers introduced earlier. So when trapping is triggered, the exit handlers let the host handle it, as before. Reviewed-by: Andrew Jones Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211010145636.1950948-8-tabba@google.com --- arch/arm64/include/asm/kvm_fixed_config.h | 195 +++++++++++ arch/arm64/include/asm/kvm_hyp.h | 5 + arch/arm64/kvm/arm.c | 5 + arch/arm64/kvm/hyp/include/nvhe/sys_regs.h | 29 ++ arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- arch/arm64/kvm/hyp/nvhe/setup.c | 3 + arch/arm64/kvm/hyp/nvhe/switch.c | 1 + arch/arm64/kvm/hyp/nvhe/sys_regs.c | 498 +++++++++++++++++++++++++++++ 8 files changed, 737 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/include/asm/kvm_fixed_config.h create mode 100644 arch/arm64/kvm/hyp/include/nvhe/sys_regs.h create mode 100644 arch/arm64/kvm/hyp/nvhe/sys_regs.c (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/kvm_fixed_config.h b/arch/arm64/include/asm/kvm_fixed_config.h new file mode 100644 index 000000000000..0ed06923f7e9 --- /dev/null +++ b/arch/arm64/include/asm/kvm_fixed_config.h @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 Google LLC + * Author: Fuad Tabba + */ + +#ifndef __ARM64_KVM_FIXED_CONFIG_H__ +#define __ARM64_KVM_FIXED_CONFIG_H__ + +#include + +/* + * This file contains definitions for features to be allowed or restricted for + * guest virtual machines, depending on the mode KVM is running in and on the + * type of guest that is running. + * + * The ALLOW masks represent a bitmask of feature fields that are allowed + * without any restrictions as long as they are supported by the system. + * + * The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for + * features that are restricted to support at most the specified feature. + * + * If a feature field is not present in either, than it is not supported. + * + * The approach taken for protected VMs is to allow features that are: + * - Needed by common Linux distributions (e.g., floating point) + * - Trivial to support, e.g., supporting the feature does not introduce or + * require tracking of additional state in KVM + * - Cannot be trapped or prevent the guest from using anyway + */ + +/* + * Allow for protected VMs: + * - Floating-point and Advanced SIMD + * - Data Independent Timing + */ +#define PVM_ID_AA64PFR0_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64PFR0_FP) | \ + ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD) | \ + ARM64_FEATURE_MASK(ID_AA64PFR0_DIT) \ + ) + +/* + * Restrict to the following *unsigned* features for protected VMs: + * - AArch64 guests only (no support for AArch32 guests): + * AArch32 adds complexity in trap handling, emulation, condition codes, + * etc... + * - RAS (v1) + * Supported by KVM + */ +#define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), ID_AA64PFR0_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), ID_AA64PFR0_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL2), ID_AA64PFR0_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL3), ID_AA64PFR0_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), ID_AA64PFR0_RAS_V1) \ + ) + +/* + * Allow for protected VMs: + * - Branch Target Identification + * - Speculative Store Bypassing + */ +#define PVM_ID_AA64PFR1_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64PFR1_BT) | \ + ARM64_FEATURE_MASK(ID_AA64PFR1_SSBS) \ + ) + +/* + * Allow for protected VMs: + * - Mixed-endian + * - Distinction between Secure and Non-secure Memory + * - Mixed-endian at EL0 only + * - Non-context synchronizing exception entry and exit + */ +#define PVM_ID_AA64MMFR0_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR0_SNSMEM) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL0) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR0_EXS) \ + ) + +/* + * Restrict to the following *unsigned* features for protected VMs: + * - 40-bit IPA + * - 16-bit ASID + */ +#define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_PARANGE), ID_AA64MMFR0_PARANGE_40) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_ASID), ID_AA64MMFR0_ASID_16) \ + ) + +/* + * Allow for protected VMs: + * - Hardware translation table updates to Access flag and Dirty state + * - Number of VMID bits from CPU + * - Hierarchical Permission Disables + * - Privileged Access Never + * - SError interrupt exceptions from speculative reads + * - Enhanced Translation Synchronization + */ +#define PVM_ID_AA64MMFR1_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_VMIDBITS) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_HPD) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_PAN) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_SPECSEI) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_ETS) \ + ) + +/* + * Allow for protected VMs: + * - Common not Private translations + * - User Access Override + * - IESB bit in the SCTLR_ELx registers + * - Unaligned single-copy atomicity and atomic functions + * - ESR_ELx.EC value on an exception by read access to feature ID space + * - TTL field in address operations. + * - Break-before-make sequences when changing translation block size + * - E0PDx mechanism + */ +#define PVM_ID_AA64MMFR2_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64MMFR2_CNP) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_UAO) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_IESB) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_AT) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_IDS) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_TTL) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_BBM) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_E0PD) \ + ) + +/* + * No support for Scalable Vectors for protected VMs: + * Requires additional support from KVM, e.g., context-switching and + * trapping at EL2 + */ +#define PVM_ID_AA64ZFR0_ALLOW (0ULL) + +/* + * No support for debug, including breakpoints, and watchpoints for protected + * VMs: + * The Arm architecture mandates support for at least the Armv8 debug + * architecture, which would include at least 2 hardware breakpoints and + * watchpoints. Providing that support to protected guests adds + * considerable state and complexity. Therefore, the reserved value of 0 is + * used for debug-related fields. + */ +#define PVM_ID_AA64DFR0_ALLOW (0ULL) +#define PVM_ID_AA64DFR1_ALLOW (0ULL) + +/* + * No support for implementation defined features. + */ +#define PVM_ID_AA64AFR0_ALLOW (0ULL) +#define PVM_ID_AA64AFR1_ALLOW (0ULL) + +/* + * No restrictions on instructions implemented in AArch64. + */ +#define PVM_ID_AA64ISAR0_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64ISAR0_AES) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA1) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA2) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_CRC32) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_RDM) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA3) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SM3) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SM4) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_DP) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_FHM) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_TS) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_TLB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_RNDR) \ + ) + +#define PVM_ID_AA64ISAR1_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64ISAR1_DPB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_JSCVT) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_FCMA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_LRCPC) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_FRINTTS) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_SB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_SPECRES) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_BF16) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_DGH) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \ + ) + +#endif /* __ARM64_KVM_FIXED_CONFIG_H__ */ diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 657d0c94cf82..5afd14ab15b9 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -115,7 +115,12 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus, void __noreturn __host_enter(struct kvm_cpu_context *host_ctxt); #endif +extern u64 kvm_nvhe_sym(id_aa64pfr0_el1_sys_val); +extern u64 kvm_nvhe_sym(id_aa64pfr1_el1_sys_val); +extern u64 kvm_nvhe_sym(id_aa64isar0_el1_sys_val); +extern u64 kvm_nvhe_sym(id_aa64isar1_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val); +extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val); #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index fe102cd2e518..6aa7b0c5bf21 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1802,8 +1802,13 @@ static int kvm_hyp_init_protection(u32 hyp_va_bits) void *addr = phys_to_virt(hyp_mem_base); int ret; + kvm_nvhe_sym(id_aa64pfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); + kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1); + kvm_nvhe_sym(id_aa64isar1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1); kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); + kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1); ret = create_hyp_mappings(addr, addr + hyp_mem_size, PAGE_HYP); if (ret) diff --git a/arch/arm64/kvm/hyp/include/nvhe/sys_regs.h b/arch/arm64/kvm/hyp/include/nvhe/sys_regs.h new file mode 100644 index 000000000000..3288128738aa --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/sys_regs.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 Google LLC + * Author: Fuad Tabba + */ + +#ifndef __ARM64_KVM_NVHE_SYS_REGS_H__ +#define __ARM64_KVM_NVHE_SYS_REGS_H__ + +#include + +u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu); +u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu); +u64 get_pvm_id_aa64zfr0(const struct kvm_vcpu *vcpu); +u64 get_pvm_id_aa64dfr0(const struct kvm_vcpu *vcpu); +u64 get_pvm_id_aa64dfr1(const struct kvm_vcpu *vcpu); +u64 get_pvm_id_aa64afr0(const struct kvm_vcpu *vcpu); +u64 get_pvm_id_aa64afr1(const struct kvm_vcpu *vcpu); +u64 get_pvm_id_aa64isar0(const struct kvm_vcpu *vcpu); +u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu); +u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu); +u64 get_pvm_id_aa64mmfr1(const struct kvm_vcpu *vcpu); +u64 get_pvm_id_aa64mmfr2(const struct kvm_vcpu *vcpu); + +bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code); +int kvm_check_pvm_sysreg_table(void); +void inject_undef64(struct kvm_vcpu *vcpu); + +#endif /* __ARM64_KVM_NVHE_SYS_REGS_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 8d741f71377f..0bbe37a18d5d 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -14,7 +14,7 @@ lib-objs := $(addprefix ../../../lib/, $(lib-objs)) obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o page_alloc.o \ - cache.o setup.o mm.o mem_protect.o + cache.o setup.o mm.o mem_protect.o sys_regs.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o obj-y += $(lib-objs) diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index 57c27846320f..c85ff64e63f2 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -14,6 +14,7 @@ #include #include #include +#include #include struct hyp_pool hpool; @@ -260,6 +261,8 @@ int __pkvm_init(phys_addr_t phys, unsigned long size, unsigned long nr_cpus, void (*fn)(phys_addr_t params_pa, void *finalize_fn_va); int ret; + BUG_ON(kvm_check_pvm_sysreg_table()); + if (!PAGE_ALIGNED(phys) || !PAGE_ALIGNED(size)) return -EINVAL; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 8c9a0464be00..17d1a9512507 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -28,6 +28,7 @@ #include #include +#include /* Non-VHE specific context */ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data); diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c new file mode 100644 index 000000000000..6e3ea49af302 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -0,0 +1,498 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 Google LLC + * Author: Fuad Tabba + */ + +#include +#include +#include + +#include + +#include + +#include "../../sys_regs.h" + +/* + * Copies of the host's CPU features registers holding sanitized values at hyp. + */ +u64 id_aa64pfr0_el1_sys_val; +u64 id_aa64pfr1_el1_sys_val; +u64 id_aa64isar0_el1_sys_val; +u64 id_aa64isar1_el1_sys_val; +u64 id_aa64mmfr2_el1_sys_val; + +/* + * Inject an unknown/undefined exception to an AArch64 guest while most of its + * sysregs are live. + */ +void inject_undef64(struct kvm_vcpu *vcpu) +{ + u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); + + *vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR); + *vcpu_cpsr(vcpu) = read_sysreg_el2(SYS_SPSR); + + vcpu->arch.flags |= (KVM_ARM64_EXCEPT_AA64_EL1 | + KVM_ARM64_EXCEPT_AA64_ELx_SYNC | + KVM_ARM64_PENDING_EXCEPTION); + + __kvm_adjust_pc(vcpu); + + write_sysreg_el1(esr, SYS_ESR); + write_sysreg_el1(read_sysreg_el2(SYS_ELR), SYS_ELR); + write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR); + write_sysreg_el2(*vcpu_cpsr(vcpu), SYS_SPSR); +} + +/* + * Returns the restricted features values of the feature register based on the + * limitations in restrict_fields. + * A feature id field value of 0b0000 does not impose any restrictions. + * Note: Use only for unsigned feature field values. + */ +static u64 get_restricted_features_unsigned(u64 sys_reg_val, + u64 restrict_fields) +{ + u64 value = 0UL; + u64 mask = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0); + + /* + * According to the Arm Architecture Reference Manual, feature fields + * use increasing values to indicate increases in functionality. + * Iterate over the restricted feature fields and calculate the minimum + * unsigned value between the one supported by the system, and what the + * value is being restricted to. + */ + while (sys_reg_val && restrict_fields) { + value |= min(sys_reg_val & mask, restrict_fields & mask); + sys_reg_val &= ~mask; + restrict_fields &= ~mask; + mask <<= ARM64_FEATURE_FIELD_BITS; + } + + return value; +} + +/* + * Functions that return the value of feature id registers for protected VMs + * based on allowed features, system features, and KVM support. + */ + +u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu) +{ + const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm); + u64 set_mask = 0; + u64 allow_mask = PVM_ID_AA64PFR0_ALLOW; + + if (!vcpu_has_sve(vcpu)) + allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_SVE); + + set_mask |= get_restricted_features_unsigned(id_aa64pfr0_el1_sys_val, + PVM_ID_AA64PFR0_RESTRICT_UNSIGNED); + + /* Spectre and Meltdown mitigation in KVM */ + set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), + (u64)kvm->arch.pfr0_csv2); + set_mask |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), + (u64)kvm->arch.pfr0_csv3); + + return (id_aa64pfr0_el1_sys_val & allow_mask) | set_mask; +} + +u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu) +{ + const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm); + u64 allow_mask = PVM_ID_AA64PFR1_ALLOW; + + if (!kvm_has_mte(kvm)) + allow_mask &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE); + + return id_aa64pfr1_el1_sys_val & allow_mask; +} + +u64 get_pvm_id_aa64zfr0(const struct kvm_vcpu *vcpu) +{ + /* + * No support for Scalable Vectors, therefore, hyp has no sanitized + * copy of the feature id register. + */ + BUILD_BUG_ON(PVM_ID_AA64ZFR0_ALLOW != 0ULL); + return 0; +} + +u64 get_pvm_id_aa64dfr0(const struct kvm_vcpu *vcpu) +{ + /* + * No support for debug, including breakpoints, and watchpoints, + * therefore, pKVM has no sanitized copy of the feature id register. + */ + BUILD_BUG_ON(PVM_ID_AA64DFR0_ALLOW != 0ULL); + return 0; +} + +u64 get_pvm_id_aa64dfr1(const struct kvm_vcpu *vcpu) +{ + /* + * No support for debug, therefore, hyp has no sanitized copy of the + * feature id register. + */ + BUILD_BUG_ON(PVM_ID_AA64DFR1_ALLOW != 0ULL); + return 0; +} + +u64 get_pvm_id_aa64afr0(const struct kvm_vcpu *vcpu) +{ + /* + * No support for implementation defined features, therefore, hyp has no + * sanitized copy of the feature id register. + */ + BUILD_BUG_ON(PVM_ID_AA64AFR0_ALLOW != 0ULL); + return 0; +} + +u64 get_pvm_id_aa64afr1(const struct kvm_vcpu *vcpu) +{ + /* + * No support for implementation defined features, therefore, hyp has no + * sanitized copy of the feature id register. + */ + BUILD_BUG_ON(PVM_ID_AA64AFR1_ALLOW != 0ULL); + return 0; +} + +u64 get_pvm_id_aa64isar0(const struct kvm_vcpu *vcpu) +{ + return id_aa64isar0_el1_sys_val & PVM_ID_AA64ISAR0_ALLOW; +} + +u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu) +{ + u64 allow_mask = PVM_ID_AA64ISAR1_ALLOW; + + if (!vcpu_has_ptrauth(vcpu)) + allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | + ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI)); + + return id_aa64isar1_el1_sys_val & allow_mask; +} + +u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu) +{ + u64 set_mask; + + set_mask = get_restricted_features_unsigned(id_aa64mmfr0_el1_sys_val, + PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED); + + return (id_aa64mmfr0_el1_sys_val & PVM_ID_AA64MMFR0_ALLOW) | set_mask; +} + +u64 get_pvm_id_aa64mmfr1(const struct kvm_vcpu *vcpu) +{ + return id_aa64mmfr1_el1_sys_val & PVM_ID_AA64MMFR1_ALLOW; +} + +u64 get_pvm_id_aa64mmfr2(const struct kvm_vcpu *vcpu) +{ + return id_aa64mmfr2_el1_sys_val & PVM_ID_AA64MMFR2_ALLOW; +} + +/* Read a sanitized cpufeature ID register by its sys_reg_desc. */ +static u64 read_id_reg(const struct kvm_vcpu *vcpu, + struct sys_reg_desc const *r) +{ + u32 id = reg_to_encoding(r); + + switch (id) { + case SYS_ID_AA64PFR0_EL1: + return get_pvm_id_aa64pfr0(vcpu); + case SYS_ID_AA64PFR1_EL1: + return get_pvm_id_aa64pfr1(vcpu); + case SYS_ID_AA64ZFR0_EL1: + return get_pvm_id_aa64zfr0(vcpu); + case SYS_ID_AA64DFR0_EL1: + return get_pvm_id_aa64dfr0(vcpu); + case SYS_ID_AA64DFR1_EL1: + return get_pvm_id_aa64dfr1(vcpu); + case SYS_ID_AA64AFR0_EL1: + return get_pvm_id_aa64afr0(vcpu); + case SYS_ID_AA64AFR1_EL1: + return get_pvm_id_aa64afr1(vcpu); + case SYS_ID_AA64ISAR0_EL1: + return get_pvm_id_aa64isar0(vcpu); + case SYS_ID_AA64ISAR1_EL1: + return get_pvm_id_aa64isar1(vcpu); + case SYS_ID_AA64MMFR0_EL1: + return get_pvm_id_aa64mmfr0(vcpu); + case SYS_ID_AA64MMFR1_EL1: + return get_pvm_id_aa64mmfr1(vcpu); + case SYS_ID_AA64MMFR2_EL1: + return get_pvm_id_aa64mmfr2(vcpu); + default: + /* + * Should never happen because all cases are covered in + * pvm_sys_reg_descs[]. + */ + WARN_ON(1); + break; + } + + return 0; +} + +/* + * Accessor for AArch32 feature id registers. + * + * The value of these registers is "unknown" according to the spec if AArch32 + * isn't supported. + */ +static bool pvm_access_id_aarch32(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + inject_undef64(vcpu); + return false; + } + + /* + * No support for AArch32 guests, therefore, pKVM has no sanitized copy + * of AArch32 feature id registers. + */ + BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), + PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_ELx_64BIT_ONLY); + + /* Use 0 for architecturally "unknown" values. */ + p->regval = 0; + return true; +} + +/* + * Accessor for AArch64 feature id registers. + * + * If access is allowed, set the regval to the protected VM's view of the + * register and return true. + * Otherwise, inject an undefined exception and return false. + */ +static bool pvm_access_id_aarch64(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (p->is_write) { + inject_undef64(vcpu); + return false; + } + + p->regval = read_id_reg(vcpu, r); + return true; +} + +/* Mark the specified system register as an AArch32 feature id register. */ +#define AARCH32(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch32 } + +/* Mark the specified system register as an AArch64 feature id register. */ +#define AARCH64(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch64 } + +/* Mark the specified system register as not being handled in hyp. */ +#define HOST_HANDLED(REG) { SYS_DESC(REG), .access = NULL } + +/* + * Architected system registers. + * Important: Must be sorted ascending by Op0, Op1, CRn, CRm, Op2 + * + * NOTE: Anything not explicitly listed here is *restricted by default*, i.e., + * it will lead to injecting an exception into the guest. + */ +static const struct sys_reg_desc pvm_sys_reg_descs[] = { + /* Cache maintenance by set/way operations are restricted. */ + + /* Debug and Trace Registers are restricted. */ + + /* AArch64 mappings of the AArch32 ID registers */ + /* CRm=1 */ + AARCH32(SYS_ID_PFR0_EL1), + AARCH32(SYS_ID_PFR1_EL1), + AARCH32(SYS_ID_DFR0_EL1), + AARCH32(SYS_ID_AFR0_EL1), + AARCH32(SYS_ID_MMFR0_EL1), + AARCH32(SYS_ID_MMFR1_EL1), + AARCH32(SYS_ID_MMFR2_EL1), + AARCH32(SYS_ID_MMFR3_EL1), + + /* CRm=2 */ + AARCH32(SYS_ID_ISAR0_EL1), + AARCH32(SYS_ID_ISAR1_EL1), + AARCH32(SYS_ID_ISAR2_EL1), + AARCH32(SYS_ID_ISAR3_EL1), + AARCH32(SYS_ID_ISAR4_EL1), + AARCH32(SYS_ID_ISAR5_EL1), + AARCH32(SYS_ID_MMFR4_EL1), + AARCH32(SYS_ID_ISAR6_EL1), + + /* CRm=3 */ + AARCH32(SYS_MVFR0_EL1), + AARCH32(SYS_MVFR1_EL1), + AARCH32(SYS_MVFR2_EL1), + AARCH32(SYS_ID_PFR2_EL1), + AARCH32(SYS_ID_DFR1_EL1), + AARCH32(SYS_ID_MMFR5_EL1), + + /* AArch64 ID registers */ + /* CRm=4 */ + AARCH64(SYS_ID_AA64PFR0_EL1), + AARCH64(SYS_ID_AA64PFR1_EL1), + AARCH64(SYS_ID_AA64ZFR0_EL1), + AARCH64(SYS_ID_AA64DFR0_EL1), + AARCH64(SYS_ID_AA64DFR1_EL1), + AARCH64(SYS_ID_AA64AFR0_EL1), + AARCH64(SYS_ID_AA64AFR1_EL1), + AARCH64(SYS_ID_AA64ISAR0_EL1), + AARCH64(SYS_ID_AA64ISAR1_EL1), + AARCH64(SYS_ID_AA64MMFR0_EL1), + AARCH64(SYS_ID_AA64MMFR1_EL1), + AARCH64(SYS_ID_AA64MMFR2_EL1), + + HOST_HANDLED(SYS_SCTLR_EL1), + HOST_HANDLED(SYS_ACTLR_EL1), + HOST_HANDLED(SYS_CPACR_EL1), + + HOST_HANDLED(SYS_RGSR_EL1), + HOST_HANDLED(SYS_GCR_EL1), + + /* Scalable Vector Registers are restricted. */ + + HOST_HANDLED(SYS_TTBR0_EL1), + HOST_HANDLED(SYS_TTBR1_EL1), + HOST_HANDLED(SYS_TCR_EL1), + + HOST_HANDLED(SYS_APIAKEYLO_EL1), + HOST_HANDLED(SYS_APIAKEYHI_EL1), + HOST_HANDLED(SYS_APIBKEYLO_EL1), + HOST_HANDLED(SYS_APIBKEYHI_EL1), + HOST_HANDLED(SYS_APDAKEYLO_EL1), + HOST_HANDLED(SYS_APDAKEYHI_EL1), + HOST_HANDLED(SYS_APDBKEYLO_EL1), + HOST_HANDLED(SYS_APDBKEYHI_EL1), + HOST_HANDLED(SYS_APGAKEYLO_EL1), + HOST_HANDLED(SYS_APGAKEYHI_EL1), + + HOST_HANDLED(SYS_AFSR0_EL1), + HOST_HANDLED(SYS_AFSR1_EL1), + HOST_HANDLED(SYS_ESR_EL1), + + HOST_HANDLED(SYS_ERRIDR_EL1), + HOST_HANDLED(SYS_ERRSELR_EL1), + HOST_HANDLED(SYS_ERXFR_EL1), + HOST_HANDLED(SYS_ERXCTLR_EL1), + HOST_HANDLED(SYS_ERXSTATUS_EL1), + HOST_HANDLED(SYS_ERXADDR_EL1), + HOST_HANDLED(SYS_ERXMISC0_EL1), + HOST_HANDLED(SYS_ERXMISC1_EL1), + + HOST_HANDLED(SYS_TFSR_EL1), + HOST_HANDLED(SYS_TFSRE0_EL1), + + HOST_HANDLED(SYS_FAR_EL1), + HOST_HANDLED(SYS_PAR_EL1), + + /* Performance Monitoring Registers are restricted. */ + + HOST_HANDLED(SYS_MAIR_EL1), + HOST_HANDLED(SYS_AMAIR_EL1), + + /* Limited Ordering Regions Registers are restricted. */ + + HOST_HANDLED(SYS_VBAR_EL1), + HOST_HANDLED(SYS_DISR_EL1), + + /* GIC CPU Interface registers are restricted. */ + + HOST_HANDLED(SYS_CONTEXTIDR_EL1), + HOST_HANDLED(SYS_TPIDR_EL1), + + HOST_HANDLED(SYS_SCXTNUM_EL1), + + HOST_HANDLED(SYS_CNTKCTL_EL1), + + HOST_HANDLED(SYS_CCSIDR_EL1), + HOST_HANDLED(SYS_CLIDR_EL1), + HOST_HANDLED(SYS_CSSELR_EL1), + HOST_HANDLED(SYS_CTR_EL0), + + /* Performance Monitoring Registers are restricted. */ + + HOST_HANDLED(SYS_TPIDR_EL0), + HOST_HANDLED(SYS_TPIDRRO_EL0), + + HOST_HANDLED(SYS_SCXTNUM_EL0), + + /* Activity Monitoring Registers are restricted. */ + + HOST_HANDLED(SYS_CNTP_TVAL_EL0), + HOST_HANDLED(SYS_CNTP_CTL_EL0), + HOST_HANDLED(SYS_CNTP_CVAL_EL0), + + /* Performance Monitoring Registers are restricted. */ + + HOST_HANDLED(SYS_DACR32_EL2), + HOST_HANDLED(SYS_IFSR32_EL2), + HOST_HANDLED(SYS_FPEXC32_EL2), +}; + +/* + * Checks that the sysreg table is unique and in-order. + * + * Returns 0 if the table is consistent, or 1 otherwise. + */ +int kvm_check_pvm_sysreg_table(void) +{ + unsigned int i; + + for (i = 1; i < ARRAY_SIZE(pvm_sys_reg_descs); i++) { + if (cmp_sys_reg(&pvm_sys_reg_descs[i-1], &pvm_sys_reg_descs[i]) >= 0) + return 1; + } + + return 0; +} + +/* + * Handler for protected VM MSR, MRS or System instruction execution. + * + * Returns true if the hypervisor has handled the exit, and control should go + * back to the guest, or false if it hasn't, to be handled by the host. + */ +bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + const struct sys_reg_desc *r; + struct sys_reg_params params; + unsigned long esr = kvm_vcpu_get_esr(vcpu); + int Rt = kvm_vcpu_sys_get_rt(vcpu); + + params = esr_sys64_to_params(esr); + params.regval = vcpu_get_reg(vcpu, Rt); + + r = find_reg(¶ms, pvm_sys_reg_descs, ARRAY_SIZE(pvm_sys_reg_descs)); + + /* Undefined (RESTRICTED). */ + if (r == NULL) { + inject_undef64(vcpu); + return true; + } + + /* Handled by the host (HOST_HANDLED) */ + if (r->access == NULL) + return false; + + /* Handled by hyp: skip instruction if instructed to do so. */ + if (r->access(vcpu, ¶ms, r)) + __kvm_skip_instr(vcpu); + + if (!params.is_write) + vcpu_set_reg(vcpu, Rt, params.regval); + + return true; +} -- cgit v1.2.3-70-g09d2 From 2a0c343386ae1a6826e1b9d751bfc14f4711c2de Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Sun, 10 Oct 2021 15:56:33 +0100 Subject: KVM: arm64: Initialize trap registers for protected VMs Protected VMs have more restricted features that need to be trapped. Moreover, the host should not be trusted to set the appropriate trapping registers and their values. Initialize the trapping registers, i.e., hcr_el2, mdcr_el2, and cptr_el2 at EL2 for protected guests, based on the values of the guest's feature id registers. No functional change intended as trap handlers introduced in the previous patch are still not hooked in to the guest exit handlers. Reviewed-by: Andrew Jones Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211010145636.1950948-9-tabba@google.com --- arch/arm64/include/asm/kvm_asm.h | 1 + arch/arm64/include/asm/kvm_host.h | 2 + arch/arm64/kvm/arm.c | 8 ++ arch/arm64/kvm/hyp/include/nvhe/trap_handler.h | 2 + arch/arm64/kvm/hyp/nvhe/Makefile | 2 +- arch/arm64/kvm/hyp/nvhe/hyp-main.c | 9 ++ arch/arm64/kvm/hyp/nvhe/pkvm.c | 186 +++++++++++++++++++++++++ 7 files changed, 209 insertions(+), 1 deletion(-) create mode 100644 arch/arm64/kvm/hyp/nvhe/pkvm.c (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index e86045ac43ba..a460e1243cef 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -64,6 +64,7 @@ #define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector 18 #define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19 #define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc 20 +#define __KVM_HOST_SMCCC_FUNC___pkvm_vcpu_init_traps 21 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f8be56d5342b..4a323aa27a6b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -780,6 +780,8 @@ static inline bool kvm_vm_is_protected(struct kvm *kvm) return false; } +void kvm_init_protected_traps(struct kvm_vcpu *vcpu); + int kvm_arm_vcpu_finalize(struct kvm_vcpu *vcpu, int feature); bool kvm_arm_vcpu_is_finalized(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 6aa7b0c5bf21..3af6d59d1919 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -620,6 +620,14 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) ret = kvm_arm_pmu_v3_enable(vcpu); + /* + * Initialize traps for protected VMs. + * NOTE: Move to run in EL2 directly, rather than via a hypercall, once + * the code is in place for first run initialization at EL2. + */ + if (kvm_vm_is_protected(kvm)) + kvm_call_hyp_nvhe(__pkvm_vcpu_init_traps, vcpu); + return ret; } diff --git a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h index 1e6d995968a1..45a84f0ade04 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h +++ b/arch/arm64/kvm/hyp/include/nvhe/trap_handler.h @@ -15,4 +15,6 @@ #define DECLARE_REG(type, name, ctxt, reg) \ type name = (type)cpu_reg(ctxt, (reg)) +void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu); + #endif /* __ARM64_KVM_NVHE_TRAP_HANDLER_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 0bbe37a18d5d..c3c11974fa3b 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -14,7 +14,7 @@ lib-objs := $(addprefix ../../../lib/, $(lib-objs)) obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o \ hyp-main.o hyp-smp.o psci-relay.o early_alloc.o stub.o page_alloc.o \ - cache.o setup.o mm.o mem_protect.o sys_regs.o + cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o obj-y += $(lib-objs) diff --git a/arch/arm64/kvm/hyp/nvhe/hyp-main.c b/arch/arm64/kvm/hyp/nvhe/hyp-main.c index 8ca1104f4774..a6303db09cd6 100644 --- a/arch/arm64/kvm/hyp/nvhe/hyp-main.c +++ b/arch/arm64/kvm/hyp/nvhe/hyp-main.c @@ -160,6 +160,14 @@ static void handle___pkvm_prot_finalize(struct kvm_cpu_context *host_ctxt) { cpu_reg(host_ctxt, 1) = __pkvm_prot_finalize(); } + +static void handle___pkvm_vcpu_init_traps(struct kvm_cpu_context *host_ctxt) +{ + DECLARE_REG(struct kvm_vcpu *, vcpu, host_ctxt, 1); + + __pkvm_vcpu_init_traps(kern_hyp_va(vcpu)); +} + typedef void (*hcall_t)(struct kvm_cpu_context *); #define HANDLE_FUNC(x) [__KVM_HOST_SMCCC_FUNC_##x] = (hcall_t)handle_##x @@ -185,6 +193,7 @@ static const hcall_t host_hcall[] = { HANDLE_FUNC(__pkvm_host_share_hyp), HANDLE_FUNC(__pkvm_create_private_mapping), HANDLE_FUNC(__pkvm_prot_finalize), + HANDLE_FUNC(__pkvm_vcpu_init_traps), }; static void handle_host_hcall(struct kvm_cpu_context *host_ctxt) diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c new file mode 100644 index 000000000000..633547cc1033 --- /dev/null +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 Google LLC + * Author: Fuad Tabba + */ + +#include +#include +#include +#include +#include + +/* + * Set trap register values based on features in ID_AA64PFR0. + */ +static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu) +{ + const u64 feature_ids = get_pvm_id_aa64pfr0(vcpu); + u64 hcr_set = HCR_RW; + u64 hcr_clear = 0; + u64 cptr_set = 0; + + /* Protected KVM does not support AArch32 guests. */ + BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), + PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY); + BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), + PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) != ID_AA64PFR0_ELx_64BIT_ONLY); + + /* + * Linux guests assume support for floating-point and Advanced SIMD. Do + * not change the trapping behavior for these from the KVM default. + */ + BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP), + PVM_ID_AA64PFR0_ALLOW)); + BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD), + PVM_ID_AA64PFR0_ALLOW)); + + /* Trap RAS unless all current versions are supported */ + if (FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), feature_ids) < + ID_AA64PFR0_RAS_V1P1) { + hcr_set |= HCR_TERR | HCR_TEA; + hcr_clear |= HCR_FIEN; + } + + /* Trap AMU */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_AMU), feature_ids)) { + hcr_clear |= HCR_AMVOFFEN; + cptr_set |= CPTR_EL2_TAM; + } + + /* Trap SVE */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_SVE), feature_ids)) + cptr_set |= CPTR_EL2_TZ; + + vcpu->arch.hcr_el2 |= hcr_set; + vcpu->arch.hcr_el2 &= ~hcr_clear; + vcpu->arch.cptr_el2 |= cptr_set; +} + +/* + * Set trap register values based on features in ID_AA64PFR1. + */ +static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu) +{ + const u64 feature_ids = get_pvm_id_aa64pfr1(vcpu); + u64 hcr_set = 0; + u64 hcr_clear = 0; + + /* Memory Tagging: Trap and Treat as Untagged if not supported. */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR1_MTE), feature_ids)) { + hcr_set |= HCR_TID5; + hcr_clear |= HCR_DCT | HCR_ATA; + } + + vcpu->arch.hcr_el2 |= hcr_set; + vcpu->arch.hcr_el2 &= ~hcr_clear; +} + +/* + * Set trap register values based on features in ID_AA64DFR0. + */ +static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu) +{ + const u64 feature_ids = get_pvm_id_aa64dfr0(vcpu); + u64 mdcr_set = 0; + u64 mdcr_clear = 0; + u64 cptr_set = 0; + + /* Trap/constrain PMU */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMUVER), feature_ids)) { + mdcr_set |= MDCR_EL2_TPM | MDCR_EL2_TPMCR; + mdcr_clear |= MDCR_EL2_HPME | MDCR_EL2_MTPME | + MDCR_EL2_HPMN_MASK; + } + + /* Trap Debug */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER), feature_ids)) + mdcr_set |= MDCR_EL2_TDRA | MDCR_EL2_TDA | MDCR_EL2_TDE; + + /* Trap OS Double Lock */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_DOUBLELOCK), feature_ids)) + mdcr_set |= MDCR_EL2_TDOSA; + + /* Trap SPE */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_PMSVER), feature_ids)) { + mdcr_set |= MDCR_EL2_TPMS; + mdcr_clear |= MDCR_EL2_E2PB_MASK << MDCR_EL2_E2PB_SHIFT; + } + + /* Trap Trace Filter */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACE_FILT), feature_ids)) + mdcr_set |= MDCR_EL2_TTRF; + + /* Trap Trace */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_TRACEVER), feature_ids)) + cptr_set |= CPTR_EL2_TTA; + + vcpu->arch.mdcr_el2 |= mdcr_set; + vcpu->arch.mdcr_el2 &= ~mdcr_clear; + vcpu->arch.cptr_el2 |= cptr_set; +} + +/* + * Set trap register values based on features in ID_AA64MMFR0. + */ +static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu) +{ + const u64 feature_ids = get_pvm_id_aa64mmfr0(vcpu); + u64 mdcr_set = 0; + + /* Trap Debug Communications Channel registers */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_FGT), feature_ids)) + mdcr_set |= MDCR_EL2_TDCC; + + vcpu->arch.mdcr_el2 |= mdcr_set; +} + +/* + * Set trap register values based on features in ID_AA64MMFR1. + */ +static void pvm_init_traps_aa64mmfr1(struct kvm_vcpu *vcpu) +{ + const u64 feature_ids = get_pvm_id_aa64mmfr1(vcpu); + u64 hcr_set = 0; + + /* Trap LOR */ + if (!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_LOR), feature_ids)) + hcr_set |= HCR_TLOR; + + vcpu->arch.hcr_el2 |= hcr_set; +} + +/* + * Set baseline trap register values. + */ +static void pvm_init_trap_regs(struct kvm_vcpu *vcpu) +{ + const u64 hcr_trap_feat_regs = HCR_TID3; + const u64 hcr_trap_impdef = HCR_TACR | HCR_TIDCP | HCR_TID1; + + /* + * Always trap: + * - Feature id registers: to control features exposed to guests + * - Implementation-defined features + */ + vcpu->arch.hcr_el2 |= hcr_trap_feat_regs | hcr_trap_impdef; + + /* Clear res0 and set res1 bits to trap potential new features. */ + vcpu->arch.hcr_el2 &= ~(HCR_RES0); + vcpu->arch.mdcr_el2 &= ~(MDCR_EL2_RES0); + vcpu->arch.cptr_el2 |= CPTR_NVHE_EL2_RES1; + vcpu->arch.cptr_el2 &= ~(CPTR_NVHE_EL2_RES0); +} + +/* + * Initialize trap register values for protected VMs. + */ +void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu) +{ + pvm_init_trap_regs(vcpu); + pvm_init_traps_aa64pfr0(vcpu); + pvm_init_traps_aa64pfr1(vcpu); + pvm_init_traps_aa64dfr0(vcpu); + pvm_init_traps_aa64mmfr0(vcpu); + pvm_init_traps_aa64mmfr1(vcpu); +} -- cgit v1.2.3-70-g09d2 From 72e1be120eaaf82a58c81fcf173cdb1d7a5dcfbb Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Sun, 10 Oct 2021 15:56:34 +0100 Subject: KVM: arm64: Move sanitized copies of CPU features Move the sanitized copies of the CPU feature registers to the recently created sys_regs.c. This consolidates all copies in a more relevant file. No functional change intended. Acked-by: Will Deacon Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211010145636.1950948-10-tabba@google.com --- arch/arm64/kvm/hyp/nvhe/mem_protect.c | 6 ------ arch/arm64/kvm/hyp/nvhe/sys_regs.c | 2 ++ 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/nvhe/mem_protect.c b/arch/arm64/kvm/hyp/nvhe/mem_protect.c index 2a07d63b8498..f6d96e60b323 100644 --- a/arch/arm64/kvm/hyp/nvhe/mem_protect.c +++ b/arch/arm64/kvm/hyp/nvhe/mem_protect.c @@ -25,12 +25,6 @@ struct host_kvm host_kvm; static struct hyp_pool host_s2_pool; -/* - * Copies of the host's CPU features registers holding sanitized values. - */ -u64 id_aa64mmfr0_el1_sys_val; -u64 id_aa64mmfr1_el1_sys_val; - const u8 pkvm_hyp_id = 1; static void *host_s2_zalloc_pages_exact(size_t size) diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 6e3ea49af302..6bde2dc5205c 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -21,6 +21,8 @@ u64 id_aa64pfr0_el1_sys_val; u64 id_aa64pfr1_el1_sys_val; u64 id_aa64isar0_el1_sys_val; u64 id_aa64isar1_el1_sys_val; +u64 id_aa64mmfr0_el1_sys_val; +u64 id_aa64mmfr1_el1_sys_val; u64 id_aa64mmfr2_el1_sys_val; /* -- cgit v1.2.3-70-g09d2 From 1423afcb411780c7a6a68f801fdcfb6920ad6f06 Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Sun, 10 Oct 2021 15:56:35 +0100 Subject: KVM: arm64: Trap access to pVM restricted features Trap accesses to restricted features for VMs running in protected mode. Access to feature registers are emulated, and only supported features are exposed to protected VMs. Accesses to restricted registers as well as restricted instructions are trapped, and an undefined exception is injected into the protected guests, i.e., with EC = 0x0 (unknown reason). This EC is the one used, according to the Arm Architecture Reference Manual, for unallocated or undefined system registers or instructions. Only affects the functionality of protected VMs. Otherwise, should not affect non-protected VMs when KVM is running in protected mode. Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211010145636.1950948-11-tabba@google.com --- arch/arm64/kvm/hyp/nvhe/switch.c | 57 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 17d1a9512507..2c72c31e516e 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -159,6 +160,49 @@ static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) write_sysreg(pmu->events_host, pmcntenset_el0); } +/** + * Handler for protected VM restricted exceptions. + * + * Inject an undefined exception into the guest and return true to indicate that + * the hypervisor has handled the exit, and control should go back to the guest. + */ +static bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + inject_undef64(vcpu); + return true; +} + +/** + * Handler for protected VM MSR, MRS or System instruction execution in AArch64. + * + * Returns true if the hypervisor has handled the exit, and control should go + * back to the guest, or false if it hasn't. + */ +static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + if (kvm_handle_pvm_sysreg(vcpu, exit_code)) + return true; + + return kvm_hyp_handle_sysreg(vcpu, exit_code); +} + +/** + * Handler for protected floating-point and Advanced SIMD accesses. + * + * Returns true if the hypervisor has handled the exit, and control should go + * back to the guest, or false if it hasn't. + */ +static bool kvm_handle_pvm_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + /* Linux guests assume support for floating-point and Advanced SIMD. */ + BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_FP), + PVM_ID_AA64PFR0_ALLOW)); + BUILD_BUG_ON(!FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD), + PVM_ID_AA64PFR0_ALLOW)); + + return kvm_hyp_handle_fpsimd(vcpu, exit_code); +} + static const exit_handler_fn hyp_exit_handlers[] = { [0 ... ESR_ELx_EC_MAX] = NULL, [ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32, @@ -170,8 +214,21 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, }; +static const exit_handler_fn pvm_exit_handlers[] = { + [0 ... ESR_ELx_EC_MAX] = NULL, + [ESR_ELx_EC_SYS64] = kvm_handle_pvm_sys64, + [ESR_ELx_EC_SVE] = kvm_handle_pvm_restricted, + [ESR_ELx_EC_FP_ASIMD] = kvm_handle_pvm_fpsimd, + [ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low, + [ESR_ELx_EC_DABT_LOW] = kvm_hyp_handle_dabt_low, + [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, +}; + static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm *kvm) { + if (unlikely(kvm_vm_is_protected(kvm))) + return pvm_exit_handlers; + return hyp_exit_handlers; } -- cgit v1.2.3-70-g09d2 From 5f39efc42052b042c4d7ba6fd77934e8de43e10c Mon Sep 17 00:00:00 2001 From: Fuad Tabba Date: Sun, 10 Oct 2021 15:56:36 +0100 Subject: KVM: arm64: Handle protected guests at 32 bits Protected KVM does not support protected AArch32 guests. However, it is possible for the guest to force run AArch32, potentially causing problems. Add an extra check so that if the hypervisor catches the guest doing that, it can prevent the guest from running again by resetting vcpu->arch.target and returning ARM_EXCEPTION_IL. If this were to happen, The VMM can try and fix it by re- initializing the vcpu with KVM_ARM_VCPU_INIT, however, this is likely not possible for protected VMs. Adapted from commit 22f553842b14 ("KVM: arm64: Handle Asymmetric AArch32 systems") Signed-off-by: Fuad Tabba Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211010145636.1950948-12-tabba@google.com --- arch/arm64/kvm/hyp/nvhe/switch.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 2c72c31e516e..f25b6353a598 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -232,6 +232,37 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm *kvm) return hyp_exit_handlers; } +/* + * Some guests (e.g., protected VMs) are not be allowed to run in AArch32. + * The ARMv8 architecture does not give the hypervisor a mechanism to prevent a + * guest from dropping to AArch32 EL0 if implemented by the CPU. If the + * hypervisor spots a guest in such a state ensure it is handled, and don't + * trust the host to spot or fix it. The check below is based on the one in + * kvm_arch_vcpu_ioctl_run(). + * + * Returns false if the guest ran in AArch32 when it shouldn't have, and + * thus should exit to the host, or true if a the guest run loop can continue. + */ +static bool handle_aarch32_guest(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + + if (kvm_vm_is_protected(kvm) && vcpu_mode_is_32bit(vcpu)) { + /* + * As we have caught the guest red-handed, decide that it isn't + * fit for purpose anymore by making the vcpu invalid. The VMM + * can try and fix it by re-initializing the vcpu with + * KVM_ARM_VCPU_INIT, however, this is likely not possible for + * protected VMs. + */ + vcpu->arch.target = -1; + *exit_code = ARM_EXCEPTION_IL; + return false; + } + + return true; +} + /* Switch to the guest for legacy non-VHE systems */ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) { @@ -294,6 +325,9 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu) /* Jump in the fire! */ exit_code = __guest_enter(vcpu); + if (unlikely(!handle_aarch32_guest(vcpu, &exit_code))) + break; + /* And we're baaack! */ } while (fixup_guest_exit(vcpu, &exit_code)); -- cgit v1.2.3-70-g09d2 From 69adec18e94ff3ca20447916a3bd23ab1d06b878 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 12 Oct 2021 12:23:12 +0100 Subject: KVM: arm64: Fix reporting of endianess when the access originates at EL0 We currently check SCTLR_EL1.EE when computing the address of a faulting guest access. However, the fault could have occured at EL0, in which case the right bit to check would be SCTLR_EL1.E0E. This is pretty unlikely to cause any issue in practice: You'd have to have a guest with a LE EL1 and a BE EL0 (or the other way around), and have mapped a device into the EL0 page tables. Good luck with that! Signed-off-by: Marc Zyngier Reviewed-by: Andrew Jones Link: https://lore.kernel.org/r/20211012112312.1247467-1-maz@kernel.org --- arch/arm64/include/asm/kvm_emulate.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index fd418955e31e..f4871e47b2d0 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -396,7 +396,10 @@ static inline bool kvm_vcpu_is_be(struct kvm_vcpu *vcpu) if (vcpu_mode_is_32bit(vcpu)) return !!(*vcpu_cpsr(vcpu) & PSR_AA32_E_BIT); - return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & (1 << 25)); + if (vcpu_mode_priv(vcpu)) + return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_ELx_EE); + else + return !!(vcpu_read_sys_reg(vcpu, SCTLR_EL1) & SCTLR_EL1_E0E); } static inline unsigned long vcpu_data_guest_to_host(struct kvm_vcpu *vcpu, -- cgit v1.2.3-70-g09d2 From 562e530fd7707aad7fed953692d1835612238966 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 10 Oct 2021 16:09:06 +0100 Subject: KVM: arm64: Force ID_AA64PFR0_EL1.GIC=1 when exposing a virtual GICv3 Until now, we always let ID_AA64PFR0_EL1.GIC reflect the value visible on the host, even if we were running a GICv2-enabled VM on a GICv3+compat host. That's fine, but we also now have the case of a host that does not expose ID_AA64PFR0_EL1.GIC==1 despite having a vGIC. Yes, this is confusing. Thank you M1. Let's go back to first principles and expose ID_AA64PFR0_EL1.GIC=1 when a GICv3 is exposed to the guest. This also hides a GICv4.1 CPU interface from the guest which has no business knowing about the v4.1 extension. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211010150910.2911495-2-maz@kernel.org --- arch/arm64/kvm/sys_regs.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 1d46e185f31e..0e8fc29df19c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1075,6 +1075,11 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV2), (u64)vcpu->kvm->arch.pfr0_csv2); val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3); val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_CSV3), (u64)vcpu->kvm->arch.pfr0_csv3); + if (irqchip_in_kernel(vcpu->kvm) && + vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + val &= ~ARM64_FEATURE_MASK(ID_AA64PFR0_GIC); + val |= FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_GIC), 1); + } break; case SYS_ID_AA64PFR1_EL1: val &= ~ARM64_FEATURE_MASK(ID_AA64PFR1_MTE); -- cgit v1.2.3-70-g09d2 From df652bcf1136db7f16e486a204ba4b4fc4181759 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 10 Oct 2021 16:09:07 +0100 Subject: KVM: arm64: vgic-v3: Work around GICv3 locally generated SErrors The infamous M1 has a feature nobody else ever implemented, in the form of the "GIC locally generated SError interrupts", also known as SEIS for short. These SErrors are generated when a guest does something that violates the GIC state machine. It would have been simpler to just *ignore* the damned thing, but that's not what this HW does. Oh well. This part of of the architecture is also amazingly under-specified. There is a whole 10 lines that describe the feature in a spec that is 930 pages long, and some of these lines are factually wrong. Oh, and it is deprecated, so the insentive to clarify it is low. Now, the spec says that this should be a *virtual* SError when HCR_EL2.AMO is set. As it turns out, that's not always the case on this CPU, and the SError sometimes fires on the host as a physical SError. Goodbye, cruel world. This clearly is a HW bug, and it means that a guest can easily take the host down, on demand. Thankfully, we have seen systems that were just as broken in the past, and we have the perfect vaccine for it. Apple M1, please meet the Cavium ThunderX workaround. All your GIC accesses will be trapped, sanitised, and emulated. Only the signalling aspect of the HW will be used. It won't be super speedy, but it will at least be safe. You're most welcome. Given that this has only ever been seen on this single implementation, that the spec is unclear at best and that we cannot trust it to ever be implemented correctly, gate the workaround solely on ICH_VTR_EL2.SEIS being set. Tested-by: Joey Gouly Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211010150910.2911495-3-maz@kernel.org --- arch/arm64/kvm/vgic/vgic-v3.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index 21a6207fb2ee..ae59e2580bf5 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -671,6 +671,14 @@ int vgic_v3_probe(const struct gic_kvm_info *info) group1_trap = true; } + if (kvm_vgic_global_state.ich_vtr_el2 & ICH_VTR_SEIS_MASK) { + kvm_info("GICv3 with locally generated SEI\n"); + + group0_trap = true; + group1_trap = true; + common_trap = true; + } + if (group0_trap || group1_trap || common_trap) { kvm_info("GICv3 sysreg trapping enabled ([%s%s%s], reduced performance)\n", group0_trap ? "G0" : "", -- cgit v1.2.3-70-g09d2 From 0924729b21bffdd0e13f29ea6256d299fc807cff Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 10 Oct 2021 16:09:08 +0100 Subject: KVM: arm64: vgic-v3: Reduce common group trapping to ICV_DIR_EL1 when possible On systems that advertise ICH_VTR_EL2.SEIS, we trap all GICv3 sysreg accesses from the guest. From a performance perspective, this is OK as long as the guest doesn't hammer the GICv3 CPU interface. In most cases, this is fine, unless the guest actively uses priorities and switches PMR_EL1 very often. Which is exactly what happens when a Linux guest runs with irqchip.gicv3_pseudo_nmi=1. In these condition, the performance plumets as we hit PMR each time we mask/unmask interrupts. Not good. There is however an opportunity for improvement. Careful reading of the architecture specification indicates that the only GICv3 sysreg belonging to the common group (which contains the SGI registers, PMR, DIR, CTLR and RPR) that is allowed to generate a SError is DIR. Everything else is safe. It is thus possible to substitute the trapping of all the common group with just that of DIR if it supported by the implementation. Yes, that's yet another optional bit of the architecture. So let's just do that, as it leads to some impressive result on the M1: Without this change: bash-5.1# /host/home/maz/hackbench 100 process 1000 Running with 100*40 (== 4000) tasks. Time: 56.596 With this change: bash-5.1# /host/home/maz/hackbench 100 process 1000 Running with 100*40 (== 4000) tasks. Time: 8.649 which is a pretty convincing result. Signed-off-by: Marc Zyngier Reviewed-by: Alexandru Elisei Link: https://lore.kernel.org/r/20211010150910.2911495-4-maz@kernel.org --- arch/arm64/include/asm/sysreg.h | 3 +++ arch/arm64/kvm/vgic/vgic-v3.c | 15 +++++++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b268082d67ed..9412a645a1c0 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -1152,6 +1152,7 @@ #define ICH_HCR_TC (1 << 10) #define ICH_HCR_TALL0 (1 << 11) #define ICH_HCR_TALL1 (1 << 12) +#define ICH_HCR_TDIR (1 << 14) #define ICH_HCR_EOIcount_SHIFT 27 #define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT) @@ -1184,6 +1185,8 @@ #define ICH_VTR_SEIS_MASK (1 << ICH_VTR_SEIS_SHIFT) #define ICH_VTR_A3V_SHIFT 21 #define ICH_VTR_A3V_MASK (1 << ICH_VTR_A3V_SHIFT) +#define ICH_VTR_TDS_SHIFT 19 +#define ICH_VTR_TDS_MASK (1 << ICH_VTR_TDS_SHIFT) #define ARM64_FEATURE_FIELD_BITS 4 diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c index ae59e2580bf5..467c22bbade6 100644 --- a/arch/arm64/kvm/vgic/vgic-v3.c +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -15,6 +15,7 @@ static bool group0_trap; static bool group1_trap; static bool common_trap; +static bool dir_trap; static bool gicv4_enable; void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) @@ -296,6 +297,8 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) vgic_v3->vgic_hcr |= ICH_HCR_TALL1; if (common_trap) vgic_v3->vgic_hcr |= ICH_HCR_TC; + if (dir_trap) + vgic_v3->vgic_hcr |= ICH_HCR_TDIR; } int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq) @@ -676,14 +679,18 @@ int vgic_v3_probe(const struct gic_kvm_info *info) group0_trap = true; group1_trap = true; - common_trap = true; + if (ich_vtr_el2 & ICH_VTR_TDS_MASK) + dir_trap = true; + else + common_trap = true; } - if (group0_trap || group1_trap || common_trap) { - kvm_info("GICv3 sysreg trapping enabled ([%s%s%s], reduced performance)\n", + if (group0_trap || group1_trap || common_trap | dir_trap) { + kvm_info("GICv3 sysreg trapping enabled ([%s%s%s%s], reduced performance)\n", group0_trap ? "G0" : "", group1_trap ? "G1" : "", - common_trap ? "C" : ""); + common_trap ? "C" : "", + dir_trap ? "D" : ""); static_branch_enable(&vgic_v3_cpuif_trap); } -- cgit v1.2.3-70-g09d2 From f87ab682722299cddf8cf5f7bc17053d70300ee0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 10 Oct 2021 16:09:09 +0100 Subject: KVM: arm64: vgic-v3: Don't advertise ICC_CTLR_EL1.SEIS Since we are trapping all sysreg accesses when ICH_VTR_EL2.SEIS is set, and that we never deliver an SError when emulating any of the GICv3 sysregs, don't advertise ICC_CTLR_EL1.SEIS. Reviewed-by: Alexandru Elisei Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211010150910.2911495-5-maz@kernel.org --- arch/arm64/kvm/hyp/vgic-v3-sr.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index 39f8f7f9227c..b3b50de496a3 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -987,8 +987,6 @@ static void __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT; /* IDbits */ val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT; - /* SEIS */ - val |= ((vtr >> 22) & 1) << ICC_CTLR_EL1_SEIS_SHIFT; /* A3V */ val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT; /* EOImode */ -- cgit v1.2.3-70-g09d2 From 9d449c71bd8f74282e84213c8f0b8328293ab0a7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 10 Oct 2021 16:09:10 +0100 Subject: KVM: arm64: vgic-v3: Align emulated cpuif LPI state machine with the pseudocode Having realised that a virtual LPI does transition through an active state that does not exist on bare metal, align the CPU interface emulation with the behaviour specified in the architecture pseudocode. The LPIs now transition to active on IAR read, and to inactive on EOI write. Special care is taken not to increment the EOIcount for an LPI that isn't present in the LRs. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20211010150910.2911495-6-maz@kernel.org --- arch/arm64/kvm/hyp/vgic-v3-sr.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c index b3b50de496a3..20db2f281cf2 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -695,9 +695,7 @@ static void __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt) goto spurious; lr_val &= ~ICH_LR_STATE; - /* No active state for LPIs */ - if ((lr_val & ICH_LR_VIRTUAL_ID_MASK) <= VGIC_MAX_SPI) - lr_val |= ICH_LR_ACTIVE_BIT; + lr_val |= ICH_LR_ACTIVE_BIT; __gic_v3_set_lr(lr_val, lr); __vgic_v3_set_active_priority(lr_prio, vmcr, grp); vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK); @@ -764,20 +762,18 @@ static void __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) /* Drop priority in any case */ act_prio = __vgic_v3_clear_highest_active_priority(); - /* If EOIing an LPI, no deactivate to be performed */ - if (vid >= VGIC_MIN_LPI) - return; - - /* EOImode == 1, nothing to be done here */ - if (vmcr & ICH_VMCR_EOIM_MASK) - return; - lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val); if (lr == -1) { - __vgic_v3_bump_eoicount(); + /* Do not bump EOIcount for LPIs that aren't in the LRs */ + if (!(vid >= VGIC_MIN_LPI)) + __vgic_v3_bump_eoicount(); return; } + /* EOImode == 1 and not an LPI, nothing to be done here */ + if ((vmcr & ICH_VMCR_EOIM_MASK) && !(vid >= VGIC_MIN_LPI)) + return; + lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; /* If priorities or group do not match, the guest has fscked-up. */ -- cgit v1.2.3-70-g09d2 From 3ef231670b9e9001316a426e794b2c74b8f6b4f6 Mon Sep 17 00:00:00 2001 From: Jia He Date: Tue, 7 Sep 2021 20:31:11 +0800 Subject: KVM: arm64: vgic: Add memcg accounting to vgic allocations Inspired by commit 254272ce6505 ("kvm: x86: Add memcg accounting to KVM allocations"), it would be better to make arm64 vgic consistent with common kvm codes. The memory allocations of VM scope should be charged into VM process cgroup, hence change GFP_KERNEL to GFP_KERNEL_ACCOUNT. There remain a few cases since these allocations are global, not in VM scope. Signed-off-by: Jia He Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210907123112.10232-2-justin.he@arm.com --- arch/arm64/kvm/vgic/vgic-init.c | 2 +- arch/arm64/kvm/vgic/vgic-irqfd.c | 2 +- arch/arm64/kvm/vgic/vgic-its.c | 14 +++++++------- arch/arm64/kvm/vgic/vgic-mmio-v3.c | 2 +- arch/arm64/kvm/vgic/vgic-v4.c | 2 +- 5 files changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c index 340c51d87677..0a06d0648970 100644 --- a/arch/arm64/kvm/vgic/vgic-init.c +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -134,7 +134,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0); int i; - dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL); + dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL_ACCOUNT); if (!dist->spis) return -ENOMEM; diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c index 79f8899b234c..475059bacedf 100644 --- a/arch/arm64/kvm/vgic/vgic-irqfd.c +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c @@ -139,7 +139,7 @@ int kvm_vgic_setup_default_irq_routing(struct kvm *kvm) u32 nr = dist->nr_spis; int i, ret; - entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL); + entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL_ACCOUNT); if (!entries) return -ENOMEM; diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c index 61728c543eb9..b99f47103056 100644 --- a/arch/arm64/kvm/vgic/vgic-its.c +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -48,7 +48,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, if (irq) return irq; - irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL); + irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL_ACCOUNT); if (!irq) return ERR_PTR(-ENOMEM); @@ -332,7 +332,7 @@ int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr) * we must be careful not to overrun the array. */ irq_count = READ_ONCE(dist->lpi_list_count); - intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL); + intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL_ACCOUNT); if (!intids) return -ENOMEM; @@ -985,7 +985,7 @@ static int vgic_its_alloc_collection(struct vgic_its *its, if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL)) return E_ITS_MAPC_COLLECTION_OOR; - collection = kzalloc(sizeof(*collection), GFP_KERNEL); + collection = kzalloc(sizeof(*collection), GFP_KERNEL_ACCOUNT); if (!collection) return -ENOMEM; @@ -1029,7 +1029,7 @@ static struct its_ite *vgic_its_alloc_ite(struct its_device *device, { struct its_ite *ite; - ite = kzalloc(sizeof(*ite), GFP_KERNEL); + ite = kzalloc(sizeof(*ite), GFP_KERNEL_ACCOUNT); if (!ite) return ERR_PTR(-ENOMEM); @@ -1150,7 +1150,7 @@ static struct its_device *vgic_its_alloc_device(struct vgic_its *its, { struct its_device *device; - device = kzalloc(sizeof(*device), GFP_KERNEL); + device = kzalloc(sizeof(*device), GFP_KERNEL_ACCOUNT); if (!device) return ERR_PTR(-ENOMEM); @@ -1847,7 +1847,7 @@ void vgic_lpi_translation_cache_init(struct kvm *kvm) struct vgic_translation_cache_entry *cte; /* An allocation failure is not fatal */ - cte = kzalloc(sizeof(*cte), GFP_KERNEL); + cte = kzalloc(sizeof(*cte), GFP_KERNEL_ACCOUNT); if (WARN_ON(!cte)) break; @@ -1888,7 +1888,7 @@ static int vgic_its_create(struct kvm_device *dev, u32 type) if (type != KVM_DEV_TYPE_ARM_VGIC_ITS) return -ENODEV; - its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL); + its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL_ACCOUNT); if (!its) return -ENOMEM; diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c index a09cdc0b953c..d3fd8a6c0c9a 100644 --- a/arch/arm64/kvm/vgic/vgic-mmio-v3.c +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -834,7 +834,7 @@ static int vgic_v3_alloc_redist_region(struct kvm *kvm, uint32_t index, if (vgic_v3_rdist_overlap(kvm, base, size)) return -EINVAL; - rdreg = kzalloc(sizeof(*rdreg), GFP_KERNEL); + rdreg = kzalloc(sizeof(*rdreg), GFP_KERNEL_ACCOUNT); if (!rdreg) return -ENOMEM; diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index c1845d8f5f7e..772dd15a22c7 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -246,7 +246,7 @@ int vgic_v4_init(struct kvm *kvm) nr_vcpus = atomic_read(&kvm->online_vcpus); dist->its_vm.vpes = kcalloc(nr_vcpus, sizeof(*dist->its_vm.vpes), - GFP_KERNEL); + GFP_KERNEL_ACCOUNT); if (!dist->its_vm.vpes) return -ENOMEM; -- cgit v1.2.3-70-g09d2 From 115bae923ac8bb29ee635e0ed6b4d5a3eec9371e Mon Sep 17 00:00:00 2001 From: Jia He Date: Tue, 7 Sep 2021 20:31:12 +0800 Subject: KVM: arm64: Add memcg accounting to KVM allocations Inspired by commit 254272ce6505 ("kvm: x86: Add memcg accounting to KVM allocations"), it would be better to make arm64 KVM consistent with common kvm codes. The memory allocations of VM scope should be charged into VM process cgroup, hence change GFP_KERNEL to GFP_KERNEL_ACCOUNT. There remain a few cases since these allocations are global, not in VM scope. Signed-off-by: Jia He Reviewed-by: Oliver Upton Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20210907123112.10232-3-justin.he@arm.com --- arch/arm64/kvm/arm.c | 6 ++++-- arch/arm64/kvm/mmu.c | 2 +- arch/arm64/kvm/pmu-emul.c | 2 +- arch/arm64/kvm/reset.c | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index fe102cd2e518..e3d903e1b7da 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -291,10 +291,12 @@ long kvm_arch_dev_ioctl(struct file *filp, struct kvm *kvm_arch_alloc_vm(void) { + size_t sz = sizeof(struct kvm); + if (!has_vhe()) - return kzalloc(sizeof(struct kvm), GFP_KERNEL); + return kzalloc(sz, GFP_KERNEL_ACCOUNT); - return vzalloc(sizeof(struct kvm)); + return __vmalloc(sz, GFP_KERNEL_ACCOUNT | __GFP_HIGHMEM | __GFP_ZERO); } void kvm_arch_free_vm(struct kvm *kvm) diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 1a94a7ca48f2..da10996dcdf1 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -512,7 +512,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu) return -EINVAL; } - pgt = kzalloc(sizeof(*pgt), GFP_KERNEL); + pgt = kzalloc(sizeof(*pgt), GFP_KERNEL_ACCOUNT); if (!pgt) return -ENOMEM; diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index 2af3c37445e0..a5e4bbf5e68f 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -978,7 +978,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) mutex_lock(&vcpu->kvm->lock); if (!vcpu->kvm->arch.pmu_filter) { - vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL); + vcpu->kvm->arch.pmu_filter = bitmap_alloc(nr_events, GFP_KERNEL_ACCOUNT); if (!vcpu->kvm->arch.pmu_filter) { mutex_unlock(&vcpu->kvm->lock); return -ENOMEM; diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 5ce36b0a3343..acad22ebac12 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -106,7 +106,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu) vl > SVE_VL_ARCH_MAX)) return -EIO; - buf = kzalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)), GFP_KERNEL); + buf = kzalloc(SVE_SIG_REGS_SIZE(sve_vq_from_vl(vl)), GFP_KERNEL_ACCOUNT); if (!buf) return -ENOMEM; -- cgit v1.2.3-70-g09d2 From 8a049862c38f0c78b0e01ab5d36db1bffc832675 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Oct 2021 13:03:36 +0100 Subject: KVM: arm64: Fix early exit ptrauth handling The previous rework of the early exit code to provide an EC-based decoding tree missed the fact that we have two trap paths for ptrauth: the instructions (EC_PAC) and the sysregs (EC_SYS64). Rework the handlers to call the ptrauth handling code on both paths. Signed-off-by: Marc Zyngier Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://lore.kernel.org/r/20211013120346.2926621-2-maz@kernel.org --- arch/arm64/kvm/hyp/include/hyp/switch.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 481399bf9b94..4126926c3e06 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -282,14 +282,6 @@ static inline bool handle_tx2_tvm(struct kvm_vcpu *vcpu) static inline bool esr_is_ptrauth_trap(u32 esr) { - u32 ec = ESR_ELx_EC(esr); - - if (ec == ESR_ELx_EC_PAC) - return true; - - if (ec != ESR_ELx_EC_SYS64) - return false; - switch (esr_sys64_to_sysreg(esr)) { case SYS_APIAKEYLO_EL1: case SYS_APIAKEYHI_EL1: @@ -323,8 +315,7 @@ static bool kvm_hyp_handle_ptrauth(struct kvm_vcpu *vcpu, u64 *exit_code) struct kvm_cpu_context *ctxt; u64 val; - if (!vcpu_has_ptrauth(vcpu) || - !esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu))) + if (!vcpu_has_ptrauth(vcpu)) return false; ctxt = this_cpu_ptr(&kvm_hyp_ctxt); @@ -353,6 +344,9 @@ static bool kvm_hyp_handle_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) __vgic_v3_perform_cpuif_access(vcpu) == 1) return true; + if (esr_is_ptrauth_trap(kvm_vcpu_get_esr(vcpu))) + return kvm_hyp_handle_ptrauth(vcpu, exit_code); + return false; } -- cgit v1.2.3-70-g09d2 From ce75916749b8cb5ec795f1157a5c426f6765a48c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Oct 2021 13:03:37 +0100 Subject: KVM: arm64: pkvm: Use a single function to expose all id-regs Rather than exposing a whole set of helper functions to retrieve individual ID registers, use the existing decoding tree and expose a single helper instead. This allow a number of functions to be made static, and we now have a single entry point to maintain. Signed-off-by: Marc Zyngier Reviewed-by: Andrew Jones Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://lore.kernel.org/r/20211013120346.2926621-3-maz@kernel.org --- arch/arm64/kvm/hyp/include/nvhe/sys_regs.h | 14 +---------- arch/arm64/kvm/hyp/nvhe/pkvm.c | 10 ++++---- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 37 ++++++++++++++++-------------- 3 files changed, 26 insertions(+), 35 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/include/nvhe/sys_regs.h b/arch/arm64/kvm/hyp/include/nvhe/sys_regs.h index 3288128738aa..8adc13227b1a 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/sys_regs.h +++ b/arch/arm64/kvm/hyp/include/nvhe/sys_regs.h @@ -9,19 +9,7 @@ #include -u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu); -u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu); -u64 get_pvm_id_aa64zfr0(const struct kvm_vcpu *vcpu); -u64 get_pvm_id_aa64dfr0(const struct kvm_vcpu *vcpu); -u64 get_pvm_id_aa64dfr1(const struct kvm_vcpu *vcpu); -u64 get_pvm_id_aa64afr0(const struct kvm_vcpu *vcpu); -u64 get_pvm_id_aa64afr1(const struct kvm_vcpu *vcpu); -u64 get_pvm_id_aa64isar0(const struct kvm_vcpu *vcpu); -u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu); -u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu); -u64 get_pvm_id_aa64mmfr1(const struct kvm_vcpu *vcpu); -u64 get_pvm_id_aa64mmfr2(const struct kvm_vcpu *vcpu); - +u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id); bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code); int kvm_check_pvm_sysreg_table(void); void inject_undef64(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 633547cc1033..62377fa8a4cb 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -15,7 +15,7 @@ */ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu) { - const u64 feature_ids = get_pvm_id_aa64pfr0(vcpu); + const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR0_EL1); u64 hcr_set = HCR_RW; u64 hcr_clear = 0; u64 cptr_set = 0; @@ -62,7 +62,7 @@ static void pvm_init_traps_aa64pfr0(struct kvm_vcpu *vcpu) */ static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu) { - const u64 feature_ids = get_pvm_id_aa64pfr1(vcpu); + const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64PFR1_EL1); u64 hcr_set = 0; u64 hcr_clear = 0; @@ -81,7 +81,7 @@ static void pvm_init_traps_aa64pfr1(struct kvm_vcpu *vcpu) */ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu) { - const u64 feature_ids = get_pvm_id_aa64dfr0(vcpu); + const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64DFR0_EL1); u64 mdcr_set = 0; u64 mdcr_clear = 0; u64 cptr_set = 0; @@ -125,7 +125,7 @@ static void pvm_init_traps_aa64dfr0(struct kvm_vcpu *vcpu) */ static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu) { - const u64 feature_ids = get_pvm_id_aa64mmfr0(vcpu); + const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR0_EL1); u64 mdcr_set = 0; /* Trap Debug Communications Channel registers */ @@ -140,7 +140,7 @@ static void pvm_init_traps_aa64mmfr0(struct kvm_vcpu *vcpu) */ static void pvm_init_traps_aa64mmfr1(struct kvm_vcpu *vcpu) { - const u64 feature_ids = get_pvm_id_aa64mmfr1(vcpu); + const u64 feature_ids = pvm_read_id_reg(vcpu, SYS_ID_AA64MMFR1_EL1); u64 hcr_set = 0; /* Trap LOR */ diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 6bde2dc5205c..f125d6a52880 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -82,7 +82,7 @@ static u64 get_restricted_features_unsigned(u64 sys_reg_val, * based on allowed features, system features, and KVM support. */ -u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu) +static u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu) { const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm); u64 set_mask = 0; @@ -103,7 +103,7 @@ u64 get_pvm_id_aa64pfr0(const struct kvm_vcpu *vcpu) return (id_aa64pfr0_el1_sys_val & allow_mask) | set_mask; } -u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu) +static u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu) { const struct kvm *kvm = (const struct kvm *)kern_hyp_va(vcpu->kvm); u64 allow_mask = PVM_ID_AA64PFR1_ALLOW; @@ -114,7 +114,7 @@ u64 get_pvm_id_aa64pfr1(const struct kvm_vcpu *vcpu) return id_aa64pfr1_el1_sys_val & allow_mask; } -u64 get_pvm_id_aa64zfr0(const struct kvm_vcpu *vcpu) +static u64 get_pvm_id_aa64zfr0(const struct kvm_vcpu *vcpu) { /* * No support for Scalable Vectors, therefore, hyp has no sanitized @@ -124,7 +124,7 @@ u64 get_pvm_id_aa64zfr0(const struct kvm_vcpu *vcpu) return 0; } -u64 get_pvm_id_aa64dfr0(const struct kvm_vcpu *vcpu) +static u64 get_pvm_id_aa64dfr0(const struct kvm_vcpu *vcpu) { /* * No support for debug, including breakpoints, and watchpoints, @@ -134,7 +134,7 @@ u64 get_pvm_id_aa64dfr0(const struct kvm_vcpu *vcpu) return 0; } -u64 get_pvm_id_aa64dfr1(const struct kvm_vcpu *vcpu) +static u64 get_pvm_id_aa64dfr1(const struct kvm_vcpu *vcpu) { /* * No support for debug, therefore, hyp has no sanitized copy of the @@ -144,7 +144,7 @@ u64 get_pvm_id_aa64dfr1(const struct kvm_vcpu *vcpu) return 0; } -u64 get_pvm_id_aa64afr0(const struct kvm_vcpu *vcpu) +static u64 get_pvm_id_aa64afr0(const struct kvm_vcpu *vcpu) { /* * No support for implementation defined features, therefore, hyp has no @@ -154,7 +154,7 @@ u64 get_pvm_id_aa64afr0(const struct kvm_vcpu *vcpu) return 0; } -u64 get_pvm_id_aa64afr1(const struct kvm_vcpu *vcpu) +static u64 get_pvm_id_aa64afr1(const struct kvm_vcpu *vcpu) { /* * No support for implementation defined features, therefore, hyp has no @@ -164,12 +164,12 @@ u64 get_pvm_id_aa64afr1(const struct kvm_vcpu *vcpu) return 0; } -u64 get_pvm_id_aa64isar0(const struct kvm_vcpu *vcpu) +static u64 get_pvm_id_aa64isar0(const struct kvm_vcpu *vcpu) { return id_aa64isar0_el1_sys_val & PVM_ID_AA64ISAR0_ALLOW; } -u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu) +static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu) { u64 allow_mask = PVM_ID_AA64ISAR1_ALLOW; @@ -182,7 +182,7 @@ u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu) return id_aa64isar1_el1_sys_val & allow_mask; } -u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu) +static u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu) { u64 set_mask; @@ -192,22 +192,19 @@ u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu) return (id_aa64mmfr0_el1_sys_val & PVM_ID_AA64MMFR0_ALLOW) | set_mask; } -u64 get_pvm_id_aa64mmfr1(const struct kvm_vcpu *vcpu) +static u64 get_pvm_id_aa64mmfr1(const struct kvm_vcpu *vcpu) { return id_aa64mmfr1_el1_sys_val & PVM_ID_AA64MMFR1_ALLOW; } -u64 get_pvm_id_aa64mmfr2(const struct kvm_vcpu *vcpu) +static u64 get_pvm_id_aa64mmfr2(const struct kvm_vcpu *vcpu) { return id_aa64mmfr2_el1_sys_val & PVM_ID_AA64MMFR2_ALLOW; } -/* Read a sanitized cpufeature ID register by its sys_reg_desc. */ -static u64 read_id_reg(const struct kvm_vcpu *vcpu, - struct sys_reg_desc const *r) +/* Read a sanitized cpufeature ID register by its encoding */ +u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id) { - u32 id = reg_to_encoding(r); - switch (id) { case SYS_ID_AA64PFR0_EL1: return get_pvm_id_aa64pfr0(vcpu); @@ -245,6 +242,12 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, return 0; } +static u64 read_id_reg(const struct kvm_vcpu *vcpu, + struct sys_reg_desc const *r) +{ + return pvm_read_id_reg(vcpu, reg_to_encoding(r)); +} + /* * Accessor for AArch32 feature id registers. * -- cgit v1.2.3-70-g09d2 From 8ffb41888334c1247bd9b4d6ff6c092a90e8d0b8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Oct 2021 13:03:38 +0100 Subject: KVM: arm64: pkvm: Make the ERR/ERX*_EL1 registers RAZ/WI The ERR*/ERX* registers should be handled as RAZ/WI, and there should be no need to involve EL1 for that. Add a helper that handles such registers, and repaint the sysreg table to declare these registers as RAZ/WI. Signed-off-by: Marc Zyngier Reviewed-by: Andrew Jones Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://lore.kernel.org/r/20211013120346.2926621-4-maz@kernel.org --- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 33 ++++++++++++++++++++++----------- 1 file changed, 22 insertions(+), 11 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index f125d6a52880..042a1c0be7e0 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -248,6 +248,16 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, return pvm_read_id_reg(vcpu, reg_to_encoding(r)); } +/* Handler to RAZ/WI sysregs */ +static bool pvm_access_raz_wi(struct kvm_vcpu *vcpu, struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + if (!p->is_write) + p->regval = 0; + + return true; +} + /* * Accessor for AArch32 feature id registers. * @@ -270,9 +280,7 @@ static bool pvm_access_id_aarch32(struct kvm_vcpu *vcpu, BUILD_BUG_ON(FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), PVM_ID_AA64PFR0_RESTRICT_UNSIGNED) > ID_AA64PFR0_ELx_64BIT_ONLY); - /* Use 0 for architecturally "unknown" values. */ - p->regval = 0; - return true; + return pvm_access_raz_wi(vcpu, p, r); } /* @@ -301,6 +309,9 @@ static bool pvm_access_id_aarch64(struct kvm_vcpu *vcpu, /* Mark the specified system register as an AArch64 feature id register. */ #define AARCH64(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch64 } +/* Mark the specified system register as Read-As-Zero/Write-Ignored */ +#define RAZ_WI(REG) { SYS_DESC(REG), .access = pvm_access_raz_wi } + /* Mark the specified system register as not being handled in hyp. */ #define HOST_HANDLED(REG) { SYS_DESC(REG), .access = NULL } @@ -388,14 +399,14 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = { HOST_HANDLED(SYS_AFSR1_EL1), HOST_HANDLED(SYS_ESR_EL1), - HOST_HANDLED(SYS_ERRIDR_EL1), - HOST_HANDLED(SYS_ERRSELR_EL1), - HOST_HANDLED(SYS_ERXFR_EL1), - HOST_HANDLED(SYS_ERXCTLR_EL1), - HOST_HANDLED(SYS_ERXSTATUS_EL1), - HOST_HANDLED(SYS_ERXADDR_EL1), - HOST_HANDLED(SYS_ERXMISC0_EL1), - HOST_HANDLED(SYS_ERXMISC1_EL1), + RAZ_WI(SYS_ERRIDR_EL1), + RAZ_WI(SYS_ERRSELR_EL1), + RAZ_WI(SYS_ERXFR_EL1), + RAZ_WI(SYS_ERXCTLR_EL1), + RAZ_WI(SYS_ERXSTATUS_EL1), + RAZ_WI(SYS_ERXADDR_EL1), + RAZ_WI(SYS_ERXMISC0_EL1), + RAZ_WI(SYS_ERXMISC1_EL1), HOST_HANDLED(SYS_TFSR_EL1), HOST_HANDLED(SYS_TFSRE0_EL1), -- cgit v1.2.3-70-g09d2 From 3c90cb15e2e66bcc526d25133747b2af747f6cd8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Oct 2021 13:03:39 +0100 Subject: KVM: arm64: pkvm: Drop AArch32-specific registers All the SYS_*32_EL2 registers are AArch32-specific. Since we forbid AArch32, there is no need to handle those in any way. Signed-off-by: Marc Zyngier Reviewed-by: Andrew Jones Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://lore.kernel.org/r/20211013120346.2926621-5-maz@kernel.org --- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 042a1c0be7e0..e2b3a9e167da 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -452,10 +452,6 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = { HOST_HANDLED(SYS_CNTP_CVAL_EL0), /* Performance Monitoring Registers are restricted. */ - - HOST_HANDLED(SYS_DACR32_EL2), - HOST_HANDLED(SYS_IFSR32_EL2), - HOST_HANDLED(SYS_FPEXC32_EL2), }; /* -- cgit v1.2.3-70-g09d2 From f3d5ccabab20c1be5838831f460f320a12e5e2c9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Oct 2021 13:03:40 +0100 Subject: KVM: arm64: pkvm: Drop sysregs that should never be routed to the host A bunch of system registers (most of them MM related) should never trap to the host under any circumstance. Keep them close to our chest. Signed-off-by: Marc Zyngier Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://lore.kernel.org/r/20211013120346.2926621-6-maz@kernel.org --- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 50 -------------------------------------- 1 file changed, 50 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index e2b3a9e167da..eb4ee2589316 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -371,34 +371,8 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = { AARCH64(SYS_ID_AA64MMFR1_EL1), AARCH64(SYS_ID_AA64MMFR2_EL1), - HOST_HANDLED(SYS_SCTLR_EL1), - HOST_HANDLED(SYS_ACTLR_EL1), - HOST_HANDLED(SYS_CPACR_EL1), - - HOST_HANDLED(SYS_RGSR_EL1), - HOST_HANDLED(SYS_GCR_EL1), - /* Scalable Vector Registers are restricted. */ - HOST_HANDLED(SYS_TTBR0_EL1), - HOST_HANDLED(SYS_TTBR1_EL1), - HOST_HANDLED(SYS_TCR_EL1), - - HOST_HANDLED(SYS_APIAKEYLO_EL1), - HOST_HANDLED(SYS_APIAKEYHI_EL1), - HOST_HANDLED(SYS_APIBKEYLO_EL1), - HOST_HANDLED(SYS_APIBKEYHI_EL1), - HOST_HANDLED(SYS_APDAKEYLO_EL1), - HOST_HANDLED(SYS_APDAKEYHI_EL1), - HOST_HANDLED(SYS_APDBKEYLO_EL1), - HOST_HANDLED(SYS_APDBKEYHI_EL1), - HOST_HANDLED(SYS_APGAKEYLO_EL1), - HOST_HANDLED(SYS_APGAKEYHI_EL1), - - HOST_HANDLED(SYS_AFSR0_EL1), - HOST_HANDLED(SYS_AFSR1_EL1), - HOST_HANDLED(SYS_ESR_EL1), - RAZ_WI(SYS_ERRIDR_EL1), RAZ_WI(SYS_ERRSELR_EL1), RAZ_WI(SYS_ERXFR_EL1), @@ -408,31 +382,12 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = { RAZ_WI(SYS_ERXMISC0_EL1), RAZ_WI(SYS_ERXMISC1_EL1), - HOST_HANDLED(SYS_TFSR_EL1), - HOST_HANDLED(SYS_TFSRE0_EL1), - - HOST_HANDLED(SYS_FAR_EL1), - HOST_HANDLED(SYS_PAR_EL1), - /* Performance Monitoring Registers are restricted. */ - HOST_HANDLED(SYS_MAIR_EL1), - HOST_HANDLED(SYS_AMAIR_EL1), - /* Limited Ordering Regions Registers are restricted. */ - HOST_HANDLED(SYS_VBAR_EL1), - HOST_HANDLED(SYS_DISR_EL1), - /* GIC CPU Interface registers are restricted. */ - HOST_HANDLED(SYS_CONTEXTIDR_EL1), - HOST_HANDLED(SYS_TPIDR_EL1), - - HOST_HANDLED(SYS_SCXTNUM_EL1), - - HOST_HANDLED(SYS_CNTKCTL_EL1), - HOST_HANDLED(SYS_CCSIDR_EL1), HOST_HANDLED(SYS_CLIDR_EL1), HOST_HANDLED(SYS_CSSELR_EL1), @@ -440,11 +395,6 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = { /* Performance Monitoring Registers are restricted. */ - HOST_HANDLED(SYS_TPIDR_EL0), - HOST_HANDLED(SYS_TPIDRRO_EL0), - - HOST_HANDLED(SYS_SCXTNUM_EL0), - /* Activity Monitoring Registers are restricted. */ HOST_HANDLED(SYS_CNTP_TVAL_EL0), -- cgit v1.2.3-70-g09d2 From cbca19738472be8156d854663ed724b01255c932 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Oct 2021 13:03:41 +0100 Subject: KVM: arm64: pkvm: Handle GICv3 traps as required Forward accesses to the ICV_*SGI*_EL1 registers to EL1, and emulate ICV_SRE_EL1 by returning a fixed value. This should be enough to support GICv3 in a protected guest. Signed-off-by: Marc Zyngier Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://lore.kernel.org/r/20211013120346.2926621-7-maz@kernel.org --- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index eb4ee2589316..a341bd8ef252 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -4,6 +4,8 @@ * Author: Fuad Tabba */ +#include + #include #include #include @@ -303,6 +305,17 @@ static bool pvm_access_id_aarch64(struct kvm_vcpu *vcpu, return true; } +static bool pvm_gic_read_sre(struct kvm_vcpu *vcpu, + struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + /* pVMs only support GICv3. 'nuf said. */ + if (!p->is_write) + p->regval = ICC_SRE_EL1_DIB | ICC_SRE_EL1_DFB | ICC_SRE_EL1_SRE; + + return true; +} + /* Mark the specified system register as an AArch32 feature id register. */ #define AARCH32(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch32 } @@ -386,7 +399,10 @@ static const struct sys_reg_desc pvm_sys_reg_descs[] = { /* Limited Ordering Regions Registers are restricted. */ - /* GIC CPU Interface registers are restricted. */ + HOST_HANDLED(SYS_ICC_SGI1R_EL1), + HOST_HANDLED(SYS_ICC_ASGI1R_EL1), + HOST_HANDLED(SYS_ICC_SGI0R_EL1), + { SYS_DESC(SYS_ICC_SRE_EL1), .access = pvm_gic_read_sre, }, HOST_HANDLED(SYS_CCSIDR_EL1), HOST_HANDLED(SYS_CLIDR_EL1), -- cgit v1.2.3-70-g09d2 From 271b7286058da636ab6f5f47722e098ca3a0478b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Oct 2021 13:03:42 +0100 Subject: KVM: arm64: pkvm: Preserve pending SError on exit from AArch32 Don't drop a potential SError when a guest gets caught red-handed running AArch32 code. Signed-off-by: Marc Zyngier Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://lore.kernel.org/r/20211013120346.2926621-8-maz@kernel.org --- arch/arm64/kvm/hyp/nvhe/switch.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index f25b6353a598..481c365ef144 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -256,7 +256,8 @@ static bool handle_aarch32_guest(struct kvm_vcpu *vcpu, u64 *exit_code) * protected VMs. */ vcpu->arch.target = -1; - *exit_code = ARM_EXCEPTION_IL; + *exit_code &= BIT(ARM_EXIT_WITH_SERROR_BIT); + *exit_code |= ARM_EXCEPTION_IL; return false; } -- cgit v1.2.3-70-g09d2 From 3061725d162cad0589b012fc6413c9dd0da8f02a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Oct 2021 13:03:43 +0100 Subject: KVM: arm64: pkvm: Consolidate include files kvm_fixed_config.h is pkvm specific, and would be better placed near its users. At the same time, include/nvhe/sys_regs.h is now almost empty. Merge the two into arch/arm64/kvm/hyp/include/nvhe/fixed_config.h. Signed-off-by: Marc Zyngier Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://lore.kernel.org/r/20211013120346.2926621-9-maz@kernel.org --- arch/arm64/include/asm/kvm_fixed_config.h | 195 ------------------------ arch/arm64/kvm/hyp/include/nvhe/fixed_config.h | 200 +++++++++++++++++++++++++ arch/arm64/kvm/hyp/include/nvhe/sys_regs.h | 17 --- arch/arm64/kvm/hyp/nvhe/pkvm.c | 3 +- arch/arm64/kvm/hyp/nvhe/setup.c | 2 +- arch/arm64/kvm/hyp/nvhe/switch.c | 3 +- arch/arm64/kvm/hyp/nvhe/sys_regs.c | 3 +- 7 files changed, 204 insertions(+), 219 deletions(-) delete mode 100644 arch/arm64/include/asm/kvm_fixed_config.h create mode 100644 arch/arm64/kvm/hyp/include/nvhe/fixed_config.h delete mode 100644 arch/arm64/kvm/hyp/include/nvhe/sys_regs.h (limited to 'arch/arm64') diff --git a/arch/arm64/include/asm/kvm_fixed_config.h b/arch/arm64/include/asm/kvm_fixed_config.h deleted file mode 100644 index 0ed06923f7e9..000000000000 --- a/arch/arm64/include/asm/kvm_fixed_config.h +++ /dev/null @@ -1,195 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2021 Google LLC - * Author: Fuad Tabba - */ - -#ifndef __ARM64_KVM_FIXED_CONFIG_H__ -#define __ARM64_KVM_FIXED_CONFIG_H__ - -#include - -/* - * This file contains definitions for features to be allowed or restricted for - * guest virtual machines, depending on the mode KVM is running in and on the - * type of guest that is running. - * - * The ALLOW masks represent a bitmask of feature fields that are allowed - * without any restrictions as long as they are supported by the system. - * - * The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for - * features that are restricted to support at most the specified feature. - * - * If a feature field is not present in either, than it is not supported. - * - * The approach taken for protected VMs is to allow features that are: - * - Needed by common Linux distributions (e.g., floating point) - * - Trivial to support, e.g., supporting the feature does not introduce or - * require tracking of additional state in KVM - * - Cannot be trapped or prevent the guest from using anyway - */ - -/* - * Allow for protected VMs: - * - Floating-point and Advanced SIMD - * - Data Independent Timing - */ -#define PVM_ID_AA64PFR0_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64PFR0_FP) | \ - ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD) | \ - ARM64_FEATURE_MASK(ID_AA64PFR0_DIT) \ - ) - -/* - * Restrict to the following *unsigned* features for protected VMs: - * - AArch64 guests only (no support for AArch32 guests): - * AArch32 adds complexity in trap handling, emulation, condition codes, - * etc... - * - RAS (v1) - * Supported by KVM - */ -#define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), ID_AA64PFR0_ELx_64BIT_ONLY) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), ID_AA64PFR0_ELx_64BIT_ONLY) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL2), ID_AA64PFR0_ELx_64BIT_ONLY) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL3), ID_AA64PFR0_ELx_64BIT_ONLY) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), ID_AA64PFR0_RAS_V1) \ - ) - -/* - * Allow for protected VMs: - * - Branch Target Identification - * - Speculative Store Bypassing - */ -#define PVM_ID_AA64PFR1_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64PFR1_BT) | \ - ARM64_FEATURE_MASK(ID_AA64PFR1_SSBS) \ - ) - -/* - * Allow for protected VMs: - * - Mixed-endian - * - Distinction between Secure and Non-secure Memory - * - Mixed-endian at EL0 only - * - Non-context synchronizing exception entry and exit - */ -#define PVM_ID_AA64MMFR0_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR0_SNSMEM) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL0) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR0_EXS) \ - ) - -/* - * Restrict to the following *unsigned* features for protected VMs: - * - 40-bit IPA - * - 16-bit ASID - */ -#define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_PARANGE), ID_AA64MMFR0_PARANGE_40) | \ - FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_ASID), ID_AA64MMFR0_ASID_16) \ - ) - -/* - * Allow for protected VMs: - * - Hardware translation table updates to Access flag and Dirty state - * - Number of VMID bits from CPU - * - Hierarchical Permission Disables - * - Privileged Access Never - * - SError interrupt exceptions from speculative reads - * - Enhanced Translation Synchronization - */ -#define PVM_ID_AA64MMFR1_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR1_VMIDBITS) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR1_HPD) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR1_PAN) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR1_SPECSEI) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR1_ETS) \ - ) - -/* - * Allow for protected VMs: - * - Common not Private translations - * - User Access Override - * - IESB bit in the SCTLR_ELx registers - * - Unaligned single-copy atomicity and atomic functions - * - ESR_ELx.EC value on an exception by read access to feature ID space - * - TTL field in address operations. - * - Break-before-make sequences when changing translation block size - * - E0PDx mechanism - */ -#define PVM_ID_AA64MMFR2_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64MMFR2_CNP) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_UAO) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_IESB) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_AT) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_IDS) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_TTL) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_BBM) | \ - ARM64_FEATURE_MASK(ID_AA64MMFR2_E0PD) \ - ) - -/* - * No support for Scalable Vectors for protected VMs: - * Requires additional support from KVM, e.g., context-switching and - * trapping at EL2 - */ -#define PVM_ID_AA64ZFR0_ALLOW (0ULL) - -/* - * No support for debug, including breakpoints, and watchpoints for protected - * VMs: - * The Arm architecture mandates support for at least the Armv8 debug - * architecture, which would include at least 2 hardware breakpoints and - * watchpoints. Providing that support to protected guests adds - * considerable state and complexity. Therefore, the reserved value of 0 is - * used for debug-related fields. - */ -#define PVM_ID_AA64DFR0_ALLOW (0ULL) -#define PVM_ID_AA64DFR1_ALLOW (0ULL) - -/* - * No support for implementation defined features. - */ -#define PVM_ID_AA64AFR0_ALLOW (0ULL) -#define PVM_ID_AA64AFR1_ALLOW (0ULL) - -/* - * No restrictions on instructions implemented in AArch64. - */ -#define PVM_ID_AA64ISAR0_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64ISAR0_AES) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA1) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA2) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_CRC32) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_RDM) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA3) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_SM3) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_SM4) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_DP) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_FHM) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_TS) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_TLB) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR0_RNDR) \ - ) - -#define PVM_ID_AA64ISAR1_ALLOW (\ - ARM64_FEATURE_MASK(ID_AA64ISAR1_DPB) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_JSCVT) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_FCMA) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_LRCPC) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_FRINTTS) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_SB) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_SPECRES) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_BF16) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_DGH) | \ - ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \ - ) - -#endif /* __ARM64_KVM_FIXED_CONFIG_H__ */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h new file mode 100644 index 000000000000..747fc79ae784 --- /dev/null +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -0,0 +1,200 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 Google LLC + * Author: Fuad Tabba + */ + +#ifndef __ARM64_KVM_FIXED_CONFIG_H__ +#define __ARM64_KVM_FIXED_CONFIG_H__ + +#include + +/* + * This file contains definitions for features to be allowed or restricted for + * guest virtual machines, depending on the mode KVM is running in and on the + * type of guest that is running. + * + * The ALLOW masks represent a bitmask of feature fields that are allowed + * without any restrictions as long as they are supported by the system. + * + * The RESTRICT_UNSIGNED masks, if present, represent unsigned fields for + * features that are restricted to support at most the specified feature. + * + * If a feature field is not present in either, than it is not supported. + * + * The approach taken for protected VMs is to allow features that are: + * - Needed by common Linux distributions (e.g., floating point) + * - Trivial to support, e.g., supporting the feature does not introduce or + * require tracking of additional state in KVM + * - Cannot be trapped or prevent the guest from using anyway + */ + +/* + * Allow for protected VMs: + * - Floating-point and Advanced SIMD + * - Data Independent Timing + */ +#define PVM_ID_AA64PFR0_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64PFR0_FP) | \ + ARM64_FEATURE_MASK(ID_AA64PFR0_ASIMD) | \ + ARM64_FEATURE_MASK(ID_AA64PFR0_DIT) \ + ) + +/* + * Restrict to the following *unsigned* features for protected VMs: + * - AArch64 guests only (no support for AArch32 guests): + * AArch32 adds complexity in trap handling, emulation, condition codes, + * etc... + * - RAS (v1) + * Supported by KVM + */ +#define PVM_ID_AA64PFR0_RESTRICT_UNSIGNED (\ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL0), ID_AA64PFR0_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1), ID_AA64PFR0_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL2), ID_AA64PFR0_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_EL3), ID_AA64PFR0_ELx_64BIT_ONLY) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64PFR0_RAS), ID_AA64PFR0_RAS_V1) \ + ) + +/* + * Allow for protected VMs: + * - Branch Target Identification + * - Speculative Store Bypassing + */ +#define PVM_ID_AA64PFR1_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64PFR1_BT) | \ + ARM64_FEATURE_MASK(ID_AA64PFR1_SSBS) \ + ) + +/* + * Allow for protected VMs: + * - Mixed-endian + * - Distinction between Secure and Non-secure Memory + * - Mixed-endian at EL0 only + * - Non-context synchronizing exception entry and exit + */ +#define PVM_ID_AA64MMFR0_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR0_SNSMEM) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR0_BIGENDEL0) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR0_EXS) \ + ) + +/* + * Restrict to the following *unsigned* features for protected VMs: + * - 40-bit IPA + * - 16-bit ASID + */ +#define PVM_ID_AA64MMFR0_RESTRICT_UNSIGNED (\ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_PARANGE), ID_AA64MMFR0_PARANGE_40) | \ + FIELD_PREP(ARM64_FEATURE_MASK(ID_AA64MMFR0_ASID), ID_AA64MMFR0_ASID_16) \ + ) + +/* + * Allow for protected VMs: + * - Hardware translation table updates to Access flag and Dirty state + * - Number of VMID bits from CPU + * - Hierarchical Permission Disables + * - Privileged Access Never + * - SError interrupt exceptions from speculative reads + * - Enhanced Translation Synchronization + */ +#define PVM_ID_AA64MMFR1_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64MMFR1_HADBS) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_VMIDBITS) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_HPD) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_PAN) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_SPECSEI) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR1_ETS) \ + ) + +/* + * Allow for protected VMs: + * - Common not Private translations + * - User Access Override + * - IESB bit in the SCTLR_ELx registers + * - Unaligned single-copy atomicity and atomic functions + * - ESR_ELx.EC value on an exception by read access to feature ID space + * - TTL field in address operations. + * - Break-before-make sequences when changing translation block size + * - E0PDx mechanism + */ +#define PVM_ID_AA64MMFR2_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64MMFR2_CNP) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_UAO) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_IESB) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_AT) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_IDS) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_TTL) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_BBM) | \ + ARM64_FEATURE_MASK(ID_AA64MMFR2_E0PD) \ + ) + +/* + * No support for Scalable Vectors for protected VMs: + * Requires additional support from KVM, e.g., context-switching and + * trapping at EL2 + */ +#define PVM_ID_AA64ZFR0_ALLOW (0ULL) + +/* + * No support for debug, including breakpoints, and watchpoints for protected + * VMs: + * The Arm architecture mandates support for at least the Armv8 debug + * architecture, which would include at least 2 hardware breakpoints and + * watchpoints. Providing that support to protected guests adds + * considerable state and complexity. Therefore, the reserved value of 0 is + * used for debug-related fields. + */ +#define PVM_ID_AA64DFR0_ALLOW (0ULL) +#define PVM_ID_AA64DFR1_ALLOW (0ULL) + +/* + * No support for implementation defined features. + */ +#define PVM_ID_AA64AFR0_ALLOW (0ULL) +#define PVM_ID_AA64AFR1_ALLOW (0ULL) + +/* + * No restrictions on instructions implemented in AArch64. + */ +#define PVM_ID_AA64ISAR0_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64ISAR0_AES) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA1) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA2) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_CRC32) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_ATOMICS) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_RDM) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SHA3) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SM3) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_SM4) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_DP) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_FHM) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_TS) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_TLB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR0_RNDR) \ + ) + +#define PVM_ID_AA64ISAR1_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64ISAR1_DPB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_APA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_API) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_JSCVT) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_FCMA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_LRCPC) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_FRINTTS) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_SB) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_SPECRES) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_BF16) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_DGH) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \ + ) + +u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id); +bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code); +int kvm_check_pvm_sysreg_table(void); +void inject_undef64(struct kvm_vcpu *vcpu); + +#endif /* __ARM64_KVM_FIXED_CONFIG_H__ */ diff --git a/arch/arm64/kvm/hyp/include/nvhe/sys_regs.h b/arch/arm64/kvm/hyp/include/nvhe/sys_regs.h deleted file mode 100644 index 8adc13227b1a..000000000000 --- a/arch/arm64/kvm/hyp/include/nvhe/sys_regs.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2021 Google LLC - * Author: Fuad Tabba - */ - -#ifndef __ARM64_KVM_NVHE_SYS_REGS_H__ -#define __ARM64_KVM_NVHE_SYS_REGS_H__ - -#include - -u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id); -bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code); -int kvm_check_pvm_sysreg_table(void); -void inject_undef64(struct kvm_vcpu *vcpu); - -#endif /* __ARM64_KVM_NVHE_SYS_REGS_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c index 62377fa8a4cb..99c8d8b73e70 100644 --- a/arch/arm64/kvm/hyp/nvhe/pkvm.c +++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c @@ -6,8 +6,7 @@ #include #include -#include -#include +#include #include /* diff --git a/arch/arm64/kvm/hyp/nvhe/setup.c b/arch/arm64/kvm/hyp/nvhe/setup.c index c85ff64e63f2..862c7b514e20 100644 --- a/arch/arm64/kvm/hyp/nvhe/setup.c +++ b/arch/arm64/kvm/hyp/nvhe/setup.c @@ -10,11 +10,11 @@ #include #include +#include #include #include #include #include -#include #include struct hyp_pool hpool; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 481c365ef144..317dba6a018d 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -28,8 +27,8 @@ #include #include +#include #include -#include /* Non-VHE specific context */ DEFINE_PER_CPU(struct kvm_host_data, kvm_host_data); diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index a341bd8ef252..052f885e65b2 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -7,12 +7,11 @@ #include #include -#include #include #include -#include +#include #include "../../sys_regs.h" -- cgit v1.2.3-70-g09d2 From 746bdeadc53b0d58fddea6442591f5ec3eeabe7d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Oct 2021 13:03:44 +0100 Subject: KVM: arm64: pkvm: Move kvm_handle_pvm_restricted around Place kvm_handle_pvm_restricted() next to its little friends such as kvm_handle_pvm_sysreg(). This allows to make inject_undef64() static. Signed-off-by: Marc Zyngier Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://lore.kernel.org/r/20211013120346.2926621-10-maz@kernel.org --- arch/arm64/kvm/hyp/include/nvhe/fixed_config.h | 2 +- arch/arm64/kvm/hyp/nvhe/switch.c | 12 ------------ arch/arm64/kvm/hyp/nvhe/sys_regs.c | 14 +++++++++++++- 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index 747fc79ae784..eea1f6a53723 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -194,7 +194,7 @@ u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id); bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code); +bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code); int kvm_check_pvm_sysreg_table(void); -void inject_undef64(struct kvm_vcpu *vcpu); #endif /* __ARM64_KVM_FIXED_CONFIG_H__ */ diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 317dba6a018d..be6889e33b2b 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -159,18 +159,6 @@ static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) write_sysreg(pmu->events_host, pmcntenset_el0); } -/** - * Handler for protected VM restricted exceptions. - * - * Inject an undefined exception into the guest and return true to indicate that - * the hypervisor has handled the exit, and control should go back to the guest. - */ -static bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code) -{ - inject_undef64(vcpu); - return true; -} - /** * Handler for protected VM MSR, MRS or System instruction execution in AArch64. * diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 052f885e65b2..3787ee6fb1a2 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -30,7 +30,7 @@ u64 id_aa64mmfr2_el1_sys_val; * Inject an unknown/undefined exception to an AArch64 guest while most of its * sysregs are live. */ -void inject_undef64(struct kvm_vcpu *vcpu) +static void inject_undef64(struct kvm_vcpu *vcpu) { u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); @@ -473,3 +473,15 @@ bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code) return true; } + +/** + * Handler for protected VM restricted exceptions. + * + * Inject an undefined exception into the guest and return true to indicate that + * the hypervisor has handled the exit, and control should go back to the guest. + */ +bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code) +{ + inject_undef64(vcpu); + return true; +} -- cgit v1.2.3-70-g09d2 From 0c7639cc838263b6e38b3af76755d574f15cdf41 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Oct 2021 13:03:45 +0100 Subject: KVM: arm64: pkvm: Pass vpcu instead of kvm to kvm_get_exit_handler_array() Passing a VM pointer around is odd, and results in extra work on VHE. Follow the rest of the design that uses the vcpu instead, and let the nVHE code look into the struct kvm as required. Signed-off-by: Marc Zyngier Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://lore.kernel.org/r/20211013120346.2926621-11-maz@kernel.org --- arch/arm64/kvm/hyp/include/hyp/switch.h | 4 ++-- arch/arm64/kvm/hyp/nvhe/switch.c | 4 ++-- arch/arm64/kvm/hyp/vhe/switch.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 4126926c3e06..c6e98c7e918b 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -397,7 +397,7 @@ static bool kvm_hyp_handle_dabt_low(struct kvm_vcpu *vcpu, u64 *exit_code) typedef bool (*exit_handler_fn)(struct kvm_vcpu *, u64 *); -static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm *kvm); +static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu); /* * Allow the hypervisor to handle the exit with an exit handler if it has one. @@ -407,7 +407,7 @@ static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm *kvm); */ static inline bool kvm_hyp_handle_exit(struct kvm_vcpu *vcpu, u64 *exit_code) { - const exit_handler_fn *handlers = kvm_get_exit_handler_array(kern_hyp_va(vcpu->kvm)); + const exit_handler_fn *handlers = kvm_get_exit_handler_array(vcpu); exit_handler_fn fn; fn = handlers[kvm_vcpu_trap_get_class(vcpu)]; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index be6889e33b2b..50c7d48e0fa0 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -211,9 +211,9 @@ static const exit_handler_fn pvm_exit_handlers[] = { [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, }; -static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm *kvm) +static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) { - if (unlikely(kvm_vm_is_protected(kvm))) + if (unlikely(kvm_vm_is_protected(kern_hyp_va(vcpu->kvm)))) return pvm_exit_handlers; return hyp_exit_handlers; diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index f6fb97accf65..5a2cb5d9bc4b 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -107,7 +107,7 @@ static const exit_handler_fn hyp_exit_handlers[] = { [ESR_ELx_EC_PAC] = kvm_hyp_handle_ptrauth, }; -static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm *kvm) +static const exit_handler_fn *kvm_get_exit_handler_array(struct kvm_vcpu *vcpu) { return hyp_exit_handlers; } -- cgit v1.2.3-70-g09d2 From 07305590114af81817148d181f1eb0af294e40d6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 13 Oct 2021 13:03:46 +0100 Subject: KVM: arm64: pkvm: Give priority to standard traps over pvm handling Checking for pvm handling first means that we cannot handle ptrauth traps or apply any of the workarounds (GICv3 or TX2 #219). Flip the order around. Signed-off-by: Marc Zyngier Reviewed-by: Fuad Tabba Tested-by: Fuad Tabba Link: https://lore.kernel.org/r/20211013120346.2926621-12-maz@kernel.org --- arch/arm64/kvm/hyp/nvhe/switch.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/arm64') diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 50c7d48e0fa0..c0e3fed26d93 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -167,10 +167,13 @@ static void __pmu_switch_to_host(struct kvm_cpu_context *host_ctxt) */ static bool kvm_handle_pvm_sys64(struct kvm_vcpu *vcpu, u64 *exit_code) { - if (kvm_handle_pvm_sysreg(vcpu, exit_code)) - return true; - - return kvm_hyp_handle_sysreg(vcpu, exit_code); + /* + * Make sure we handle the exit for workarounds and ptrauth + * before the pKVM handling, as the latter could decide to + * UNDEF. + */ + return (kvm_hyp_handle_sysreg(vcpu, exit_code) || + kvm_handle_pvm_sysreg(vcpu, exit_code)); } /** -- cgit v1.2.3-70-g09d2