From 0e3b70aa137cb29a407de38e5b660d939ab462a3 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 13:04:20 -0700 Subject: KVM: x86: Document an erratum in KVM_SET_VCPU_EVENTS on Intel CPUs Document a flaw in KVM's ABI which lets userspace attempt to inject a "bad" hardware exception event, and thus induce VM-Fail on Intel CPUs. Fixing the flaw is a fool's errand, as AMD doesn't sanity check the validity of the error code, Intel CPUs that support CET relax the check for Protected Mode, userspace can change the mode after queueing an exception, KVM ignores the error code when emulating Real Mode exceptions, and so on and so forth. The VM-Fail itself doesn't harm KVM or the kernel beyond triggering a ratelimited pr_warn(), so just document the oddity. Link: https://lore.kernel.org/r/20240802200420.330769-1-seanjc@google.com Signed-off-by: Sean Christopherson --- Documentation/virt/kvm/x86/errata.rst | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'Documentation/virt/kvm') diff --git a/Documentation/virt/kvm/x86/errata.rst b/Documentation/virt/kvm/x86/errata.rst index 4116045a8744..37c79362a48f 100644 --- a/Documentation/virt/kvm/x86/errata.rst +++ b/Documentation/virt/kvm/x86/errata.rst @@ -33,6 +33,18 @@ Note however that any software (e.g ``WIN87EM.DLL``) expecting these features to be present likely predates these CPUID feature bits, and therefore doesn't know to check for them anyway. +``KVM_SET_VCPU_EVENTS`` issue +----------------------------- + +Invalid KVM_SET_VCPU_EVENTS input with respect to error codes *may* result in +failed VM-Entry on Intel CPUs. Pre-CET Intel CPUs require that exception +injection through the VMCS correctly set the "error code valid" flag, e.g. +require the flag be set when injecting a #GP, clear when injecting a #UD, +clear when injecting a soft exception, etc. Intel CPUs that enumerate +IA32_VMX_BASIC[56] as '1' relax VMX's consistency checks, and AMD CPUs have no +restrictions whatsoever. KVM_SET_VCPU_EVENTS doesn't sanity check the vector +versus "has_error_code", i.e. KVM's ABI follows AMD behavior. + Nested virtualization features ------------------------------ -- cgit v1.3.1 From dcb988cdac85bad177de86fbf409524eda4f9467 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 2 Aug 2024 11:55:05 -0700 Subject: KVM: x86: Quirk initialization of feature MSRs to KVM's max configuration Add a quirk to control KVM's misguided initialization of select feature MSRs to KVM's max configuration, as enabling features by default violates KVM's approach of letting userspace own the vCPU model, and is actively problematic for MSRs that are conditionally supported, as the vCPU will end up with an MSR value that userspace can't restore. E.g. if the vCPU is configured with PDCM=0, userspace will save and attempt to restore a non-zero PERF_CAPABILITIES, thanks to KVM's meddling. Link: https://lore.kernel.org/r/20240802185511.305849-4-seanjc@google.com Signed-off-by: Sean Christopherson --- Documentation/virt/kvm/api.rst | 22 ++++++++++++++++++++++ arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/include/uapi/asm/kvm.h | 1 + arch/x86/kvm/svm/svm.c | 4 +++- arch/x86/kvm/vmx/vmx.c | 9 ++++++--- arch/x86/kvm/x86.c | 8 +++++--- 6 files changed, 39 insertions(+), 8 deletions(-) (limited to 'Documentation/virt/kvm') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index edc070c6e19b..061ec93d9ecb 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -8107,6 +8107,28 @@ KVM_X86_QUIRK_SLOT_ZAP_ALL By default, for KVM_X86_DEFAULT_VM VMs, KVM or moved memslot isn't reachable, i.e KVM _may_ invalidate only SPTEs related to the memslot. + +KVM_X86_QUIRK_STUFF_FEATURE_MSRS By default, at vCPU creation, KVM sets the + vCPU's MSR_IA32_PERF_CAPABILITIES (0x345), + MSR_IA32_ARCH_CAPABILITIES (0x10a), + MSR_PLATFORM_INFO (0xce), and all VMX MSRs + (0x480..0x492) to the maximal capabilities + supported by KVM. KVM also sets + MSR_IA32_UCODE_REV (0x8b) to an arbitrary + value (which is different for Intel vs. + AMD). Lastly, when guest CPUID is set (by + userspace), KVM modifies select VMX MSR + fields to force consistency between guest + CPUID and L2's effective ISA. When this + quirk is disabled, KVM zeroes the vCPU's MSR + values (with two exceptions, see below), + i.e. treats the feature MSRs like CPUID + leaves and gives userspace full control of + the vCPU model definition. This quirk does + not affect VMX MSRs CR0/CR4_FIXED1 (0x487 + and 0x489), as KVM does now allow them to + be set by userspace (KVM sets them based on + guest CPUID, for safety purposes). =================================== ============================================ 7.32 KVM_CAP_MAX_VCPU_ID diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3ae90df0a177..dbe0ea509642 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -2360,7 +2360,8 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages); KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT | \ KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \ KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS | \ - KVM_X86_QUIRK_SLOT_ZAP_ALL) + KVM_X86_QUIRK_SLOT_ZAP_ALL | \ + KVM_X86_QUIRK_STUFF_FEATURE_MSRS) /* * KVM previously used a u32 field in kvm_run to indicate the hypercall was diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index a8debbf2f702..88585c1de416 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -440,6 +440,7 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_FIX_HYPERCALL_INSN (1 << 5) #define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6) #define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7) +#define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8) #define KVM_STATE_NESTED_FORMAT_VMX 0 #define KVM_STATE_NESTED_FORMAT_SVM 1 diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 50f6b0e03d04..237e72b8a999 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1390,7 +1390,9 @@ static void __svm_vcpu_reset(struct kvm_vcpu *vcpu) svm_vcpu_init_msrpm(vcpu, svm->msrpm); svm_init_osvw(vcpu); - vcpu->arch.microcode_version = 0x01000065; + + if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) + vcpu->arch.microcode_version = 0x01000065; svm->tsc_ratio_msr = kvm_caps.default_tsc_scaling_ratio; svm->nmi_masked = false; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 115ec4617a5f..f0326927d4e1 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -4572,7 +4572,8 @@ vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control, * Update the nested MSR settings so that a nested VMM can/can't set * controls for features that are/aren't exposed to the guest. */ - if (nested) { + if (nested && + kvm_check_has_quirk(vmx->vcpu.kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) { /* * All features that can be added or removed to VMX MSRs must * be supported in the first place for nested virtualization. @@ -4862,7 +4863,8 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu) init_vmcs(vmx); - if (nested) + if (nested && + kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) memcpy(&vmx->nested.msrs, &vmcs_config.nested, sizeof(vmx->nested.msrs)); vcpu_setup_sgx_lepubkeyhash(vcpu); @@ -4875,7 +4877,8 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu) vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID; #endif - vcpu->arch.microcode_version = 0x100000000ULL; + if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) + vcpu->arch.microcode_version = 0x100000000ULL; vmx->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED; /* diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 16804637ba97..34a7359d2bf3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12314,9 +12314,11 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) kvm_async_pf_hash_reset(vcpu); - vcpu->arch.arch_capabilities = kvm_get_arch_capabilities(); - vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT; - vcpu->arch.perf_capabilities = kvm_caps.supported_perf_cap; + if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) { + vcpu->arch.arch_capabilities = kvm_get_arch_capabilities(); + vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT; + vcpu->arch.perf_capabilities = kvm_caps.supported_perf_cap; + } kvm_pmu_init(vcpu); vcpu->arch.pending_external_vector = -1; -- cgit v1.3.1