diff options
Diffstat (limited to 'arch/x86/kvm/x86.c')
-rw-r--r-- | arch/x86/kvm/x86.c | 248 |
1 files changed, 155 insertions, 93 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 88c593f83b28..12ea77f99ff3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -56,6 +56,7 @@ #include <linux/sched/stat.h> #include <linux/sched/isolation.h> #include <linux/mem_encrypt.h> +#include <linux/entry-kvm.h> #include <trace/events/kvm.h> @@ -187,6 +188,9 @@ static struct kvm_shared_msrs __percpu *shared_msrs; u64 __read_mostly host_efer; EXPORT_SYMBOL_GPL(host_efer); +bool __read_mostly allow_smaller_maxphyaddr; +EXPORT_SYMBOL_GPL(allow_smaller_maxphyaddr); + static u64 __read_mostly host_xss; u64 __read_mostly supported_xss; EXPORT_SYMBOL_GPL(supported_xss); @@ -243,6 +247,29 @@ static struct kmem_cache *x86_fpu_cache; static struct kmem_cache *x86_emulator_cache; +/* + * When called, it means the previous get/set msr reached an invalid msr. + * Return 0 if we want to ignore/silent this failed msr access, or 1 if we want + * to fail the caller. + */ +static int kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr, + u64 data, bool write) +{ + const char *op = write ? "wrmsr" : "rdmsr"; + + if (ignore_msrs) { + if (report_ignored_msrs) + vcpu_unimpl(vcpu, "ignored %s: 0x%x data 0x%llx\n", + op, msr, data); + /* Mask the error */ + return 0; + } else { + vcpu_debug_ratelimited(vcpu, "unhandled %s: 0x%x data 0x%llx\n", + op, msr, data); + return 1; + } +} + static struct kmem_cache *kvm_alloc_emulator_cache(void) { unsigned int useroffset = offsetof(struct x86_emulate_ctxt, src); @@ -379,7 +406,7 @@ int kvm_set_apic_base(struct kvm_vcpu *vcpu, struct msr_data *msr_info) } EXPORT_SYMBOL_GPL(kvm_set_apic_base); -asmlinkage __visible void kvm_spurious_fault(void) +asmlinkage __visible noinstr void kvm_spurious_fault(void) { /* Fault while not rebooting. We want the trace. */ BUG_ON(!kvm_rebooting); @@ -775,6 +802,7 @@ EXPORT_SYMBOL_GPL(pdptrs_changed); int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) { unsigned long old_cr0 = kvm_read_cr0(vcpu); + unsigned long pdptr_bits = X86_CR0_CD | X86_CR0_NW | X86_CR0_PG; unsigned long update_bits = X86_CR0_PG | X86_CR0_WP; cr0 |= X86_CR0_ET; @@ -792,9 +820,9 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) return 1; - if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { + if (cr0 & X86_CR0_PG) { #ifdef CONFIG_X86_64 - if ((vcpu->arch.efer & EFER_LME)) { + if (!is_paging(vcpu) && (vcpu->arch.efer & EFER_LME)) { int cs_db, cs_l; if (!is_pae(vcpu)) @@ -804,8 +832,8 @@ int kvm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) return 1; } else #endif - if (is_pae(vcpu) && !load_pdptrs(vcpu, vcpu->arch.walk_mmu, - kvm_read_cr3(vcpu))) + if (is_pae(vcpu) && ((cr0 ^ old_cr0) & pdptr_bits) && + !load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu))) return 1; } @@ -916,7 +944,7 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) vcpu->arch.xcr0 = xcr0; if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND) - kvm_update_cpuid(vcpu); + kvm_update_cpuid_runtime(vcpu); return 0; } @@ -931,37 +959,17 @@ int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr) } EXPORT_SYMBOL_GPL(kvm_set_xcr); -#define __cr4_reserved_bits(__cpu_has, __c) \ -({ \ - u64 __reserved_bits = CR4_RESERVED_BITS; \ - \ - if (!__cpu_has(__c, X86_FEATURE_XSAVE)) \ - __reserved_bits |= X86_CR4_OSXSAVE; \ - if (!__cpu_has(__c, X86_FEATURE_SMEP)) \ - __reserved_bits |= X86_CR4_SMEP; \ - if (!__cpu_has(__c, X86_FEATURE_SMAP)) \ - __reserved_bits |= X86_CR4_SMAP; \ - if (!__cpu_has(__c, X86_FEATURE_FSGSBASE)) \ - __reserved_bits |= X86_CR4_FSGSBASE; \ - if (!__cpu_has(__c, X86_FEATURE_PKU)) \ - __reserved_bits |= X86_CR4_PKE; \ - if (!__cpu_has(__c, X86_FEATURE_LA57)) \ - __reserved_bits |= X86_CR4_LA57; \ - if (!__cpu_has(__c, X86_FEATURE_UMIP)) \ - __reserved_bits |= X86_CR4_UMIP; \ - __reserved_bits; \ -}) - -static int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) +int kvm_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { if (cr4 & cr4_reserved_bits) return -EINVAL; - if (cr4 & __cr4_reserved_bits(guest_cpuid_has, vcpu)) + if (cr4 & vcpu->arch.cr4_guest_rsvd_bits) return -EINVAL; return 0; } +EXPORT_SYMBOL_GPL(kvm_valid_cr4); int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) { @@ -1000,7 +1008,7 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) kvm_mmu_reset_context(vcpu); if ((cr4 ^ old_cr4) & (X86_CR4_OSXSAVE | X86_CR4_PKE)) - kvm_update_cpuid(vcpu); + kvm_update_cpuid_runtime(vcpu); return 0; } @@ -1110,7 +1118,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) case 4: /* fall through */ case 6: - if (val & 0xffffffff00000000ULL) + if (!kvm_dr6_valid(val)) return -1; /* #GP */ vcpu->arch.dr6 = (val & DR6_VOLATILE) | kvm_dr6_fixed(vcpu); break; @@ -1389,8 +1397,7 @@ static int kvm_get_msr_feature(struct kvm_msr_entry *msr) rdmsrl_safe(msr->index, &msr->data); break; default: - if (kvm_x86_ops.get_msr_feature(msr)) - return 1; + return kvm_x86_ops.get_msr_feature(msr); } return 0; } @@ -1402,6 +1409,13 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data) msr.index = index; r = kvm_get_msr_feature(&msr); + + if (r == KVM_MSR_RET_INVALID) { + /* Unconditionally clear the output for simplicity */ + *data = 0; + r = kvm_msr_ignored_check(vcpu, index, 0, false); + } + if (r) return r; @@ -1516,6 +1530,17 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, return kvm_x86_ops.set_msr(vcpu, &msr); } +static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu, + u32 index, u64 data, bool host_initiated) +{ + int ret = __kvm_set_msr(vcpu, index, data, host_initiated); + + if (ret == KVM_MSR_RET_INVALID) + ret = kvm_msr_ignored_check(vcpu, index, data, true); + + return ret; +} + /* * Read the MSR specified by @index into @data. Select MSR specific fault * checks are bypassed if @host_initiated is %true. @@ -1537,15 +1562,29 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data, return ret; } +static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu, + u32 index, u64 *data, bool host_initiated) +{ + int ret = __kvm_get_msr(vcpu, index, data, host_initiated); + + if (ret == KVM_MSR_RET_INVALID) { + /* Unconditionally clear *data for simplicity */ + *data = 0; + ret = kvm_msr_ignored_check(vcpu, index, 0, false); + } + + return ret; +} + int kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) { - return __kvm_get_msr(vcpu, index, data, false); + return kvm_get_msr_ignored_check(vcpu, index, data, false); } EXPORT_SYMBOL_GPL(kvm_get_msr); int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data) { - return __kvm_set_msr(vcpu, index, data, false); + return kvm_set_msr_ignored_check(vcpu, index, data, false); } EXPORT_SYMBOL_GPL(kvm_set_msr); @@ -1587,7 +1626,7 @@ EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr); bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu) { return vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) || - need_resched() || signal_pending(current); + xfer_to_guest_mode_work_pending(); } EXPORT_SYMBOL_GPL(kvm_vcpu_exit_request); @@ -1665,12 +1704,12 @@ EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff); */ static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) { - return __kvm_get_msr(vcpu, index, data, true); + return kvm_get_msr_ignored_check(vcpu, index, data, true); } static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) { - return __kvm_set_msr(vcpu, index, *data, true); + return kvm_set_msr_ignored_check(vcpu, index, *data, true); } #ifdef CONFIG_X86_64 @@ -2822,6 +2861,20 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; vcpu->arch.arch_capabilities = data; break; + case MSR_IA32_PERF_CAPABILITIES: { + struct kvm_msr_entry msr_ent = {.index = msr, .data = 0}; + + if (!msr_info->host_initiated) + return 1; + if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM) && kvm_get_msr_feature(&msr_ent)) + return 1; + if (data & ~msr_ent.data) + return 1; + + vcpu->arch.perf_capabilities = data; + + return 0; + } case MSR_EFER: return set_efer(vcpu, msr_info); case MSR_K7_HWCR: @@ -2881,7 +2934,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3)) return 1; vcpu->arch.ia32_misc_enable_msr = data; - kvm_update_cpuid(vcpu); + kvm_update_cpuid_runtime(vcpu); } else { vcpu->arch.ia32_misc_enable_msr = data; } @@ -3066,17 +3119,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return xen_hvm_config(vcpu, data); if (kvm_pmu_is_valid_msr(vcpu, msr)) return kvm_pmu_set_msr(vcpu, msr_info); - if (!ignore_msrs) { - vcpu_debug_ratelimited(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n", - msr, data); - return 1; - } else { - if (report_ignored_msrs) - vcpu_unimpl(vcpu, - "ignored wrmsr: 0x%x data 0x%llx\n", - msr, data); - break; - } + return KVM_MSR_RET_INVALID; } return 0; } @@ -3172,6 +3215,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; msr_info->data = vcpu->arch.arch_capabilities; break; + case MSR_IA32_PERF_CAPABILITIES: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_PDCM)) + return 1; + msr_info->data = vcpu->arch.perf_capabilities; + break; case MSR_IA32_POWER_CTL: msr_info->data = vcpu->arch.msr_ia32_power_ctl; break; @@ -3331,17 +3380,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) default: if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) return kvm_pmu_get_msr(vcpu, msr_info); - if (!ignore_msrs) { - vcpu_debug_ratelimited(vcpu, "unhandled rdmsr: 0x%x\n", - msr_info->index); - return 1; - } else { - if (report_ignored_msrs) - vcpu_unimpl(vcpu, "ignored rdmsr: 0x%x\n", - msr_info->index); - msr_info->data = 0; - } - break; + return KVM_MSR_RET_INVALID; } return 0; } @@ -3476,6 +3515,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_MSR_PLATFORM_INFO: case KVM_CAP_EXCEPTION_PAYLOAD: case KVM_CAP_SET_GUEST_DEBUG: + case KVM_CAP_LAST_CPU: r = 1; break; case KVM_CAP_SYNC_REGS: @@ -3538,6 +3578,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_HYPERV_ENLIGHTENED_VMCS: r = kvm_x86_ops.nested_ops->enable_evmcs != NULL; break; + case KVM_CAP_SMALLER_MAXPHYADDR: + r = (int) allow_smaller_maxphyaddr; + break; default: break; } @@ -8154,7 +8197,7 @@ static void enter_smm(struct kvm_vcpu *vcpu) kvm_x86_ops.set_efer(vcpu, 0); #endif - kvm_update_cpuid(vcpu); + kvm_update_cpuid_runtime(vcpu); kvm_mmu_reset_context(vcpu); } @@ -8506,7 +8549,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } trace_kvm_entry(vcpu->vcpu_id); - guest_enter_irqoff(); fpregs_assert_state_consistent(); if (test_thread_flag(TIF_NEED_FPU_LOAD)) @@ -8548,6 +8590,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (hw_breakpoint_active()) hw_breakpoint_restore(); + vcpu->arch.last_vmentry_cpu = vcpu->cpu; vcpu->arch.last_guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); vcpu->mode = OUTSIDE_GUEST_MODE; @@ -8568,7 +8611,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) local_irq_disable(); kvm_after_interrupt(vcpu); - guest_exit_irqoff(); if (lapic_in_kernel(vcpu)) { s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta; if (delta != S64_MIN) { @@ -8681,15 +8723,11 @@ static int vcpu_run(struct kvm_vcpu *vcpu) break; } - if (signal_pending(current)) { - r = -EINTR; - vcpu->run->exit_reason = KVM_EXIT_INTR; - ++vcpu->stat.signal_exits; - break; - } - if (need_resched()) { + if (__xfer_to_guest_mode_work_pending()) { srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); - cond_resched(); + r = xfer_to_guest_mode_handle_work(vcpu); + if (r) + return r; vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); } } @@ -9177,7 +9215,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) (X86_CR4_OSXSAVE | X86_CR4_PKE)); kvm_x86_ops.set_cr4(vcpu, sregs->cr4); if (cpuid_update_needed) - kvm_update_cpuid(vcpu); + kvm_update_cpuid_runtime(vcpu); idx = srcu_read_lock(&vcpu->kvm->srcu); if (is_pae_paging(vcpu)) { @@ -9281,7 +9319,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, */ kvm_set_rflags(vcpu, rflags); - kvm_x86_ops.update_bp_intercept(vcpu); + kvm_x86_ops.update_exception_bitmap(vcpu); r = 0; @@ -9479,7 +9517,6 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) fx_init(vcpu); vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); - vcpu->arch.tdp_level = kvm_x86_ops.get_tdp_level(vcpu); vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT; @@ -9932,7 +9969,7 @@ void kvm_arch_sync_events(struct kvm *kvm) int __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa, u32 size) { int i, r; - unsigned long hva, uninitialized_var(old_npages); + unsigned long hva, old_npages; struct kvm_memslots *slots = kvm_memslots(kvm); struct kvm_memory_slot *slot; @@ -10676,28 +10713,53 @@ bool kvm_arch_no_poll(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_arch_no_poll); -u64 kvm_spec_ctrl_valid_bits(struct kvm_vcpu *vcpu) + +int kvm_spec_ctrl_test_value(u64 value) { - uint64_t bits = SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD; + /* + * test that setting IA32_SPEC_CTRL to given value + * is allowed by the host processor + */ + + u64 saved_value; + unsigned long flags; + int ret = 0; + + local_irq_save(flags); - /* The STIBP bit doesn't fault even if it's not advertised */ - if (!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS)) - bits &= ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP); - if (!boot_cpu_has(X86_FEATURE_SPEC_CTRL) && - !boot_cpu_has(X86_FEATURE_AMD_IBRS)) - bits &= ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP); + if (rdmsrl_safe(MSR_IA32_SPEC_CTRL, &saved_value)) + ret = 1; + else if (wrmsrl_safe(MSR_IA32_SPEC_CTRL, value)) + ret = 1; + else + wrmsrl(MSR_IA32_SPEC_CTRL, saved_value); + + local_irq_restore(flags); - if (!guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL_SSBD) && - !guest_cpuid_has(vcpu, X86_FEATURE_AMD_SSBD)) - bits &= ~SPEC_CTRL_SSBD; - if (!boot_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) && - !boot_cpu_has(X86_FEATURE_AMD_SSBD)) - bits &= ~SPEC_CTRL_SSBD; + return ret; +} +EXPORT_SYMBOL_GPL(kvm_spec_ctrl_test_value); - return bits; +void kvm_fixup_and_inject_pf_error(struct kvm_vcpu *vcpu, gva_t gva, u16 error_code) +{ + struct x86_exception fault; + + if (!(error_code & PFERR_PRESENT_MASK) || + vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, error_code, &fault) != UNMAPPED_GVA) { + /* + * If vcpu->arch.walk_mmu->gva_to_gpa succeeded, the page + * tables probably do not match the TLB. Just proceed + * with the error code that the processor gave. + */ + fault.vector = PF_VECTOR; + fault.error_code_valid = true; + fault.error_code = error_code; + fault.nested_page_fault = false; + fault.address = gva; + } + vcpu->arch.walk_mmu->inject_page_fault(vcpu, &fault); } -EXPORT_SYMBOL_GPL(kvm_spec_ctrl_valid_bits); +EXPORT_SYMBOL_GPL(kvm_fixup_and_inject_pf_error); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit); EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio); |