diff options
Diffstat (limited to 'arch/x86/kvm/svm/svm.c')
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 106 |
1 files changed, 72 insertions, 34 deletions
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index e088086f3de6..8834822c00cd 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -43,6 +43,9 @@ #include "svm.h" #include "svm_ops.h" +#include "kvm_onhyperv.h" +#include "svm_onhyperv.h" + #define __ex(x) __kvm_handle_fault_on_reboot(x) MODULE_AUTHOR("Qumranet"); @@ -185,6 +188,13 @@ module_param(vls, int, 0444); static int vgif = true; module_param(vgif, int, 0444); +/* + * enable / disable AVIC. Because the defaults differ for APICv + * support between VMX and SVM we cannot use module_param_named. + */ +static bool avic; +module_param(avic, bool, 0444); + bool __read_mostly dump_invalid_vmcb; module_param(dump_invalid_vmcb, bool, 0644); @@ -673,6 +683,9 @@ static void set_msr_interception_bitmap(struct kvm_vcpu *vcpu, u32 *msrpm, write ? clear_bit(bit_write, &tmp) : set_bit(bit_write, &tmp); msrpm[offset] = tmp; + + svm_hv_vmcb_dirty_nested_enlightenments(vcpu); + } void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, @@ -939,6 +952,16 @@ static __init int svm_hardware_setup(void) int r; unsigned int order = get_order(IOPM_SIZE); + /* + * NX is required for shadow paging and for NPT if the NX huge pages + * mitigation is enabled. + */ + if (!boot_cpu_has(X86_FEATURE_NX)) { + pr_err_ratelimited("NX (Execute Disable) not supported\n"); + return -EOPNOTSUPP; + } + kvm_enable_efer_bits(EFER_NX); + iopm_pages = alloc_pages(GFP_KERNEL, order); if (!iopm_pages) @@ -952,9 +975,6 @@ static __init int svm_hardware_setup(void) supported_xcr0 &= ~(XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR); - if (boot_cpu_has(X86_FEATURE_NX)) - kvm_enable_efer_bits(EFER_NX); - if (boot_cpu_has(X86_FEATURE_FXSR_OPT)) kvm_enable_efer_bits(EFER_FFXSR); @@ -996,6 +1016,8 @@ static __init int svm_hardware_setup(void) /* Note, SEV setup consumes npt_enabled. */ sev_hardware_setup(); + svm_hv_hardware_setup(); + svm_adjust_mmio_mask(); for_each_possible_cpu(cpu) { @@ -1009,14 +1031,12 @@ static __init int svm_hardware_setup(void) nrips = false; } - if (avic) { - if (!npt_enabled || !boot_cpu_has(X86_FEATURE_AVIC)) { - avic = false; - } else { - pr_info("AVIC enabled\n"); + enable_apicv = avic = avic && npt_enabled && boot_cpu_has(X86_FEATURE_AVIC); - amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier); - } + if (enable_apicv) { + pr_info("AVIC enabled\n"); + + amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier); } if (vls) { @@ -1080,26 +1100,30 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) seg->base = 0; } -static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) +static u64 svm_get_l2_tsc_offset(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - u64 g_tsc_offset = 0; - if (is_guest_mode(vcpu)) { - /* Write L1's TSC offset. */ - g_tsc_offset = svm->vmcb->control.tsc_offset - - svm->vmcb01.ptr->control.tsc_offset; - svm->vmcb01.ptr->control.tsc_offset = offset; - } + return svm->nested.ctl.tsc_offset; +} - trace_kvm_write_tsc_offset(vcpu->vcpu_id, - svm->vmcb->control.tsc_offset - g_tsc_offset, - offset); +static u64 svm_get_l2_tsc_multiplier(struct kvm_vcpu *vcpu) +{ + return kvm_default_tsc_scaling_ratio; +} - svm->vmcb->control.tsc_offset = offset + g_tsc_offset; +static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) +{ + struct vcpu_svm *svm = to_svm(vcpu); + svm->vmcb01.ptr->control.tsc_offset = vcpu->arch.l1_tsc_offset; + svm->vmcb->control.tsc_offset = offset; vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS); - return svm->vmcb->control.tsc_offset; +} + +static void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier) +{ + wrmsrl(MSR_AMD64_TSC_RATIO, multiplier); } /* Evaluate instruction intercepts that depend on guest CPUID features. */ @@ -1287,6 +1311,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu) } } + svm_hv_init_vmcb(svm->vmcb); + vmcb_mark_all_dirty(svm->vmcb); enable_gif(svm); @@ -3106,6 +3132,8 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) return; } + pr_err("VMCB %p, last attempted VMRUN on CPU %d\n", + svm->current_vmcb->ptr, vcpu->arch.last_vmentry_cpu); pr_err("VMCB Control Area:\n"); pr_err("%-20s%04x\n", "cr_read:", control->intercepts[INTERCEPT_CR] & 0xffff); pr_err("%-20s%04x\n", "cr_write:", control->intercepts[INTERCEPT_CR] >> 16); @@ -3762,6 +3790,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) } svm->vmcb->save.cr2 = vcpu->arch.cr2; + svm_hv_update_vp_id(svm->vmcb, vcpu); + /* * Run with all-zero DR6 unless needed, so that we can get the exact cause * of a #DB. @@ -3835,6 +3865,12 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) svm->next_rip = 0; if (is_guest_mode(vcpu)) { nested_sync_control_from_vmcb02(svm); + + /* Track VMRUNs that have made past consistency checking */ + if (svm->nested.nested_run_pending && + svm->vmcb->control.exit_code != SVM_EXIT_ERR) + ++vcpu->stat.nested_run; + svm->nested.nested_run_pending = 0; } @@ -3846,10 +3882,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) vcpu->arch.apf.host_apf_flags = kvm_read_and_reset_apf_flags(); - if (npt_enabled) { - vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR); - vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR); - } + if (npt_enabled) + kvm_register_clear_available(vcpu, VCPU_EXREG_PDPTR); /* * We need to handle MC intercepts here before the vcpu has a chance to @@ -3877,6 +3911,8 @@ static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, hpa_t root_hpa, svm->vmcb->control.nested_cr3 = __sme_set(root_hpa); vmcb_mark_dirty(svm->vmcb, VMCB_NPT); + hv_track_root_tdp(vcpu, root_hpa); + /* Loading L2's CR3 is handled by enter_svm_guest_mode. */ if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) return; @@ -4249,7 +4285,7 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) return !svm_smi_blocked(vcpu); } -static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) +static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) { struct vcpu_svm *svm = to_svm(vcpu); int ret; @@ -4271,7 +4307,7 @@ static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) return 0; } -static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) +static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) { struct vcpu_svm *svm = to_svm(vcpu); struct kvm_host_map map; @@ -4427,13 +4463,12 @@ static int svm_vm_init(struct kvm *kvm) if (!pause_filter_count || !pause_filter_thresh) kvm->arch.pause_in_guest = true; - if (avic) { + if (enable_apicv) { int ret = avic_vm_init(kvm); if (ret) return ret; } - kvm_apicv_init(kvm, avic); return 0; } @@ -4524,7 +4559,10 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .has_wbinvd_exit = svm_has_wbinvd_exit, - .write_l1_tsc_offset = svm_write_l1_tsc_offset, + .get_l2_tsc_offset = svm_get_l2_tsc_offset, + .get_l2_tsc_multiplier = svm_get_l2_tsc_multiplier, + .write_tsc_offset = svm_write_tsc_offset, + .write_tsc_multiplier = svm_write_tsc_multiplier, .load_mmu_pgd = svm_load_mmu_pgd, @@ -4544,8 +4582,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .setup_mce = svm_setup_mce, .smi_allowed = svm_smi_allowed, - .pre_enter_smm = svm_pre_enter_smm, - .pre_leave_smm = svm_pre_leave_smm, + .enter_smm = svm_enter_smm, + .leave_smm = svm_leave_smm, .enable_smi_window = svm_enable_smi_window, .mem_enc_op = svm_mem_enc_op, |