diff options
Diffstat (limited to 'arch/x86/kvm/svm')
-rw-r--r-- | arch/x86/kvm/svm/avic.c | 181 | ||||
-rw-r--r-- | arch/x86/kvm/svm/hyperv.h | 35 | ||||
-rw-r--r-- | arch/x86/kvm/svm/nested.c | 51 | ||||
-rw-r--r-- | arch/x86/kvm/svm/pmu.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/svm/sev.c | 121 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 134 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.h | 69 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm_onhyperv.h | 25 |
8 files changed, 335 insertions, 283 deletions
diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c index fb3e20791338..b37b353ec086 100644 --- a/arch/x86/kvm/svm/avic.c +++ b/arch/x86/kvm/svm/avic.c @@ -324,18 +324,18 @@ int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu) switch (id) { case AVIC_IPI_FAILURE_INVALID_INT_TYPE: /* - * AVIC hardware handles the generation of - * IPIs when the specified Message Type is Fixed - * (also known as fixed delivery mode) and - * the Trigger Mode is edge-triggered. The hardware - * also supports self and broadcast delivery modes - * specified via the Destination Shorthand(DSH) - * field of the ICRL. Logical and physical APIC ID - * formats are supported. All other IPI types cause - * a #VMEXIT, which needs to emulated. + * Emulate IPIs that are not handled by AVIC hardware, which + * only virtualizes Fixed, Edge-Triggered INTRs. The exit is + * a trap, e.g. ICR holds the correct value and RIP has been + * advanced, KVM is responsible only for emulating the IPI. + * Sadly, hardware may sometimes leave the BUSY flag set, in + * which case KVM needs to emulate the ICR write as well in + * order to clear the BUSY flag. */ - kvm_lapic_reg_write(apic, APIC_ICR2, icrh); - kvm_lapic_reg_write(apic, APIC_ICR, icrl); + if (icrl & APIC_ICR_BUSY) + kvm_apic_write_nodecode(vcpu, APIC_ICR); + else + kvm_apic_send_ipi(apic, icrl, icrh); break; case AVIC_IPI_FAILURE_TARGET_NOT_RUNNING: /* @@ -477,30 +477,28 @@ static void avic_handle_dfr_update(struct kvm_vcpu *vcpu) svm->dfr_reg = dfr; } -static int avic_unaccel_trap_write(struct vcpu_svm *svm) +static int avic_unaccel_trap_write(struct kvm_vcpu *vcpu) { - struct kvm_lapic *apic = svm->vcpu.arch.apic; - u32 offset = svm->vmcb->control.exit_info_1 & + u32 offset = to_svm(vcpu)->vmcb->control.exit_info_1 & AVIC_UNACCEL_ACCESS_OFFSET_MASK; switch (offset) { case APIC_ID: - if (avic_handle_apic_id_update(&svm->vcpu)) + if (avic_handle_apic_id_update(vcpu)) return 0; break; case APIC_LDR: - if (avic_handle_ldr_update(&svm->vcpu)) + if (avic_handle_ldr_update(vcpu)) return 0; break; case APIC_DFR: - avic_handle_dfr_update(&svm->vcpu); + avic_handle_dfr_update(vcpu); break; default: break; } - kvm_lapic_reg_write(apic, offset, kvm_lapic_get_reg(apic, offset)); - + kvm_apic_write_nodecode(vcpu, offset); return 1; } @@ -550,7 +548,7 @@ int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu) if (trap) { /* Handling Trap */ WARN_ONCE(!write, "svm: Handling trap read.\n"); - ret = avic_unaccel_trap_write(svm); + ret = avic_unaccel_trap_write(vcpu); } else { /* Handling Fault */ ret = kvm_emulate_instruction(vcpu, 0); @@ -578,7 +576,7 @@ int avic_init_vcpu(struct vcpu_svm *svm) return ret; } -void avic_post_state_restore(struct kvm_vcpu *vcpu) +void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu) { if (avic_handle_apic_id_update(vcpu) != 0) return; @@ -586,20 +584,7 @@ void avic_post_state_restore(struct kvm_vcpu *vcpu) avic_handle_ldr_update(vcpu); } -void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu) -{ - return; -} - -void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) -{ -} - -void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) -{ -} - -static int svm_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate) +static int avic_set_pi_irte_mode(struct kvm_vcpu *vcpu, bool activate) { int ret = 0; unsigned long flags; @@ -631,48 +616,6 @@ out: return ret; } -void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) -{ - struct vcpu_svm *svm = to_svm(vcpu); - struct vmcb *vmcb = svm->vmcb01.ptr; - bool activated = kvm_vcpu_apicv_active(vcpu); - - if (!enable_apicv) - return; - - if (activated) { - /** - * During AVIC temporary deactivation, guest could update - * APIC ID, DFR and LDR registers, which would not be trapped - * by avic_unaccelerated_access_interception(). In this case, - * we need to check and update the AVIC logical APIC ID table - * accordingly before re-activating. - */ - avic_post_state_restore(vcpu); - vmcb->control.int_ctl |= AVIC_ENABLE_MASK; - } else { - vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK; - } - vmcb_mark_dirty(vmcb, VMCB_AVIC); - - if (activated) - avic_vcpu_load(vcpu, vcpu->cpu); - else - avic_vcpu_put(vcpu); - - svm_set_pi_irte_mode(vcpu, activated); -} - -void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) -{ - return; -} - -bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu) -{ - return false; -} - static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) { unsigned long flags; @@ -770,7 +713,7 @@ get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, } /* - * svm_update_pi_irte - set IRTE for Posted-Interrupts + * avic_pi_update_irte - set IRTE for Posted-Interrupts * * @kvm: kvm * @host_irq: host irq of the interrupt @@ -778,8 +721,8 @@ get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, * @set: set or unset PI * returns 0 on success, < 0 on failure */ -int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, - uint32_t guest_irq, bool set) +int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, + uint32_t guest_irq, bool set) { struct kvm_kernel_irq_routing_entry *e; struct kvm_irq_routing_table *irq_rt; @@ -879,7 +822,7 @@ out: return ret; } -bool svm_check_apicv_inhibit_reasons(ulong bit) +bool avic_check_apicv_inhibit_reasons(ulong bit) { ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) | BIT(APICV_INHIBIT_REASON_ABSENT) | @@ -924,20 +867,15 @@ out: return ret; } -void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { u64 entry; - /* ID = 0xff (broadcast), ID > 0xff (reserved) */ int h_physical_id = kvm_cpu_get_apicid(cpu); struct vcpu_svm *svm = to_svm(vcpu); lockdep_assert_preemption_disabled(); - /* - * Since the host physical APIC id is 8 bits, - * we can support host APIC ID upto 255. - */ - if (WARN_ON(h_physical_id > AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) + if (WARN_ON(h_physical_id & ~AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK)) return; /* @@ -961,7 +899,7 @@ void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu) avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true); } -void avic_vcpu_put(struct kvm_vcpu *vcpu) +void __avic_vcpu_put(struct kvm_vcpu *vcpu) { u64 entry; struct vcpu_svm *svm = to_svm(vcpu); @@ -980,13 +918,63 @@ void avic_vcpu_put(struct kvm_vcpu *vcpu) WRITE_ONCE(*(svm->avic_physical_id_cache), entry); } +static void avic_vcpu_load(struct kvm_vcpu *vcpu) +{ + int cpu = get_cpu(); + + WARN_ON(cpu != vcpu->cpu); + + __avic_vcpu_load(vcpu, cpu); + + put_cpu(); +} + +static void avic_vcpu_put(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + + __avic_vcpu_put(vcpu); + + preempt_enable(); +} + +void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb *vmcb = svm->vmcb01.ptr; + bool activated = kvm_vcpu_apicv_active(vcpu); + + if (!enable_apicv) + return; + + if (activated) { + /** + * During AVIC temporary deactivation, guest could update + * APIC ID, DFR and LDR registers, which would not be trapped + * by avic_unaccelerated_access_interception(). In this case, + * we need to check and update the AVIC logical APIC ID table + * accordingly before re-activating. + */ + avic_apicv_post_state_restore(vcpu); + vmcb->control.int_ctl |= AVIC_ENABLE_MASK; + } else { + vmcb->control.int_ctl &= ~AVIC_ENABLE_MASK; + } + vmcb_mark_dirty(vmcb, VMCB_AVIC); + + if (activated) + avic_vcpu_load(vcpu); + else + avic_vcpu_put(vcpu); + + avic_set_pi_irte_mode(vcpu, activated); +} + void avic_vcpu_blocking(struct kvm_vcpu *vcpu) { if (!kvm_vcpu_apicv_active(vcpu)) return; - preempt_disable(); - /* * Unload the AVIC when the vCPU is about to block, _before_ * the vCPU actually blocks. @@ -1001,21 +989,12 @@ void avic_vcpu_blocking(struct kvm_vcpu *vcpu) * the cause of errata #1235). */ avic_vcpu_put(vcpu); - - preempt_enable(); } void avic_vcpu_unblocking(struct kvm_vcpu *vcpu) { - int cpu; - if (!kvm_vcpu_apicv_active(vcpu)) return; - cpu = get_cpu(); - WARN_ON(cpu != vcpu->cpu); - - avic_vcpu_load(vcpu, cpu); - - put_cpu(); + avic_vcpu_load(vcpu); } diff --git a/arch/x86/kvm/svm/hyperv.h b/arch/x86/kvm/svm/hyperv.h new file mode 100644 index 000000000000..7d6d97968fb9 --- /dev/null +++ b/arch/x86/kvm/svm/hyperv.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Common Hyper-V on KVM and KVM on Hyper-V definitions (SVM). + */ + +#ifndef __ARCH_X86_KVM_SVM_HYPERV_H__ +#define __ARCH_X86_KVM_SVM_HYPERV_H__ + +#include <asm/mshyperv.h> + +#include "../hyperv.h" + +/* + * Hyper-V uses the software reserved 32 bytes in VMCB + * control area to expose SVM enlightenments to guests. + */ +struct hv_enlightenments { + struct __packed hv_enlightenments_control { + u32 nested_flush_hypercall:1; + u32 msr_bitmap:1; + u32 enlightened_npt_tlb: 1; + u32 reserved:29; + } __packed hv_enlightenments_control; + u32 hv_vp_id; + u64 hv_vm_id; + u64 partition_assist_page; + u64 reserved; +} __packed; + +/* + * Hyper-V uses the software reserved clean bit in VMCB + */ +#define VMCB_HV_NESTED_ENLIGHTENMENTS VMCB_SW + +#endif /* __ARCH_X86_KVM_SVM_HYPERV_H__ */ diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 39d280e7e80e..96bab464967f 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -28,6 +28,7 @@ #include "cpuid.h" #include "lapic.h" #include "svm.h" +#include "hyperv.h" #define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK @@ -165,14 +166,30 @@ void recalc_intercepts(struct vcpu_svm *svm) vmcb_set_intercept(c, INTERCEPT_VMSAVE); } +/* + * Merge L0's (KVM) and L1's (Nested VMCB) MSR permission bitmaps. The function + * is optimized in that it only merges the parts where KVM MSR permission bitmap + * may contain zero bits. + */ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) { + struct hv_enlightenments *hve = + (struct hv_enlightenments *)svm->nested.ctl.reserved_sw; + int i; + /* - * This function merges the msr permission bitmaps of kvm and the - * nested vmcb. It is optimized in that it only merges the parts where - * the kvm msr permission bitmap may contain zero bits + * MSR bitmap update can be skipped when: + * - MSR bitmap for L1 hasn't changed. + * - Nested hypervisor (L1) is attempting to launch the same L2 as + * before. + * - Nested hypervisor (L1) is using Hyper-V emulation interface and + * tells KVM (L0) there were no changes in MSR bitmap for L2. */ - int i; + if (!svm->nested.force_msr_bitmap_recalc && + kvm_hv_hypercall_enabled(&svm->vcpu) && + hve->hv_enlightenments_control.msr_bitmap && + (svm->nested.ctl.clean & BIT(VMCB_HV_NESTED_ENLIGHTENMENTS))) + goto set_msrpm_base_pa; if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT))) return true; @@ -193,6 +210,9 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) svm->nested.msrpm[p] = svm->msrpm[p] | value; } + svm->nested.force_msr_bitmap_recalc = false; + +set_msrpm_base_pa: svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm)); return true; @@ -298,7 +318,8 @@ static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu) } static -void __nested_copy_vmcb_control_to_cache(struct vmcb_ctrl_area_cached *to, +void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu, + struct vmcb_ctrl_area_cached *to, struct vmcb_control_area *from) { unsigned int i; @@ -331,12 +352,19 @@ void __nested_copy_vmcb_control_to_cache(struct vmcb_ctrl_area_cached *to, to->asid = from->asid; to->msrpm_base_pa &= ~0x0fffULL; to->iopm_base_pa &= ~0x0fffULL; + + /* Hyper-V extensions (Enlightened VMCB) */ + if (kvm_hv_hypercall_enabled(vcpu)) { + to->clean = from->clean; + memcpy(to->reserved_sw, from->reserved_sw, + sizeof(struct hv_enlightenments)); + } } void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm, struct vmcb_control_area *control) { - __nested_copy_vmcb_control_to_cache(&svm->nested.ctl, control); + __nested_copy_vmcb_control_to_cache(&svm->vcpu, &svm->nested.ctl, control); } static void __nested_copy_vmcb_save_to_cache(struct vmcb_save_area_cached *to, @@ -464,14 +492,14 @@ static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, CC(!load_pdptrs(vcpu, cr3))) return -EINVAL; - if (!nested_npt) - kvm_mmu_new_pgd(vcpu, cr3); - vcpu->arch.cr3 = cr3; /* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */ kvm_init_mmu(vcpu); + if (!nested_npt) + kvm_mmu_new_pgd(vcpu, cr3); + return 0; } @@ -494,6 +522,7 @@ static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12 if (svm->nested.vmcb12_gpa != svm->nested.last_vmcb12_gpa) { new_vmcb12 = true; svm->nested.last_vmcb12_gpa = svm->nested.vmcb12_gpa; + svm->nested.force_msr_bitmap_recalc = true; } if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_SEG))) { @@ -1302,6 +1331,7 @@ static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area *dst, dst->virt_ext = from->virt_ext; dst->pause_filter_count = from->pause_filter_count; dst->pause_filter_thresh = from->pause_filter_thresh; + /* 'clean' and 'reserved_sw' are not changed by KVM */ } static int svm_get_nested_state(struct kvm_vcpu *vcpu, @@ -1434,7 +1464,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, goto out_free; ret = -EINVAL; - __nested_copy_vmcb_control_to_cache(&ctl_cached, ctl); + __nested_copy_vmcb_control_to_cache(vcpu, &ctl_cached, ctl); if (!__nested_vmcb_check_controls(vcpu, &ctl_cached)) goto out_free; @@ -1495,6 +1525,7 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu, if (WARN_ON_ONCE(ret)) goto out_free; + svm->nested.force_msr_bitmap_recalc = true; kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); ret = 0; diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index 5aa45f13b16d..d4de52409335 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -101,7 +101,7 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr, { struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu); - if (!enable_pmu) + if (!vcpu->kvm->arch.enable_pmu) return NULL; switch (msr) { diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 17b53457d866..75fa6dd268f0 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -258,6 +258,7 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) goto e_free; INIT_LIST_HEAD(&sev->regions_list); + INIT_LIST_HEAD(&sev->mirror_vms); return 0; @@ -1623,9 +1624,12 @@ static void sev_unlock_vcpus_for_migration(struct kvm *kvm) } } -static void sev_migrate_from(struct kvm_sev_info *dst, - struct kvm_sev_info *src) +static void sev_migrate_from(struct kvm *dst_kvm, struct kvm *src_kvm) { + struct kvm_sev_info *dst = &to_kvm_svm(dst_kvm)->sev_info; + struct kvm_sev_info *src = &to_kvm_svm(src_kvm)->sev_info; + struct kvm_sev_info *mirror; + dst->active = true; dst->asid = src->asid; dst->handle = src->handle; @@ -1639,6 +1643,30 @@ static void sev_migrate_from(struct kvm_sev_info *dst, src->enc_context_owner = NULL; list_cut_before(&dst->regions_list, &src->regions_list, &src->regions_list); + + /* + * If this VM has mirrors, "transfer" each mirror's refcount of the + * source to the destination (this KVM). The caller holds a reference + * to the source, so there's no danger of use-after-free. + */ + list_cut_before(&dst->mirror_vms, &src->mirror_vms, &src->mirror_vms); + list_for_each_entry(mirror, &dst->mirror_vms, mirror_entry) { + kvm_get_kvm(dst_kvm); + kvm_put_kvm(src_kvm); + mirror->enc_context_owner = dst_kvm; + } + + /* + * If this VM is a mirror, remove the old mirror from the owners list + * and add the new mirror to the list. + */ + if (is_mirroring_enc_context(dst_kvm)) { + struct kvm_sev_info *owner_sev_info = + &to_kvm_svm(dst->enc_context_owner)->sev_info; + + list_del(&src->mirror_entry); + list_add_tail(&dst->mirror_entry, &owner_sev_info->mirror_vms); + } } static int sev_es_migrate_from(struct kvm *dst, struct kvm *src) @@ -1681,7 +1709,7 @@ static int sev_es_migrate_from(struct kvm *dst, struct kvm *src) return 0; } -int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd) +int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd) { struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm)->sev_info; struct kvm_sev_info *src_sev, *cg_cleanup_sev; @@ -1708,15 +1736,6 @@ int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd) src_sev = &to_kvm_svm(source_kvm)->sev_info; - /* - * VMs mirroring src's encryption context rely on it to keep the - * ASID allocated, but below we are clearing src_sev->asid. - */ - if (src_sev->num_mirrored_vms) { - ret = -EBUSY; - goto out_unlock; - } - dst_sev->misc_cg = get_current_misc_cg(); cg_cleanup_sev = dst_sev; if (dst_sev->misc_cg != src_sev->misc_cg) { @@ -1738,7 +1757,8 @@ int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd) if (ret) goto out_source_vcpu; } - sev_migrate_from(dst_sev, src_sev); + + sev_migrate_from(kvm, source_kvm); kvm_vm_dead(source_kvm); cg_cleanup_sev = src_sev; ret = 0; @@ -1761,7 +1781,7 @@ out_fput: return ret; } -int svm_mem_enc_op(struct kvm *kvm, void __user *argp) +int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp) { struct kvm_sev_cmd sev_cmd; int r; @@ -1858,8 +1878,8 @@ out: return r; } -int svm_register_enc_region(struct kvm *kvm, - struct kvm_enc_region *range) +int sev_mem_enc_register_region(struct kvm *kvm, + struct kvm_enc_region *range) { struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct enc_region *region; @@ -1932,8 +1952,8 @@ static void __unregister_enc_region_locked(struct kvm *kvm, kfree(region); } -int svm_unregister_enc_region(struct kvm *kvm, - struct kvm_enc_region *range) +int sev_mem_enc_unregister_region(struct kvm *kvm, + struct kvm_enc_region *range) { struct enc_region *region; int ret; @@ -1972,7 +1992,7 @@ failed: return ret; } -int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd) +int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd) { struct file *source_kvm_file; struct kvm *source_kvm; @@ -2008,10 +2028,10 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd) */ source_sev = &to_kvm_svm(source_kvm)->sev_info; kvm_get_kvm(source_kvm); - source_sev->num_mirrored_vms++; + mirror_sev = &to_kvm_svm(kvm)->sev_info; + list_add_tail(&mirror_sev->mirror_entry, &source_sev->mirror_vms); /* Set enc_context_owner and copy its encryption context over */ - mirror_sev = &to_kvm_svm(kvm)->sev_info; mirror_sev->enc_context_owner = source_kvm; mirror_sev->active = true; mirror_sev->asid = source_sev->asid; @@ -2019,6 +2039,7 @@ int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd) mirror_sev->es_active = source_sev->es_active; mirror_sev->handle = source_sev->handle; INIT_LIST_HEAD(&mirror_sev->regions_list); + INIT_LIST_HEAD(&mirror_sev->mirror_vms); ret = 0; /* @@ -2041,19 +2062,17 @@ void sev_vm_destroy(struct kvm *kvm) struct list_head *head = &sev->regions_list; struct list_head *pos, *q; - WARN_ON(sev->num_mirrored_vms); - if (!sev_guest(kvm)) return; + WARN_ON(!list_empty(&sev->mirror_vms)); + /* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */ if (is_mirroring_enc_context(kvm)) { struct kvm *owner_kvm = sev->enc_context_owner; - struct kvm_sev_info *owner_sev = &to_kvm_svm(owner_kvm)->sev_info; mutex_lock(&owner_kvm->lock); - if (!WARN_ON(!owner_sev->num_mirrored_vms)) - owner_sev->num_mirrored_vms--; + list_del(&sev->mirror_entry); mutex_unlock(&owner_kvm->lock); kvm_put_kvm(owner_kvm); return; @@ -2173,7 +2192,7 @@ out: #endif } -void sev_hardware_teardown(void) +void sev_hardware_unsetup(void) { if (!sev_enabled) return; @@ -2358,7 +2377,7 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); } -static bool sev_es_validate_vmgexit(struct vcpu_svm *svm) +static int sev_es_validate_vmgexit(struct vcpu_svm *svm) { struct kvm_vcpu *vcpu; struct ghcb *ghcb; @@ -2463,7 +2482,7 @@ static bool sev_es_validate_vmgexit(struct vcpu_svm *svm) goto vmgexit_err; } - return true; + return 0; vmgexit_err: vcpu = &svm->vcpu; @@ -2486,7 +2505,8 @@ vmgexit_err: ghcb_set_sw_exit_info_1(ghcb, 2); ghcb_set_sw_exit_info_2(ghcb, reason); - return false; + /* Resume the guest to "return" the error code. */ + return 1; } void sev_es_unmap_ghcb(struct vcpu_svm *svm) @@ -2545,7 +2565,7 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) } #define GHCB_SCRATCH_AREA_LIMIT (16ULL * PAGE_SIZE) -static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) +static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) { struct vmcb_control_area *control = &svm->vmcb->control; struct ghcb *ghcb = svm->sev_es.ghcb; @@ -2598,14 +2618,14 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) } scratch_va = kvzalloc(len, GFP_KERNEL_ACCOUNT); if (!scratch_va) - goto e_scratch; + return -ENOMEM; if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) { /* Unable to copy scratch area from guest */ pr_err("vmgexit: kvm_read_guest for scratch area failed\n"); kvfree(scratch_va); - goto e_scratch; + return -EFAULT; } /* @@ -2621,13 +2641,13 @@ static bool setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) svm->sev_es.ghcb_sa = scratch_va; svm->sev_es.ghcb_sa_len = len; - return true; + return 0; e_scratch: ghcb_set_sw_exit_info_1(ghcb, 2); ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_SCRATCH_AREA); - return false; + return 1; } static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask, @@ -2765,17 +2785,18 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) exit_code = ghcb_get_sw_exit_code(ghcb); - if (!sev_es_validate_vmgexit(svm)) - return 1; + ret = sev_es_validate_vmgexit(svm); + if (ret) + return ret; sev_es_sync_from_ghcb(svm); ghcb_set_sw_exit_info_1(ghcb, 0); ghcb_set_sw_exit_info_2(ghcb, 0); - ret = 1; switch (exit_code) { case SVM_VMGEXIT_MMIO_READ: - if (!setup_vmgexit_scratch(svm, true, control->exit_info_2)) + ret = setup_vmgexit_scratch(svm, true, control->exit_info_2); + if (ret) break; ret = kvm_sev_es_mmio_read(vcpu, @@ -2784,7 +2805,8 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) svm->sev_es.ghcb_sa); break; case SVM_VMGEXIT_MMIO_WRITE: - if (!setup_vmgexit_scratch(svm, false, control->exit_info_2)) + ret = setup_vmgexit_scratch(svm, false, control->exit_info_2); + if (ret) break; ret = kvm_sev_es_mmio_write(vcpu, @@ -2817,6 +2839,7 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_INPUT); } + ret = 1; break; } case SVM_VMGEXIT_UNSUPPORTED_EVENT: @@ -2836,6 +2859,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) { int count; int bytes; + int r; if (svm->vmcb->control.exit_info_2 > INT_MAX) return -EINVAL; @@ -2844,8 +2868,9 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in) if (unlikely(check_mul_overflow(count, size, &bytes))) return -EINVAL; - if (!setup_vmgexit_scratch(svm, in, bytes)) - return 1; + r = setup_vmgexit_scratch(svm, in, bytes); + if (r) + return r; return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->sev_es.ghcb_sa, count, in); @@ -2907,20 +2932,16 @@ void sev_es_vcpu_reset(struct vcpu_svm *svm) sev_enc_bit)); } -void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu) +void sev_es_prepare_switch_to_guest(struct vmcb_save_area *hostsa) { - struct svm_cpu_data *sd = per_cpu(svm_data, cpu); - struct vmcb_save_area *hostsa; - /* * As an SEV-ES guest, hardware will restore the host state on VMEXIT, - * of which one step is to perform a VMLOAD. Since hardware does not - * perform a VMSAVE on VMRUN, the host savearea must be updated. + * of which one step is to perform a VMLOAD. KVM performs the + * corresponding VMSAVE in svm_prepare_guest_switch for both + * traditional and SEV-ES guests. */ - vmsave(__sme_page_pa(sd->save_area)); /* XCR0 is restored on VMEXIT, save the current host value */ - hostsa = (struct vmcb_save_area *)(page_address(sd->save_area) + 0x400); hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); /* PKRU is restored on VMEXIT, save the current host value */ diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index fd3a00c892c7..0884c3414a1b 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -263,7 +263,7 @@ u32 svm_msrpm_offset(u32 msr) return MSR_INVALID; } -#define MAX_INST_SIZE 15 +static void svm_flush_tlb_current(struct kvm_vcpu *vcpu); static int get_npt_level(void) { @@ -353,7 +353,7 @@ static void svm_set_interrupt_shadow(struct kvm_vcpu *vcpu, int mask) } -static int skip_emulated_instruction(struct kvm_vcpu *vcpu) +static int svm_skip_emulated_instruction(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -401,7 +401,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu) * raises a fault that is not intercepted. Still better than * failing in all cases. */ - (void)skip_emulated_instruction(vcpu); + (void)svm_skip_emulated_instruction(vcpu); rip = kvm_rip_read(vcpu); svm->int3_rip = rip + svm->vmcb->save.cs.base; svm->int3_injected = rip - old_rip; @@ -668,6 +668,7 @@ static bool msr_write_intercepted(struct kvm_vcpu *vcpu, u32 msr) static void set_msr_interception_bitmap(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, int read, int write) { + struct vcpu_svm *svm = to_svm(vcpu); u8 bit_read, bit_write; unsigned long tmp; u32 offset; @@ -698,7 +699,7 @@ static void set_msr_interception_bitmap(struct kvm_vcpu *vcpu, u32 *msrpm, msrpm[offset] = tmp; svm_hv_vmcb_dirty_nested_enlightenments(vcpu); - + svm->nested.force_msr_bitmap_recalc = true; } void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr, @@ -873,11 +874,11 @@ static void shrink_ple_window(struct kvm_vcpu *vcpu) } } -static void svm_hardware_teardown(void) +static void svm_hardware_unsetup(void) { int cpu; - sev_hardware_teardown(); + sev_hardware_unsetup(); for_each_possible_cpu(cpu) svm_cpu_uninit(cpu); @@ -1175,7 +1176,7 @@ void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb) svm->vmcb = target_vmcb->ptr; } -static int svm_create_vcpu(struct kvm_vcpu *vcpu) +static int svm_vcpu_create(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm; struct page *vmcb01_page; @@ -1246,7 +1247,7 @@ static void svm_clear_current_vmcb(struct vmcb *vmcb) cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL); } -static void svm_free_vcpu(struct kvm_vcpu *vcpu) +static void svm_vcpu_free(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1265,7 +1266,7 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) __free_pages(virt_to_page(svm->msrpm), get_order(MSRPM_SIZE)); } -static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) +static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu); @@ -1280,10 +1281,12 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu) * Save additional host state that will be restored on VMEXIT (sev-es) * or subsequent vmload of host save area. */ + vmsave(__sme_page_pa(sd->save_area)); if (sev_es_guest(vcpu->kvm)) { - sev_es_prepare_guest_switch(svm, vcpu->cpu); - } else { - vmsave(__sme_page_pa(sd->save_area)); + struct vmcb_save_area *hostsa; + hostsa = (struct vmcb_save_area *)(page_address(sd->save_area) + 0x400); + + sev_es_prepare_switch_to_guest(hostsa); } if (tsc_scaling) { @@ -1315,13 +1318,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) indirect_branch_prediction_barrier(); } if (kvm_vcpu_apicv_active(vcpu)) - avic_vcpu_load(vcpu, cpu); + __avic_vcpu_load(vcpu, cpu); } static void svm_vcpu_put(struct kvm_vcpu *vcpu) { if (kvm_vcpu_apicv_active(vcpu)) - avic_vcpu_put(vcpu); + __avic_vcpu_put(vcpu); svm_prepare_host_switch(vcpu); @@ -1529,6 +1532,15 @@ static int svm_get_cpl(struct kvm_vcpu *vcpu) return save->cpl; } +static void svm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l) +{ + struct kvm_segment cs; + + svm_get_segment(vcpu, &cs, VCPU_SREG_CS); + *db = cs.db; + *l = cs.l; +} + static void svm_get_idt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1563,7 +1575,7 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) vmcb_mark_dirty(svm->vmcb, VMCB_DT); } -static void svm_post_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) +static void sev_post_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1647,7 +1659,7 @@ void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) unsigned long old_cr4 = vcpu->arch.cr4; if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) - svm_flush_tlb(vcpu); + svm_flush_tlb_current(vcpu); vcpu->arch.cr4 = cr4; if (!npt_enabled) { @@ -2269,7 +2281,7 @@ static int task_switch_interception(struct kvm_vcpu *vcpu) int_type == SVM_EXITINTINFO_TYPE_SOFT || (int_type == SVM_EXITINTINFO_TYPE_EXEPT && (int_vec == OF_VECTOR || int_vec == BP_VECTOR))) { - if (!skip_emulated_instruction(vcpu)) + if (!svm_skip_emulated_instruction(vcpu)) return 0; } @@ -3149,7 +3161,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu) "excp_to:", save->last_excp_to); } -static bool svm_check_exit_valid(struct kvm_vcpu *vcpu, u64 exit_code) +static bool svm_check_exit_valid(u64 exit_code) { return (exit_code < ARRAY_SIZE(svm_exit_handlers) && svm_exit_handlers[exit_code]); @@ -3169,7 +3181,7 @@ static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code) int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code) { - if (!svm_check_exit_valid(vcpu, exit_code)) + if (!svm_check_exit_valid(exit_code)) return svm_handle_invalid_exit(vcpu, exit_code); #ifdef CONFIG_RETPOLINE @@ -3204,7 +3216,7 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u32 *reason, *error_code = 0; } -static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) +static int svm_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) { struct vcpu_svm *svm = to_svm(vcpu); struct kvm_run *kvm_run = vcpu->run; @@ -3301,7 +3313,7 @@ static void svm_inject_nmi(struct kvm_vcpu *vcpu) ++vcpu->stat.nmi_injections; } -static void svm_set_irq(struct kvm_vcpu *vcpu) +static void svm_inject_irq(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3531,17 +3543,7 @@ static void svm_enable_nmi_window(struct kvm_vcpu *vcpu) svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF); } -static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr) -{ - return 0; -} - -static int svm_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) -{ - return 0; -} - -void svm_flush_tlb(struct kvm_vcpu *vcpu) +static void svm_flush_tlb_current(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3915,11 +3917,6 @@ static int __init svm_check_processor_compat(void) return 0; } -static bool svm_cpu_has_accelerated_tpr(void) -{ - return false; -} - /* * The kvm parameter can be NULL (module initialization, or invocation before * VM creation). Be sure to check the kvm parameter before using it. @@ -4254,7 +4251,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) * by 0x400 (matches the offset of 'struct vmcb_save_area' * within 'struct vmcb'). Note: HSAVE area may also be used by * L1 hypervisor to save additional host context (e.g. KVM does - * that, see svm_prepare_guest_switch()) which must be + * that, see svm_prepare_switch_to_guest()) which must be * preserved. */ if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), @@ -4529,21 +4526,20 @@ static int svm_vm_init(struct kvm *kvm) static struct kvm_x86_ops svm_x86_ops __initdata = { .name = "kvm_amd", - .hardware_unsetup = svm_hardware_teardown, + .hardware_unsetup = svm_hardware_unsetup, .hardware_enable = svm_hardware_enable, .hardware_disable = svm_hardware_disable, - .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr, .has_emulated_msr = svm_has_emulated_msr, - .vcpu_create = svm_create_vcpu, - .vcpu_free = svm_free_vcpu, + .vcpu_create = svm_vcpu_create, + .vcpu_free = svm_vcpu_free, .vcpu_reset = svm_vcpu_reset, .vm_size = sizeof(struct kvm_svm), .vm_init = svm_vm_init, .vm_destroy = svm_vm_destroy, - .prepare_guest_switch = svm_prepare_guest_switch, + .prepare_switch_to_guest = svm_prepare_switch_to_guest, .vcpu_load = svm_vcpu_load, .vcpu_put = svm_vcpu_put, .vcpu_blocking = avic_vcpu_blocking, @@ -4557,9 +4553,9 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .get_segment = svm_get_segment, .set_segment = svm_set_segment, .get_cpl = svm_get_cpl, - .get_cs_db_l_bits = kvm_get_cs_db_l_bits, + .get_cs_db_l_bits = svm_get_cs_db_l_bits, .set_cr0 = svm_set_cr0, - .post_set_cr3 = svm_post_set_cr3, + .post_set_cr3 = sev_post_set_cr3, .is_valid_cr4 = svm_is_valid_cr4, .set_cr4 = svm_set_cr4, .set_efer = svm_set_efer, @@ -4574,21 +4570,21 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .set_rflags = svm_set_rflags, .get_if_flag = svm_get_if_flag, - .tlb_flush_all = svm_flush_tlb, - .tlb_flush_current = svm_flush_tlb, - .tlb_flush_gva = svm_flush_tlb_gva, - .tlb_flush_guest = svm_flush_tlb, + .flush_tlb_all = svm_flush_tlb_current, + .flush_tlb_current = svm_flush_tlb_current, + .flush_tlb_gva = svm_flush_tlb_gva, + .flush_tlb_guest = svm_flush_tlb_current, .vcpu_pre_run = svm_vcpu_pre_run, - .run = svm_vcpu_run, - .handle_exit = handle_exit, - .skip_emulated_instruction = skip_emulated_instruction, + .vcpu_run = svm_vcpu_run, + .handle_exit = svm_handle_exit, + .skip_emulated_instruction = svm_skip_emulated_instruction, .update_emulated_instruction = NULL, .set_interrupt_shadow = svm_set_interrupt_shadow, .get_interrupt_shadow = svm_get_interrupt_shadow, .patch_hypercall = svm_patch_hypercall, - .set_irq = svm_set_irq, - .set_nmi = svm_inject_nmi, + .inject_irq = svm_inject_irq, + .inject_nmi = svm_inject_nmi, .queue_exception = svm_queue_exception, .cancel_injection = svm_cancel_injection, .interrupt_allowed = svm_interrupt_allowed, @@ -4598,18 +4594,11 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .enable_nmi_window = svm_enable_nmi_window, .enable_irq_window = svm_enable_irq_window, .update_cr8_intercept = svm_update_cr8_intercept, - .set_virtual_apic_mode = svm_set_virtual_apic_mode, - .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl, - .check_apicv_inhibit_reasons = svm_check_apicv_inhibit_reasons, - .load_eoi_exitmap = svm_load_eoi_exitmap, - .hwapic_irr_update = svm_hwapic_irr_update, - .hwapic_isr_update = svm_hwapic_isr_update, - .apicv_post_state_restore = avic_post_state_restore, - - .set_tss_addr = svm_set_tss_addr, - .set_identity_map_addr = svm_set_identity_map_addr, - .get_mt_mask = svm_get_mt_mask, + .refresh_apicv_exec_ctrl = avic_refresh_apicv_exec_ctrl, + .check_apicv_inhibit_reasons = avic_check_apicv_inhibit_reasons, + .apicv_post_state_restore = avic_apicv_post_state_restore, + .get_mt_mask = svm_get_mt_mask, .get_exit_info = svm_get_exit_info, .vcpu_after_set_cpuid = svm_vcpu_after_set_cpuid, @@ -4634,8 +4623,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .nested_ops = &svm_nested_ops, .deliver_interrupt = svm_deliver_interrupt, - .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt, - .update_pi_irte = svm_update_pi_irte, + .pi_update_irte = avic_pi_update_irte, .setup_mce = svm_setup_mce, .smi_allowed = svm_smi_allowed, @@ -4643,12 +4631,12 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .leave_smm = svm_leave_smm, .enable_smi_window = svm_enable_smi_window, - .mem_enc_op = svm_mem_enc_op, - .mem_enc_reg_region = svm_register_enc_region, - .mem_enc_unreg_region = svm_unregister_enc_region, + .mem_enc_ioctl = sev_mem_enc_ioctl, + .mem_enc_register_region = sev_mem_enc_register_region, + .mem_enc_unregister_region = sev_mem_enc_unregister_region, - .vm_copy_enc_context_from = svm_vm_copy_asid_from, - .vm_move_enc_context_from = svm_vm_migrate_from, + .vm_copy_enc_context_from = sev_vm_copy_enc_context_from, + .vm_move_enc_context_from = sev_vm_move_enc_context_from, .can_emulate_instruction = svm_can_emulate_instruction, @@ -4893,7 +4881,7 @@ static __init int svm_hardware_setup(void) return 0; err: - svm_hardware_teardown(); + svm_hardware_unsetup(); return r; } diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index fa98d6844728..e37bb3508cfa 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -79,7 +79,8 @@ struct kvm_sev_info { struct list_head regions_list; /* List of registered regions */ u64 ap_jump_table; /* SEV-ES AP Jump Table address */ struct kvm *enc_context_owner; /* Owner of copied encryption context */ - unsigned long num_mirrored_vms; /* Number of VMs sharing this ASID */ + struct list_head mirror_vms; /* List of VMs mirroring */ + struct list_head mirror_entry; /* Use as a list entry of mirrors */ struct misc_cg *misc_cg; /* For misc cgroup accounting */ atomic_t migration_in_progress; }; @@ -137,6 +138,8 @@ struct vmcb_ctrl_area_cached { u32 event_inj_err; u64 nested_cr3; u64 virt_ext; + u32 clean; + u8 reserved_sw[32]; }; struct svm_nested_state { @@ -163,6 +166,15 @@ struct svm_nested_state { struct vmcb_save_area_cached save; bool initialized; + + /* + * Indicates whether MSR bitmap for L2 needs to be rebuilt due to + * changes in MSR bitmap for L1 or switching to a different L2. Note, + * this flag can only be used reliably in conjunction with a paravirt L1 + * which informs L0 whether any changes to MSR bitmap for L2 were done + * on its side. + */ + bool force_msr_bitmap_recalc; }; struct vcpu_sev_es_state { @@ -321,7 +333,7 @@ static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu) /* * Only the PDPTRs are loaded on demand into the shadow MMU. All other - * fields are synchronized in handle_exit, because accessing the VMCB is cheap. + * fields are synchronized on VM-Exit, because accessing the VMCB is cheap. * * CR3 might be out of date in the VMCB but it is not marked dirty; instead, * KVM_REQ_LOAD_MMU_PGD is always requested when the cached vcpu->arch.cr3 @@ -480,7 +492,6 @@ void svm_vcpu_free_msrpm(u32 *msrpm); int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer); void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); -void svm_flush_tlb(struct kvm_vcpu *vcpu); void disable_nmi_singlestep(struct vcpu_svm *svm); bool svm_smi_blocked(struct kvm_vcpu *vcpu); bool svm_nmi_blocked(struct kvm_vcpu *vcpu); @@ -558,6 +569,17 @@ extern struct kvm_x86_nested_ops svm_nested_ops; /* avic.c */ +#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF) +#define AVIC_LOGICAL_ID_ENTRY_VALID_BIT 31 +#define AVIC_LOGICAL_ID_ENTRY_VALID_MASK (1 << 31) + +#define AVIC_PHYSICAL_ID_ENTRY_HOST_PHYSICAL_ID_MASK GENMASK_ULL(11, 0) +#define AVIC_PHYSICAL_ID_ENTRY_BACKING_PAGE_MASK (0xFFFFFFFFFFULL << 12) +#define AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK (1ULL << 62) +#define AVIC_PHYSICAL_ID_ENTRY_VALID_MASK (1ULL << 63) + +#define VMCB_AVIC_APIC_BAR_MASK 0xFFFFFFFFFF000ULL + int avic_ga_log_notifier(u32 ga_tag); void avic_vm_destroy(struct kvm *kvm); int avic_vm_init(struct kvm *kvm); @@ -565,18 +587,17 @@ void avic_init_vmcb(struct vcpu_svm *svm); int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu); int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu); int avic_init_vcpu(struct vcpu_svm *svm); -void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu); -void avic_vcpu_put(struct kvm_vcpu *vcpu); -void avic_post_state_restore(struct kvm_vcpu *vcpu); -void svm_set_virtual_apic_mode(struct kvm_vcpu *vcpu); -void svm_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu); -bool svm_check_apicv_inhibit_reasons(ulong bit); -void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); -void svm_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr); -void svm_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr); -bool svm_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu); -int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, - uint32_t guest_irq, bool set); +void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu); +void __avic_vcpu_put(struct kvm_vcpu *vcpu); +void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu); +void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu); +void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu); +bool avic_check_apicv_inhibit_reasons(ulong bit); +void avic_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr); +void avic_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr); +bool avic_dy_apicv_has_pending_interrupt(struct kvm_vcpu *vcpu); +int avic_pi_update_irte(struct kvm *kvm, unsigned int host_irq, + uint32_t guest_irq, bool set); void avic_vcpu_blocking(struct kvm_vcpu *vcpu); void avic_vcpu_unblocking(struct kvm_vcpu *vcpu); void avic_ring_doorbell(struct kvm_vcpu *vcpu); @@ -590,17 +611,17 @@ void avic_ring_doorbell(struct kvm_vcpu *vcpu); extern unsigned int max_sev_asid; void sev_vm_destroy(struct kvm *kvm); -int svm_mem_enc_op(struct kvm *kvm, void __user *argp); -int svm_register_enc_region(struct kvm *kvm, - struct kvm_enc_region *range); -int svm_unregister_enc_region(struct kvm *kvm, - struct kvm_enc_region *range); -int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd); -int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd); +int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp); +int sev_mem_enc_register_region(struct kvm *kvm, + struct kvm_enc_region *range); +int sev_mem_enc_unregister_region(struct kvm *kvm, + struct kvm_enc_region *range); +int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd); +int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd); void pre_sev_run(struct vcpu_svm *svm, int cpu); void __init sev_set_cpu_caps(void); void __init sev_hardware_setup(void); -void sev_hardware_teardown(void); +void sev_hardware_unsetup(void); int sev_cpu_init(struct svm_cpu_data *sd); void sev_free_vcpu(struct kvm_vcpu *vcpu); int sev_handle_vmgexit(struct kvm_vcpu *vcpu); @@ -608,7 +629,7 @@ int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in); void sev_es_init_vmcb(struct vcpu_svm *svm); void sev_es_vcpu_reset(struct vcpu_svm *svm); void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); -void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu); +void sev_es_prepare_switch_to_guest(struct vmcb_save_area *hostsa); void sev_es_unmap_ghcb(struct vcpu_svm *svm); /* vmenter.S */ diff --git a/arch/x86/kvm/svm/svm_onhyperv.h b/arch/x86/kvm/svm/svm_onhyperv.h index 489ca56212c6..e2fc59380465 100644 --- a/arch/x86/kvm/svm/svm_onhyperv.h +++ b/arch/x86/kvm/svm/svm_onhyperv.h @@ -7,35 +7,12 @@ #define __ARCH_X86_KVM_SVM_ONHYPERV_H__ #if IS_ENABLED(CONFIG_HYPERV) -#include <asm/mshyperv.h> -#include "hyperv.h" #include "kvm_onhyperv.h" +#include "svm/hyperv.h" static struct kvm_x86_ops svm_x86_ops; -/* - * Hyper-V uses the software reserved 32 bytes in VMCB - * control area to expose SVM enlightenments to guests. - */ -struct hv_enlightenments { - struct __packed hv_enlightenments_control { - u32 nested_flush_hypercall:1; - u32 msr_bitmap:1; - u32 enlightened_npt_tlb: 1; - u32 reserved:29; - } __packed hv_enlightenments_control; - u32 hv_vp_id; - u64 hv_vm_id; - u64 partition_assist_page; - u64 reserved; -} __packed; - -/* - * Hyper-V uses the software reserved clean bit in VMCB - */ -#define VMCB_HV_NESTED_ENLIGHTENMENTS VMCB_SW - int svm_hv_enable_direct_tlbflush(struct kvm_vcpu *vcpu); static inline void svm_hv_init_vmcb(struct vmcb *vmcb) |