summaryrefslogtreecommitdiff
path: root/arch/x86/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm')
-rw-r--r--arch/x86/kvm/cpuid.c2
-rw-r--r--arch/x86/kvm/emulate.c2
-rw-r--r--arch/x86/kvm/kvm_cache_regs.h51
-rw-r--r--arch/x86/kvm/lapic.c4
-rw-r--r--arch/x86/kvm/mmu.h11
-rw-r--r--arch/x86/kvm/mmu/mmu.c53
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c119
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.h4
-rw-r--r--arch/x86/kvm/svm/nested.c24
-rw-r--r--arch/x86/kvm/svm/sev.c56
-rw-r--r--arch/x86/kvm/svm/svm.c19
-rw-r--r--arch/x86/kvm/svm/svm.h5
-rw-r--r--arch/x86/kvm/vmx/nested.c46
-rw-r--r--arch/x86/kvm/vmx/pmu_intel.c6
-rw-r--r--arch/x86/kvm/vmx/vmx.c21
-rw-r--r--arch/x86/kvm/x86.c64
-rw-r--r--arch/x86/kvm/x86.h2
17 files changed, 307 insertions, 182 deletions
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 13036cf0b912..38172ca627d3 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -321,7 +321,7 @@ int kvm_vcpu_ioctl_get_cpuid2(struct kvm_vcpu *vcpu,
if (cpuid->nent < vcpu->arch.cpuid_nent)
goto out;
r = -EFAULT;
- if (copy_to_user(entries, &vcpu->arch.cpuid_entries,
+ if (copy_to_user(entries, vcpu->arch.cpuid_entries,
vcpu->arch.cpuid_nent * sizeof(struct kvm_cpuid_entry2)))
goto out;
return 0;
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 56cae1ff9e3f..66a08322988f 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2879,6 +2879,8 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt)
ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data);
*reg_write(ctxt, VCPU_REGS_RSP) = (efer & EFER_LMA) ? msr_data :
(u32)msr_data;
+ if (efer & EFER_LMA)
+ ctxt->mode = X86EMUL_MODE_PROT64;
return X86EMUL_CONTINUE;
}
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index f15bc16de07c..a889563ad02d 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -9,31 +9,6 @@
(X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \
| X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE)
-static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
- enum kvm_reg reg)
-{
- return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
-}
-
-static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu,
- enum kvm_reg reg)
-{
- return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
-}
-
-static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
- enum kvm_reg reg)
-{
- __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
-}
-
-static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
- enum kvm_reg reg)
-{
- __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
- __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
-}
-
#define BUILD_KVM_GPR_ACCESSORS(lname, uname) \
static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu)\
{ \
@@ -43,7 +18,6 @@ static __always_inline void kvm_##lname##_write(struct kvm_vcpu *vcpu, \
unsigned long val) \
{ \
vcpu->arch.regs[VCPU_REGS_##uname] = val; \
- kvm_register_mark_dirty(vcpu, VCPU_REGS_##uname); \
}
BUILD_KVM_GPR_ACCESSORS(rax, RAX)
BUILD_KVM_GPR_ACCESSORS(rbx, RBX)
@@ -63,6 +37,31 @@ BUILD_KVM_GPR_ACCESSORS(r14, R14)
BUILD_KVM_GPR_ACCESSORS(r15, R15)
#endif
+static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu,
+ enum kvm_reg reg)
+{
+ return test_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+}
+
+static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu,
+ enum kvm_reg reg)
+{
+ return test_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
+}
+
+static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu,
+ enum kvm_reg reg)
+{
+ __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+}
+
+static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu,
+ enum kvm_reg reg)
+{
+ __set_bit(reg, (unsigned long *)&vcpu->arch.regs_avail);
+ __set_bit(reg, (unsigned long *)&vcpu->arch.regs_dirty);
+}
+
static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, int reg)
{
if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_REGS))
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 3136e05831cf..43cceadd073e 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -674,7 +674,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
(unsigned long long)vcpu->arch.pv_eoi.msr_val);
return false;
}
- return val & 0x1;
+ return val & KVM_PV_EOI_ENABLED;
}
static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
@@ -2898,7 +2898,7 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
/* evaluate pending_events before reading the vector */
smp_rmb();
sipi_vector = apic->sipi_vector;
- kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
+ kvm_x86_ops.vcpu_deliver_sipi_vector(vcpu, sipi_vector);
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
}
}
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 9c4a9c8e43d9..261be1d2032b 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -44,12 +44,19 @@
#define PT32_ROOT_LEVEL 2
#define PT32E_ROOT_LEVEL 3
-static inline u64 rsvd_bits(int s, int e)
+static __always_inline u64 rsvd_bits(int s, int e)
{
+ BUILD_BUG_ON(__builtin_constant_p(e) && __builtin_constant_p(s) && e < s);
+
+ if (__builtin_constant_p(e))
+ BUILD_BUG_ON(e > 63);
+ else
+ e &= 63;
+
if (e < s)
return 0;
- return ((1ULL << (e - s + 1)) - 1) << s;
+ return ((2ULL << (e - s)) - 1) << s;
}
void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 access_mask);
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index c478904af518..6d16481aa29d 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3493,26 +3493,25 @@ static bool mmio_info_in_cache(struct kvm_vcpu *vcpu, u64 addr, bool direct)
* Return the level of the lowest level SPTE added to sptes.
* That SPTE may be non-present.
*/
-static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
+static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes, int *root_level)
{
struct kvm_shadow_walk_iterator iterator;
- int leaf = vcpu->arch.mmu->root_level;
+ int leaf = -1;
u64 spte;
-
walk_shadow_page_lockless_begin(vcpu);
- for (shadow_walk_init(&iterator, vcpu, addr);
+ for (shadow_walk_init(&iterator, vcpu, addr),
+ *root_level = iterator.level;
shadow_walk_okay(&iterator);
__shadow_walk_next(&iterator, spte)) {
leaf = iterator.level;
spte = mmu_spte_get_lockless(iterator.sptep);
- sptes[leaf - 1] = spte;
+ sptes[leaf] = spte;
if (!is_shadow_present_pte(spte))
break;
-
}
walk_shadow_page_lockless_end(vcpu);
@@ -3520,14 +3519,12 @@ static int get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
return leaf;
}
-/* return true if reserved bit is detected on spte. */
+/* return true if reserved bit(s) are detected on a valid, non-MMIO SPTE. */
static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
{
- u64 sptes[PT64_ROOT_MAX_LEVEL];
+ u64 sptes[PT64_ROOT_MAX_LEVEL + 1];
struct rsvd_bits_validate *rsvd_check;
- int root = vcpu->arch.mmu->shadow_root_level;
- int leaf;
- int level;
+ int root, leaf, level;
bool reserved = false;
if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) {
@@ -3536,35 +3533,45 @@ static bool get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep)
}
if (is_tdp_mmu_root(vcpu->kvm, vcpu->arch.mmu->root_hpa))
- leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes);
+ leaf = kvm_tdp_mmu_get_walk(vcpu, addr, sptes, &root);
else
- leaf = get_walk(vcpu, addr, sptes);
+ leaf = get_walk(vcpu, addr, sptes, &root);
+
+ if (unlikely(leaf < 0)) {
+ *sptep = 0ull;
+ return reserved;
+ }
+
+ *sptep = sptes[leaf];
+
+ /*
+ * Skip reserved bits checks on the terminal leaf if it's not a valid
+ * SPTE. Note, this also (intentionally) skips MMIO SPTEs, which, by
+ * design, always have reserved bits set. The purpose of the checks is
+ * to detect reserved bits on non-MMIO SPTEs. i.e. buggy SPTEs.
+ */
+ if (!is_shadow_present_pte(sptes[leaf]))
+ leaf++;
rsvd_check = &vcpu->arch.mmu->shadow_zero_check;
- for (level = root; level >= leaf; level--) {
- if (!is_shadow_present_pte(sptes[level - 1]))
- break;
+ for (level = root; level >= leaf; level--)
/*
* Use a bitwise-OR instead of a logical-OR to aggregate the
* reserved bit and EPT's invalid memtype/XWR checks to avoid
* adding a Jcc in the loop.
*/
- reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level - 1]) |
- __is_rsvd_bits_set(rsvd_check, sptes[level - 1],
- level);
- }
+ reserved |= __is_bad_mt_xwr(rsvd_check, sptes[level]) |
+ __is_rsvd_bits_set(rsvd_check, sptes[level], level);
if (reserved) {
pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n",
__func__, addr);
for (level = root; level >= leaf; level--)
pr_err("------ spte 0x%llx level %d.\n",
- sptes[level - 1], level);
+ sptes[level], level);
}
- *sptep = sptes[leaf - 1];
-
return reserved;
}
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 4bd2f1dc0172..b56d604809b8 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -44,7 +44,48 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
}
-#define for_each_tdp_mmu_root(_kvm, _root) \
+static void tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root)
+{
+ if (kvm_mmu_put_root(kvm, root))
+ kvm_tdp_mmu_free_root(kvm, root);
+}
+
+static inline bool tdp_mmu_next_root_valid(struct kvm *kvm,
+ struct kvm_mmu_page *root)
+{
+ lockdep_assert_held(&kvm->mmu_lock);
+
+ if (list_entry_is_head(root, &kvm->arch.tdp_mmu_roots, link))
+ return false;
+
+ kvm_mmu_get_root(kvm, root);
+ return true;
+
+}
+
+static inline struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
+ struct kvm_mmu_page *root)
+{
+ struct kvm_mmu_page *next_root;
+
+ next_root = list_next_entry(root, link);
+ tdp_mmu_put_root(kvm, root);
+ return next_root;
+}
+
+/*
+ * Note: this iterator gets and puts references to the roots it iterates over.
+ * This makes it safe to release the MMU lock and yield within the loop, but
+ * if exiting the loop early, the caller must drop the reference to the most
+ * recent root. (Unless keeping a live reference is desirable.)
+ */
+#define for_each_tdp_mmu_root_yield_safe(_kvm, _root) \
+ for (_root = list_first_entry(&_kvm->arch.tdp_mmu_roots, \
+ typeof(*_root), link); \
+ tdp_mmu_next_root_valid(_kvm, _root); \
+ _root = tdp_mmu_next_root(_kvm, _root))
+
+#define for_each_tdp_mmu_root(_kvm, _root) \
list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link)
bool is_tdp_mmu_root(struct kvm *kvm, hpa_t hpa)
@@ -447,18 +488,9 @@ bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end)
struct kvm_mmu_page *root;
bool flush = false;
- for_each_tdp_mmu_root(kvm, root) {
- /*
- * Take a reference on the root so that it cannot be freed if
- * this thread releases the MMU lock and yields in this loop.
- */
- kvm_mmu_get_root(kvm, root);
-
+ for_each_tdp_mmu_root_yield_safe(kvm, root)
flush |= zap_gfn_range(kvm, root, start, end, true);
- kvm_mmu_put_root(kvm, root);
- }
-
return flush;
}
@@ -619,13 +651,7 @@ static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start,
int ret = 0;
int as_id;
- for_each_tdp_mmu_root(kvm, root) {
- /*
- * Take a reference on the root so that it cannot be freed if
- * this thread releases the MMU lock and yields in this loop.
- */
- kvm_mmu_get_root(kvm, root);
-
+ for_each_tdp_mmu_root_yield_safe(kvm, root) {
as_id = kvm_mmu_page_as_id(root);
slots = __kvm_memslots(kvm, as_id);
kvm_for_each_memslot(memslot, slots) {
@@ -647,8 +673,6 @@ static int kvm_tdp_mmu_handle_hva_range(struct kvm *kvm, unsigned long start,
ret |= handler(kvm, memslot, root, gfn_start,
gfn_end, data);
}
-
- kvm_mmu_put_root(kvm, root);
}
return ret;
@@ -838,21 +862,13 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct kvm_memory_slot *slot,
int root_as_id;
bool spte_set = false;
- for_each_tdp_mmu_root(kvm, root) {
+ for_each_tdp_mmu_root_yield_safe(kvm, root) {
root_as_id = kvm_mmu_page_as_id(root);
if (root_as_id != slot->as_id)
continue;
- /*
- * Take a reference on the root so that it cannot be freed if
- * this thread releases the MMU lock and yields in this loop.
- */
- kvm_mmu_get_root(kvm, root);
-
spte_set |= wrprot_gfn_range(kvm, root, slot->base_gfn,
slot->base_gfn + slot->npages, min_level);
-
- kvm_mmu_put_root(kvm, root);
}
return spte_set;
@@ -906,21 +922,13 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
int root_as_id;
bool spte_set = false;
- for_each_tdp_mmu_root(kvm, root) {
+ for_each_tdp_mmu_root_yield_safe(kvm, root) {
root_as_id = kvm_mmu_page_as_id(root);
if (root_as_id != slot->as_id)
continue;
- /*
- * Take a reference on the root so that it cannot be freed if
- * this thread releases the MMU lock and yields in this loop.
- */
- kvm_mmu_get_root(kvm, root);
-
spte_set |= clear_dirty_gfn_range(kvm, root, slot->base_gfn,
slot->base_gfn + slot->npages);
-
- kvm_mmu_put_root(kvm, root);
}
return spte_set;
@@ -1029,28 +1037,20 @@ bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot)
int root_as_id;
bool spte_set = false;
- for_each_tdp_mmu_root(kvm, root) {
+ for_each_tdp_mmu_root_yield_safe(kvm, root) {
root_as_id = kvm_mmu_page_as_id(root);
if (root_as_id != slot->as_id)
continue;
- /*
- * Take a reference on the root so that it cannot be freed if
- * this thread releases the MMU lock and yields in this loop.
- */
- kvm_mmu_get_root(kvm, root);
-
spte_set |= set_dirty_gfn_range(kvm, root, slot->base_gfn,
slot->base_gfn + slot->npages);
-
- kvm_mmu_put_root(kvm, root);
}
return spte_set;
}
/*
- * Clear non-leaf entries (and free associated page tables) which could
- * be replaced by large mappings, for GFNs within the slot.
+ * Clear leaf entries which could be replaced by large mappings, for
+ * GFNs within the slot.
*/
static void zap_collapsible_spte_range(struct kvm *kvm,
struct kvm_mmu_page *root,
@@ -1062,7 +1062,7 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
tdp_root_for_each_pte(iter, root, start, end) {
if (!is_shadow_present_pte(iter.old_spte) ||
- is_last_spte(iter.old_spte, iter.level))
+ !is_last_spte(iter.old_spte, iter.level))
continue;
pfn = spte_to_pfn(iter.old_spte);
@@ -1089,21 +1089,13 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
struct kvm_mmu_page *root;
int root_as_id;
- for_each_tdp_mmu_root(kvm, root) {
+ for_each_tdp_mmu_root_yield_safe(kvm, root) {
root_as_id = kvm_mmu_page_as_id(root);
if (root_as_id != slot->as_id)
continue;
- /*
- * Take a reference on the root so that it cannot be freed if
- * this thread releases the MMU lock and yields in this loop.
- */
- kvm_mmu_get_root(kvm, root);
-
zap_collapsible_spte_range(kvm, root, slot->base_gfn,
slot->base_gfn + slot->npages);
-
- kvm_mmu_put_root(kvm, root);
}
}
@@ -1160,16 +1152,19 @@ bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
* Return the level of the lowest level SPTE added to sptes.
* That SPTE may be non-present.
*/
-int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes)
+int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+ int *root_level)
{
struct tdp_iter iter;
struct kvm_mmu *mmu = vcpu->arch.mmu;
- int leaf = vcpu->arch.mmu->shadow_root_level;
gfn_t gfn = addr >> PAGE_SHIFT;
+ int leaf = -1;
+
+ *root_level = vcpu->arch.mmu->shadow_root_level;
tdp_mmu_for_each_pte(iter, mmu, gfn, gfn + 1) {
leaf = iter.level;
- sptes[leaf - 1] = iter.old_spte;
+ sptes[leaf] = iter.old_spte;
}
return leaf;
diff --git a/arch/x86/kvm/mmu/tdp_mmu.h b/arch/x86/kvm/mmu/tdp_mmu.h
index 556e065503f6..cbbdbadd1526 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.h
+++ b/arch/x86/kvm/mmu/tdp_mmu.h
@@ -44,5 +44,7 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
struct kvm_memory_slot *slot, gfn_t gfn);
-int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes);
+int kvm_tdp_mmu_get_walk(struct kvm_vcpu *vcpu, u64 addr, u64 *sptes,
+ int *root_level);
+
#endif /* __KVM_X86_MMU_TDP_MMU_H */
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index b0b667456b2e..db30670dd8c4 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -199,6 +199,10 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm)
static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+
+ if (WARN_ON(!is_guest_mode(vcpu)))
+ return true;
+
if (!nested_svm_vmrun_msrpm(svm)) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror =
@@ -227,6 +231,7 @@ static bool nested_vmcb_check_controls(struct vmcb_control_area *control)
static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb12)
{
+ struct kvm_vcpu *vcpu = &svm->vcpu;
bool vmcb12_lma;
if ((vmcb12->save.efer & EFER_SVME) == 0)
@@ -240,18 +245,10 @@ static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb12)
vmcb12_lma = (vmcb12->save.efer & EFER_LME) && (vmcb12->save.cr0 & X86_CR0_PG);
- if (!vmcb12_lma) {
- if (vmcb12->save.cr4 & X86_CR4_PAE) {
- if (vmcb12->save.cr3 & MSR_CR3_LEGACY_PAE_RESERVED_MASK)
- return false;
- } else {
- if (vmcb12->save.cr3 & MSR_CR3_LEGACY_RESERVED_MASK)
- return false;
- }
- } else {
+ if (vmcb12_lma) {
if (!(vmcb12->save.cr4 & X86_CR4_PAE) ||
!(vmcb12->save.cr0 & X86_CR0_PE) ||
- (vmcb12->save.cr3 & MSR_CR3_LONG_MBZ_MASK))
+ (vmcb12->save.cr3 & vcpu->arch.cr3_lm_rsvd_bits))
return false;
}
if (!kvm_is_valid_cr4(&svm->vcpu, vmcb12->save.cr4))
@@ -595,6 +592,8 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
svm->nested.vmcb12_gpa = 0;
WARN_ON_ONCE(svm->nested.nested_run_pending);
+ kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, &svm->vcpu);
+
/* in case we halted in L2 */
svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@ -754,6 +753,7 @@ void svm_leave_nested(struct vcpu_svm *svm)
leave_guest_mode(&svm->vcpu);
copy_vmcb_control_area(&vmcb->control, &hsave->control);
nested_svm_uninit_mmu_context(&svm->vcpu);
+ vmcb_mark_all_dirty(svm->vmcb);
}
kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, &svm->vcpu);
@@ -1194,6 +1194,10 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
* in the registers, the save area of the nested state instead
* contains saved L1 state.
*/
+
+ svm->nested.nested_run_pending =
+ !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
+
copy_vmcb_control_area(&hsave->control, &svm->vmcb->control);
hsave->save = *save;
diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c
index 9858d5ae9ddd..48017fef1cd9 100644
--- a/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@ -342,6 +342,8 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
unsigned long first, last;
int ret;
+ lockdep_assert_held(&kvm->lock);
+
if (ulen == 0 || uaddr + ulen < uaddr)
return ERR_PTR(-EINVAL);
@@ -1119,12 +1121,20 @@ int svm_register_enc_region(struct kvm *kvm,
if (!region)
return -ENOMEM;
+ mutex_lock(&kvm->lock);
region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages, 1);
if (IS_ERR(region->pages)) {
ret = PTR_ERR(region->pages);
+ mutex_unlock(&kvm->lock);
goto e_free;
}
+ region->uaddr = range->addr;
+ region->size = range->size;
+
+ list_add_tail(&region->list, &sev->regions_list);
+ mutex_unlock(&kvm->lock);
+
/*
* The guest may change the memory encryption attribute from C=0 -> C=1
* or vice versa for this memory range. Lets make sure caches are
@@ -1133,13 +1143,6 @@ int svm_register_enc_region(struct kvm *kvm,
*/
sev_clflush_pages(region->pages, region->npages);
- region->uaddr = range->addr;
- region->size = range->size;
-
- mutex_lock(&kvm->lock);
- list_add_tail(&region->list, &sev->regions_list);
- mutex_unlock(&kvm->lock);
-
return ret;
e_free:
@@ -1415,16 +1418,13 @@ static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
* to be returned:
* GPRs RAX, RBX, RCX, RDX
*
- * Copy their values to the GHCB if they are dirty.
+ * Copy their values, even if they may not have been written during the
+ * VM-Exit. It's the guest's responsibility to not consume random data.
*/
- if (kvm_register_is_dirty(vcpu, VCPU_REGS_RAX))
- ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]);
- if (kvm_register_is_dirty(vcpu, VCPU_REGS_RBX))
- ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]);
- if (kvm_register_is_dirty(vcpu, VCPU_REGS_RCX))
- ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]);
- if (kvm_register_is_dirty(vcpu, VCPU_REGS_RDX))
- ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]);
+ ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]);
+ ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]);
+ ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]);
+ ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]);
}
static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
@@ -1563,6 +1563,7 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
goto vmgexit_err;
break;
case SVM_VMGEXIT_NMI_COMPLETE:
+ case SVM_VMGEXIT_AP_HLT_LOOP:
case SVM_VMGEXIT_AP_JUMP_TABLE:
case SVM_VMGEXIT_UNSUPPORTED_EVENT:
break;
@@ -1888,6 +1889,9 @@ int sev_handle_vmgexit(struct vcpu_svm *svm)
case SVM_VMGEXIT_NMI_COMPLETE:
ret = svm_invoke_exit_handler(svm, SVM_EXIT_IRET);
break;
+ case SVM_VMGEXIT_AP_HLT_LOOP:
+ ret = kvm_emulate_ap_reset_hold(&svm->vcpu);
+ break;
case SVM_VMGEXIT_AP_JUMP_TABLE: {
struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
@@ -2001,7 +2005,7 @@ void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu)
* of which one step is to perform a VMLOAD. Since hardware does not
* perform a VMSAVE on VMRUN, the host savearea must be updated.
*/
- asm volatile(__ex("vmsave") : : "a" (__sme_page_pa(sd->save_area)) : "memory");
+ asm volatile(__ex("vmsave %0") : : "a" (__sme_page_pa(sd->save_area)) : "memory");
/*
* Certain MSRs are restored on VMEXIT, only save ones that aren't
@@ -2040,3 +2044,21 @@ void sev_es_vcpu_put(struct vcpu_svm *svm)
wrmsrl(host_save_user_msrs[i].index, svm->host_user_msrs[i]);
}
}
+
+void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
+{
+ struct vcpu_svm *svm = to_svm(vcpu);
+
+ /* First SIPI: Use the values as initially set by the VMM */
+ if (!svm->received_first_sipi) {
+ svm->received_first_sipi = true;
+ return;
+ }
+
+ /*
+ * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where
+ * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
+ * non-zero value.
+ */
+ ghcb_set_sw_exit_info_2(svm->ghcb, 1);
+}
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index cce0143a6f80..3442d44ca53b 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -454,6 +454,11 @@ static int has_svm(void)
return 0;
}
+ if (sev_active()) {
+ pr_info("KVM is unsupported when running as an SEV guest\n");
+ return 0;
+ }
+
return 1;
}
@@ -3677,8 +3682,6 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
return EXIT_FASTPATH_NONE;
}
-void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs);
-
static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu,
struct vcpu_svm *svm)
{
@@ -3741,6 +3744,8 @@ static __no_kcsan fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
+ trace_kvm_entry(vcpu);
+
svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
@@ -4384,6 +4389,14 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu)
(vmcb_is_intercept(&svm->vmcb->control, INTERCEPT_INIT));
}
+static void svm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
+{
+ if (!sev_es_guest(vcpu->kvm))
+ return kvm_vcpu_deliver_sipi_vector(vcpu, vector);
+
+ sev_vcpu_deliver_sipi_vector(vcpu, vector);
+}
+
static void svm_vm_destroy(struct kvm *kvm)
{
avic_vm_destroy(kvm);
@@ -4526,6 +4539,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
.msr_filter_changed = svm_msr_filter_changed,
.complete_emulated_msr = svm_complete_emulated_msr,
+
+ .vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector,
};
static struct kvm_x86_init_ops svm_init_ops __initdata = {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 5431e6335e2e..6e7d070f8b86 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -185,6 +185,7 @@ struct vcpu_svm {
struct vmcb_save_area *vmsa;
struct ghcb *ghcb;
struct kvm_host_map ghcb_map;
+ bool received_first_sipi;
/* SEV-ES scratch area support */
void *ghcb_sa;
@@ -402,9 +403,6 @@ static inline bool gif_set(struct vcpu_svm *svm)
}
/* svm.c */
-#define MSR_CR3_LEGACY_RESERVED_MASK 0xfe7U
-#define MSR_CR3_LEGACY_PAE_RESERVED_MASK 0x7U
-#define MSR_CR3_LONG_MBZ_MASK 0xfff0000000000000U
#define MSR_INVALID 0xffffffffU
extern int sev;
@@ -591,6 +589,7 @@ void sev_es_init_vmcb(struct vcpu_svm *svm);
void sev_es_create_vcpu(struct vcpu_svm *svm);
void sev_es_vcpu_load(struct vcpu_svm *svm, int cpu);
void sev_es_vcpu_put(struct vcpu_svm *svm);
+void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
/* vmenter.S */
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index e2f26564a12d..f2b9bfb58206 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -3124,13 +3124,9 @@ static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
return 0;
}
-static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
+static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
{
- struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
struct vcpu_vmx *vmx = to_vmx(vcpu);
- struct kvm_host_map *map;
- struct page *page;
- u64 hpa;
/*
* hv_evmcs may end up being not mapped after migration (when
@@ -3153,6 +3149,17 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
}
}
+ return true;
+}
+
+static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
+{
+ struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ struct vcpu_vmx *vmx = to_vmx(vcpu);
+ struct kvm_host_map *map;
+ struct page *page;
+ u64 hpa;
+
if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
/*
* Translate L1 physical address to host physical
@@ -3221,6 +3228,18 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
else
exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
+
+ return true;
+}
+
+static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu)
+{
+ if (!nested_get_evmcs_page(vcpu))
+ return false;
+
+ if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu))
+ return false;
+
return true;
}
@@ -4442,6 +4461,8 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
/* trying to cancel vmlaunch/vmresume is a bug */
WARN_ON_ONCE(vmx->nested.nested_run_pending);
+ kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
+
/* Service the TLB flush request for L2 before switching to L1. */
if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
kvm_vcpu_flush_tlb_current(vcpu);
@@ -6075,11 +6096,14 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
if (is_guest_mode(vcpu)) {
sync_vmcs02_to_vmcs12(vcpu, vmcs12);
sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
- } else if (!vmx->nested.need_vmcs12_to_shadow_sync) {
- if (vmx->nested.hv_evmcs)
- copy_enlightened_to_vmcs12(vmx);
- else if (enable_shadow_vmcs)
- copy_shadow_to_vmcs12(vmx);
+ } else {
+ copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
+ if (!vmx->nested.need_vmcs12_to_shadow_sync) {
+ if (vmx->nested.hv_evmcs)
+ copy_enlightened_to_vmcs12(vmx);
+ else if (enable_shadow_vmcs)
+ copy_shadow_to_vmcs12(vmx);
+ }
}
BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE);
@@ -6600,7 +6624,7 @@ struct kvm_x86_nested_ops vmx_nested_ops = {
.hv_timer_pending = nested_vmx_preemption_timer_pending,
.get_state = vmx_get_nested_state,
.set_state = vmx_set_nested_state,
- .get_nested_state_pages = nested_get_vmcs12_pages,
+ .get_nested_state_pages = vmx_get_nested_state_pages,
.write_log_dirty = nested_vmx_write_pml_buffer,
.enable_evmcs = nested_enable_evmcs,
.get_evmcs_version = nested_get_evmcs_version,
diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
index a886a47daebd..cdf5f34518f4 100644
--- a/arch/x86/kvm/vmx/pmu_intel.c
+++ b/arch/x86/kvm/vmx/pmu_intel.c
@@ -29,7 +29,7 @@ static struct kvm_event_hw_type_mapping intel_arch_events[] = {
[4] = { 0x2e, 0x41, PERF_COUNT_HW_CACHE_MISSES },
[5] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
[6] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES },
- [7] = { 0x00, 0x30, PERF_COUNT_HW_REF_CPU_CYCLES },
+ [7] = { 0x00, 0x03, PERF_COUNT_HW_REF_CPU_CYCLES },
};
/* mapping between fixed pmc index and intel_arch_events array */
@@ -345,7 +345,9 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
x86_pmu.num_counters_gp);
+ eax.split.bit_width = min_t(int, eax.split.bit_width, x86_pmu.bit_width_gp);
pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << eax.split.bit_width) - 1;
+ eax.split.mask_length = min_t(int, eax.split.mask_length, x86_pmu.events_mask_len);
pmu->available_event_types = ~entry->ebx &
((1ull << eax.split.mask_length) - 1);
@@ -355,6 +357,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
pmu->nr_arch_fixed_counters =
min_t(int, edx.split.num_counters_fixed,
x86_pmu.num_counters_fixed);
+ edx.split.bit_width_fixed = min_t(int,
+ edx.split.bit_width_fixed, x86_pmu.bit_width_fixed);
pmu->counter_bitmask[KVM_PMC_FIXED] =
((u64)1 << edx.split.bit_width_fixed) - 1;
}
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 75c9c6a0a3a4..eb69fef57485 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -6653,6 +6653,8 @@ reenter_guest:
if (vmx->emulation_required)
return EXIT_FASTPATH_NONE;
+ trace_kvm_entry(vcpu);
+
if (vmx->ple_window_dirty) {
vmx->ple_window_dirty = false;
vmcs_write32(PLE_WINDOW, vmx->ple_window);
@@ -6858,11 +6860,20 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
switch (index) {
case MSR_IA32_TSX_CTRL:
/*
- * No need to pass TSX_CTRL_CPUID_CLEAR through, so
- * let's avoid changing CPUID bits under the host
- * kernel's feet.
+ * TSX_CTRL_CPUID_CLEAR is handled in the CPUID
+ * interception. Keep the host value unchanged to avoid
+ * changing CPUID bits under the host kernel's feet.
+ *
+ * hle=0, rtm=0, tsx_ctrl=1 can be found with some
+ * combinations of new kernel and old userspace. If
+ * those guests run on a tsx=off host, do allow guests
+ * to use TSX_CTRL, but do not change the value on the
+ * host so that TSX remains always disabled.
*/
- vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
+ if (boot_cpu_has(X86_FEATURE_RTM))
+ vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
+ else
+ vmx->guest_uret_msrs[j].mask = 0;
break;
default:
vmx->guest_uret_msrs[j].mask = -1ull;
@@ -7707,6 +7718,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
.msr_filter_changed = vmx_msr_filter_changed,
.complete_emulated_msr = kvm_complete_insn_gp,
.cpu_dirty_log_size = vmx_cpu_dirty_log_size,
+
+ .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
};
static __init int hardware_setup(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3f7c1fc7a3ce..1b404e4d7dd8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -105,6 +105,7 @@ static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS;
static void update_cr8_intercept(struct kvm_vcpu *vcpu);
static void process_nmi(struct kvm_vcpu *vcpu);
+static void process_smi(struct kvm_vcpu *vcpu);
static void enter_smm(struct kvm_vcpu *vcpu);
static void __kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
static void store_regs(struct kvm_vcpu *vcpu);
@@ -1393,16 +1394,24 @@ static u64 kvm_get_arch_capabilities(void)
if (!boot_cpu_has_bug(X86_BUG_MDS))
data |= ARCH_CAP_MDS_NO;
- /*
- * On TAA affected systems:
- * - nothing to do if TSX is disabled on the host.
- * - we emulate TSX_CTRL if present on the host.
- * This lets the guest use VERW to clear CPU buffers.
- */
- if (!boot_cpu_has(X86_FEATURE_RTM))
- data &= ~(ARCH_CAP_TAA_NO | ARCH_CAP_TSX_CTRL_MSR);
- else if (!boot_cpu_has_bug(X86_BUG_TAA))
+ if (!boot_cpu_has(X86_FEATURE_RTM)) {
+ /*
+ * If RTM=0 because the kernel has disabled TSX, the host might
+ * have TAA_NO or TSX_CTRL. Clear TAA_NO (the guest sees RTM=0
+ * and therefore knows that there cannot be TAA) but keep
+ * TSX_CTRL: some buggy userspaces leave it set on tsx=on hosts,
+ * and we want to allow migrating those guests to tsx=off hosts.
+ */
+ data &= ~ARCH_CAP_TAA_NO;
+ } else if (!boot_cpu_has_bug(X86_BUG_TAA)) {
data |= ARCH_CAP_TAA_NO;
+ } else {
+ /*
+ * Nothing to do here; we emulate TSX_CTRL if present on the
+ * host so the guest can choose between disabling TSX or
+ * using VERW to clear CPU buffers.
+ */
+ }
return data;
}
@@ -4230,6 +4239,9 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
{
process_nmi(vcpu);
+ if (kvm_check_request(KVM_REQ_SMI, vcpu))
+ process_smi(vcpu);
+
/*
* In guest mode, payload delivery should be deferred,
* so that the L1 hypervisor can intercept #PF before
@@ -7976,17 +7988,22 @@ void kvm_arch_exit(void)
kmem_cache_destroy(x86_fpu_cache);
}
-int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
+static int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)
{
++vcpu->stat.halt_exits;
if (lapic_in_kernel(vcpu)) {
- vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
+ vcpu->arch.mp_state = state;
return 1;
} else {
- vcpu->run->exit_reason = KVM_EXIT_HLT;
+ vcpu->run->exit_reason = reason;
return 0;
}
}
+
+int kvm_vcpu_halt(struct kvm_vcpu *vcpu)
+{
+ return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_HALTED, KVM_EXIT_HLT);
+}
EXPORT_SYMBOL_GPL(kvm_vcpu_halt);
int kvm_emulate_halt(struct kvm_vcpu *vcpu)
@@ -8000,6 +8017,14 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_emulate_halt);
+int kvm_emulate_ap_reset_hold(struct kvm_vcpu *vcpu)
+{
+ int ret = kvm_skip_emulated_instruction(vcpu);
+
+ return __kvm_vcpu_halt(vcpu, KVM_MP_STATE_AP_RESET_HOLD, KVM_EXIT_AP_RESET_HOLD) && ret;
+}
+EXPORT_SYMBOL_GPL(kvm_emulate_ap_reset_hold);
+
#ifdef CONFIG_X86_64
static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr,
unsigned long clock_type)
@@ -8973,8 +8998,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
kvm_x86_ops.request_immediate_exit(vcpu);
}
- trace_kvm_entry(vcpu);
-
fpregs_assert_state_consistent();
if (test_thread_flag(TIF_NEED_FPU_LOAD))
switch_fpu_return();
@@ -9094,6 +9117,7 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu)
kvm_apic_accept_events(vcpu);
switch(vcpu->arch.mp_state) {
case KVM_MP_STATE_HALTED:
+ case KVM_MP_STATE_AP_RESET_HOLD:
vcpu->arch.pv.pv_unhalted = false;
vcpu->arch.mp_state =
KVM_MP_STATE_RUNNABLE;
@@ -9520,8 +9544,9 @@ int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
kvm_load_guest_fpu(vcpu);
kvm_apic_accept_events(vcpu);
- if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED &&
- vcpu->arch.pv.pv_unhalted)
+ if ((vcpu->arch.mp_state == KVM_MP_STATE_HALTED ||
+ vcpu->arch.mp_state == KVM_MP_STATE_AP_RESET_HOLD) &&
+ vcpu->arch.pv.pv_unhalted)
mp_state->mp_state = KVM_MP_STATE_RUNNABLE;
else
mp_state->mp_state = vcpu->arch.mp_state;
@@ -9599,6 +9624,8 @@ static bool kvm_is_valid_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
*/
if (!(sregs->cr4 & X86_CR4_PAE) || !(sregs->efer & EFER_LMA))
return false;
+ if (sregs->cr3 & vcpu->arch.cr3_lm_rsvd_bits)
+ return false;
} else {
/*
* Not in 64-bit mode: EFER.LMA is clear and the code
@@ -9976,6 +10003,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
fx_init(vcpu);
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
+ vcpu->arch.cr3_lm_rsvd_bits = rsvd_bits(cpuid_maxphyaddr(vcpu), 63);
vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT;
@@ -10152,6 +10180,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
kvm_rip_write(vcpu, 0);
}
+EXPORT_SYMBOL_GPL(kvm_vcpu_deliver_sipi_vector);
int kvm_arch_hardware_enable(void)
{
@@ -10476,7 +10505,7 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
return 0;
old_npages = slot->npages;
- hva = 0;
+ hva = slot->userspace_addr;
}
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
@@ -11538,6 +11567,7 @@ int kvm_sev_es_string_io(struct kvm_vcpu *vcpu, unsigned int size,
}
EXPORT_SYMBOL_GPL(kvm_sev_es_string_io);
+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_entry);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index c5ee0f5ce0f1..0f727b50bd3d 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -425,6 +425,8 @@ bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type);
__reserved_bits |= X86_CR4_UMIP; \
if (!__cpu_has(__c, X86_FEATURE_VMX)) \
__reserved_bits |= X86_CR4_VMXE; \
+ if (!__cpu_has(__c, X86_FEATURE_PCID)) \
+ __reserved_bits |= X86_CR4_PCIDE; \
__reserved_bits; \
})