diff options
-rw-r--r-- | arch/x86/include/asm/kvm_host.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/cpuid.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/mmu/mmu.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/pmu.c | 21 | ||||
-rw-r--r-- | arch/x86/kvm/pmu.h | 2 | ||||
-rw-r--r-- | arch/x86/kvm/svm/pmu.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/svm/svm.c | 2 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/pmu_intel.c | 135 | ||||
-rw-r--r-- | arch/x86/kvm/vmx/vmx.c | 16 | ||||
-rw-r--r-- | arch/x86/kvm/x86.c | 102 | ||||
-rw-r--r-- | arch/x86/kvm/x86.h | 13 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/include/x86_64/processor.h | 45 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c | 252 | ||||
-rw-r--r-- | tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c | 230 |
14 files changed, 558 insertions, 268 deletions
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ffdb0c3010e7..d197c54dcaa6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -513,6 +513,7 @@ struct kvm_pmc { #define MSR_ARCH_PERFMON_FIXED_CTR_MAX (MSR_ARCH_PERFMON_FIXED_CTR0 + KVM_PMC_MAX_FIXED - 1) #define KVM_AMD_PMC_MAX_GENERIC 6 struct kvm_pmu { + u8 version; unsigned nr_arch_gp_counters; unsigned nr_arch_fixed_counters; unsigned available_event_types; @@ -525,7 +526,6 @@ struct kvm_pmu { u64 global_ovf_ctrl_mask; u64 reserved_bits; u64 raw_event_mask; - u8 version; struct kvm_pmc gp_counters[KVM_INTEL_PMC_MAX_GENERIC]; struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED]; struct irq_work irq_work; diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 6972e0be60fa..b944492faefa 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -414,7 +414,7 @@ static int kvm_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 *e2, * KVM_SET_CPUID{,2} again. To support this legacy behavior, check * whether the supplied CPUID data is equal to what's already set. */ - if (vcpu->arch.last_vmentry_cpu != -1) { + if (kvm_vcpu_has_run(vcpu)) { r = kvm_cpuid_check_equal(vcpu, e2, nent); if (r) return r; diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index e3d02f059437..c8961f45e3b1 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -5476,7 +5476,7 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu) * Changing guest CPUID after KVM_RUN is forbidden, see the comment in * kvm_arch_vcpu_ioctl(). */ - KVM_BUG_ON(vcpu->arch.last_vmentry_cpu != -1, vcpu->kvm); + KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm); } void kvm_mmu_reset_context(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index bb1733bb5bf9..1690d41c1830 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -93,7 +93,7 @@ void kvm_pmu_ops_update(const struct kvm_pmu_ops *pmu_ops) #undef __KVM_X86_PMU_OP } -static inline bool pmc_is_enabled(struct kvm_pmc *pmc) +static inline bool pmc_is_globally_enabled(struct kvm_pmc *pmc) { return static_call(kvm_x86_pmu_pmc_is_enabled)(pmc); } @@ -400,6 +400,12 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc) return is_fixed_event_allowed(filter, pmc->idx); } +static bool pmc_event_is_allowed(struct kvm_pmc *pmc) +{ + return pmc_is_globally_enabled(pmc) && pmc_speculative_in_use(pmc) && + check_pmu_event_filter(pmc); +} + static void reprogram_counter(struct kvm_pmc *pmc) { struct kvm_pmu *pmu = pmc_to_pmu(pmc); @@ -409,10 +415,7 @@ static void reprogram_counter(struct kvm_pmc *pmc) pmc_pause_counter(pmc); - if (!pmc_speculative_in_use(pmc) || !pmc_is_enabled(pmc)) - goto reprogram_complete; - - if (!check_pmu_event_filter(pmc)) + if (!pmc_event_is_allowed(pmc)) goto reprogram_complete; if (pmc->counter < pmc->prev_counter) @@ -589,6 +592,10 @@ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) */ void kvm_pmu_refresh(struct kvm_vcpu *vcpu) { + if (KVM_BUG_ON(kvm_vcpu_has_run(vcpu), vcpu->kvm)) + return; + + bitmap_zero(vcpu_to_pmu(vcpu)->all_valid_pmc_idx, X86_PMC_IDX_MAX); static_call(kvm_x86_pmu_refresh)(vcpu); } @@ -646,7 +653,7 @@ static void kvm_pmu_incr_counter(struct kvm_pmc *pmc) { pmc->prev_counter = pmc->counter; pmc->counter = (pmc->counter + 1) & pmc_bitmask(pmc); - kvm_pmu_request_counter_reprogam(pmc); + kvm_pmu_request_counter_reprogram(pmc); } static inline bool eventsel_match_perf_hw_id(struct kvm_pmc *pmc, @@ -684,7 +691,7 @@ void kvm_pmu_trigger_event(struct kvm_vcpu *vcpu, u64 perf_hw_id) for_each_set_bit(i, pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX) { pmc = static_call(kvm_x86_pmu_pmc_idx_to_pmc)(pmu, i); - if (!pmc || !pmc_is_enabled(pmc) || !pmc_speculative_in_use(pmc)) + if (!pmc || !pmc_event_is_allowed(pmc)) continue; /* Ignore checks for edge detect, pin control, invert and CMASK bits */ diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index be62c16f2265..5c7bbf03b599 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -195,7 +195,7 @@ static inline void kvm_init_pmu_capability(const struct kvm_pmu_ops *pmu_ops) KVM_PMC_MAX_FIXED); } -static inline void kvm_pmu_request_counter_reprogam(struct kvm_pmc *pmc) +static inline void kvm_pmu_request_counter_reprogram(struct kvm_pmc *pmc) { set_bit(pmc->idx, pmc_to_pmu(pmc)->reprogram_pmi); kvm_make_request(KVM_REQ_PMU, pmc->vcpu); diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index cc77a0681800..5fa939e411d8 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -161,7 +161,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) data &= ~pmu->reserved_bits; if (data != pmc->eventsel) { pmc->eventsel = data; - kvm_pmu_request_counter_reprogam(pmc); + kvm_pmu_request_counter_reprogram(pmc); } return 0; } diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 0827e0c25309..a64ede4f1d8a 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4093,7 +4093,7 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index) { switch (index) { case MSR_IA32_MCG_EXT_CTL: - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: return false; case MSR_IA32_SMBASE: if (!IS_ENABLED(CONFIG_KVM_SMM)) diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index e8a3be0b9df9..741efe2c497b 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -57,7 +57,7 @@ static void reprogram_fixed_counters(struct kvm_pmu *pmu, u64 data) pmc = get_fixed_pmc(pmu, MSR_CORE_PERF_FIXED_CTR0 + i); __set_bit(INTEL_PMC_IDX_FIXED + i, pmu->pmc_in_use); - kvm_pmu_request_counter_reprogam(pmc); + kvm_pmu_request_counter_reprogram(pmc); } } @@ -76,13 +76,13 @@ static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) static void reprogram_counters(struct kvm_pmu *pmu, u64 diff) { int bit; - struct kvm_pmc *pmc; - for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) { - pmc = intel_pmc_idx_to_pmc(pmu, bit); - if (pmc) - kvm_pmu_request_counter_reprogam(pmc); - } + if (!diff) + return; + + for_each_set_bit(bit, (unsigned long *)&diff, X86_PMC_IDX_MAX) + set_bit(bit, pmu->reprogram_pmi); + kvm_make_request(KVM_REQ_PMU, pmu_to_vcpu(pmu)); } static bool intel_hw_event_available(struct kvm_pmc *pmc) @@ -351,45 +351,47 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) switch (msr) { case MSR_CORE_PERF_FIXED_CTR_CTRL: msr_info->data = pmu->fixed_ctr_ctrl; - return 0; + break; case MSR_CORE_PERF_GLOBAL_STATUS: msr_info->data = pmu->global_status; - return 0; + break; case MSR_CORE_PERF_GLOBAL_CTRL: msr_info->data = pmu->global_ctrl; - return 0; + break; case MSR_CORE_PERF_GLOBAL_OVF_CTRL: msr_info->data = 0; - return 0; + break; case MSR_IA32_PEBS_ENABLE: msr_info->data = pmu->pebs_enable; - return 0; + break; case MSR_IA32_DS_AREA: msr_info->data = pmu->ds_area; - return 0; + break; case MSR_PEBS_DATA_CFG: msr_info->data = pmu->pebs_data_cfg; - return 0; + break; default: if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { u64 val = pmc_read_counter(pmc); msr_info->data = val & pmu->counter_bitmask[KVM_PMC_GP]; - return 0; + break; } else if ((pmc = get_fixed_pmc(pmu, msr))) { u64 val = pmc_read_counter(pmc); msr_info->data = val & pmu->counter_bitmask[KVM_PMC_FIXED]; - return 0; + break; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { msr_info->data = pmc->eventsel; - return 0; - } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, true)) - return 0; + break; + } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, true)) { + break; + } + return 1; } - return 1; + return 0; } static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) @@ -402,44 +404,43 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) switch (msr) { case MSR_CORE_PERF_FIXED_CTR_CTRL: - if (pmu->fixed_ctr_ctrl == data) - return 0; - if (!(data & pmu->fixed_ctr_ctrl_mask)) { + if (data & pmu->fixed_ctr_ctrl_mask) + return 1; + + if (pmu->fixed_ctr_ctrl != data) reprogram_fixed_counters(pmu, data); - return 0; - } break; case MSR_CORE_PERF_GLOBAL_STATUS: - if (msr_info->host_initiated) { - pmu->global_status = data; - return 0; - } - break; /* RO MSR */ + if (!msr_info->host_initiated) + return 1; /* RO MSR */ + + pmu->global_status = data; + break; case MSR_CORE_PERF_GLOBAL_CTRL: - if (pmu->global_ctrl == data) - return 0; - if (kvm_valid_perf_global_ctrl(pmu, data)) { + if (!kvm_valid_perf_global_ctrl(pmu, data)) + return 1; + + if (pmu->global_ctrl != data) { diff = pmu->global_ctrl ^ data; pmu->global_ctrl = data; reprogram_counters(pmu, diff); - return 0; } break; case MSR_CORE_PERF_GLOBAL_OVF_CTRL: - if (!(data & pmu->global_ovf_ctrl_mask)) { - if (!msr_info->host_initiated) - pmu->global_status &= ~data; - return 0; - } + if (data & pmu->global_ovf_ctrl_mask) + return 1; + + if (!msr_info->host_initiated) + pmu->global_status &= ~data; break; case MSR_IA32_PEBS_ENABLE: - if (pmu->pebs_enable == data) - return 0; - if (!(data & pmu->pebs_enable_mask)) { + if (data & pmu->pebs_enable_mask) + return 1; + + if (pmu->pebs_enable != data) { diff = pmu->pebs_enable ^ data; pmu->pebs_enable = data; reprogram_counters(pmu, diff); - return 0; } break; case MSR_IA32_DS_AREA: @@ -447,15 +448,14 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; if (is_noncanonical_address(data, vcpu)) return 1; + pmu->ds_area = data; - return 0; + break; case MSR_PEBS_DATA_CFG: - if (pmu->pebs_data_cfg == data) - return 0; - if (!(data & pmu->pebs_data_cfg_mask)) { - pmu->pebs_data_cfg = data; - return 0; - } + if (data & pmu->pebs_data_cfg_mask) + return 1; + + pmu->pebs_data_cfg = data; break; default: if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || @@ -463,33 +463,38 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if ((msr & MSR_PMC_FULL_WIDTH_BIT) && (data & ~pmu->counter_bitmask[KVM_PMC_GP])) return 1; + if (!msr_info->host_initiated && !(msr & MSR_PMC_FULL_WIDTH_BIT)) data = (s64)(s32)data; pmc->counter += data - pmc_read_counter(pmc); pmc_update_sample_period(pmc); - return 0; + break; } else if ((pmc = get_fixed_pmc(pmu, msr))) { pmc->counter += data - pmc_read_counter(pmc); pmc_update_sample_period(pmc); - return 0; + break; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { - if (data == pmc->eventsel) - return 0; reserved_bits = pmu->reserved_bits; if ((pmc->idx == 2) && (pmu->raw_event_mask & HSW_IN_TX_CHECKPOINTED)) reserved_bits ^= HSW_IN_TX_CHECKPOINTED; - if (!(data & reserved_bits)) { + if (data & reserved_bits) + return 1; + + if (data != pmc->eventsel) { pmc->eventsel = data; - kvm_pmu_request_counter_reprogam(pmc); - return 0; + kvm_pmu_request_counter_reprogram(pmc); } - } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false)) - return 0; + break; + } else if (intel_pmu_handle_lbr_msrs_access(vcpu, msr_info, false)) { + break; + } + /* Not a known PMU MSR. */ + return 1; } - return 1; + return 0; } static void setup_fixed_pmc_eventsel(struct kvm_pmu *pmu) @@ -531,6 +536,16 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->pebs_enable_mask = ~0ull; pmu->pebs_data_cfg_mask = ~0ull; + memset(&lbr_desc->records, 0, sizeof(lbr_desc->records)); + + /* + * Setting passthrough of LBR MSRs is done only in the VM-Entry loop, + * and PMU refresh is disallowed after the vCPU has run, i.e. this code + * should never be reached while KVM is passing through MSRs. + */ + if (KVM_BUG_ON(lbr_desc->msr_passthrough, vcpu->kvm)) + return; + entry = kvm_find_cpuid_entry(vcpu, 0xa); if (!entry || !vcpu->kvm->arch.enable_pmu) return; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 536b7d688851..599dd62eeb0e 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -1946,7 +1946,7 @@ static inline bool is_vmx_feature_control_msr_valid(struct vcpu_vmx *vmx, static int vmx_get_msr_feature(struct kvm_msr_entry *msr) { switch (msr->index) { - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: if (!nested) return 1; return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); @@ -2031,7 +2031,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = to_vmx(vcpu)->msr_ia32_sgxlepubkeyhash [msr_info->index - MSR_IA32_SGXLEPUBKEYHASH0]; break; - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: if (!nested_vmx_allowed(vcpu)) return 1; if (vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index, @@ -2340,7 +2340,7 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmx->msr_ia32_sgxlepubkeyhash [msr_index - MSR_IA32_SGXLEPUBKEYHASH0] = data; break; - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: if (!msr_info->host_initiated) return 1; /* they are read-only */ if (!nested_vmx_allowed(vcpu)) @@ -6930,7 +6930,7 @@ static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index) * real mode. */ return enable_unrestricted_guest || emulate_invalid_guest_state; - case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + case KVM_FIRST_EMULATED_VMX_MSR ... KVM_LAST_EMULATED_VMX_MSR: return nested; case MSR_AMD64_VIRT_SPEC_CTRL: case MSR_AMD64_TSC_RATIO: @@ -7756,9 +7756,11 @@ static u64 vmx_get_perf_capabilities(void) if (boot_cpu_has(X86_FEATURE_PDCM)) rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap); - x86_perf_get_lbr(&lbr); - if (lbr.nr) - perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT; + if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) { + x86_perf_get_lbr(&lbr); + if (lbr.nr) + perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT; + } if (vmx_pebs_supported()) { perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ad4a45d8975b..095a41c6f346 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1556,39 +1556,41 @@ static u32 emulated_msrs[ARRAY_SIZE(emulated_msrs_all)]; static unsigned num_emulated_msrs; /* - * List of msr numbers which are used to expose MSR-based features that - * can be used by a hypervisor to validate requested CPU features. + * List of MSRs that control the existence of MSR-based features, i.e. MSRs + * that are effectively CPUID leafs. VMX MSRs are also included in the set of + * feature MSRs, but are handled separately to allow expedited lookups. */ -static const u32 msr_based_features_all[] = { - MSR_IA32_VMX_BASIC, - MSR_IA32_VMX_TRUE_PINBASED_CTLS, - MSR_IA32_VMX_PINBASED_CTLS, - MSR_IA32_VMX_TRUE_PROCBASED_CTLS, - MSR_IA32_VMX_PROCBASED_CTLS, - MSR_IA32_VMX_TRUE_EXIT_CTLS, - MSR_IA32_VMX_EXIT_CTLS, - MSR_IA32_VMX_TRUE_ENTRY_CTLS, - MSR_IA32_VMX_ENTRY_CTLS, - MSR_IA32_VMX_MISC, - MSR_IA32_VMX_CR0_FIXED0, - MSR_IA32_VMX_CR0_FIXED1, - MSR_IA32_VMX_CR4_FIXED0, - MSR_IA32_VMX_CR4_FIXED1, - MSR_IA32_VMX_VMCS_ENUM, - MSR_IA32_VMX_PROCBASED_CTLS2, - MSR_IA32_VMX_EPT_VPID_CAP, - MSR_IA32_VMX_VMFUNC, - +static const u32 msr_based_features_all_except_vmx[] = { MSR_AMD64_DE_CFG, MSR_IA32_UCODE_REV, MSR_IA32_ARCH_CAPABILITIES, MSR_IA32_PERF_CAPABILITIES, }; -static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)]; +static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all_except_vmx) + + (KVM_LAST_EMULATED_VMX_MSR - KVM_FIRST_EMULATED_VMX_MSR + 1)]; static unsigned int num_msr_based_features; /* + * All feature MSRs except uCode revID, which tracks the currently loaded uCode + * patch, are immutable once the vCPU model is defined. + */ +static bool kvm_is_immutable_feature_msr(u32 msr) +{ + int i; + + if (msr >= KVM_FIRST_EMULATED_VMX_MSR && msr <= KVM_LAST_EMULATED_VMX_MSR) + return true; + + for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++) { + if (msr == msr_based_features_all_except_vmx[i]) + return msr != MSR_IA32_UCODE_REV; + } + + return false; +} + +/* * Some IA32_ARCH_CAPABILITIES bits have dependencies on MSRs that KVM * does not yet virtualize. These include: * 10 - MISC_PACKAGE_CTRLS @@ -2205,6 +2207,22 @@ static int do_get_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data) { + u64 val; + + /* + * Disallow writes to immutable feature MSRs after KVM_RUN. KVM does + * not support modifying the guest vCPU model on the fly, e.g. changing + * the nVMX capabilities while L2 is running is nonsensical. Ignore + * writes of the same value, e.g. to allow userspace to blindly stuff + * all MSRs when emulating RESET. + */ + if (kvm_vcpu_has_run(vcpu) && kvm_is_immutable_feature_msr(index)) { + if (do_get_msr(vcpu, index, &val) || *data != val) + return -EINVAL; + + return 0; + } + return kvm_set_msr_ignored_check(vcpu, index, *data, true); } @@ -3627,9 +3645,17 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data & ~kvm_caps.supported_perf_cap) return 1; + /* + * Note, this is not just a performance optimization! KVM + * disallows changing feature MSRs after the vCPU has run; PMU + * refresh will bug the VM if called after the vCPU has run. + */ + if (vcpu->arch.perf_capabilities == data) + break; + vcpu->arch.perf_capabilities = data; kvm_pmu_refresh(vcpu); - return 0; + break; case MSR_IA32_PRED_CMD: if (!msr_info->host_initiated && !guest_has_pred_cmd_msr(vcpu)) return 1; @@ -7045,6 +7071,18 @@ out: return r; } +static void kvm_probe_feature_msr(u32 msr_index) +{ + struct kvm_msr_entry msr = { + .index = msr_index, + }; + + if (kvm_get_msr_feature(&msr)) + return; + + msr_based_features[num_msr_based_features++] = msr_index; +} + static void kvm_probe_msr_to_save(u32 msr_index) { u32 dummy[2]; @@ -7120,7 +7158,7 @@ static void kvm_probe_msr_to_save(u32 msr_index) msrs_to_save[num_msrs_to_save++] = msr_index; } -static void kvm_init_msr_list(void) +static void kvm_init_msr_lists(void) { unsigned i; @@ -7146,15 +7184,11 @@ static void kvm_init_msr_list(void) emulated_msrs[num_emulated_msrs++] = emulated_msrs_all[i]; } - for (i = 0; i < ARRAY_SIZE(msr_based_features_all); i++) { - struct kvm_msr_entry msr; + for (i = KVM_FIRST_EMULATED_VMX_MSR; i <= KVM_LAST_EMULATED_VMX_MSR; i++) + kvm_probe_feature_msr(i); - msr.index = msr_based_features_all[i]; - if (kvm_get_msr_feature(&msr)) - continue; - - msr_based_features[num_msr_based_features++] = msr_based_features_all[i]; - } + for (i = 0; i < ARRAY_SIZE(msr_based_features_all_except_vmx); i++) + kvm_probe_feature_msr(msr_based_features_all_except_vmx[i]); } static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, @@ -9488,7 +9522,7 @@ static int __kvm_x86_vendor_init(struct kvm_x86_init_ops *ops) kvm_caps.max_guest_tsc_khz = max; } kvm_caps.default_tsc_scaling_ratio = 1ULL << kvm_caps.tsc_scaling_ratio_frac_bits; - kvm_init_msr_list(); + kvm_init_msr_lists(); return 0; out_unwind_ops: diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 203fb6640b5b..fbef05c0bdeb 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -40,6 +40,14 @@ void kvm_spurious_fault(void); failed; \ }) +/* + * The first...last VMX feature MSRs that are emulated by KVM. This may or may + * not cover all known VMX MSRs, as KVM doesn't emulate an MSR until there's an + * associated feature that KVM supports for nested virtualization. + */ +#define KVM_FIRST_EMULATED_VMX_MSR MSR_IA32_VMX_BASIC +#define KVM_LAST_EMULATED_VMX_MSR MSR_IA32_VMX_VMFUNC + #define KVM_DEFAULT_PLE_GAP 128 #define KVM_VMX_DEFAULT_PLE_WINDOW 4096 #define KVM_DEFAULT_PLE_WINDOW_GROW 2 @@ -83,6 +91,11 @@ static inline unsigned int __shrink_ple_window(unsigned int val, void kvm_service_local_tlb_flush_requests(struct kvm_vcpu *vcpu); int kvm_check_nested_events(struct kvm_vcpu *vcpu); +static inline bool kvm_vcpu_has_run(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.last_vmentry_cpu != -1; +} + static inline bool kvm_is_exception_pending(struct kvm_vcpu *vcpu) { return vcpu->arch.exception.pending || diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h index 90387ddcb2a9..e1d65d933310 100644 --- a/tools/testing/selftests/kvm/include/x86_64/processor.h +++ b/tools/testing/selftests/kvm/include/x86_64/processor.h @@ -928,14 +928,45 @@ static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu, uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index); int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value); -static inline void vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, - uint64_t msr_value) -{ - int r = _vcpu_set_msr(vcpu, msr_index, msr_value); - - TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r)); -} +/* + * Assert on an MSR access(es) and pretty print the MSR name when possible. + * Note, the caller provides the stringified name so that the name of macro is + * printed, not the value the macro resolves to (due to macro expansion). + */ +#define TEST_ASSERT_MSR(cond, fmt, msr, str, args...) \ +do { \ + if (__builtin_constant_p(msr)) { \ + TEST_ASSERT(cond, fmt, str, args); \ + } else if (!(cond)) { \ + char buf[16]; \ + \ + snprintf(buf, sizeof(buf), "MSR 0x%x", msr); \ + TEST_ASSERT(cond, fmt, buf, args); \ + } \ +} while (0) +/* + * Returns true if KVM should return the last written value when reading an MSR + * from userspace, e.g. the MSR isn't a command MSR, doesn't emulate state that + * is changing, etc. This is NOT an exhaustive list! The intent is to filter + * out MSRs that are not durable _and_ that a selftest wants to write. + */ +static inline bool is_durable_msr(uint32_t msr) +{ + return msr != MSR_IA32_TSC; +} + +#define vcpu_set_msr(vcpu, msr, val) \ +do { \ + uint64_t r, v = val; \ + \ + TEST_ASSERT_MSR(_vcpu_set_msr(vcpu, msr, v) == 1, \ + "KVM_SET_MSRS failed on %s, value = 0x%lx", msr, #msr, v); \ + if (!is_durable_msr(msr)) \ + break; \ + r = vcpu_get_msr(vcpu, msr); \ + TEST_ASSERT_MSR(r == v, "Set %s to '0x%lx', got back '0x%lx'", msr, #msr, v, r);\ +} while (0) void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits); bool vm_is_unrestricted_guest(struct kvm_vm *vm); diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c index 2feef25ba691..8cec5c8aca8a 100644 --- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c +++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c @@ -54,6 +54,21 @@ #define AMD_ZEN_BR_RETIRED EVENT(0xc2, 0) + +/* + * "Retired instructions", from Processor Programming Reference + * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors, + * Preliminary Processor Programming Reference (PPR) for AMD Family + * 17h Model 31h, Revision B0 Processors, and Preliminary Processor + * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision + * B1 Processors Volume 1 of 2. + * --- and --- + * "Instructions retired", from the Intel SDM, volume 3, + * "Pre-defined Architectural Performance Events." + */ + +#define INST_RETIRED EVENT(0xc0, 0) + /* * This event list comprises Intel's eight architectural events plus * AMD's "retired branch instructions" for Zen[123] (and possibly @@ -61,7 +76,7 @@ */ static const uint64_t event_list[] = { EVENT(0x3c, 0), - EVENT(0xc0, 0), + INST_RETIRED, EVENT(0x3c, 1), EVENT(0x2e, 0x4f), EVENT(0x2e, 0x41), @@ -71,13 +86,21 @@ static const uint64_t event_list[] = { AMD_ZEN_BR_RETIRED, }; +struct { + uint64_t loads; + uint64_t stores; + uint64_t loads_stores; + uint64_t branches_retired; + uint64_t instructions_retired; +} pmc_results; + /* * If we encounter a #GP during the guest PMU sanity check, then the guest * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0). */ static void guest_gp_handler(struct ex_regs *regs) { - GUEST_SYNC(0); + GUEST_SYNC(-EFAULT); } /* @@ -92,12 +115,23 @@ static void check_msr(uint32_t msr, uint64_t bits_to_flip) wrmsr(msr, v); if (rdmsr(msr) != v) - GUEST_SYNC(0); + GUEST_SYNC(-EIO); v ^= bits_to_flip; wrmsr(msr, v); if (rdmsr(msr) != v) - GUEST_SYNC(0); + GUEST_SYNC(-EIO); +} + +static void run_and_measure_loop(uint32_t msr_base) +{ + const uint64_t branches_retired = rdmsr(msr_base + 0); + const uint64_t insn_retired = rdmsr(msr_base + 1); + + __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); + + pmc_results.branches_retired = rdmsr(msr_base + 0) - branches_retired; + pmc_results.instructions_retired = rdmsr(msr_base + 1) - insn_retired; } static void intel_guest_code(void) @@ -105,19 +139,18 @@ static void intel_guest_code(void) check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1); check_msr(MSR_P6_EVNTSEL0, 0xffff); check_msr(MSR_IA32_PMC0, 0xffff); - GUEST_SYNC(1); + GUEST_SYNC(0); for (;;) { - uint64_t br0, br1; - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE | ARCH_PERFMON_EVENTSEL_OS | INTEL_BR_RETIRED); - wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 1); - br0 = rdmsr(MSR_IA32_PMC0); - __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); - br1 = rdmsr(MSR_IA32_PMC0); - GUEST_SYNC(br1 - br0); + wrmsr(MSR_P6_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE | + ARCH_PERFMON_EVENTSEL_OS | INST_RETIRED); + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x3); + + run_and_measure_loop(MSR_IA32_PMC0); + GUEST_SYNC(0); } } @@ -130,18 +163,17 @@ static void amd_guest_code(void) { check_msr(MSR_K7_EVNTSEL0, 0xffff); check_msr(MSR_K7_PERFCTR0, 0xffff); - GUEST_SYNC(1); + GUEST_SYNC(0); for (;;) { - uint64_t br0, br1; - wrmsr(MSR_K7_EVNTSEL0, 0); wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE | ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BR_RETIRED); - br0 = rdmsr(MSR_K7_PERFCTR0); - __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); - br1 = rdmsr(MSR_K7_PERFCTR0); - GUEST_SYNC(br1 - br0); + wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE | + ARCH_PERFMON_EVENTSEL_OS | INST_RETIRED); + + run_and_measure_loop(MSR_K7_PERFCTR0); + GUEST_SYNC(0); } } @@ -161,6 +193,19 @@ static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu) return uc.args[1]; } +static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu) +{ + uint64_t r; + + memset(&pmc_results, 0, sizeof(pmc_results)); + sync_global_to_guest(vcpu->vm, pmc_results); + + r = run_vcpu_to_sync(vcpu); + TEST_ASSERT(!r, "Unexpected sync value: 0x%lx", r); + + sync_global_from_guest(vcpu->vm, pmc_results); +} + /* * In a nested environment or if the vPMU is disabled, the guest PMU * might not work as architected (accessing the PMU MSRs may raise @@ -171,13 +216,13 @@ static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu) */ static bool sanity_check_pmu(struct kvm_vcpu *vcpu) { - bool success; + uint64_t r; vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler); - success = run_vcpu_to_sync(vcpu); + r = run_vcpu_to_sync(vcpu); vm_install_exception_handler(vcpu->vm, GP_VECTOR, NULL); - return success; + return !r; } static struct kvm_pmu_event_filter *alloc_pmu_event_filter(uint32_t nevents) @@ -237,91 +282,101 @@ static struct kvm_pmu_event_filter *remove_event(struct kvm_pmu_event_filter *f, return f; } +#define ASSERT_PMC_COUNTING_INSTRUCTIONS() \ +do { \ + uint64_t br = pmc_results.branches_retired; \ + uint64_t ir = pmc_results.instructions_retired; \ + \ + if (br && br != NUM_BRANCHES) \ + pr_info("%s: Branch instructions retired = %lu (expected %u)\n", \ + __func__, br, NUM_BRANCHES); \ + TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)", \ + __func__, br); \ + TEST_ASSERT(ir, "%s: Instructions retired = %lu (expected > 0)", \ + __func__, ir); \ +} while (0) + +#define ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS() \ +do { \ + uint64_t br = pmc_results.branches_retired; \ + uint64_t ir = pmc_results.instructions_retired; \ + \ + TEST_ASSERT(!br, "%s: Branch instructions retired = %lu (expected 0)", \ + __func__, br); \ + TEST_ASSERT(!ir, "%s: Instructions retired = %lu (expected 0)", \ + __func__, ir); \ +} while (0) + static void test_without_filter(struct kvm_vcpu *vcpu) { - uint64_t count = run_vcpu_to_sync(vcpu); + run_vcpu_and_sync_pmc_results(vcpu); - if (count != NUM_BRANCHES) - pr_info("%s: Branch instructions retired = %lu (expected %u)\n", - __func__, count, NUM_BRANCHES); - TEST_ASSERT(count, "Allowed PMU event is not counting"); + ASSERT_PMC_COUNTING_INSTRUCTIONS(); } -static uint64_t test_with_filter(struct kvm_vcpu *vcpu, - struct kvm_pmu_event_filter *f) +static void test_with_filter(struct kvm_vcpu *vcpu, + struct kvm_pmu_event_filter *f) { vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f); - return run_vcpu_to_sync(vcpu); + run_vcpu_and_sync_pmc_results(vcpu); } static void test_amd_deny_list(struct kvm_vcpu *vcpu) { uint64_t event = EVENT(0x1C2, 0); struct kvm_pmu_event_filter *f; - uint64_t count; f = create_pmu_event_filter(&event, 1, KVM_PMU_EVENT_DENY, 0); - count = test_with_filter(vcpu, f); - + test_with_filter(vcpu, f); free(f); - if (count != NUM_BRANCHES) - pr_info("%s: Branch instructions retired = %lu (expected %u)\n", - __func__, count, NUM_BRANCHES); - TEST_ASSERT(count, "Allowed PMU event is not counting"); + + ASSERT_PMC_COUNTING_INSTRUCTIONS(); } static void test_member_deny_list(struct kvm_vcpu *vcpu) { struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY); - uint64_t count = test_with_filter(vcpu, f); + test_with_filter(vcpu, f); free(f); - if (count) - pr_info("%s: Branch instructions retired = %lu (expected 0)\n", - __func__, count); - TEST_ASSERT(!count, "Disallowed PMU Event is counting"); + + ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS(); } static void test_member_allow_list(struct kvm_vcpu *vcpu) { struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW); - uint64_t count = test_with_filter(vcpu, f); + test_with_filter(vcpu, f); free(f); - if (count != NUM_BRANCHES) - pr_info("%s: Branch instructions retired = %lu (expected %u)\n", - __func__, count, NUM_BRANCHES); - TEST_ASSERT(count, "Allowed PMU event is not counting"); + + ASSERT_PMC_COUNTING_INSTRUCTIONS(); } static void test_not_member_deny_list(struct kvm_vcpu *vcpu) { struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_DENY); - uint64_t count; + remove_event(f, INST_RETIRED); remove_event(f, INTEL_BR_RETIRED); remove_event(f, AMD_ZEN_BR_RETIRED); - count = test_with_filter(vcpu, f); + test_with_filter(vcpu, f); free(f); - if (count != NUM_BRANCHES) - pr_info("%s: Branch instructions retired = %lu (expected %u)\n", - __func__, count, NUM_BRANCHES); - TEST_ASSERT(count, "Allowed PMU event is not counting"); + + ASSERT_PMC_COUNTING_INSTRUCTIONS(); } static void test_not_member_allow_list(struct kvm_vcpu *vcpu) { struct kvm_pmu_event_filter *f = event_filter(KVM_PMU_EVENT_ALLOW); - uint64_t count; + remove_event(f, INST_RETIRED); remove_event(f, INTEL_BR_RETIRED); remove_event(f, AMD_ZEN_BR_RETIRED); - count = test_with_filter(vcpu, f); + test_with_filter(vcpu, f); free(f); - if (count) - pr_info("%s: Branch instructions retired = %lu (expected 0)\n", - __func__, count); - TEST_ASSERT(!count, "Disallowed PMU Event is counting"); + + ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS(); } /* @@ -450,51 +505,30 @@ static bool supports_event_mem_inst_retired(void) #define EXCLUDE_MASKED_ENTRY(event_select, mask, match) \ KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, true) -struct perf_counter { - union { - uint64_t raw; - struct { - uint64_t loads:22; - uint64_t stores:22; - uint64_t loads_stores:20; - }; - }; -}; - -static uint64_t masked_events_guest_test(uint32_t msr_base) +static void masked_events_guest_test(uint32_t msr_base) { - uint64_t ld0, ld1, st0, st1, ls0, ls1; - struct perf_counter c; - int val; - /* - * The acutal value of the counters don't determine the outcome of + * The actual value of the counters don't determine the outcome of * the test. Only that they are zero or non-zero. */ - ld0 = rdmsr(msr_base + 0); - st0 = rdmsr(msr_base + 1); - ls0 = rdmsr(msr_base + 2); + const uint64_t loads = rdmsr(msr_base + 0); + const uint64_t stores = rdmsr(msr_base + 1); + const uint64_t loads_stores = rdmsr(msr_base + 2); + int val; + __asm__ __volatile__("movl $0, %[v];" "movl %[v], %%eax;" "incl %[v];" : [v]"+m"(val) :: "eax"); - ld1 = rdmsr(msr_base + 0); - st1 = rdmsr(msr_base + 1); - ls1 = rdmsr(msr_base + 2); - - c.loads = ld1 - ld0; - c.stores = st1 - st0; - c.loads_stores = ls1 - ls0; - - return c.raw; + pmc_results.loads = rdmsr(msr_base + 0) - loads; + pmc_results.stores = rdmsr(msr_base + 1) - stores; + pmc_results.loads_stores = rdmsr(msr_base + 2) - loads_stores; } static void intel_masked_events_guest_code(void) { - uint64_t r; - for (;;) { wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0); @@ -507,16 +541,13 @@ static void intel_masked_events_guest_code(void) wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x7); - r = masked_events_guest_test(MSR_IA32_PMC0); - - GUEST_SYNC(r); + masked_events_guest_test(MSR_IA32_PMC0); + GUEST_SYNC(0); } } static void amd_masked_events_guest_code(void) { - uint64_t r; - for (;;) { wrmsr(MSR_K7_EVNTSEL0, 0); wrmsr(MSR_K7_EVNTSEL1, 0); @@ -529,26 +560,22 @@ static void amd_masked_events_guest_code(void) wrmsr(MSR_K7_EVNTSEL2, ARCH_PERFMON_EVENTSEL_ENABLE | ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD_STORE); - r = masked_events_guest_test(MSR_K7_PERFCTR0); - - GUEST_SYNC(r); + masked_events_guest_test(MSR_K7_PERFCTR0); + GUEST_SYNC(0); } } -static struct perf_counter run_masked_events_test(struct kvm_vcpu *vcpu, - const uint64_t masked_events[], - const int nmasked_events) +static void run_masked_events_test(struct kvm_vcpu *vcpu, + const uint64_t masked_events[], + const int nmasked_events) { struct kvm_pmu_event_filter *f; - struct perf_counter r; f = create_pmu_event_filter(masked_events, nmasked_events, KVM_PMU_EVENT_ALLOW, KVM_PMU_EVENT_FLAG_MASKED_EVENTS); - r.raw = test_with_filter(vcpu, f); + test_with_filter(vcpu, f); free(f); - - return r; } /* Matches KVM_PMU_EVENT_FILTER_MAX_EVENTS in pmu.c */ @@ -673,7 +700,6 @@ static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events, int nevents) { int ntests = ARRAY_SIZE(test_cases); - struct perf_counter c; int i, n; for (i = 0; i < ntests; i++) { @@ -685,13 +711,15 @@ static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events, n = append_test_events(test, events, nevents); - c = run_masked_events_test(vcpu, events, n); - TEST_ASSERT(bool_eq(c.loads, test->flags & ALLOW_LOADS) && - bool_eq(c.stores, test->flags & ALLOW_STORES) && - bool_eq(c.loads_stores, + run_masked_events_test(vcpu, events, n); + + TEST_ASSERT(bool_eq(pmc_results.loads, test->flags & ALLOW_LOADS) && + bool_eq(pmc_results.stores, test->flags & ALLOW_STORES) && + bool_eq(pmc_results.loads_stores, test->flags & ALLOW_LOADS_STORES), - "%s loads: %u, stores: %u, loads + stores: %u", - test->msg, c.loads, c.stores, c.loads_stores); + "%s loads: %lu, stores: %lu, loads + stores: %lu", + test->msg, pmc_results.loads, pmc_results.stores, + pmc_results.loads_stores); } } diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c index c280ba1e6572..3009b3e5254d 100644 --- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c +++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c @@ -14,12 +14,11 @@ #define _GNU_SOURCE /* for program_invocation_short_name */ #include <sys/ioctl.h> +#include <linux/bitmap.h> + #include "kvm_util.h" #include "vmx.h" -#define PMU_CAP_FW_WRITES (1ULL << 13) -#define PMU_CAP_LBR_FMT 0x3f - union perf_capabilities { struct { u64 lbr_format:6; @@ -36,59 +35,220 @@ union perf_capabilities { u64 capabilities; }; -static void guest_code(void) +/* + * The LBR format and most PEBS features are immutable, all other features are + * fungible (if supported by the host and KVM). + */ +static const union perf_capabilities immutable_caps = { + .lbr_format = -1, + .pebs_trap = 1, + .pebs_arch_reg = 1, + .pebs_format = -1, + .pebs_baseline = 1, +}; + +static const union perf_capabilities format_caps = { + .lbr_format = -1, + .pebs_format = -1, +}; + +static void guest_code(uint64_t current_val) { - wrmsr(MSR_IA32_PERF_CAPABILITIES, PMU_CAP_LBR_FMT); + uint8_t vector; + int i; + + vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, current_val); + GUEST_ASSERT_2(vector == GP_VECTOR, current_val, vector); + + vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, 0); + GUEST_ASSERT_2(vector == GP_VECTOR, 0, vector); + + for (i = 0; i < 64; i++) { + vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, + current_val ^ BIT_ULL(i)); + GUEST_ASSERT_2(vector == GP_VECTOR, + current_val ^ BIT_ULL(i), vector); + } + + GUEST_DONE(); } -int main(int argc, char *argv[]) +/* + * Verify that guest WRMSRs to PERF_CAPABILITIES #GP regardless of the value + * written, that the guest always sees the userspace controlled value, and that + * PERF_CAPABILITIES is immutable after KVM_RUN. + */ +static void test_guest_wrmsr_perf_capabilities(union perf_capabilities host_cap) { - struct kvm_vm *vm; struct kvm_vcpu *vcpu; - int ret; - union perf_capabilities host_cap; - uint64_t val; + struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, guest_code); + struct ucall uc; + int r, i; - host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES); - host_cap.capabilities &= (PMU_CAP_FW_WRITES | PMU_CAP_LBR_FMT); + vm_init_descriptor_tables(vm); + vcpu_init_descriptor_tables(vcpu); - /* Create VM */ - vm = vm_create_with_one_vcpu(&vcpu, guest_code); + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); - TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM)); + vcpu_args_set(vcpu, 1, host_cap.capabilities); + vcpu_run(vcpu); - TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION)); - TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0); + switch (get_ucall(vcpu, &uc)) { + case UCALL_ABORT: + REPORT_GUEST_ASSERT_2(uc, "val = 0x%lx, vector = %lu"); + break; + case UCALL_DONE: + break; + default: + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); + } - /* testcase 1, set capabilities when we have PDCM bit */ - vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, PMU_CAP_FW_WRITES); + ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), host_cap.capabilities); - /* check capabilities can be retrieved with KVM_GET_MSR */ - ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES); + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); - /* check whatever we write with KVM_SET_MSR is _not_ modified */ - vcpu_run(vcpu); - ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), PMU_CAP_FW_WRITES); + r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0); + TEST_ASSERT(!r, "Post-KVM_RUN write '0' didn't fail"); + + for (i = 0; i < 64; i++) { + r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, + host_cap.capabilities ^ BIT_ULL(i)); + TEST_ASSERT(!r, "Post-KVM_RUN write '0x%llx'didn't fail", + host_cap.capabilities ^ BIT_ULL(i)); + } + + kvm_vm_free(vm); +} + +/* + * Verify KVM allows writing PERF_CAPABILITIES with all KVM-supported features + * enabled, as well as '0' (to disable all features). + */ +static void test_basic_perf_capabilities(union perf_capabilities host_cap) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, NULL); - /* testcase 2, check valid LBR formats are accepted */ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0); - ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), 0); + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); + + kvm_vm_free(vm); +} - vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.lbr_format); - ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES), (u64)host_cap.lbr_format); +static void test_fungible_perf_capabilities(union perf_capabilities host_cap) +{ + const uint64_t fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities; + + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, NULL); + int bit; + + for_each_set_bit(bit, &fungible_caps, 64) { + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(bit)); + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, + host_cap.capabilities & ~BIT_ULL(bit)); + } + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); + + kvm_vm_free(vm); +} + +/* + * Verify KVM rejects attempts to set unsupported and/or immutable features in + * PERF_CAPABILITIES. Note, LBR format and PEBS format need to be validated + * separately as they are multi-bit values, e.g. toggling or setting a single + * bit can generate a false positive without dedicated safeguards. + */ +static void test_immutable_perf_capabilities(union perf_capabilities host_cap) +{ + const uint64_t reserved_caps = (~host_cap.capabilities | + immutable_caps.capabilities) & + ~format_caps.capabilities; + + struct kvm_vcpu *vcpu; + struct kvm_vm *vm = vm_create_with_one_vcpu(&vcpu, NULL); + union perf_capabilities val = host_cap; + int r, bit; + + for_each_set_bit(bit, &reserved_caps, 64) { + r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, + host_cap.capabilities ^ BIT_ULL(bit)); + TEST_ASSERT(!r, "%s immutable feature 0x%llx (bit %d) didn't fail", + host_cap.capabilities & BIT_ULL(bit) ? "Setting" : "Clearing", + BIT_ULL(bit), bit); + } /* - * Testcase 3, check that an "invalid" LBR format is rejected. Only an - * exact match of the host's format (and 0/disabled) is allowed. + * KVM only supports the host's native LBR format, as well as '0' (to + * disable LBR support). Verify KVM rejects all other LBR formats. */ - for (val = 1; val <= PMU_CAP_LBR_FMT; val++) { - if (val == (host_cap.capabilities & PMU_CAP_LBR_FMT)) + for (val.lbr_format = 1; val.lbr_format; val.lbr_format++) { + if (val.lbr_format == host_cap.lbr_format) continue; - ret = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val); - TEST_ASSERT(!ret, "Bad LBR FMT = 0x%lx didn't fail", val); + r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities); + TEST_ASSERT(!r, "Bad LBR FMT = 0x%x didn't fail, host = 0x%x", + val.lbr_format, host_cap.lbr_format); } - printf("Completed perf capability tests.\n"); + /* Ditto for the PEBS format. */ + for (val.pebs_format = 1; val.pebs_format; val.pebs_format++) { + if (val.pebs_format == host_cap.pebs_format) + continue; + + r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities); + TEST_ASSERT(!r, "Bad PEBS FMT = 0x%x didn't fail, host = 0x%x", + val.pebs_format, host_cap.pebs_format); + } + + kvm_vm_free(vm); +} + +/* + * Test that LBR MSRs are writable when LBRs are enabled, and then verify that + * disabling the vPMU via CPUID also disables LBR support. Set bits 2:0 of + * LBR_TOS as those bits are writable across all uarch implementations (arch + * LBRs will need to poke a different MSR). + */ +static void test_lbr_perf_capabilities(union perf_capabilities host_cap) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + int r; + + if (!host_cap.lbr_format) + return; + + vm = vm_create_with_one_vcpu(&vcpu, NULL); + + vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities); + vcpu_set_msr(vcpu, MSR_LBR_TOS, 7); + + vcpu_clear_cpuid_entry(vcpu, X86_PROPERTY_PMU_VERSION.function); + + r = _vcpu_set_msr(vcpu, MSR_LBR_TOS, 7); + TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU"); + kvm_vm_free(vm); } + +int main(int argc, char *argv[]) +{ + union perf_capabilities host_cap; + + TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM)); + + TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION)); + TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0); + + host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES); + + TEST_ASSERT(host_cap.full_width_write, + "Full-width writes should always be supported"); + + test_basic_perf_capabilities(host_cap); + test_fungible_perf_capabilities(host_cap); + test_immutable_perf_capabilities(host_cap); + test_guest_wrmsr_perf_capabilities(host_cap); + test_lbr_perf_capabilities(host_cap); +} |