diff options
-rw-r--r-- | MAINTAINERS | 3 | ||||
-rw-r--r-- | arch/arm64/Kbuild | 1 | ||||
-rw-r--r-- | arch/arm64/hyperv/Makefile | 2 | ||||
-rw-r--r-- | arch/arm64/hyperv/hv_core.c | 181 | ||||
-rw-r--r-- | arch/arm64/hyperv/mshyperv.c | 87 | ||||
-rw-r--r-- | arch/arm64/include/asm/hyperv-tlfs.h | 69 | ||||
-rw-r--r-- | arch/arm64/include/asm/mshyperv.h | 54 | ||||
-rw-r--r-- | arch/arm64/kernel/efi.c | 1 | ||||
-rw-r--r-- | arch/x86/hyperv/hv_init.c | 165 | ||||
-rw-r--r-- | arch/x86/include/asm/hyperv-tlfs.h | 9 | ||||
-rw-r--r-- | arch/x86/include/asm/mshyperv.h | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mshyperv.c | 38 | ||||
-rw-r--r-- | drivers/clocksource/hyperv_timer.c | 3 | ||||
-rw-r--r-- | drivers/hv/Kconfig | 7 | ||||
-rw-r--r-- | drivers/hv/hv_common.c | 219 | ||||
-rw-r--r-- | drivers/hv/hv_snapshot.c | 1 | ||||
-rw-r--r-- | drivers/hv/hv_util.c | 5 | ||||
-rw-r--r-- | include/asm-generic/mshyperv.h | 13 | ||||
-rw-r--r-- | include/clocksource/hyperv_timer.h | 11 | ||||
-rw-r--r-- | include/linux/hyperv.h | 16 |
20 files changed, 724 insertions, 165 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 2170528c05a7..7c2fe5f34b7e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8647,6 +8647,9 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git F: Documentation/ABI/stable/sysfs-bus-vmbus F: Documentation/ABI/testing/debugfs-hyperv F: Documentation/networking/device_drivers/ethernet/microsoft/netvsc.rst +F: arch/arm64/hyperv +F: arch/arm64/include/asm/hyperv-tlfs.h +F: arch/arm64/include/asm/mshyperv.h F: arch/x86/hyperv F: arch/x86/include/asm/hyperv-tlfs.h F: arch/x86/include/asm/mshyperv.h diff --git a/arch/arm64/Kbuild b/arch/arm64/Kbuild index 7b393cfec071..ea7ab4ca81f9 100644 --- a/arch/arm64/Kbuild +++ b/arch/arm64/Kbuild @@ -2,4 +2,5 @@ obj-y += kernel/ mm/ net/ obj-$(CONFIG_KVM) += kvm/ obj-$(CONFIG_XEN) += xen/ +obj-$(subst m,y,$(CONFIG_HYPERV)) += hyperv/ obj-$(CONFIG_CRYPTO) += crypto/ diff --git a/arch/arm64/hyperv/Makefile b/arch/arm64/hyperv/Makefile new file mode 100644 index 000000000000..87c31c001da9 --- /dev/null +++ b/arch/arm64/hyperv/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-y := hv_core.o mshyperv.o diff --git a/arch/arm64/hyperv/hv_core.c b/arch/arm64/hyperv/hv_core.c new file mode 100644 index 000000000000..b54c34793701 --- /dev/null +++ b/arch/arm64/hyperv/hv_core.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Low level utility routines for interacting with Hyper-V. + * + * Copyright (C) 2021, Microsoft, Inc. + * + * Author : Michael Kelley <mikelley@microsoft.com> + */ + +#include <linux/types.h> +#include <linux/export.h> +#include <linux/mm.h> +#include <linux/hyperv.h> +#include <linux/arm-smccc.h> +#include <linux/module.h> +#include <asm-generic/bug.h> +#include <asm/hyperv-tlfs.h> +#include <asm/mshyperv.h> + +/* + * hv_do_hypercall- Invoke the specified hypercall + */ +u64 hv_do_hypercall(u64 control, void *input, void *output) +{ + struct arm_smccc_res res; + u64 input_address; + u64 output_address; + + input_address = input ? virt_to_phys(input) : 0; + output_address = output ? virt_to_phys(output) : 0; + + arm_smccc_1_1_hvc(HV_FUNC_ID, control, + input_address, output_address, &res); + return res.a0; +} +EXPORT_SYMBOL_GPL(hv_do_hypercall); + +/* + * hv_do_fast_hypercall8 -- Invoke the specified hypercall + * with arguments in registers instead of physical memory. + * Avoids the overhead of virt_to_phys for simple hypercalls. + */ + +u64 hv_do_fast_hypercall8(u16 code, u64 input) +{ + struct arm_smccc_res res; + u64 control; + + control = (u64)code | HV_HYPERCALL_FAST_BIT; + + arm_smccc_1_1_hvc(HV_FUNC_ID, control, input, &res); + return res.a0; +} +EXPORT_SYMBOL_GPL(hv_do_fast_hypercall8); + +/* + * Set a single VP register to a 64-bit value. + */ +void hv_set_vpreg(u32 msr, u64 value) +{ + struct arm_smccc_res res; + + arm_smccc_1_1_hvc(HV_FUNC_ID, + HVCALL_SET_VP_REGISTERS | HV_HYPERCALL_FAST_BIT | + HV_HYPERCALL_REP_COMP_1, + HV_PARTITION_ID_SELF, + HV_VP_INDEX_SELF, + msr, + 0, + value, + 0, + &res); + + /* + * Something is fundamentally broken in the hypervisor if + * setting a VP register fails. There's really no way to + * continue as a guest VM, so panic. + */ + BUG_ON(!hv_result_success(res.a0)); +} +EXPORT_SYMBOL_GPL(hv_set_vpreg); + +/* + * Get the value of a single VP register. One version + * returns just 64 bits and another returns the full 128 bits. + * The two versions are separate to avoid complicating the + * calling sequence for the more frequently used 64 bit version. + */ + +void hv_get_vpreg_128(u32 msr, struct hv_get_vp_registers_output *result) +{ + struct arm_smccc_1_2_regs args; + struct arm_smccc_1_2_regs res; + + args.a0 = HV_FUNC_ID; + args.a1 = HVCALL_GET_VP_REGISTERS | HV_HYPERCALL_FAST_BIT | + HV_HYPERCALL_REP_COMP_1; + args.a2 = HV_PARTITION_ID_SELF; + args.a3 = HV_VP_INDEX_SELF; + args.a4 = msr; + + /* + * Use the SMCCC 1.2 interface because the results are in registers + * beyond X0-X3. + */ + arm_smccc_1_2_hvc(&args, &res); + + /* + * Something is fundamentally broken in the hypervisor if + * getting a VP register fails. There's really no way to + * continue as a guest VM, so panic. + */ + BUG_ON(!hv_result_success(res.a0)); + + result->as64.low = res.a6; + result->as64.high = res.a7; +} +EXPORT_SYMBOL_GPL(hv_get_vpreg_128); + +u64 hv_get_vpreg(u32 msr) +{ + struct hv_get_vp_registers_output output; + + hv_get_vpreg_128(msr, &output); + + return output.as64.low; +} +EXPORT_SYMBOL_GPL(hv_get_vpreg); + +/* + * hyperv_report_panic - report a panic to Hyper-V. This function uses + * the older version of the Hyper-V interface that admittedly doesn't + * pass enough information to be useful beyond just recording the + * occurrence of a panic. The parallel hv_kmsg_dump() uses the + * new interface that allows reporting 4 Kbytes of data, which is much + * more useful. Hyper-V on ARM64 always supports the newer interface, but + * we retain support for the older version because the sysadmin is allowed + * to disable the newer version via sysctl in case of information security + * concerns about the more verbose version. + */ +void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die) +{ + static bool panic_reported; + u64 guest_id; + + /* Don't report a panic to Hyper-V if we're not going to panic */ + if (in_die && !panic_on_oops) + return; + + /* + * We prefer to report panic on 'die' chain as we have proper + * registers to report, but if we miss it (e.g. on BUG()) we need + * to report it on 'panic'. + * + * Calling code in the 'die' and 'panic' paths ensures that only + * one CPU is running this code, so no atomicity is needed. + */ + if (panic_reported) + return; + panic_reported = true; + + guest_id = hv_get_vpreg(HV_REGISTER_GUEST_OSID); + + /* + * Hyper-V provides the ability to store only 5 values. + * Pick the passed in error value, the guest_id, the PC, + * and the SP. + */ + hv_set_vpreg(HV_REGISTER_CRASH_P0, err); + hv_set_vpreg(HV_REGISTER_CRASH_P1, guest_id); + hv_set_vpreg(HV_REGISTER_CRASH_P2, regs->pc); + hv_set_vpreg(HV_REGISTER_CRASH_P3, regs->sp); + hv_set_vpreg(HV_REGISTER_CRASH_P4, 0); + + /* + * Let Hyper-V know there is crash data available + */ + hv_set_vpreg(HV_REGISTER_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY); +} +EXPORT_SYMBOL_GPL(hyperv_report_panic); diff --git a/arch/arm64/hyperv/mshyperv.c b/arch/arm64/hyperv/mshyperv.c new file mode 100644 index 000000000000..bbbe351e9045 --- /dev/null +++ b/arch/arm64/hyperv/mshyperv.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Core routines for interacting with Microsoft's Hyper-V hypervisor, + * including hypervisor initialization. + * + * Copyright (C) 2021, Microsoft, Inc. + * + * Author : Michael Kelley <mikelley@microsoft.com> + */ + +#include <linux/types.h> +#include <linux/acpi.h> +#include <linux/export.h> +#include <linux/errno.h> +#include <linux/version.h> +#include <linux/cpuhotplug.h> +#include <asm/mshyperv.h> + +static bool hyperv_initialized; + +static int __init hyperv_init(void) +{ + struct hv_get_vp_registers_output result; + u32 a, b, c, d; + u64 guest_id; + int ret; + + /* + * Allow for a kernel built with CONFIG_HYPERV to be running in + * a non-Hyper-V environment, including on DT instead of ACPI. + * In such cases, do nothing and return success. + */ + if (acpi_disabled) + return 0; + + if (strncmp((char *)&acpi_gbl_FADT.hypervisor_id, "MsHyperV", 8)) + return 0; + + /* Setup the guest ID */ + guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0); + hv_set_vpreg(HV_REGISTER_GUEST_OSID, guest_id); + + /* Get the features and hints from Hyper-V */ + hv_get_vpreg_128(HV_REGISTER_FEATURES, &result); + ms_hyperv.features = result.as32.a; + ms_hyperv.priv_high = result.as32.b; + ms_hyperv.misc_features = result.as32.c; + + hv_get_vpreg_128(HV_REGISTER_ENLIGHTENMENTS, &result); + ms_hyperv.hints = result.as32.a; + + pr_info("Hyper-V: privilege flags low 0x%x, high 0x%x, hints 0x%x, misc 0x%x\n", + ms_hyperv.features, ms_hyperv.priv_high, ms_hyperv.hints, + ms_hyperv.misc_features); + + /* Get information about the Hyper-V host version */ + hv_get_vpreg_128(HV_REGISTER_HYPERVISOR_VERSION, &result); + a = result.as32.a; + b = result.as32.b; + c = result.as32.c; + d = result.as32.d; + pr_info("Hyper-V: Host Build %d.%d.%d.%d-%d-%d\n", + b >> 16, b & 0xFFFF, a, d & 0xFFFFFF, c, d >> 24); + + ret = hv_common_init(); + if (ret) + return ret; + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "arm64/hyperv_init:online", + hv_common_cpu_init, hv_common_cpu_die); + if (ret < 0) { + hv_common_free(); + return ret; + } + + hyperv_initialized = true; + return 0; +} + +early_initcall(hyperv_init); + +bool hv_is_hyperv_initialized(void) +{ + return hyperv_initialized; +} +EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized); diff --git a/arch/arm64/include/asm/hyperv-tlfs.h b/arch/arm64/include/asm/hyperv-tlfs.h new file mode 100644 index 000000000000..4d964a7f02ee --- /dev/null +++ b/arch/arm64/include/asm/hyperv-tlfs.h @@ -0,0 +1,69 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * This file contains definitions from the Hyper-V Hypervisor Top-Level + * Functional Specification (TLFS): + * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/reference/tlfs + * + * Copyright (C) 2021, Microsoft, Inc. + * + * Author : Michael Kelley <mikelley@microsoft.com> + */ + +#ifndef _ASM_HYPERV_TLFS_H +#define _ASM_HYPERV_TLFS_H + +#include <linux/types.h> + +/* + * All data structures defined in the TLFS that are shared between Hyper-V + * and a guest VM use Little Endian byte ordering. This matches the default + * byte ordering of Linux running on ARM64, so no special handling is required. + */ + +/* + * These Hyper-V registers provide information equivalent to the CPUID + * instruction on x86/x64. + */ +#define HV_REGISTER_HYPERVISOR_VERSION 0x00000100 /*CPUID 0x40000002 */ +#define HV_REGISTER_FEATURES 0x00000200 /*CPUID 0x40000003 */ +#define HV_REGISTER_ENLIGHTENMENTS 0x00000201 /*CPUID 0x40000004 */ + +/* + * Group C Features. See the asm-generic version of hyperv-tlfs.h + * for a description of Feature Groups. + */ + +/* Crash MSRs available */ +#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(8) + +/* STIMER direct mode is available */ +#define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(13) + +/* + * Synthetic register definitions equivalent to MSRs on x86/x64 + */ +#define HV_REGISTER_CRASH_P0 0x00000210 +#define HV_REGISTER_CRASH_P1 0x00000211 +#define HV_REGISTER_CRASH_P2 0x00000212 +#define HV_REGISTER_CRASH_P3 0x00000213 +#define HV_REGISTER_CRASH_P4 0x00000214 +#define HV_REGISTER_CRASH_CTL 0x00000215 + +#define HV_REGISTER_GUEST_OSID 0x00090002 +#define HV_REGISTER_VP_INDEX 0x00090003 +#define HV_REGISTER_TIME_REF_COUNT 0x00090004 +#define HV_REGISTER_REFERENCE_TSC 0x00090017 + +#define HV_REGISTER_SINT0 0x000A0000 +#define HV_REGISTER_SCONTROL 0x000A0010 +#define HV_REGISTER_SIEFP 0x000A0012 +#define HV_REGISTER_SIMP 0x000A0013 +#define HV_REGISTER_EOM 0x000A0014 + +#define HV_REGISTER_STIMER0_CONFIG 0x000B0000 +#define HV_REGISTER_STIMER0_COUNT 0x000B0001 + +#include <asm-generic/hyperv-tlfs.h> + +#endif diff --git a/arch/arm64/include/asm/mshyperv.h b/arch/arm64/include/asm/mshyperv.h new file mode 100644 index 000000000000..20070a847304 --- /dev/null +++ b/arch/arm64/include/asm/mshyperv.h @@ -0,0 +1,54 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Linux-specific definitions for managing interactions with Microsoft's + * Hyper-V hypervisor. The definitions in this file are specific to + * the ARM64 architecture. See include/asm-generic/mshyperv.h for + * definitions are that architecture independent. + * + * Definitions that are specified in the Hyper-V Top Level Functional + * Spec (TLFS) should not go in this file, but should instead go in + * hyperv-tlfs.h. + * + * Copyright (C) 2021, Microsoft, Inc. + * + * Author : Michael Kelley <mikelley@microsoft.com> + */ + +#ifndef _ASM_MSHYPERV_H +#define _ASM_MSHYPERV_H + +#include <linux/types.h> +#include <linux/arm-smccc.h> +#include <asm/hyperv-tlfs.h> + +/* + * Declare calls to get and set Hyper-V VP register values on ARM64, which + * requires a hypercall. + */ + +void hv_set_vpreg(u32 reg, u64 value); +u64 hv_get_vpreg(u32 reg); +void hv_get_vpreg_128(u32 reg, struct hv_get_vp_registers_output *result); + +static inline void hv_set_register(unsigned int reg, u64 value) +{ + hv_set_vpreg(reg, value); +} + +static inline u64 hv_get_register(unsigned int reg) +{ + return hv_get_vpreg(reg); +} + +/* SMCCC hypercall parameters */ +#define HV_SMCCC_FUNC_NUMBER 1 +#define HV_FUNC_ID ARM_SMCCC_CALL_VAL( \ + ARM_SMCCC_STD_CALL, \ + ARM_SMCCC_SMC_64, \ + ARM_SMCCC_OWNER_VENDOR_HYP, \ + HV_SMCCC_FUNC_NUMBER) + +#include <asm-generic/mshyperv.h> + +#endif diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index fa02efb28e88..e1be6c429810 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -55,6 +55,7 @@ static __init pteval_t create_mapping_protection(efi_memory_desc_t *md) /* we will fill this structure from the stub, so don't put it in .bss */ struct screen_info screen_info __section(".data"); +EXPORT_SYMBOL(screen_info); int __init efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md) { diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 6952e219cba3..708a2712a516 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -7,10 +7,10 @@ * Author : K. Y. Srinivasan <kys@microsoft.com> */ -#include <linux/acpi.h> #include <linux/efi.h> #include <linux/types.h> #include <linux/bitfield.h> +#include <linux/io.h> #include <asm/apic.h> #include <asm/desc.h> #include <asm/hypervisor.h> @@ -39,71 +39,50 @@ EXPORT_SYMBOL_GPL(hv_hypercall_pg); /* Storage to save the hypercall page temporarily for hibernation */ static void *hv_hypercall_pg_saved; -u32 *hv_vp_index; -EXPORT_SYMBOL_GPL(hv_vp_index); - struct hv_vp_assist_page **hv_vp_assist_page; EXPORT_SYMBOL_GPL(hv_vp_assist_page); -void __percpu **hyperv_pcpu_input_arg; -EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); - -void __percpu **hyperv_pcpu_output_arg; -EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg); - -u32 hv_max_vp_index; -EXPORT_SYMBOL_GPL(hv_max_vp_index); - static int hv_cpu_init(unsigned int cpu) { - u64 msr_vp_index; + union hv_vp_assist_msr_contents msr = { 0 }; struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()]; - void **input_arg; - struct page *pg; - - /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */ - pg = alloc_pages(irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL, hv_root_partition ? 1 : 0); - if (unlikely(!pg)) - return -ENOMEM; - - input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); - *input_arg = page_address(pg); - if (hv_root_partition) { - void **output_arg; - - output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); - *output_arg = page_address(pg + 1); - } - - msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX); - - hv_vp_index[smp_processor_id()] = msr_vp_index; + int ret; - if (msr_vp_index > hv_max_vp_index) - hv_max_vp_index = msr_vp_index; + ret = hv_common_cpu_init(cpu); + if (ret) + return ret; if (!hv_vp_assist_page) return 0; - /* - * The VP ASSIST PAGE is an "overlay" page (see Hyper-V TLFS's Section - * 5.2.1 "GPA Overlay Pages"). Here it must be zeroed out to make sure - * we always write the EOI MSR in hv_apic_eoi_write() *after* the - * EOI optimization is disabled in hv_cpu_die(), otherwise a CPU may - * not be stopped in the case of CPU offlining and the VM will hang. - */ if (!*hvp) { - *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO); - } - - if (*hvp) { - u64 val; - - val = vmalloc_to_pfn(*hvp); - val = (val << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) | - HV_X64_MSR_VP_ASSIST_PAGE_ENABLE; - - wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, val); + if (hv_root_partition) { + /* + * For root partition we get the hypervisor provided VP assist + * page, instead of allocating a new page. + */ + rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); + *hvp = memremap(msr.pfn << + HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT, + PAGE_SIZE, MEMREMAP_WB); + } else { + /* + * The VP assist page is an "overlay" page (see Hyper-V TLFS's + * Section 5.2.1 "GPA Overlay Pages"). Here it must be zeroed + * out to make sure we always write the EOI MSR in + * hv_apic_eoi_write() *after* the EOI optimization is disabled + * in hv_cpu_die(), otherwise a CPU may not be stopped in the + * case of CPU offlining and the VM will hang. + */ + *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO); + if (*hvp) + msr.pfn = vmalloc_to_pfn(*hvp); + } + WARN_ON(!(*hvp)); + if (*hvp) { + msr.enable = 1; + wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); + } } return 0; @@ -198,29 +177,26 @@ static int hv_cpu_die(unsigned int cpu) { struct hv_reenlightenment_control re_ctrl; unsigned int new_cpu; - unsigned long flags; - void **input_arg; - void *pg; - local_irq_save(flags); - input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); - pg = *input_arg; - *input_arg = NULL; - - if (hv_root_partition) { - void **output_arg; - - output_arg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); - *output_arg = NULL; + hv_common_cpu_die(cpu); + + if (hv_vp_assist_page && hv_vp_assist_page[cpu]) { + union hv_vp_assist_msr_contents msr = { 0 }; + if (hv_root_partition) { + /* + * For root partition the VP assist page is mapped to + * hypervisor provided page, and thus we unmap the + * page here and nullify it, so that in future we have + * correct page address mapped in hv_cpu_init. + */ + memunmap(hv_vp_assist_page[cpu]); + hv_vp_assist_page[cpu] = NULL; + rdmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); + msr.enable = 0; + } + wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, msr.as_uint64); } - local_irq_restore(flags); - - free_pages((unsigned long)pg, hv_root_partition ? 1 : 0); - - if (hv_vp_assist_page && hv_vp_assist_page[cpu]) - wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0); - if (hv_reenlightenment_cb == NULL) return 0; @@ -368,7 +344,7 @@ void __init hyperv_init(void) { u64 guest_id, required_msrs; union hv_x64_msr_hypercall_contents hypercall_msr; - int cpuhp, i; + int cpuhp; if (x86_hyper_type != X86_HYPER_MS_HYPERV) return; @@ -380,36 +356,14 @@ void __init hyperv_init(void) if ((ms_hyperv.features & required_msrs) != required_msrs) return; - /* - * Allocate the per-CPU state for the hypercall input arg. - * If this allocation fails, we will not be able to setup - * (per-CPU) hypercall input page and thus this failure is - * fatal on Hyper-V. - */ - hyperv_pcpu_input_arg = alloc_percpu(void *); - - BUG_ON(hyperv_pcpu_input_arg == NULL); - - /* Allocate the per-CPU state for output arg for root */ - if (hv_root_partition) { - hyperv_pcpu_output_arg = alloc_percpu(void *); - BUG_ON(hyperv_pcpu_output_arg == NULL); - } - - /* Allocate percpu VP index */ - hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index), - GFP_KERNEL); - if (!hv_vp_index) + if (hv_common_init()) return; - for (i = 0; i < num_possible_cpus(); i++) - hv_vp_index[i] = VP_INVAL; - hv_vp_assist_page = kcalloc(num_possible_cpus(), sizeof(*hv_vp_assist_page), GFP_KERNEL); if (!hv_vp_assist_page) { ms_hyperv.hints &= ~HV_X64_ENLIGHTENED_VMCS_RECOMMENDED; - goto free_vp_index; + goto common_free; } cpuhp = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/hyperv_init:online", @@ -507,9 +461,8 @@ remove_cpuhp_state: free_vp_assist_page: kfree(hv_vp_assist_page); hv_vp_assist_page = NULL; -free_vp_index: - kfree(hv_vp_index); - hv_vp_index = NULL; +common_free: + hv_common_free(); } /* @@ -539,7 +492,6 @@ void hyperv_cleanup(void) hypercall_msr.as_uint64 = 0; wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64); } -EXPORT_SYMBOL_GPL(hyperv_cleanup); void hyperv_report_panic(struct pt_regs *regs, long err, bool in_die) { @@ -595,12 +547,6 @@ bool hv_is_hyperv_initialized(void) } EXPORT_SYMBOL_GPL(hv_is_hyperv_initialized); -bool hv_is_hibernation_supported(void) -{ - return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4); -} -EXPORT_SYMBOL_GPL(hv_is_hibernation_supported); - enum hv_isolation_type hv_get_isolation_type(void) { if (!(ms_hyperv.priv_high & HV_ISOLATION)) @@ -613,4 +559,3 @@ bool hv_is_isolation_supported(void) { return hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE; } -EXPORT_SYMBOL_GPL(hv_is_isolation_supported); diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index f1366ce609e3..2322d6bd5883 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -288,6 +288,15 @@ union hv_x64_msr_hypercall_contents { } __packed; }; +union hv_vp_assist_msr_contents { + u64 as_uint64; + struct { + u64 enable:1; + u64 reserved:11; + u64 pfn:52; + } __packed; +}; + struct hv_reenlightenment_control { __u64 vector:8; __u64 reserved1:8; diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 67ff0d637e55..adccbc209169 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -36,8 +36,6 @@ void hyperv_vector_handler(struct pt_regs *regs); extern int hyperv_init_cpuhp; extern void *hv_hypercall_pg; -extern void __percpu **hyperv_pcpu_input_arg; -extern void __percpu **hyperv_pcpu_output_arg; extern u64 hv_current_partition_id; @@ -170,8 +168,6 @@ int hyperv_fill_flush_guest_mapping_list( struct hv_guest_mapping_flush_list *flush, u64 start_gfn, u64 end_gfn); -extern bool hv_root_partition; - #ifdef CONFIG_X86_64 void hv_apic_init(void); void __init hv_init_spinlocks(void); diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index c890d67a64ad..e095c28d27ae 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -17,7 +17,6 @@ #include <linux/irq.h> #include <linux/kexec.h> #include <linux/i8253.h> -#include <linux/panic_notifier.h> #include <linux/random.h> #include <asm/processor.h> #include <asm/hypervisor.h> @@ -36,10 +35,7 @@ /* Is Linux running as the root partition? */ bool hv_root_partition; -EXPORT_SYMBOL_GPL(hv_root_partition); - struct ms_hyperv_info ms_hyperv; -EXPORT_SYMBOL_GPL(ms_hyperv); #if IS_ENABLED(CONFIG_HYPERV) static void (*vmbus_handler)(void); @@ -65,14 +61,12 @@ void hv_setup_vmbus_handler(void (*handler)(void)) { vmbus_handler = handler; } -EXPORT_SYMBOL_GPL(hv_setup_vmbus_handler); void hv_remove_vmbus_handler(void) { /* We have no way to deallocate the interrupt gate */ vmbus_handler = NULL; } -EXPORT_SYMBOL_GPL(hv_remove_vmbus_handler); /* * Routines to do per-architecture handling of stimer0 @@ -107,25 +101,21 @@ void hv_setup_kexec_handler(void (*handler)(void)) { hv_kexec_handler = handler; } -EXPORT_SYMBOL_GPL(hv_setup_kexec_handler); void hv_remove_kexec_handler(void) { hv_kexec_handler = NULL; } -EXPORT_SYMBOL_GPL(hv_remove_kexec_handler); void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)) { hv_crash_handler = handler; } -EXPORT_SYMBOL_GPL(hv_setup_crash_handler); void hv_remove_crash_handler(void) { hv_crash_handler = NULL; } -EXPORT_SYMBOL_GPL(hv_remove_crash_handler); #ifdef CONFIG_KEXEC_CORE static void hv_machine_shutdown(void) @@ -335,16 +325,6 @@ static void __init ms_hyperv_init_platform(void) ms_hyperv.nested_features); } - /* - * Hyper-V expects to get crash register data or kmsg when - * crash enlightment is available and system crashes. Set - * crash_kexec_post_notifiers to be true to make sure that - * calling crash enlightment interface before running kdump - * kernel. - */ - if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) - crash_kexec_post_notifiers = true; - #ifdef CONFIG_X86_LOCAL_APIC if (ms_hyperv.features & HV_ACCESS_FREQUENCY_MSRS && ms_hyperv.misc_features & HV_FEATURE_FREQUENCY_MSRS_AVAILABLE) { @@ -373,10 +353,17 @@ static void __init ms_hyperv_init_platform(void) machine_ops.crash_shutdown = hv_machine_crash_shutdown; #endif if (ms_hyperv.features & HV_ACCESS_TSC_INVARIANT) { + /* + * Writing to synthetic MSR 0x40000118 updates/changes the + * guest visible CPUIDs. Setting bit 0 of this MSR enables + * guests to report invariant TSC feature through CPUID + * instruction, CPUID 0x800000007/EDX, bit 8. See code in + * early_init_intel() where this bit is examined. The + * setting of this MSR bit should happen before init_intel() + * is called. + */ wrmsrl(HV_X64_MSR_TSC_INVARIANT_CONTROL, 0x1); setup_force_cpu_cap(X86_FEATURE_TSC_RELIABLE); - } else { - mark_tsc_unstable("running on Hyper-V"); } /* @@ -437,6 +424,13 @@ static void __init ms_hyperv_init_platform(void) /* Register Hyper-V specific clocksource */ hv_init_clocksource(); #endif + /* + * TSC should be marked as unstable only after Hyper-V + * clocksource has been initialized. This ensures that the + * stability of the sched_clock is not altered. + */ + if (!(ms_hyperv.features & HV_ACCESS_TSC_INVARIANT)) + mark_tsc_unstable("running on Hyper-V"); } static bool __init ms_hyperv_x2apic_available(void) diff --git a/drivers/clocksource/hyperv_timer.c b/drivers/clocksource/hyperv_timer.c index d6ece7bbce89..ff188ab68496 100644 --- a/drivers/clocksource/hyperv_timer.c +++ b/drivers/clocksource/hyperv_timer.c @@ -361,9 +361,6 @@ EXPORT_SYMBOL_GPL(hv_stimer_global_cleanup); * Hyper-V and 32-bit x86. The TSC reference page version is preferred. */ -u64 (*hv_read_reference_counter)(void); -EXPORT_SYMBOL_GPL(hv_read_reference_counter); - static union { struct ms_hyperv_tsc_page page; u8 reserved[PAGE_SIZE]; diff --git a/drivers/hv/Kconfig b/drivers/hv/Kconfig index 66c794d92391..d1123ceb38f3 100644 --- a/drivers/hv/Kconfig +++ b/drivers/hv/Kconfig @@ -4,15 +4,16 @@ menu "Microsoft Hyper-V guest support" config HYPERV tristate "Microsoft Hyper-V client drivers" - depends on X86 && ACPI && X86_LOCAL_APIC && HYPERVISOR_GUEST + depends on ACPI && ((X86 && X86_LOCAL_APIC && HYPERVISOR_GUEST) \ + || (ARM64 && !CPU_BIG_ENDIAN)) select PARAVIRT - select X86_HV_CALLBACK_VECTOR + select X86_HV_CALLBACK_VECTOR if X86 help Select this option to run Linux as a Hyper-V client operating system. config HYPERV_TIMER - def_bool HYPERV + def_bool HYPERV && X86 config HYPERV_UTILS tristate "Microsoft Hyper-V Utilities driver" diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 7f42da98d377..c0d9048a4112 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -13,11 +13,162 @@ */ #include <linux/types.h> +#include <linux/acpi.h> #include <linux/export.h> #include <linux/bitfield.h> +#include <linux/cpumask.h> +#include <linux/panic_notifier.h> +#include <linux/ptrace.h> +#include <linux/slab.h> #include <asm/hyperv-tlfs.h> #include <asm/mshyperv.h> +/* + * hv_root_partition and ms_hyperv are defined here with other Hyper-V + * specific globals so they are shared across all architectures and are + * built only when CONFIG_HYPERV is defined. But on x86, + * ms_hyperv_init_platform() is built even when CONFIG_HYPERV is not + * defined, and it uses these two variables. So mark them as __weak + * here, allowing for an overriding definition in the module containing + * ms_hyperv_init_platform(). + */ +bool __weak hv_root_partition; +EXPORT_SYMBOL_GPL(hv_root_partition); + +struct ms_hyperv_info __weak ms_hyperv; +EXPORT_SYMBOL_GPL(ms_hyperv); + +u32 *hv_vp_index; +EXPORT_SYMBOL_GPL(hv_vp_index); + +u32 hv_max_vp_index; +EXPORT_SYMBOL_GPL(hv_max_vp_index); + +void __percpu **hyperv_pcpu_input_arg; +EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); + +void __percpu **hyperv_pcpu_output_arg; +EXPORT_SYMBOL_GPL(hyperv_pcpu_output_arg); + +/* + * Hyper-V specific initialization and shutdown code that is + * common across all architectures. Called from architecture + * specific initialization functions. + */ + +void __init hv_common_free(void) +{ + kfree(hv_vp_index); + hv_vp_index = NULL; + + free_percpu(hyperv_pcpu_output_arg); + hyperv_pcpu_output_arg = NULL; + + free_percpu(hyperv_pcpu_input_arg); + hyperv_pcpu_input_arg = NULL; +} + +int __init hv_common_init(void) +{ + int i; + + /* + * Hyper-V expects to get crash register data or kmsg when + * crash enlightment is available and system crashes. Set + * crash_kexec_post_notifiers to be true to make sure that + * calling crash enlightment interface before running kdump + * kernel. + */ + if (ms_hyperv.misc_features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) + crash_kexec_post_notifiers = true; + + /* + * Allocate the per-CPU state for the hypercall input arg. + * If this allocation fails, we will not be able to setup + * (per-CPU) hypercall input page and thus this failure is + * fatal on Hyper-V. + */ + hyperv_pcpu_input_arg = alloc_percpu(void *); + BUG_ON(!hyperv_pcpu_input_arg); + + /* Allocate the per-CPU state for output arg for root */ + if (hv_root_partition) { + hyperv_pcpu_output_arg = alloc_percpu(void *); + BUG_ON(!hyperv_pcpu_output_arg); + } + + hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index), + GFP_KERNEL); + if (!hv_vp_index) { + hv_common_free(); + return -ENOMEM; + } + + for (i = 0; i < num_possible_cpus(); i++) + hv_vp_index[i] = VP_INVAL; + + return 0; +} + +/* + * Hyper-V specific initialization and die code for + * individual CPUs that is common across all architectures. + * Called by the CPU hotplug mechanism. + */ + +int hv_common_cpu_init(unsigned int cpu) +{ + void **inputarg, **outputarg; + u64 msr_vp_index; + gfp_t flags; + int pgcount = hv_root_partition ? 2 : 1; + + /* hv_cpu_init() can be called with IRQs disabled from hv_resume() */ + flags = irqs_disabled() ? GFP_ATOMIC : GFP_KERNEL; + + inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); + *inputarg = kmalloc(pgcount * HV_HYP_PAGE_SIZE, flags); + if (!(*inputarg)) + return -ENOMEM; + + if (hv_root_partition) { + outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); + *outputarg = (char *)(*inputarg) + HV_HYP_PAGE_SIZE; + } + + msr_vp_index = hv_get_register(HV_REGISTER_VP_INDEX); + + hv_vp_index[cpu] = msr_vp_index; + + if (msr_vp_index > hv_max_vp_index) + hv_max_vp_index = msr_vp_index; + + return 0; +} + +int hv_common_cpu_die(unsigned int cpu) +{ + unsigned long flags; + void **inputarg, **outputarg; + void *mem; + + local_irq_save(flags); + + inputarg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); + mem = *inputarg; + *inputarg = NULL; + + if (hv_root_partition) { + outputarg = (void **)this_cpu_ptr(hyperv_pcpu_output_arg); + *outputarg = NULL; + } + + local_irq_restore(flags); + + kfree(mem); + + return 0; +} /* Bit mask of the extended capability to query: see HV_EXT_CAPABILITY_xxx */ bool hv_query_ext_cap(u64 cap_query) @@ -64,3 +215,71 @@ bool hv_query_ext_cap(u64 cap_query) return hv_extended_cap & cap_query; } EXPORT_SYMBOL_GPL(hv_query_ext_cap); + +bool hv_is_hibernation_supported(void) +{ + return !hv_root_partition && acpi_sleep_state_supported(ACPI_STATE_S4); +} +EXPORT_SYMBOL_GPL(hv_is_hibernation_supported); + +/* + * Default function to read the Hyper-V reference counter, independent + * of whether Hyper-V enlightened clocks/timers are being used. But on + * architectures where it is used, Hyper-V enlightenment code in + * hyperv_timer.c may override this function. + */ +static u64 __hv_read_ref_counter(void) +{ + return hv_get_register(HV_REGISTER_TIME_REF_COUNT); +} + +u64 (*hv_read_reference_counter)(void) = __hv_read_ref_counter; +EXPORT_SYMBOL_GPL(hv_read_reference_counter); + +/* These __weak functions provide default "no-op" behavior and + * may be overridden by architecture specific versions. Architectures + * for which the default "no-op" behavior is sufficient can leave + * them unimplemented and not be cluttered with a bunch of stub + * functions in arch-specific code. + */ + +bool __weak hv_is_isolation_supported(void) +{ + return false; +} +EXPORT_SYMBOL_GPL(hv_is_isolation_supported); + +void __weak hv_setup_vmbus_handler(void (*handler)(void)) +{ +} +EXPORT_SYMBOL_GPL(hv_setup_vmbus_handler); + +void __weak hv_remove_vmbus_handler(void) +{ +} +EXPORT_SYMBOL_GPL(hv_remove_vmbus_handler); + +void __weak hv_setup_kexec_handler(void (*handler)(void)) +{ +} +EXPORT_SYMBOL_GPL(hv_setup_kexec_handler); + +void __weak hv_remove_kexec_handler(void) +{ +} +EXPORT_SYMBOL_GPL(hv_remove_kexec_handler); + +void __weak hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)) +{ +} +EXPORT_SYMBOL_GPL(hv_setup_crash_handler); + +void __weak hv_remove_crash_handler(void) +{ +} +EXPORT_SYMBOL_GPL(hv_remove_crash_handler); + +void __weak hyperv_cleanup(void) +{ +} +EXPORT_SYMBOL_GPL(hyperv_cleanup); diff --git a/drivers/hv/hv_snapshot.c b/drivers/hv/hv_snapshot.c index 2267bd4c3472..6018b9d1b1fb 100644 --- a/drivers/hv/hv_snapshot.c +++ b/drivers/hv/hv_snapshot.c @@ -375,6 +375,7 @@ hv_vss_init(struct hv_util_service *srv) } recv_buffer = srv->recv_buffer; vss_transaction.recv_channel = srv->channel; + vss_transaction.recv_channel->max_pkt_size = HV_HYP_PAGE_SIZE * 2; /* * When this driver loads, the user level daemon that diff --git a/drivers/hv/hv_util.c b/drivers/hv/hv_util.c index 136576cba26f..835e6039c186 100644 --- a/drivers/hv/hv_util.c +++ b/drivers/hv/hv_util.c @@ -17,7 +17,6 @@ #include <linux/hyperv.h> #include <linux/clockchips.h> #include <linux/ptp_clock_kernel.h> -#include <clocksource/hyperv_timer.h> #include <asm/mshyperv.h> #include "hyperv_vmbus.h" @@ -735,10 +734,6 @@ static struct ptp_clock *hv_ptp_clock; static int hv_timesync_init(struct hv_util_service *srv) { - /* TimeSync requires Hyper-V clocksource. */ - if (!hv_read_reference_counter) - return -ENODEV; - spin_lock_init(&host_ts.lock); INIT_WORK(&adj_time_work, hv_set_host_time); diff --git a/include/asm-generic/mshyperv.h b/include/asm-generic/mshyperv.h index 9a000ba2bb75..c1ab6a6e72b5 100644 --- a/include/asm-generic/mshyperv.h +++ b/include/asm-generic/mshyperv.h @@ -22,6 +22,7 @@ #include <linux/atomic.h> #include <linux/bitops.h> #include <linux/cpumask.h> +#include <linux/nmi.h> #include <asm/ptrace.h> #include <asm/hyperv-tlfs.h> @@ -38,6 +39,9 @@ struct ms_hyperv_info { }; extern struct ms_hyperv_info ms_hyperv; +extern void __percpu **hyperv_pcpu_input_arg; +extern void __percpu **hyperv_pcpu_output_arg; + extern u64 hv_do_hypercall(u64 control, void *inputaddr, void *outputaddr); extern u64 hv_do_fast_hypercall8(u16 control, u64 input8); @@ -151,6 +155,8 @@ void hv_remove_crash_handler(void); extern int vmbus_interrupt; extern int vmbus_irq; +extern bool hv_root_partition; + #if IS_ENABLED(CONFIG_HYPERV) /* * Hypervisor's notion of virtual processor ID is different from @@ -161,9 +167,16 @@ extern int vmbus_irq; extern u32 *hv_vp_index; extern u32 hv_max_vp_index; +extern u64 (*hv_read_reference_counter)(void); + /* Sentinel value for an uninitialized entry in hv_vp_index array */ #define VP_INVAL U32_MAX +int __init hv_common_init(void); +void __init hv_common_free(void); +int hv_common_cpu_init(unsigned int cpu); +int hv_common_cpu_die(unsigned int cpu); + void *hv_alloc_hyperv_page(void); void *hv_alloc_hyperv_zeroed_page(void); void hv_free_hyperv_page(unsigned long addr); diff --git a/include/clocksource/hyperv_timer.h b/include/clocksource/hyperv_timer.h index b6774aa5a4b8..b3f5d73ae1d6 100644 --- a/include/clocksource/hyperv_timer.h +++ b/include/clocksource/hyperv_timer.h @@ -20,6 +20,8 @@ #define HV_MAX_MAX_DELTA_TICKS 0xffffffff #define HV_MIN_DELTA_TICKS 1 +#ifdef CONFIG_HYPERV_TIMER + /* Routines called by the VMbus driver */ extern int hv_stimer_alloc(bool have_percpu_irqs); extern int hv_stimer_cleanup(unsigned int cpu); @@ -28,8 +30,6 @@ extern void hv_stimer_legacy_cleanup(unsigned int cpu); extern void hv_stimer_global_cleanup(void); extern void hv_stimer0_isr(void); -#ifdef CONFIG_HYPERV_TIMER -extern u64 (*hv_read_reference_counter)(void); extern void hv_init_clocksource(void); extern struct ms_hyperv_tsc_page *hv_get_tsc_page(void); @@ -100,6 +100,13 @@ static inline u64 hv_read_tsc_page_tsc(const struct ms_hyperv_tsc_page *tsc_pg, { return U64_MAX; } + +static inline int hv_stimer_cleanup(unsigned int cpu) { return 0; } +static inline void hv_stimer_legacy_init(unsigned int cpu, int sint) {} +static inline void hv_stimer_legacy_cleanup(unsigned int cpu) {} +static inline void hv_stimer_global_cleanup(void) {} +static inline void hv_stimer0_isr(void) {} + #endif /* CONFIG_HYPERV_TIMER */ #endif diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 2e859d2f9609..ddc8713ce57b 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -538,12 +538,6 @@ struct vmbus_channel_rescind_offer { u32 child_relid; } __packed; -static inline u32 -hv_ringbuffer_pending_size(const struct hv_ring_buffer_info *rbi) -{ - return rbi->ring_buffer->pending_send_sz; -} - /* * Request Offer -- no parameters, SynIC message contains the partition ID * Set Snoop -- no parameters, SynIC message contains the partition ID @@ -1092,16 +1086,6 @@ static inline void set_channel_pending_send_size(struct vmbus_channel *c, c->outbound.ring_buffer->pending_send_sz = size; } -static inline void set_low_latency_mode(struct vmbus_channel *c) -{ - c->low_latency = true; -} - -static inline void clear_low_latency_mode(struct vmbus_channel *c) -{ - c->low_latency = false; -} - void vmbus_onmessage(struct vmbus_channel_message_header *hdr); int vmbus_request_offers(void); |