diff options
author | Ingo Molnar <mingo@kernel.org> | 2021-04-02 12:33:16 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2021-04-02 12:33:16 +0200 |
commit | e855e80d001530ec0bbb1ee1ca6a16ac6bdf9acf (patch) | |
tree | 5f762f2992658891c565af4f923147b384bf0dd4 /arch | |
parent | f2ac256b9a8b7e79847efcd82bd10fd876369b9f (diff) | |
parent | a5e13c6df0e41702d2b2c77c8ad41677ebb065b3 (diff) |
Merge tag 'v5.12-rc5' into WIP.x86/core, to pick up recent NOP related changes
In particular we want to have this upstream commit:
b90829704780: ("bpf: Use NOP_ATOMIC5 instead of emit_nops(&prog, 5) for BPF_TRAMP_F_CALL_ORIG")
... before merging in x86/cpu changes and the removal of the NOP optimizations, and
applying PeterZ's !retpoline objtool series.
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch')
84 files changed, 586 insertions, 337 deletions
diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index 5b213a1e68bb..5e33d0e88f5b 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -40,6 +40,9 @@ ethernet1 = &cpsw_emac1; spi0 = &spi0; spi1 = &spi1; + mmc0 = &mmc1; + mmc1 = &mmc2; + mmc2 = &mmc3; }; cpus { diff --git a/arch/arm/boot/dts/at91-sam9x60ek.dts b/arch/arm/boot/dts/at91-sam9x60ek.dts index 73b6b1f89de9..775ceb3acb6c 100644 --- a/arch/arm/boot/dts/at91-sam9x60ek.dts +++ b/arch/arm/boot/dts/at91-sam9x60ek.dts @@ -334,14 +334,6 @@ }; &pinctrl { - atmel,mux-mask = < - /* A B C */ - 0xFFFFFE7F 0xC0E0397F 0xEF00019D /* pioA */ - 0x03FFFFFF 0x02FC7E68 0x00780000 /* pioB */ - 0xffffffff 0xF83FFFFF 0xB800F3FC /* pioC */ - 0x003FFFFF 0x003F8000 0x00000000 /* pioD */ - >; - adc { pinctrl_adc_default: adc_default { atmel,pins = <AT91_PIOB 15 AT91_PERIPH_A AT91_PINCTRL_NONE>; diff --git a/arch/arm/boot/dts/at91-sama5d27_som1.dtsi b/arch/arm/boot/dts/at91-sama5d27_som1.dtsi index 1b1163858b1d..e3251f3e3eaa 100644 --- a/arch/arm/boot/dts/at91-sama5d27_som1.dtsi +++ b/arch/arm/boot/dts/at91-sama5d27_som1.dtsi @@ -84,8 +84,8 @@ pinctrl-0 = <&pinctrl_macb0_default>; phy-mode = "rmii"; - ethernet-phy@0 { - reg = <0x0>; + ethernet-phy@7 { + reg = <0x7>; interrupt-parent = <&pioA>; interrupts = <PIN_PD31 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi b/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi index c593597b2119..5a1e10def6ef 100644 --- a/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi +++ b/arch/arm/boot/dts/imx6ul-14x14-evk.dtsi @@ -210,9 +210,6 @@ micrel,led-mode = <1>; clocks = <&clks IMX6UL_CLK_ENET_REF>; clock-names = "rmii-ref"; - reset-gpios = <&gpio_spi 1 GPIO_ACTIVE_LOW>; - reset-assert-us = <10000>; - reset-deassert-us = <100>; }; @@ -222,9 +219,6 @@ micrel,led-mode = <1>; clocks = <&clks IMX6UL_CLK_ENET2_REF>; clock-names = "rmii-ref"; - reset-gpios = <&gpio_spi 2 GPIO_ACTIVE_LOW>; - reset-assert-us = <10000>; - reset-deassert-us = <100>; }; }; }; @@ -243,6 +237,22 @@ status = "okay"; }; +&gpio_spi { + eth0-phy-hog { + gpio-hog; + gpios = <1 GPIO_ACTIVE_HIGH>; + output-high; + line-name = "eth0-phy"; + }; + + eth1-phy-hog { + gpio-hog; + gpios = <2 GPIO_ACTIVE_HIGH>; + output-high; + line-name = "eth1-phy"; + }; +}; + &i2c1 { clock-frequency = <100000>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/imx6ull-myir-mys-6ulx-eval.dts b/arch/arm/boot/dts/imx6ull-myir-mys-6ulx-eval.dts index ecbb2cc5b9ab..79cc45728cd2 100644 --- a/arch/arm/boot/dts/imx6ull-myir-mys-6ulx-eval.dts +++ b/arch/arm/boot/dts/imx6ull-myir-mys-6ulx-eval.dts @@ -14,5 +14,6 @@ }; &gpmi { + fsl,use-minimum-ecc; status = "okay"; }; diff --git a/arch/arm/boot/dts/sam9x60.dtsi b/arch/arm/boot/dts/sam9x60.dtsi index 84066c1298df..ec45ced3cde6 100644 --- a/arch/arm/boot/dts/sam9x60.dtsi +++ b/arch/arm/boot/dts/sam9x60.dtsi @@ -606,6 +606,15 @@ compatible = "microchip,sam9x60-pinctrl", "atmel,at91sam9x5-pinctrl", "atmel,at91rm9200-pinctrl", "simple-bus"; ranges = <0xfffff400 0xfffff400 0x800>; + /* mux-mask corresponding to sam9x60 SoC in TFBGA228L package */ + atmel,mux-mask = < + /* A B C */ + 0xffffffff 0xffe03fff 0xef00019d /* pioA */ + 0x03ffffff 0x02fc7e7f 0x00780000 /* pioB */ + 0xffffffff 0xffffffff 0xf83fffff /* pioC */ + 0x003fffff 0x003f8000 0x00000000 /* pioD */ + >; + pioA: gpio@fffff400 { compatible = "microchip,sam9x60-gpio", "atmel,at91sam9x5-gpio", "atmel,at91rm9200-gpio"; reg = <0xfffff400 0x200>; diff --git a/arch/arm/mach-imx/avic.c b/arch/arm/mach-imx/avic.c index 322caa21bcb3..21bce4049cec 100644 --- a/arch/arm/mach-imx/avic.c +++ b/arch/arm/mach-imx/avic.c @@ -7,6 +7,7 @@ #include <linux/module.h> #include <linux/irq.h> #include <linux/irqdomain.h> +#include <linux/irqchip.h> #include <linux/io.h> #include <linux/of.h> #include <linux/of_address.h> @@ -162,7 +163,7 @@ static void __exception_irq_entry avic_handle_irq(struct pt_regs *regs) * interrupts. It registers the interrupt enable and disable functions * to the kernel for each interrupt source. */ -void __init mxc_init_irq(void __iomem *irqbase) +static void __init mxc_init_irq(void __iomem *irqbase) { struct device_node *np; int irq_base; @@ -220,3 +221,16 @@ void __init mxc_init_irq(void __iomem *irqbase) printk(KERN_INFO "MXC IRQ initialized\n"); } + +static int __init imx_avic_init(struct device_node *node, + struct device_node *parent) +{ + void __iomem *avic_base; + + avic_base = of_iomap(node, 0); + BUG_ON(!avic_base); + mxc_init_irq(avic_base); + return 0; +} + +IRQCHIP_DECLARE(imx_avic, "fsl,avic", imx_avic_init); diff --git a/arch/arm/mach-imx/common.h b/arch/arm/mach-imx/common.h index 2b004cc4f95e..474dedb73bc7 100644 --- a/arch/arm/mach-imx/common.h +++ b/arch/arm/mach-imx/common.h @@ -22,7 +22,6 @@ void mx35_map_io(void); void imx21_init_early(void); void imx31_init_early(void); void imx35_init_early(void); -void mxc_init_irq(void __iomem *); void mx31_init_irq(void); void mx35_init_irq(void); void mxc_set_cpu_type(unsigned int type); diff --git a/arch/arm/mach-imx/mach-imx1.c b/arch/arm/mach-imx/mach-imx1.c index 32df3b8012f9..8eca92d66a2e 100644 --- a/arch/arm/mach-imx/mach-imx1.c +++ b/arch/arm/mach-imx/mach-imx1.c @@ -17,16 +17,6 @@ static void __init imx1_init_early(void) mxc_set_cpu_type(MXC_CPU_MX1); } -static void __init imx1_init_irq(void) -{ - void __iomem *avic_addr; - - avic_addr = ioremap(MX1_AVIC_ADDR, SZ_4K); - WARN_ON(!avic_addr); - - mxc_init_irq(avic_addr); -} - static const char * const imx1_dt_board_compat[] __initconst = { "fsl,imx1", NULL @@ -34,7 +24,6 @@ static const char * const imx1_dt_board_compat[] __initconst = { DT_MACHINE_START(IMX1_DT, "Freescale i.MX1 (Device Tree Support)") .init_early = imx1_init_early, - .init_irq = imx1_init_irq, .dt_compat = imx1_dt_board_compat, .restart = mxc_restart, MACHINE_END diff --git a/arch/arm/mach-imx/mach-imx25.c b/arch/arm/mach-imx/mach-imx25.c index 95de48a1aa7d..51927bd08aef 100644 --- a/arch/arm/mach-imx/mach-imx25.c +++ b/arch/arm/mach-imx/mach-imx25.c @@ -22,17 +22,6 @@ static void __init imx25_dt_init(void) imx_aips_allow_unprivileged_access("fsl,imx25-aips"); } -static void __init mx25_init_irq(void) -{ - struct device_node *np; - void __iomem *avic_base; - - np = of_find_compatible_node(NULL, NULL, "fsl,avic"); - avic_base = of_iomap(np, 0); - BUG_ON(!avic_base); - mxc_init_irq(avic_base); -} - static const char * const imx25_dt_board_compat[] __initconst = { "fsl,imx25", NULL @@ -42,6 +31,5 @@ DT_MACHINE_START(IMX25_DT, "Freescale i.MX25 (Device Tree Support)") .init_early = imx25_init_early, .init_machine = imx25_dt_init, .init_late = imx25_pm_init, - .init_irq = mx25_init_irq, .dt_compat = imx25_dt_board_compat, MACHINE_END diff --git a/arch/arm/mach-imx/mach-imx27.c b/arch/arm/mach-imx/mach-imx27.c index 262422a9c196..e325c9468105 100644 --- a/arch/arm/mach-imx/mach-imx27.c +++ b/arch/arm/mach-imx/mach-imx27.c @@ -56,17 +56,6 @@ static void __init imx27_init_early(void) mxc_set_cpu_type(MXC_CPU_MX27); } -static void __init mx27_init_irq(void) -{ - void __iomem *avic_base; - struct device_node *np; - - np = of_find_compatible_node(NULL, NULL, "fsl,avic"); - avic_base = of_iomap(np, 0); - BUG_ON(!avic_base); - mxc_init_irq(avic_base); -} - static const char * const imx27_dt_board_compat[] __initconst = { "fsl,imx27", NULL @@ -75,7 +64,6 @@ static const char * const imx27_dt_board_compat[] __initconst = { DT_MACHINE_START(IMX27_DT, "Freescale i.MX27 (Device Tree Support)") .map_io = mx27_map_io, .init_early = imx27_init_early, - .init_irq = mx27_init_irq, .init_late = imx27_pm_init, .dt_compat = imx27_dt_board_compat, MACHINE_END diff --git a/arch/arm/mach-imx/mach-imx31.c b/arch/arm/mach-imx/mach-imx31.c index dc69dfe600df..e9a1092b6093 100644 --- a/arch/arm/mach-imx/mach-imx31.c +++ b/arch/arm/mach-imx/mach-imx31.c @@ -14,6 +14,5 @@ static const char * const imx31_dt_board_compat[] __initconst = { DT_MACHINE_START(IMX31_DT, "Freescale i.MX31 (Device Tree Support)") .map_io = mx31_map_io, .init_early = imx31_init_early, - .init_irq = mx31_init_irq, .dt_compat = imx31_dt_board_compat, MACHINE_END diff --git a/arch/arm/mach-imx/mach-imx35.c b/arch/arm/mach-imx/mach-imx35.c index ec5c3068715c..0fc08218b77d 100644 --- a/arch/arm/mach-imx/mach-imx35.c +++ b/arch/arm/mach-imx/mach-imx35.c @@ -27,6 +27,5 @@ DT_MACHINE_START(IMX35_DT, "Freescale i.MX35 (Device Tree Support)") .l2c_aux_mask = ~0, .map_io = mx35_map_io, .init_early = imx35_init_early, - .init_irq = mx35_init_irq, .dt_compat = imx35_dt_board_compat, MACHINE_END diff --git a/arch/arm/mach-imx/mm-imx3.c b/arch/arm/mach-imx/mm-imx3.c index 5056438e5b42..28db97289ee8 100644 --- a/arch/arm/mach-imx/mm-imx3.c +++ b/arch/arm/mach-imx/mm-imx3.c @@ -109,18 +109,6 @@ void __init imx31_init_early(void) mx3_ccm_base = of_iomap(np, 0); BUG_ON(!mx3_ccm_base); } - -void __init mx31_init_irq(void) -{ - void __iomem *avic_base; - struct device_node *np; - - np = of_find_compatible_node(NULL, NULL, "fsl,imx31-avic"); - avic_base = of_iomap(np, 0); - BUG_ON(!avic_base); - - mxc_init_irq(avic_base); -} #endif /* ifdef CONFIG_SOC_IMX31 */ #ifdef CONFIG_SOC_IMX35 @@ -158,16 +146,4 @@ void __init imx35_init_early(void) mx3_ccm_base = of_iomap(np, 0); BUG_ON(!mx3_ccm_base); } - -void __init mx35_init_irq(void) -{ - void __iomem *avic_base; - struct device_node *np; - - np = of_find_compatible_node(NULL, NULL, "fsl,imx35-avic"); - avic_base = of_iomap(np, 0); - BUG_ON(!avic_base); - - mxc_init_irq(avic_base); -} #endif /* ifdef CONFIG_SOC_IMX35 */ diff --git a/arch/arm/mach-omap2/sr_device.c b/arch/arm/mach-omap2/sr_device.c index 62df666c2bd0..17b66f0d0dee 100644 --- a/arch/arm/mach-omap2/sr_device.c +++ b/arch/arm/mach-omap2/sr_device.c @@ -88,34 +88,26 @@ static void __init sr_set_nvalues(struct omap_volt_data *volt_data, extern struct omap_sr_data omap_sr_pdata[]; -static int __init sr_dev_init(struct omap_hwmod *oh, void *user) +static int __init sr_init_by_name(const char *name, const char *voltdm) { struct omap_sr_data *sr_data = NULL; struct omap_volt_data *volt_data; - struct omap_smartreflex_dev_attr *sr_dev_attr; static int i; - if (!strncmp(oh->name, "smartreflex_mpu_iva", 20) || - !strncmp(oh->name, "smartreflex_mpu", 16)) + if (!strncmp(name, "smartreflex_mpu_iva", 20) || + !strncmp(name, "smartreflex_mpu", 16)) sr_data = &omap_sr_pdata[OMAP_SR_MPU]; - else if (!strncmp(oh->name, "smartreflex_core", 17)) + else if (!strncmp(name, "smartreflex_core", 17)) sr_data = &omap_sr_pdata[OMAP_SR_CORE]; - else if (!strncmp(oh->name, "smartreflex_iva", 16)) + else if (!strncmp(name, "smartreflex_iva", 16)) sr_data = &omap_sr_pdata[OMAP_SR_IVA]; if (!sr_data) { - pr_err("%s: Unknown instance %s\n", __func__, oh->name); + pr_err("%s: Unknown instance %s\n", __func__, name); return -EINVAL; } - sr_dev_attr = (struct omap_smartreflex_dev_attr *)oh->dev_attr; - if (!sr_dev_attr || !sr_dev_attr->sensor_voltdm_name) { - pr_err("%s: No voltage domain specified for %s. Cannot initialize\n", - __func__, oh->name); - goto exit; - } - - sr_data->name = oh->name; + sr_data->name = name; if (cpu_is_omap343x()) sr_data->ip_type = 1; else @@ -136,10 +128,10 @@ static int __init sr_dev_init(struct omap_hwmod *oh, void *user) } } - sr_data->voltdm = voltdm_lookup(sr_dev_attr->sensor_voltdm_name); + sr_data->voltdm = voltdm_lookup(voltdm); if (!sr_data->voltdm) { pr_err("%s: Unable to get voltage domain pointer for VDD %s\n", - __func__, sr_dev_attr->sensor_voltdm_name); + __func__, voltdm); goto exit; } @@ -160,6 +152,20 @@ exit: return 0; } +static int __init sr_dev_init(struct omap_hwmod *oh, void *user) +{ + struct omap_smartreflex_dev_attr *sr_dev_attr; + + sr_dev_attr = (struct omap_smartreflex_dev_attr *)oh->dev_attr; + if (!sr_dev_attr || !sr_dev_attr->sensor_voltdm_name) { + pr_err("%s: No voltage domain specified for %s. Cannot initialize\n", + __func__, oh->name); + return 0; + } + + return sr_init_by_name(oh->name, sr_dev_attr->sensor_voltdm_name); +} + /* * API to be called from board files to enable smartreflex * autocompensation at init. @@ -169,7 +175,42 @@ void __init omap_enable_smartreflex_on_init(void) sr_enable_on_init = true; } +static const char * const omap4_sr_instances[] = { + "mpu", + "iva", + "core", +}; + +static const char * const dra7_sr_instances[] = { + "mpu", + "core", +}; + int __init omap_devinit_smartreflex(void) { + const char * const *sr_inst; + int i, nr_sr = 0; + + if (soc_is_omap44xx()) { + sr_inst = omap4_sr_instances; + nr_sr = ARRAY_SIZE(omap4_sr_instances); + + } else if (soc_is_dra7xx()) { + sr_inst = dra7_sr_instances; + nr_sr = ARRAY_SIZE(dra7_sr_instances); + } + + if (nr_sr) { + const char *name, *voltdm; + + for (i = 0; i < nr_sr; i++) { + name = kasprintf(GFP_KERNEL, "smartreflex_%s", sr_inst[i]); + voltdm = sr_inst[i]; + sr_init_by_name(name, voltdm); + } + + return 0; + } + return omap_hwmod_for_each_by_class("smartreflex", sr_dev_init, NULL); } diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5656e7aacd69..e4e1b6550115 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -810,6 +810,16 @@ config QCOM_FALKOR_ERRATUM_E1041 If unsure, say Y. +config NVIDIA_CARMEL_CNP_ERRATUM + bool "NVIDIA Carmel CNP: CNP on Carmel semantically different than ARM cores" + default y + help + If CNP is enabled on Carmel cores, non-sharable TLBIs on a core will not + invalidate shared TLB entries installed by a different core, as it would + on standard ARM cores. + + If unsure, say Y. + config SOCIONEXT_SYNQUACER_PREITS bool "Socionext Synquacer: Workaround for GICv3 pre-ITS" default y diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1012a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1012a.dtsi index 7de6b376d792..9058cfa4980f 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1012a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1012a.dtsi @@ -198,6 +198,7 @@ ranges = <0x0 0x00 0x1700000 0x100000>; reg = <0x00 0x1700000 0x0 0x100000>; interrupts = <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH>; + dma-coherent; sec_jr0: jr@10000 { compatible = "fsl,sec-v5.4-job-ring", diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi index 5a8a1dc4262d..28c51e521cb2 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi @@ -348,6 +348,7 @@ ranges = <0x0 0x00 0x1700000 0x100000>; reg = <0x00 0x1700000 0x0 0x100000>; interrupts = <0 75 0x4>; + dma-coherent; sec_jr0: jr@10000 { compatible = "fsl,sec-v5.4-job-ring", diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi index 1d6dfd189c7f..39458305e333 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi @@ -354,6 +354,7 @@ ranges = <0x0 0x00 0x1700000 0x100000>; reg = <0x00 0x1700000 0x0 0x100000>; interrupts = <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH>; + dma-coherent; sec_jr0: jr@10000 { compatible = "fsl,sec-v5.4-job-ring", diff --git a/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts b/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts index 0e1a6d953389..122c95ddad30 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts @@ -35,7 +35,7 @@ &i2c2 { clock-frequency = <400000>; - pinctrl-names = "default"; + pinctrl-names = "default", "gpio"; pinctrl-0 = <&pinctrl_i2c2>; pinctrl-1 = <&pinctrl_i2c2_gpio>; sda-gpios = <&gpio5 17 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi index 44a8c2337cee..f3965ec5b31d 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi @@ -67,7 +67,7 @@ &i2c1 { clock-frequency = <400000>; - pinctrl-names = "default"; + pinctrl-names = "default", "gpio"; pinctrl-0 = <&pinctrl_i2c1>; pinctrl-1 = <&pinctrl_i2c1_gpio>; sda-gpios = <&gpio5 15 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>; diff --git a/arch/arm64/include/asm/checksum.h b/arch/arm64/include/asm/checksum.h index 93a161b3bf3f..dc52b733675d 100644 --- a/arch/arm64/include/asm/checksum.h +++ b/arch/arm64/include/asm/checksum.h @@ -37,7 +37,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl) } while (--n > 0); sum += ((sum >> 32) | (sum << 32)); - return csum_fold((__force u32)(sum >> 32)); + return csum_fold((__force __wsum)(sum >> 32)); } #define ip_fast_csum ip_fast_csum diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index b77d997b173b..c40f2490cd7b 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -66,7 +66,8 @@ #define ARM64_WORKAROUND_1508412 58 #define ARM64_HAS_LDAPR 59 #define ARM64_KVM_PROTECTED_MODE 60 +#define ARM64_WORKAROUND_NVIDIA_CARMEL_CNP 61 -#define ARM64_NCAPS 61 +#define ARM64_NCAPS 62 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index ca2cd75d3286..efc10e9041a0 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -251,6 +251,8 @@ unsigned long get_wchan(struct task_struct *p); extern struct task_struct *cpu_switch_to(struct task_struct *prev, struct task_struct *next); +asmlinkage void arm64_preempt_schedule_irq(void); + #define task_pt_regs(p) \ ((struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 9f4e3b266f21..6623c99f0984 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -55,6 +55,8 @@ void arch_setup_new_exec(void); #define arch_setup_new_exec arch_setup_new_exec void arch_release_task_struct(struct task_struct *tsk); +int arch_dup_task_struct(struct task_struct *dst, + struct task_struct *src); #endif diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 506a1cd37973..e2c20c036442 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -526,6 +526,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { 1, 0), }, #endif +#ifdef CONFIG_NVIDIA_CARMEL_CNP_ERRATUM + { + /* NVIDIA Carmel */ + .desc = "NVIDIA Carmel CNP erratum", + .capability = ARM64_WORKAROUND_NVIDIA_CARMEL_CNP, + ERRATA_MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL), + }, +#endif { } }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 066030717a4c..2a5d9854d664 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1321,7 +1321,10 @@ has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope) * may share TLB entries with a CPU stuck in the crashed * kernel. */ - if (is_kdump_kernel()) + if (is_kdump_kernel()) + return false; + + if (cpus_have_const_cap(ARM64_WORKAROUND_NVIDIA_CARMEL_CNP)) return false; return has_cpuid_feature(entry, scope); diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 77605aec25fe..51fcf99d5351 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -353,7 +353,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) * with the CLIDR_EL1 fields to avoid triggering false warnings * when there is a mismatch across the CPUs. Keep track of the * effective value of the CTR_EL0 in our internal records for - * acurate sanity check and feature enablement. + * accurate sanity check and feature enablement. */ info->reg_ctr = read_cpuid_effective_cachetype(); info->reg_dczid = read_cpuid(DCZID_EL0); diff --git a/arch/arm64/kernel/crash_dump.c b/arch/arm64/kernel/crash_dump.c index e6e284265f19..58303a9ec32c 100644 --- a/arch/arm64/kernel/crash_dump.c +++ b/arch/arm64/kernel/crash_dump.c @@ -64,5 +64,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos) { memcpy(buf, phys_to_virt((phys_addr_t)*ppos), count); + *ppos += count; + return count; } diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 325c83b1a24d..6e60aa3b5ea9 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -57,6 +57,8 @@ #include <asm/processor.h> #include <asm/pointer_auth.h> #include <asm/stacktrace.h> +#include <asm/switch_to.h> +#include <asm/system_misc.h> #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) #include <linux/stackprotector.h> diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index ad20981dfda4..d55bdfb7789c 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -194,8 +194,9 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl) #ifdef CONFIG_STACKTRACE -void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, - struct task_struct *task, struct pt_regs *regs) +noinline void arch_stack_walk(stack_trace_consume_fn consume_entry, + void *cookie, struct task_struct *task, + struct pt_regs *regs) { struct stackframe frame; @@ -203,8 +204,8 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, start_backtrace(&frame, regs->regs[29], regs->pc); else if (task == current) start_backtrace(&frame, - (unsigned long)__builtin_frame_address(0), - (unsigned long)arch_stack_walk); + (unsigned long)__builtin_frame_address(1), + (unsigned long)__builtin_return_address(0)); else start_backtrace(&frame, thread_saved_fp(task), thread_saved_pc(task)); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 7484ea4f6ba0..5d9550fdb9cf 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1448,6 +1448,22 @@ static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size) struct range arch_get_mappable_range(void) { struct range mhp_range; + u64 start_linear_pa = __pa(_PAGE_OFFSET(vabits_actual)); + u64 end_linear_pa = __pa(PAGE_END - 1); + + if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) { + /* + * Check for a wrap, it is possible because of randomized linear + * mapping the start physical address is actually bigger than + * the end physical address. In this case set start to zero + * because [0, end_linear_pa] range must still be able to cover + * all addressable physical addresses. + */ + if (start_linear_pa > end_linear_pa) + start_linear_pa = 0; + } + + WARN_ON(start_linear_pa > end_linear_pa); /* * Linear mapping region is the range [PAGE_OFFSET..(PAGE_END - 1)] @@ -1455,8 +1471,9 @@ struct range arch_get_mappable_range(void) * range which can be mapped inside this linear mapping range, must * also be derived from its end points. */ - mhp_range.start = __pa(_PAGE_OFFSET(vabits_actual)); - mhp_range.end = __pa(PAGE_END - 1); + mhp_range.start = start_linear_pa; + mhp_range.end = end_linear_pa; + return mhp_range; } diff --git a/arch/csky/kernel/probes/ftrace.c b/arch/csky/kernel/probes/ftrace.c index ae2b1c7b3b5c..ef2bb9bd9605 100644 --- a/arch/csky/kernel/probes/ftrace.c +++ b/arch/csky/kernel/probes/ftrace.c @@ -9,7 +9,7 @@ int arch_check_ftrace_location(struct kprobe *p) return 0; } -/* Ftrace callback handler for kprobes -- called under preepmt disabed */ +/* Ftrace callback handler for kprobes -- called under preepmt disabled */ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs) { diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c index 8b5b8e6bc9d9..dd5bfed52031 100644 --- a/arch/ia64/kernel/err_inject.c +++ b/arch/ia64/kernel/err_inject.c @@ -59,7 +59,7 @@ show_##name(struct device *dev, struct device_attribute *attr, \ char *buf) \ { \ u32 cpu=dev->id; \ - return sprintf(buf, "%lx\n", name[cpu]); \ + return sprintf(buf, "%llx\n", name[cpu]); \ } #define store(name) \ @@ -86,9 +86,9 @@ store_call_start(struct device *dev, struct device_attribute *attr, #ifdef ERR_INJ_DEBUG printk(KERN_DEBUG "pal_mc_err_inject for cpu%d:\n", cpu); - printk(KERN_DEBUG "err_type_info=%lx,\n", err_type_info[cpu]); - printk(KERN_DEBUG "err_struct_info=%lx,\n", err_struct_info[cpu]); - printk(KERN_DEBUG "err_data_buffer=%lx, %lx, %lx.\n", + printk(KERN_DEBUG "err_type_info=%llx,\n", err_type_info[cpu]); + printk(KERN_DEBUG "err_struct_info=%llx,\n", err_struct_info[cpu]); + printk(KERN_DEBUG "err_data_buffer=%llx, %llx, %llx.\n", err_data_buffer[cpu].data1, err_data_buffer[cpu].data2, err_data_buffer[cpu].data3); @@ -117,8 +117,8 @@ store_call_start(struct device *dev, struct device_attribute *attr, #ifdef ERR_INJ_DEBUG printk(KERN_DEBUG "Returns: status=%d,\n", (int)status[cpu]); - printk(KERN_DEBUG "capabilities=%lx,\n", capabilities[cpu]); - printk(KERN_DEBUG "resources=%lx\n", resources[cpu]); + printk(KERN_DEBUG "capabilities=%llx,\n", capabilities[cpu]); + printk(KERN_DEBUG "resources=%llx\n", resources[cpu]); #endif return size; } @@ -131,7 +131,7 @@ show_virtual_to_phys(struct device *dev, struct device_attribute *attr, char *buf) { unsigned int cpu=dev->id; - return sprintf(buf, "%lx\n", phys_addr[cpu]); + return sprintf(buf, "%llx\n", phys_addr[cpu]); } static ssize_t @@ -145,7 +145,7 @@ store_virtual_to_phys(struct device *dev, struct device_attribute *attr, ret = get_user_pages_fast(virt_addr, 1, FOLL_WRITE, NULL); if (ret<=0) { #ifdef ERR_INJ_DEBUG - printk("Virtual address %lx is not existing.\n",virt_addr); + printk("Virtual address %llx is not existing.\n", virt_addr); #endif return -EINVAL; } @@ -163,7 +163,7 @@ show_err_data_buffer(struct device *dev, { unsigned int cpu=dev->id; - return sprintf(buf, "%lx, %lx, %lx\n", + return sprintf(buf, "%llx, %llx, %llx\n", err_data_buffer[cpu].data1, err_data_buffer[cpu].data2, err_data_buffer[cpu].data3); @@ -178,13 +178,13 @@ store_err_data_buffer(struct device *dev, int ret; #ifdef ERR_INJ_DEBUG - printk("write err_data_buffer=[%lx,%lx,%lx] on cpu%d\n", + printk("write err_data_buffer=[%llx,%llx,%llx] on cpu%d\n", err_data_buffer[cpu].data1, err_data_buffer[cpu].data2, err_data_buffer[cpu].data3, cpu); #endif - ret=sscanf(buf, "%lx, %lx, %lx", + ret = sscanf(buf, "%llx, %llx, %llx", &err_data_buffer[cpu].data1, &err_data_buffer[cpu].data2, &err_data_buffer[cpu].data3); diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index d4cae2fc69ca..adf6521525f4 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -1824,7 +1824,7 @@ ia64_mca_cpu_init(void *cpu_data) data = mca_bootmem(); first_time = 0; } else - data = (void *)__get_free_pages(GFP_KERNEL, + data = (void *)__get_free_pages(GFP_ATOMIC, get_order(sz)); if (!data) panic("Could not allocate MCA memory for cpu %d\n", diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 1234834cc4c4..1f98947fe715 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -176,7 +176,7 @@ SECTIONS .fill : { FILL(0); BYTE(0); - . = ALIGN(8); + STRUCT_ALIGN(); } __appended_dtb = .; /* leave space for appended DTB */ diff --git a/arch/powerpc/include/asm/cpu_has_feature.h b/arch/powerpc/include/asm/cpu_has_feature.h index 7897d16e0990..727d4b321937 100644 --- a/arch/powerpc/include/asm/cpu_has_feature.h +++ b/arch/powerpc/include/asm/cpu_has_feature.h @@ -7,7 +7,7 @@ #include <linux/bug.h> #include <asm/cputable.h> -static inline bool early_cpu_has_feature(unsigned long feature) +static __always_inline bool early_cpu_has_feature(unsigned long feature) { return !!((CPU_FTRS_ALWAYS & feature) || (CPU_FTRS_POSSIBLE & cur_cpu_spec->cpu_features & feature)); @@ -46,7 +46,7 @@ static __always_inline bool cpu_has_feature(unsigned long feature) return static_branch_likely(&cpu_feature_keys[i]); } #else -static inline bool cpu_has_feature(unsigned long feature) +static __always_inline bool cpu_has_feature(unsigned long feature) { return early_cpu_has_feature(feature); } diff --git a/arch/powerpc/kernel/vdso32/gettimeofday.S b/arch/powerpc/kernel/vdso32/gettimeofday.S index a6e29f880e0e..d21d08140a5e 100644 --- a/arch/powerpc/kernel/vdso32/gettimeofday.S +++ b/arch/powerpc/kernel/vdso32/gettimeofday.S @@ -65,3 +65,14 @@ V_FUNCTION_END(__kernel_clock_getres) V_FUNCTION_BEGIN(__kernel_time) cvdso_call_time __c_kernel_time V_FUNCTION_END(__kernel_time) + +/* Routines for restoring integer registers, called by the compiler. */ +/* Called with r11 pointing to the stack header word of the caller of the */ +/* function, just beyond the end of the integer restore area. */ +_GLOBAL(_restgpr_31_x) +_GLOBAL(_rest32gpr_31_x) + lwz r0,4(r11) + lwz r31,-4(r11) + mtlr r0 + mr r1,r11 + blr diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 85d626b8ce5e..87d7b52f278f 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -93,7 +93,6 @@ config RISCV select PCI_MSI if PCI select RISCV_INTC select RISCV_TIMER if RISCV_SBI - select SPARSEMEM_STATIC if 32BIT select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK @@ -154,7 +153,8 @@ config ARCH_FLATMEM_ENABLE config ARCH_SPARSEMEM_ENABLE def_bool y depends on MMU - select SPARSEMEM_VMEMMAP_ENABLE + select SPARSEMEM_STATIC if 32BIT && SPARSMEM + select SPARSEMEM_VMEMMAP_ENABLE if 64BIT config ARCH_SELECT_MEMORY_MODEL def_bool ARCH_SPARSEMEM_ENABLE diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 7efcece8896c..e1b2690b6e45 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -31,6 +31,8 @@ config SOC_CANAAN select SIFIVE_PLIC select ARCH_HAS_RESET_CONTROLLER select PINCTRL + select COMMON_CLK + select COMMON_CLK_K210 help This enables support for Canaan Kendryte K210 SoC platform hardware. diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h index 27e005fca584..2a652b0c987d 100644 --- a/arch/riscv/include/asm/asm-prototypes.h +++ b/arch/riscv/include/asm/asm-prototypes.h @@ -9,4 +9,20 @@ long long __lshrti3(long long a, int b); long long __ashrti3(long long a, int b); long long __ashlti3(long long a, int b); + +#define DECLARE_DO_ERROR_INFO(name) asmlinkage void name(struct pt_regs *regs) + +DECLARE_DO_ERROR_INFO(do_trap_unknown); +DECLARE_DO_ERROR_INFO(do_trap_insn_misaligned); +DECLARE_DO_ERROR_INFO(do_trap_insn_fault); +DECLARE_DO_ERROR_INFO(do_trap_insn_illegal); +DECLARE_DO_ERROR_INFO(do_trap_load_fault); +DECLARE_DO_ERROR_INFO(do_trap_load_misaligned); +DECLARE_DO_ERROR_INFO(do_trap_store_misaligned); +DECLARE_DO_ERROR_INFO(do_trap_store_fault); +DECLARE_DO_ERROR_INFO(do_trap_ecall_u); +DECLARE_DO_ERROR_INFO(do_trap_ecall_s); +DECLARE_DO_ERROR_INFO(do_trap_ecall_m); +DECLARE_DO_ERROR_INFO(do_trap_break); + #endif /* _ASM_RISCV_PROTOTYPES_H */ diff --git a/arch/riscv/include/asm/irq.h b/arch/riscv/include/asm/irq.h index 9807ad164015..e4c435509983 100644 --- a/arch/riscv/include/asm/irq.h +++ b/arch/riscv/include/asm/irq.h @@ -12,4 +12,6 @@ #include <asm-generic/irq.h> +extern void __init init_IRQ(void); + #endif /* _ASM_RISCV_IRQ_H */ diff --git a/arch/riscv/include/asm/processor.h b/arch/riscv/include/asm/processor.h index 3a240037bde2..021ed64ee608 100644 --- a/arch/riscv/include/asm/processor.h +++ b/arch/riscv/include/asm/processor.h @@ -71,6 +71,7 @@ int riscv_of_processor_hartid(struct device_node *node); int riscv_of_parent_hartid(struct device_node *node); extern void riscv_fill_hwcap(void); +extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); #endif /* __ASSEMBLY__ */ diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h index cb4abb639e8d..09ad4e923510 100644 --- a/arch/riscv/include/asm/ptrace.h +++ b/arch/riscv/include/asm/ptrace.h @@ -119,6 +119,11 @@ extern int regs_query_register_offset(const char *name); extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n); +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer); +int do_syscall_trace_enter(struct pt_regs *regs); +void do_syscall_trace_exit(struct pt_regs *regs); + /** * regs_get_register() - get register value from its offset * @regs: pt_regs from which register value is gotten diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 99895d9c3bdd..d7027411dde8 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -51,10 +51,10 @@ enum sbi_ext_rfence_fid { SBI_EXT_RFENCE_REMOTE_FENCE_I = 0, SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID, - SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA, SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID, - SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA, + SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA, SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID, + SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA, }; enum sbi_ext_hsm_fid { diff --git a/arch/riscv/include/asm/timex.h b/arch/riscv/include/asm/timex.h index 81de51e6aa32..507cae273bc6 100644 --- a/arch/riscv/include/asm/timex.h +++ b/arch/riscv/include/asm/timex.h @@ -88,4 +88,6 @@ static inline int read_current_timer(unsigned long *timer_val) return 0; } +extern void time_init(void); + #endif /* _ASM_RISCV_TIMEX_H */ diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 3dc0abde988a..647a47f5484a 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -8,6 +8,7 @@ CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE) endif +CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,) extra-y += head.o extra-y += vmlinux.lds diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c index e6372490aa0b..17ca5e923bb0 100644 --- a/arch/riscv/kernel/probes/ftrace.c +++ b/arch/riscv/kernel/probes/ftrace.c @@ -2,39 +2,41 @@ #include <linux/kprobes.h> -/* Ftrace callback handler for kprobes -- called under preepmt disabed */ +/* Ftrace callback handler for kprobes -- called under preepmt disabled */ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, - struct ftrace_ops *ops, struct ftrace_regs *regs) + struct ftrace_ops *ops, struct ftrace_regs *fregs) { struct kprobe *p; + struct pt_regs *regs; struct kprobe_ctlblk *kcb; p = get_kprobe((kprobe_opcode_t *)ip); if (unlikely(!p) || kprobe_disabled(p)) return; + regs = ftrace_get_regs(fregs); kcb = get_kprobe_ctlblk(); if (kprobe_running()) { kprobes_inc_nmissed_count(p); } else { - unsigned long orig_ip = instruction_pointer(&(regs->regs)); + unsigned long orig_ip = instruction_pointer(regs); - instruction_pointer_set(&(regs->regs), ip); + instruction_pointer_set(regs, ip); __this_cpu_write(current_kprobe, p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; - if (!p->pre_handler || !p->pre_handler(p, &(regs->regs))) { + if (!p->pre_handler || !p->pre_handler(p, regs)) { /* * Emulate singlestep (and also recover regs->pc) * as if there is a nop */ - instruction_pointer_set(&(regs->regs), + instruction_pointer_set(regs, (unsigned long)p->addr + MCOUNT_INSN_SIZE); if (unlikely(p->post_handler)) { kcb->kprobe_status = KPROBE_HIT_SSDONE; - p->post_handler(p, &(regs->regs), 0); + p->post_handler(p, regs, 0); } - instruction_pointer_set(&(regs->regs), orig_ip); + instruction_pointer_set(regs, orig_ip); } /* diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index a2ec18662fee..7e2c78e2ca6b 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -256,8 +256,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr) * normal page fault. */ regs->epc = (unsigned long) cur->addr; - if (!instruction_pointer(regs)) - BUG(); + BUG_ON(!instruction_pointer(regs)); if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 6f728e731bed..f9cd57c9c67d 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -10,6 +10,7 @@ #include <linux/cpu.h> #include <linux/kernel.h> #include <linux/sched.h> +#include <linux/sched/debug.h> #include <linux/sched/task_stack.h> #include <linux/tick.h> #include <linux/ptrace.h> diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index f4a7db3d309e..d3bf756321a5 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -116,7 +116,7 @@ void sbi_clear_ipi(void) EXPORT_SYMBOL(sbi_clear_ipi); /** - * sbi_set_timer_v01() - Program the timer for next timer event. + * __sbi_set_timer_v01() - Program the timer for next timer event. * @stime_value: The value after which next timer event should fire. * * Return: None diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index e85bacff1b50..f8f15332caa2 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -147,7 +147,8 @@ static void __init init_resources(void) bss_res.end = __pa_symbol(__bss_stop) - 1; bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - mem_res_sz = (memblock.memory.cnt + memblock.reserved.cnt) * sizeof(*mem_res); + /* + 1 as memblock_alloc() might increase memblock.reserved.cnt */ + mem_res_sz = (memblock.memory.cnt + memblock.reserved.cnt + 1) * sizeof(*mem_res); mem_res = memblock_alloc(mem_res_sz, SMP_CACHE_BYTES); if (!mem_res) panic("%s: Failed to allocate %zu bytes\n", __func__, mem_res_sz); diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c index 8a5cf99c0776..1b432264f7ef 100644 --- a/arch/riscv/kernel/time.c +++ b/arch/riscv/kernel/time.c @@ -9,6 +9,7 @@ #include <linux/delay.h> #include <asm/sbi.h> #include <asm/processor.h> +#include <asm/timex.h> unsigned long riscv_timebase; EXPORT_SYMBOL_GPL(riscv_timebase); diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 3ed2c23601a0..0879b5df11b9 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -17,6 +17,7 @@ #include <linux/module.h> #include <linux/irq.h> +#include <asm/asm-prototypes.h> #include <asm/bug.h> #include <asm/processor.h> #include <asm/ptrace.h> diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 3fc18f469efb..4f85c6d0ddf8 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -155,7 +155,7 @@ static void __init kasan_populate(void *start, void *end) memset(start, KASAN_SHADOW_INIT, end - start); } -void __init kasan_shallow_populate(void *start, void *end) +static void __init kasan_shallow_populate(void *start, void *end) { unsigned long vaddr = (unsigned long)start & PAGE_MASK; unsigned long vend = PAGE_ALIGN((unsigned long)end); @@ -187,6 +187,8 @@ void __init kasan_shallow_populate(void *start, void *end) } vaddr += PAGE_SIZE; } + + local_flush_tlb_all(); } void __init kasan_init(void) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 053fe8b8dec7..a75d94a9bcb2 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -202,7 +202,7 @@ extern unsigned int s390_pci_no_rid; ----------------------------------------------------------------------------- */ /* Base stuff */ int zpci_create_device(u32 fid, u32 fh, enum zpci_state state); -void zpci_remove_device(struct zpci_dev *zdev); +void zpci_remove_device(struct zpci_dev *zdev, bool set_error); int zpci_enable_device(struct zpci_dev *); int zpci_disable_device(struct zpci_dev *); int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64); diff --git a/arch/s390/kernel/perf_cpum_cf_diag.c b/arch/s390/kernel/perf_cpum_cf_diag.c index bc302b86ce28..2e3e7edbe3a0 100644 --- a/arch/s390/kernel/perf_cpum_cf_diag.c +++ b/arch/s390/kernel/perf_cpum_cf_diag.c @@ -968,7 +968,7 @@ static int cf_diag_all_start(void) */ static size_t cf_diag_needspace(unsigned int sets) { - struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events); + struct cpu_cf_events *cpuhw = get_cpu_ptr(&cpu_cf_events); size_t bytes = 0; int i; @@ -984,6 +984,7 @@ static size_t cf_diag_needspace(unsigned int sets) sizeof(((struct s390_ctrset_cpudata *)0)->no_sets)); debug_sprintf_event(cf_diag_dbg, 5, "%s bytes %ld\n", __func__, bytes); + put_cpu_ptr(&cpu_cf_events); return bytes; } diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 73c7afcc0527..f216a1b2f825 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -214,7 +214,7 @@ void vtime_flush(struct task_struct *tsk) avg_steal = S390_lowcore.avg_steal_timer / 2; if ((s64) steal > 0) { S390_lowcore.steal_timer = 0; - account_steal_time(steal); + account_steal_time(cputime_to_nsecs(steal)); avg_steal += steal; } S390_lowcore.avg_steal_timer = avg_steal; diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 600881d894dd..91064077526d 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -682,16 +682,36 @@ int zpci_disable_device(struct zpci_dev *zdev) } EXPORT_SYMBOL_GPL(zpci_disable_device); -void zpci_remove_device(struct zpci_dev *zdev) +/* zpci_remove_device - Removes the given zdev from the PCI core + * @zdev: the zdev to be removed from the PCI core + * @set_error: if true the device's error state is set to permanent failure + * + * Sets a zPCI device to a configured but offline state; the zPCI + * device is still accessible through its hotplug slot and the zPCI + * API but is removed from the common code PCI bus, making it + * no longer available to drivers. + */ +void zpci_remove_device(struct zpci_dev *zdev, bool set_error) { struct zpci_bus *zbus = zdev->zbus; struct pci_dev *pdev; + if (!zdev->zbus->bus) + return; + pdev = pci_get_slot(zbus->bus, zdev->devfn); if (pdev) { - if (pdev->is_virtfn) - return zpci_iov_remove_virtfn(pdev, zdev->vfn); + if (set_error) + pdev->error_state = pci_channel_io_perm_failure; + if (pdev->is_virtfn) { + zpci_iov_remove_virtfn(pdev, zdev->vfn); + /* balance pci_get_slot */ + pci_dev_put(pdev); + return; + } pci_stop_and_remove_bus_device_locked(pdev); + /* balance pci_get_slot */ + pci_dev_put(pdev); } } @@ -765,7 +785,7 @@ void zpci_release_device(struct kref *kref) struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref); if (zdev->zbus->bus) - zpci_remove_device(zdev); + zpci_remove_device(zdev, false); switch (zdev->state) { case ZPCI_FN_STATE_ONLINE: diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index b4162da4e8a2..ac0c65cdd69d 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -76,13 +76,10 @@ void zpci_event_error(void *data) static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); - struct pci_dev *pdev = NULL; enum zpci_state state; + struct pci_dev *pdev; int ret; - if (zdev && zdev->zbus->bus) - pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); - zpci_err("avail CCDF:\n"); zpci_err_hex(ccdf, sizeof(*ccdf)); @@ -124,8 +121,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) case 0x0303: /* Deconfiguration requested */ if (!zdev) break; - if (pdev) - zpci_remove_device(zdev); + zpci_remove_device(zdev, false); ret = zpci_disable_device(zdev); if (ret) @@ -140,12 +136,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) case 0x0304: /* Configured -> Standby|Reserved */ if (!zdev) break; - if (pdev) { - /* Give the driver a hint that the function is - * already unusable. */ - pdev->error_state = pci_channel_io_perm_failure; - zpci_remove_device(zdev); - } + /* Give the driver a hint that the function is + * already unusable. + */ + zpci_remove_device(zdev, true); zdev->fh = ccdf->fh; zpci_disable_device(zdev); diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 952f534ad7b7..c84d0dc492cb 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -27,7 +27,7 @@ endif REALMODE_CFLAGS := -m16 -g -Os -DDISABLE_BRANCH_PROFILING \ -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \ -fno-strict-aliasing -fomit-frame-pointer -fno-pic \ - -mno-mmx -mno-sse + -mno-mmx -mno-sse $(call cc-option,-fcf-protection=none) REALMODE_CFLAGS += -ffreestanding REALMODE_CFLAGS += -fno-stack-protector diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 7bbb5bb98d8c..37ce38403cb8 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3659,6 +3659,9 @@ static int intel_pmu_hw_config(struct perf_event *event) return ret; if (event->attr.precise_ip) { + if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT) + return -EINVAL; + if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) { event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD; if (!(event->attr.sample_type & diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index cdd195a3804e..16f226f51482 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -2009,7 +2009,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d */ if (!pebs_status && cpuc->pebs_enabled && !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1))) - pebs_status = cpuc->pebs_enabled; + pebs_status = p->status = cpuc->pebs_enabled; bit = find_first_bit((unsigned long *)&pebs_status, x86_pmu.max_pebs_events); diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9bc091ecaaeb..3768819693e5 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -884,12 +884,29 @@ struct kvm_hv_syndbg { u64 options; }; +/* Current state of Hyper-V TSC page clocksource */ +enum hv_tsc_page_status { + /* TSC page was not set up or disabled */ + HV_TSC_PAGE_UNSET = 0, + /* TSC page MSR was written by the guest, update pending */ + HV_TSC_PAGE_GUEST_CHANGED, + /* TSC page MSR was written by KVM userspace, update pending */ + HV_TSC_PAGE_HOST_CHANGED, + /* TSC page was properly set up and is currently active */ + HV_TSC_PAGE_SET, + /* TSC page is currently being updated and therefore is inactive */ + HV_TSC_PAGE_UPDATING, + /* TSC page was set up with an inaccessible GPA */ + HV_TSC_PAGE_BROKEN, +}; + /* Hyper-V emulation context */ struct kvm_hv { struct mutex hv_lock; u64 hv_guest_os_id; u64 hv_hypercall; u64 hv_tsc_page; + enum hv_tsc_page_status hv_tsc_page_status; /* Hyper-v based guest crash (NT kernel bugcheck) parameters */ u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS]; @@ -931,6 +948,12 @@ enum kvm_irqchip_mode { KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ }; +struct kvm_x86_msr_filter { + u8 count; + bool default_allow:1; + struct msr_bitmap_range ranges[16]; +}; + #define APICV_INHIBIT_REASON_DISABLE 0 #define APICV_INHIBIT_REASON_HYPERV 1 #define APICV_INHIBIT_REASON_NESTED 2 @@ -1025,16 +1048,11 @@ struct kvm_arch { bool guest_can_read_msr_platform_info; bool exception_payload_enabled; + bool bus_lock_detection_enabled; + /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */ u32 user_space_msr_mask; - - struct { - u8 count; - bool default_allow:1; - struct msr_bitmap_range ranges[16]; - } msr_filter; - - bool bus_lock_detection_enabled; + struct kvm_x86_msr_filter __rcu *msr_filter; struct kvm_pmu_event_filter __rcu *pmu_event_filter; struct task_struct *nx_lpage_recovery_thread; diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index bac2a42796c4..242e942b8ba8 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -544,15 +544,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset, *size = fpu_kernel_xstate_size; } -/* - * Thread-synchronous status. - * - * This is different from the flags in that nobody else - * ever touches our thread-synchronous status, so we don't - * have to worry about atomic accesses. - */ -#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ - static inline void native_load_sp0(unsigned long sp0) { diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 0d751d5da702..06b740bae431 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -205,10 +205,23 @@ static inline int arch_within_stack_frames(const void * const stack, #endif +/* + * Thread-synchronous status. + * + * This is different from the flags in that nobody else + * ever touches our thread-synchronous status, so we don't + * have to worry about atomic accesses. + */ +#define TS_COMPAT 0x0002 /* 32bit syscall active (64BIT)*/ + +#ifndef __ASSEMBLY__ #ifdef CONFIG_COMPAT #define TS_I386_REGS_POKED 0x0004 /* regs poked by 32-bit ptracer */ + +#define arch_set_restart_data(restart) \ + do { restart->arch_data = current_thread_info()->status; } while (0) + #endif -#ifndef __ASSEMBLY__ #ifdef CONFIG_X86_32 #define in_ia32_syscall() true diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index 7068e4bb057d..1a162e559753 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -87,18 +87,6 @@ clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops, #endif /* - * The maximum amount of extra memory compared to the base size. The - * main scaling factor is the size of struct page. At extreme ratios - * of base:extra, all the base memory can be filled with page - * structures for the extra memory, leaving no space for anything - * else. - * - * 10x seems like a reasonable balance between scaling flexibility and - * leaving a practically usable system. - */ -#define XEN_EXTRA_MEM_RATIO (10) - -/* * Helper functions to write or read unsigned long values to/from * memory, when the access may fault. */ diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index bda4f2a36868..4f26700f314d 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2342,6 +2342,11 @@ static int cpuid_to_apicid[] = { [0 ... NR_CPUS - 1] = -1, }; +bool arch_match_cpu_phys_id(int cpu, u64 phys_id) +{ + return phys_id == cpuid_to_apicid[cpu]; +} + #ifdef CONFIG_SMP /** * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index c3b60c37c728..73ff4dd426a8 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1032,6 +1032,16 @@ static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin, if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) { irq = mp_irqs[idx].srcbusirq; legacy = mp_is_legacy_irq(irq); + /* + * IRQ2 is unusable for historical reasons on systems which + * have a legacy PIC. See the comment vs. IRQ2 further down. + * + * If this gets removed at some point then the related code + * in lapic_assign_system_vectors() needs to be adjusted as + * well. + */ + if (legacy && irq == PIC_CASCADE_IR) + return -EINVAL; } mutex_lock(&ioapic_mutex); diff --git a/arch/x86/kernel/kprobes/ftrace.c b/arch/x86/kernel/kprobes/ftrace.c index 373e5fa3ce1f..51c7f5271aee 100644 --- a/arch/x86/kernel/kprobes/ftrace.c +++ b/arch/x86/kernel/kprobes/ftrace.c @@ -12,7 +12,7 @@ #include "common.h" -/* Ftrace callback handler for kprobes -- called under preepmt disabed */ +/* Ftrace callback handler for kprobes -- called under preepmt disabled */ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct ftrace_ops *ops, struct ftrace_regs *fregs) { diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 351ba99f6009..172c947240b9 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -836,28 +836,25 @@ static void kvm_kick_cpu(int cpu) static void kvm_wait(u8 *ptr, u8 val) { - unsigned long flags; - if (in_nmi()) return; - local_irq_save(flags); - - if (READ_ONCE(*ptr) != val) - goto out; - /* * halt until it's our turn and kicked. Note that we do safe halt * for irq enabled case to avoid hang when lock info is overwritten * in irq spinlock slowpath and no spurious interrupt occur to save us. */ - if (arch_irqs_disabled_flags(flags)) - halt(); - else - safe_halt(); + if (irqs_disabled()) { + if (READ_ONCE(*ptr) == val) + halt(); + } else { + local_irq_disable(); -out: - local_irq_restore(flags); + if (READ_ONCE(*ptr) == val) + safe_halt(); + + local_irq_enable(); + } } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index ea794a083c44..f306e85a08a6 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -766,30 +766,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs) { - /* - * This function is fundamentally broken as currently - * implemented. - * - * The idea is that we want to trigger a call to the - * restart_block() syscall and that we want in_ia32_syscall(), - * in_x32_syscall(), etc. to match whatever they were in the - * syscall being restarted. We assume that the syscall - * instruction at (regs->ip - 2) matches whatever syscall - * instruction we used to enter in the first place. - * - * The problem is that we can get here when ptrace pokes - * syscall-like values into regs even if we're not in a syscall - * at all. - * - * For now, we maintain historical behavior and guess based on - * stored state. We could do better by saving the actual - * syscall arch in restart_block or (with caveats on x32) by - * checking if regs->ip points to 'int $0x80'. The current - * behavior is incorrect if a tracer has a different bitness - * than the tracee. - */ #ifdef CONFIG_IA32_EMULATION - if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED)) + if (current->restart_block.arch_data & TS_COMPAT) return __NR_ia32_restart_syscall; #endif #ifdef CONFIG_X86_X32_ABI diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 58fa8c029867..f98370a39936 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -520,10 +520,10 @@ static u64 get_time_ref_counter(struct kvm *kvm) u64 tsc; /* - * The guest has not set up the TSC page or the clock isn't - * stable, fall back to get_kvmclock_ns. + * Fall back to get_kvmclock_ns() when TSC page hasn't been set up, + * is broken, disabled or being updated. */ - if (!hv->tsc_ref.tsc_sequence) + if (hv->hv_tsc_page_status != HV_TSC_PAGE_SET) return div_u64(get_kvmclock_ns(kvm), 100); vcpu = kvm_get_vcpu(kvm, 0); @@ -1077,6 +1077,21 @@ static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock, return true; } +/* + * Don't touch TSC page values if the guest has opted for TSC emulation after + * migration. KVM doesn't fully support reenlightenment notifications and TSC + * access emulation and Hyper-V is known to expect the values in TSC page to + * stay constant before TSC access emulation is disabled from guest side + * (HV_X64_MSR_TSC_EMULATION_STATUS). KVM userspace is expected to preserve TSC + * frequency and guest visible TSC value across migration (and prevent it when + * TSC scaling is unsupported). + */ +static inline bool tsc_page_update_unsafe(struct kvm_hv *hv) +{ + return (hv->hv_tsc_page_status != HV_TSC_PAGE_GUEST_CHANGED) && + hv->hv_tsc_emulation_control; +} + void kvm_hv_setup_tsc_page(struct kvm *kvm, struct pvclock_vcpu_time_info *hv_clock) { @@ -1087,7 +1102,8 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence)); BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0); - if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) + if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN || + hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET) return; mutex_lock(&hv->hv_lock); @@ -1101,7 +1117,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, */ if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn), &tsc_seq, sizeof(tsc_seq)))) + goto out_err; + + if (tsc_seq && tsc_page_update_unsafe(hv)) { + if (kvm_read_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref))) + goto out_err; + + hv->hv_tsc_page_status = HV_TSC_PAGE_SET; goto out_unlock; + } /* * While we're computing and writing the parameters, force the @@ -1110,15 +1134,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, hv->tsc_ref.tsc_sequence = 0; if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) - goto out_unlock; + goto out_err; if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref)) - goto out_unlock; + goto out_err; /* Ensure sequence is zero before writing the rest of the struct. */ smp_wmb(); if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref))) - goto out_unlock; + goto out_err; /* * Now switch to the TSC page mechanism by writing the sequence. @@ -1131,8 +1155,45 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, smp_wmb(); hv->tsc_ref.tsc_sequence = tsc_seq; - kvm_write_guest(kvm, gfn_to_gpa(gfn), - &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)); + if (kvm_write_guest(kvm, gfn_to_gpa(gfn), + &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) + goto out_err; + + hv->hv_tsc_page_status = HV_TSC_PAGE_SET; + goto out_unlock; + +out_err: + hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN; +out_unlock: + mutex_unlock(&hv->hv_lock); +} + +void kvm_hv_invalidate_tsc_page(struct kvm *kvm) +{ + struct kvm_hv *hv = to_kvm_hv(kvm); + u64 gfn; + + if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN || + hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET || + tsc_page_update_unsafe(hv)) + return; + + mutex_lock(&hv->hv_lock); + + if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) + goto out_unlock; + + /* Preserve HV_TSC_PAGE_GUEST_CHANGED/HV_TSC_PAGE_HOST_CHANGED states */ + if (hv->hv_tsc_page_status == HV_TSC_PAGE_SET) + hv->hv_tsc_page_status = HV_TSC_PAGE_UPDATING; + + gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; + + hv->tsc_ref.tsc_sequence = 0; + if (kvm_write_guest(kvm, gfn_to_gpa(gfn), + &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence))) + hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN; + out_unlock: mutex_unlock(&hv->hv_lock); } @@ -1193,8 +1254,15 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, } case HV_X64_MSR_REFERENCE_TSC: hv->hv_tsc_page = data; - if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) + if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) { + if (!host) + hv->hv_tsc_page_status = HV_TSC_PAGE_GUEST_CHANGED; + else + hv->hv_tsc_page_status = HV_TSC_PAGE_HOST_CHANGED; kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); + } else { + hv->hv_tsc_page_status = HV_TSC_PAGE_UNSET; + } break; case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: return kvm_hv_msr_set_crash_data(kvm, @@ -1229,6 +1297,9 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, hv->hv_tsc_emulation_control = data; break; case HV_X64_MSR_TSC_EMULATION_STATUS: + if (data && !host) + return 1; + hv->hv_tsc_emulation_status = data; break; case HV_X64_MSR_TIME_REF_COUNT: diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index e951af1fcb2c..60547d5cb6d7 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -133,6 +133,7 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu); void kvm_hv_setup_tsc_page(struct kvm *kvm, struct pvclock_vcpu_time_info *hv_clock); +void kvm_hv_invalidate_tsc_page(struct kvm *kvm); void kvm_hv_init_vm(struct kvm *kvm); void kvm_hv_destroy_vm(struct kvm *kvm); diff --git a/arch/x86/kvm/mmu/mmu_internal.h b/arch/x86/kvm/mmu/mmu_internal.h index ec4fc28b325a..1f6f98c76bdf 100644 --- a/arch/x86/kvm/mmu/mmu_internal.h +++ b/arch/x86/kvm/mmu/mmu_internal.h @@ -78,6 +78,11 @@ static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep) return to_shadow_page(__pa(sptep)); } +static inline int kvm_mmu_page_as_id(struct kvm_mmu_page *sp) +{ + return sp->role.smm ? 1 : 0; +} + static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu) { /* diff --git a/arch/x86/kvm/mmu/tdp_iter.c b/arch/x86/kvm/mmu/tdp_iter.c index e5f148106e20..b3ed302c1a35 100644 --- a/arch/x86/kvm/mmu/tdp_iter.c +++ b/arch/x86/kvm/mmu/tdp_iter.c @@ -21,6 +21,21 @@ static gfn_t round_gfn_for_level(gfn_t gfn, int level) } /* + * Return the TDP iterator to the root PT and allow it to continue its + * traversal over the paging structure from there. + */ +void tdp_iter_restart(struct tdp_iter *iter) +{ + iter->yielded_gfn = iter->next_last_level_gfn; + iter->level = iter->root_level; + + iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level); + tdp_iter_refresh_sptep(iter); + + iter->valid = true; +} + +/* * Sets a TDP iterator to walk a pre-order traversal of the paging structure * rooted at root_pt, starting with the walk to translate next_last_level_gfn. */ @@ -31,16 +46,12 @@ void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level, WARN_ON(root_level > PT64_ROOT_MAX_LEVEL); iter->next_last_level_gfn = next_last_level_gfn; - iter->yielded_gfn = iter->next_last_level_gfn; iter->root_level = root_level; iter->min_level = min_level; - iter->level = root_level; - iter->pt_path[iter->level - 1] = (tdp_ptep_t)root_pt; - - iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level); - tdp_iter_refresh_sptep(iter); + iter->pt_path[iter->root_level - 1] = (tdp_ptep_t)root_pt; + iter->as_id = kvm_mmu_page_as_id(sptep_to_sp(root_pt)); - iter->valid = true; + tdp_iter_restart(iter); } /* @@ -159,8 +170,3 @@ void tdp_iter_next(struct tdp_iter *iter) iter->valid = false; } -tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter) -{ - return iter->pt_path[iter->root_level - 1]; -} - diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h index 4cc177d75c4a..b1748b988d3a 100644 --- a/arch/x86/kvm/mmu/tdp_iter.h +++ b/arch/x86/kvm/mmu/tdp_iter.h @@ -36,6 +36,8 @@ struct tdp_iter { int min_level; /* The iterator's current level within the paging structure */ int level; + /* The address space ID, i.e. SMM vs. regular. */ + int as_id; /* A snapshot of the value at sptep */ u64 old_spte; /* @@ -62,6 +64,6 @@ tdp_ptep_t spte_to_child_pt(u64 pte, int level); void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level, int min_level, gfn_t next_last_level_gfn); void tdp_iter_next(struct tdp_iter *iter); -tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter); +void tdp_iter_restart(struct tdp_iter *iter); #endif /* __KVM_X86_MMU_TDP_ITER_H */ diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index d78915019b08..462b1f71c77f 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -203,11 +203,6 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, u64 old_spte, u64 new_spte, int level, bool shared); -static int kvm_mmu_page_as_id(struct kvm_mmu_page *sp) -{ - return sp->role.smm ? 1 : 0; -} - static void handle_changed_spte_acc_track(u64 old_spte, u64 new_spte, int level) { bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte); @@ -301,11 +296,16 @@ static void tdp_mmu_unlink_page(struct kvm *kvm, struct kvm_mmu_page *sp, * * Given a page table that has been removed from the TDP paging structure, * iterates through the page table to clear SPTEs and free child page tables. + * + * Note that pt is passed in as a tdp_ptep_t, but it does not need RCU + * protection. Since this thread removed it from the paging structure, + * this thread will be responsible for ensuring the page is freed. Hence the + * early rcu_dereferences in the function. */ -static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt, +static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt, bool shared) { - struct kvm_mmu_page *sp = sptep_to_sp(pt); + struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(pt)); int level = sp->role.level; gfn_t base_gfn = sp->gfn; u64 old_child_spte; @@ -318,7 +318,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt, tdp_mmu_unlink_page(kvm, sp, shared); for (i = 0; i < PT64_ENT_PER_PAGE; i++) { - sptep = pt + i; + sptep = rcu_dereference(pt) + i; gfn = base_gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)); if (shared) { @@ -492,10 +492,6 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm, struct tdp_iter *iter, u64 new_spte) { - u64 *root_pt = tdp_iter_root_pt(iter); - struct kvm_mmu_page *root = sptep_to_sp(root_pt); - int as_id = kvm_mmu_page_as_id(root); - lockdep_assert_held_read(&kvm->mmu_lock); /* @@ -509,8 +505,8 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm, new_spte) != iter->old_spte) return false; - handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte, - iter->level, true); + handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte, + new_spte, iter->level, true); return true; } @@ -538,7 +534,7 @@ static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm, * here since the SPTE is going from non-present * to non-present. */ - WRITE_ONCE(*iter->sptep, 0); + WRITE_ONCE(*rcu_dereference(iter->sptep), 0); return true; } @@ -564,10 +560,6 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, u64 new_spte, bool record_acc_track, bool record_dirty_log) { - tdp_ptep_t root_pt = tdp_iter_root_pt(iter); - struct kvm_mmu_page *root = sptep_to_sp(root_pt); - int as_id = kvm_mmu_page_as_id(root); - lockdep_assert_held_write(&kvm->mmu_lock); /* @@ -581,13 +573,13 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, WRITE_ONCE(*rcu_dereference(iter->sptep), new_spte); - __handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte, - iter->level, false); + __handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte, + new_spte, iter->level, false); if (record_acc_track) handle_changed_spte_acc_track(iter->old_spte, new_spte, iter->level); if (record_dirty_log) - handle_changed_spte_dirty_log(kvm, as_id, iter->gfn, + handle_changed_spte_dirty_log(kvm, iter->as_id, iter->gfn, iter->old_spte, new_spte, iter->level); } @@ -659,9 +651,7 @@ static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm, WARN_ON(iter->gfn > iter->next_last_level_gfn); - tdp_iter_start(iter, iter->pt_path[iter->root_level - 1], - iter->root_level, iter->min_level, - iter->next_last_level_gfn); + tdp_iter_restart(iter); return true; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 47e021bdcc94..fe806e894212 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1526,35 +1526,44 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits); bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type) { + struct kvm_x86_msr_filter *msr_filter; + struct msr_bitmap_range *ranges; struct kvm *kvm = vcpu->kvm; - struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges; - u32 count = kvm->arch.msr_filter.count; - u32 i; - bool r = kvm->arch.msr_filter.default_allow; + bool allowed; int idx; + u32 i; - /* MSR filtering not set up or x2APIC enabled, allow everything */ - if (!count || (index >= 0x800 && index <= 0x8ff)) + /* x2APIC MSRs do not support filtering. */ + if (index >= 0x800 && index <= 0x8ff) return true; - /* Prevent collision with set_msr_filter */ idx = srcu_read_lock(&kvm->srcu); - for (i = 0; i < count; i++) { + msr_filter = srcu_dereference(kvm->arch.msr_filter, &kvm->srcu); + if (!msr_filter) { + allowed = true; + goto out; + } + + allowed = msr_filter->default_allow; + ranges = msr_filter->ranges; + + for (i = 0; i < msr_filter->count; i++) { u32 start = ranges[i].base; u32 end = start + ranges[i].nmsrs; u32 flags = ranges[i].flags; unsigned long *bitmap = ranges[i].bitmap; if ((index >= start) && (index < end) && (flags & type)) { - r = !!test_bit(index - start, bitmap); + allowed = !!test_bit(index - start, bitmap); break; } } +out: srcu_read_unlock(&kvm->srcu, idx); - return r; + return allowed; } EXPORT_SYMBOL_GPL(kvm_msr_allowed); @@ -2551,6 +2560,8 @@ static void kvm_gen_update_masterclock(struct kvm *kvm) struct kvm_vcpu *vcpu; struct kvm_arch *ka = &kvm->arch; + kvm_hv_invalidate_tsc_page(kvm); + spin_lock(&ka->pvclock_gtod_sync_lock); kvm_make_mclock_inprogress_request(kvm); /* no guest entries from this point */ @@ -5352,25 +5363,34 @@ split_irqchip_unlock: return r; } -static void kvm_clear_msr_filter(struct kvm *kvm) +static struct kvm_x86_msr_filter *kvm_alloc_msr_filter(bool default_allow) +{ + struct kvm_x86_msr_filter *msr_filter; + + msr_filter = kzalloc(sizeof(*msr_filter), GFP_KERNEL_ACCOUNT); + if (!msr_filter) + return NULL; + + msr_filter->default_allow = default_allow; + return msr_filter; +} + +static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter) { u32 i; - u32 count = kvm->arch.msr_filter.count; - struct msr_bitmap_range ranges[16]; - mutex_lock(&kvm->lock); - kvm->arch.msr_filter.count = 0; - memcpy(ranges, kvm->arch.msr_filter.ranges, count * sizeof(ranges[0])); - mutex_unlock(&kvm->lock); - synchronize_srcu(&kvm->srcu); + if (!msr_filter) + return; - for (i = 0; i < count; i++) - kfree(ranges[i].bitmap); + for (i = 0; i < msr_filter->count; i++) + kfree(msr_filter->ranges[i].bitmap); + + kfree(msr_filter); } -static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user_range) +static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter, + struct kvm_msr_filter_range *user_range) { - struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges; struct msr_bitmap_range range; unsigned long *bitmap = NULL; size_t bitmap_size; @@ -5404,11 +5424,9 @@ static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user goto err; } - /* Everything ok, add this range identifier to our global pool */ - ranges[kvm->arch.msr_filter.count] = range; - /* Make sure we filled the array before we tell anyone to walk it */ - smp_wmb(); - kvm->arch.msr_filter.count++; + /* Everything ok, add this range identifier. */ + msr_filter->ranges[msr_filter->count] = range; + msr_filter->count++; return 0; err: @@ -5419,10 +5437,11 @@ err: static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) { struct kvm_msr_filter __user *user_msr_filter = argp; + struct kvm_x86_msr_filter *new_filter, *old_filter; struct kvm_msr_filter filter; bool default_allow; - int r = 0; bool empty = true; + int r = 0; u32 i; if (copy_from_user(&filter, user_msr_filter, sizeof(filter))) @@ -5435,25 +5454,32 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp) if (empty && !default_allow) return -EINVAL; - kvm_clear_msr_filter(kvm); - - kvm->arch.msr_filter.default_allow = default_allow; + new_filter = kvm_alloc_msr_filter(default_allow); + if (!new_filter) + return -ENOMEM; - /* - * Protect from concurrent calls to this function that could trigger - * a TOCTOU violation on kvm->arch.msr_filter.count. - */ - mutex_lock(&kvm->lock); for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) { - r = kvm_add_msr_filter(kvm, &filter.ranges[i]); - if (r) - break; + r = kvm_add_msr_filter(new_filter, &filter.ranges[i]); + if (r) { + kvm_free_msr_filter(new_filter); + return r; + } } + mutex_lock(&kvm->lock); + + /* The per-VM filter is protected by kvm->lock... */ + old_filter = srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1); + + rcu_assign_pointer(kvm->arch.msr_filter, new_filter); + synchronize_srcu(&kvm->srcu); + + kvm_free_msr_filter(old_filter); + kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED); mutex_unlock(&kvm->lock); - return r; + return 0; } long kvm_arch_vm_ioctl(struct file *filp, @@ -6603,7 +6629,7 @@ static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu) int cpu = get_cpu(); cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask); - smp_call_function_many(vcpu->arch.wbinvd_dirty_mask, + on_each_cpu_mask(vcpu->arch.wbinvd_dirty_mask, wbinvd_ipi, NULL, 1); put_cpu(); cpumask_clear(vcpu->arch.wbinvd_dirty_mask); @@ -10634,8 +10660,6 @@ void kvm_arch_pre_destroy_vm(struct kvm *kvm) void kvm_arch_destroy_vm(struct kvm *kvm) { - u32 i; - if (current->mm == kvm->mm) { /* * Free memory regions allocated on behalf of userspace, @@ -10651,8 +10675,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) mutex_unlock(&kvm->slots_lock); } static_call_cond(kvm_x86_vm_destroy)(kvm); - for (i = 0; i < kvm->arch.msr_filter.count; i++) - kfree(kvm->arch.msr_filter.ranges[i].bitmap); + kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1)); kvm_pic_destroy(kvm); kvm_ioapic_destroy(kvm); kvm_free_vcpus(kvm); diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index f3eb53fe0215..f633f9e23b8f 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -263,7 +263,7 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) if (pgprot_val(old_prot) == pgprot_val(new_prot)) return; - pa = pfn << page_level_shift(level); + pa = pfn << PAGE_SHIFT; size = page_level_size(level); /* diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 6926d0ca6c71..b35fc8023884 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1936,7 +1936,7 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, * add rsp, 8 // skip eth_type_trans's frame * ret // return to its caller */ -int arch_prepare_bpf_trampoline(void *image, void *image_end, +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, const struct btf_func_model *m, u32 flags, struct bpf_tramp_progs *tprogs, void *orig_call) @@ -1975,6 +1975,15 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, save_regs(m, &prog, nr_args, stack_size); + if (flags & BPF_TRAMP_F_CALL_ORIG) { + /* arg1: mov rdi, im */ + emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im); + if (emit_call(&prog, __bpf_tramp_enter, prog)) { + ret = -EINVAL; + goto cleanup; + } + } + if (fentry->nr_progs) if (invoke_bpf(m, &prog, fentry, stack_size)) return -EINVAL; @@ -1993,8 +2002,7 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, } if (flags & BPF_TRAMP_F_CALL_ORIG) { - if (fentry->nr_progs || fmod_ret->nr_progs) - restore_regs(m, &prog, nr_args, stack_size); + restore_regs(m, &prog, nr_args, stack_size); /* call original function */ if (emit_call(&prog, orig_call, prog)) { @@ -2003,6 +2011,9 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, } /* remember return value in a stack for bpf prog to access */ emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); + im->ip_after_call = prog; + memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE); + prog += X86_PATCH_SIZE; } if (fmod_ret->nr_progs) { @@ -2033,9 +2044,17 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end, * the return value is only updated on the stack and still needs to be * restored to R0. */ - if (flags & BPF_TRAMP_F_CALL_ORIG) + if (flags & BPF_TRAMP_F_CALL_ORIG) { + im->ip_epilogue = prog; + /* arg1: mov rdi, im */ + emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im); + if (emit_call(&prog, __bpf_tramp_exit, prog)) { + ret = -EINVAL; + goto cleanup; + } /* restore original return value back into RAX */ emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); + } EMIT1(0x5B); /* pop rbx */ EMIT1(0xC9); /* leave */ @@ -2225,7 +2244,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) padding = true; goto skip_init_addrs; } - addrs = kmalloc_array(prog->len + 1, sizeof(*addrs), GFP_KERNEL); + addrs = kvmalloc_array(prog->len + 1, sizeof(*addrs), GFP_KERNEL); if (!addrs) { prog = orig_prog; goto out_addrs; @@ -2317,7 +2336,7 @@ out_image: if (image) bpf_prog_fill_jited_linfo(prog, addrs + 1); out_addrs: - kfree(addrs); + kvfree(addrs); kfree(jit_data); prog->aux->jit_data = NULL; } diff --git a/arch/x86/platform/iris/iris.c b/arch/x86/platform/iris/iris.c index 1ac8578258af..b42bfdab01a9 100644 --- a/arch/x86/platform/iris/iris.c +++ b/arch/x86/platform/iris/iris.c @@ -27,7 +27,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Sébastien Hinderer <Sebastien.Hinderer@ens-lyon.org>"); MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille"); -MODULE_SUPPORTED_DEVICE("Eurobraille/Iris"); static bool force; diff --git a/arch/x86/xen/p2m.c b/arch/x86/xen/p2m.c index 17d80f751fcb..ac06ca32e9ef 100644 --- a/arch/x86/xen/p2m.c +++ b/arch/x86/xen/p2m.c @@ -98,8 +98,8 @@ EXPORT_SYMBOL_GPL(xen_p2m_size); unsigned long xen_max_p2m_pfn __read_mostly; EXPORT_SYMBOL_GPL(xen_max_p2m_pfn); -#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT -#define P2M_LIMIT CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT +#ifdef CONFIG_XEN_MEMORY_HOTPLUG_LIMIT +#define P2M_LIMIT CONFIG_XEN_MEMORY_HOTPLUG_LIMIT #else #define P2M_LIMIT 0 #endif @@ -416,9 +416,6 @@ void __init xen_vmalloc_p2m_tree(void) xen_p2m_last_pfn = xen_max_p2m_pfn; p2m_limit = (phys_addr_t)P2M_LIMIT * 1024 * 1024 * 1024 / PAGE_SIZE; - if (!p2m_limit && IS_ENABLED(CONFIG_XEN_UNPOPULATED_ALLOC)) - p2m_limit = xen_start_info->nr_pages * XEN_EXTRA_MEM_RATIO; - vm.flags = VM_ALLOC; vm.size = ALIGN(sizeof(unsigned long) * max(xen_max_p2m_pfn, p2m_limit), PMD_SIZE * PMDS_PER_MID_PAGE); diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 1a3b75652fa4..8bfc10330107 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -59,6 +59,18 @@ static struct { } xen_remap_buf __initdata __aligned(PAGE_SIZE); static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY; +/* + * The maximum amount of extra memory compared to the base size. The + * main scaling factor is the size of struct page. At extreme ratios + * of base:extra, all the base memory can be filled with page + * structures for the extra memory, leaving no space for anything + * else. + * + * 10x seems like a reasonable balance between scaling flexibility and + * leaving a practically usable system. + */ +#define EXTRA_MEM_RATIO (10) + static bool xen_512gb_limit __initdata = IS_ENABLED(CONFIG_XEN_512GB); static void __init xen_parse_512gb(void) @@ -778,13 +790,13 @@ char * __init xen_memory_setup(void) extra_pages += max_pages - max_pfn; /* - * Clamp the amount of extra memory to a XEN_EXTRA_MEM_RATIO + * Clamp the amount of extra memory to a EXTRA_MEM_RATIO * factor the base size. * * Make sure we have no memory above max_pages, as this area * isn't handled by the p2m management. */ - extra_pages = min3(XEN_EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), + extra_pages = min3(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)), extra_pages, max_pages - max_pfn); i = 0; addr = xen_e820_table.entries[0].addr; |