diff options
Diffstat (limited to 'arch')
154 files changed, 1625 insertions, 781 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 9f066785bb71..65afb1de48b3 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1172,12 +1172,12 @@ config PAGE_SIZE_LESS_THAN_256KB config PAGE_SHIFT int - default 12 if PAGE_SIZE_4KB - default 13 if PAGE_SIZE_8KB - default 14 if PAGE_SIZE_16KB - default 15 if PAGE_SIZE_32KB - default 16 if PAGE_SIZE_64KB - default 18 if PAGE_SIZE_256KB + default 12 if PAGE_SIZE_4KB + default 13 if PAGE_SIZE_8KB + default 14 if PAGE_SIZE_16KB + default 15 if PAGE_SIZE_32KB + default 16 if PAGE_SIZE_64KB + default 18 if PAGE_SIZE_256KB # This allows to use a set of generic functions to determine mmap base # address by giving priority to top-down scheme only if the process diff --git a/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi b/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi index 1235a71c6abe..52869e68f833 100644 --- a/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx7-mba7.dtsi @@ -666,7 +666,7 @@ bus-width = <4>; no-1-8-v; no-sdio; - no-emmc; + no-mmc; status = "okay"; }; diff --git a/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts b/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts index ba7231b364bb..7bab113ca6da 100644 --- a/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts +++ b/arch/arm/boot/dts/nxp/imx/imx7s-warp.dts @@ -210,6 +210,7 @@ remote-endpoint = <&mipi_from_sensor>; clock-lanes = <0>; data-lanes = <1>; + link-frequencies = /bits/ 64 <330000000>; }; }; }; diff --git a/arch/arm/include/asm/mman.h b/arch/arm/include/asm/mman.h new file mode 100644 index 000000000000..2189e507c8e0 --- /dev/null +++ b/arch/arm/include/asm/mman.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_MMAN_H__ +#define __ASM_MMAN_H__ + +#include <asm/system_info.h> +#include <uapi/asm/mman.h> + +static inline bool arch_memory_deny_write_exec_supported(void) +{ + return cpu_architecture() >= CPU_ARCH_ARMv6; +} +#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported + +#endif /* __ASM_MMAN_H__ */ diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c index 31755a378c73..ff2a4a4d8220 100644 --- a/arch/arm/mach-omap2/board-n8x0.c +++ b/arch/arm/mach-omap2/board-n8x0.c @@ -79,10 +79,8 @@ static struct musb_hdrc_platform_data tusb_data = { static struct gpiod_lookup_table tusb_gpio_table = { .dev_id = "musb-tusb", .table = { - GPIO_LOOKUP("gpio-0-15", 0, "enable", - GPIO_ACTIVE_HIGH), - GPIO_LOOKUP("gpio-48-63", 10, "int", - GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-0-31", 0, "enable", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-32-63", 26, "int", GPIO_ACTIVE_HIGH), { } }, }; @@ -140,12 +138,11 @@ static int slot1_cover_open; static int slot2_cover_open; static struct device *mmc_device; -static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = { +static struct gpiod_lookup_table nokia800_mmc_gpio_table = { .dev_id = "mmci-omap.0", .table = { /* Slot switch, GPIO 96 */ - GPIO_LOOKUP("gpio-80-111", 16, - "switch", GPIO_ACTIVE_HIGH), + GPIO_LOOKUP("gpio-96-127", 0, "switch", GPIO_ACTIVE_HIGH), { } }, }; @@ -153,12 +150,12 @@ static struct gpiod_lookup_table nokia8xx_mmc_gpio_table = { static struct gpiod_lookup_table nokia810_mmc_gpio_table = { .dev_id = "mmci-omap.0", .table = { + /* Slot switch, GPIO 96 */ + GPIO_LOOKUP("gpio-96-127", 0, "switch", GPIO_ACTIVE_HIGH), /* Slot index 1, VSD power, GPIO 23 */ - GPIO_LOOKUP_IDX("gpio-16-31", 7, - "vsd", 1, GPIO_ACTIVE_HIGH), + GPIO_LOOKUP_IDX("gpio-0-31", 23, "vsd", 1, GPIO_ACTIVE_HIGH), /* Slot index 1, VIO power, GPIO 9 */ - GPIO_LOOKUP_IDX("gpio-0-15", 9, - "vio", 1, GPIO_ACTIVE_HIGH), + GPIO_LOOKUP_IDX("gpio-0-31", 9, "vio", 1, GPIO_ACTIVE_HIGH), { } }, }; @@ -415,8 +412,6 @@ static struct omap_mmc_platform_data *mmc_data[OMAP24XX_NR_MMC]; static void __init n8x0_mmc_init(void) { - gpiod_add_lookup_table(&nokia8xx_mmc_gpio_table); - if (board_is_n810()) { mmc1_data.slots[0].name = "external"; @@ -429,6 +424,8 @@ static void __init n8x0_mmc_init(void) mmc1_data.slots[1].name = "internal"; mmc1_data.slots[1].ban_openended = 1; gpiod_add_lookup_table(&nokia810_mmc_gpio_table); + } else { + gpiod_add_lookup_table(&nokia800_mmc_gpio_table); } mmc1_data.nr_slots = 2; diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi index 3c42240e78e2..4aaf5a0c1ed8 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-conn.dtsi @@ -41,7 +41,7 @@ conn_subsys: bus@5b000000 { interrupts = <GIC_SPI 267 IRQ_TYPE_LEVEL_HIGH>; fsl,usbphy = <&usbphy1>; fsl,usbmisc = <&usbmisc1 0>; - clocks = <&usb2_lpcg 0>; + clocks = <&usb2_lpcg IMX_LPCG_CLK_6>; ahb-burst-config = <0x0>; tx-burst-size-dword = <0x10>; rx-burst-size-dword = <0x10>; @@ -58,7 +58,7 @@ conn_subsys: bus@5b000000 { usbphy1: usbphy@5b100000 { compatible = "fsl,imx7ulp-usbphy"; reg = <0x5b100000 0x1000>; - clocks = <&usb2_lpcg 1>; + clocks = <&usb2_lpcg IMX_LPCG_CLK_7>; power-domains = <&pd IMX_SC_R_USB_0_PHY>; status = "disabled"; }; @@ -67,8 +67,8 @@ conn_subsys: bus@5b000000 { interrupts = <GIC_SPI 232 IRQ_TYPE_LEVEL_HIGH>; reg = <0x5b010000 0x10000>; clocks = <&sdhc0_lpcg IMX_LPCG_CLK_4>, - <&sdhc0_lpcg IMX_LPCG_CLK_0>, - <&sdhc0_lpcg IMX_LPCG_CLK_5>; + <&sdhc0_lpcg IMX_LPCG_CLK_5>, + <&sdhc0_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "ahb", "per"; power-domains = <&pd IMX_SC_R_SDHC_0>; status = "disabled"; @@ -78,8 +78,8 @@ conn_subsys: bus@5b000000 { interrupts = <GIC_SPI 233 IRQ_TYPE_LEVEL_HIGH>; reg = <0x5b020000 0x10000>; clocks = <&sdhc1_lpcg IMX_LPCG_CLK_4>, - <&sdhc1_lpcg IMX_LPCG_CLK_0>, - <&sdhc1_lpcg IMX_LPCG_CLK_5>; + <&sdhc1_lpcg IMX_LPCG_CLK_5>, + <&sdhc1_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "ahb", "per"; power-domains = <&pd IMX_SC_R_SDHC_1>; fsl,tuning-start-tap = <20>; @@ -91,8 +91,8 @@ conn_subsys: bus@5b000000 { interrupts = <GIC_SPI 234 IRQ_TYPE_LEVEL_HIGH>; reg = <0x5b030000 0x10000>; clocks = <&sdhc2_lpcg IMX_LPCG_CLK_4>, - <&sdhc2_lpcg IMX_LPCG_CLK_0>, - <&sdhc2_lpcg IMX_LPCG_CLK_5>; + <&sdhc2_lpcg IMX_LPCG_CLK_5>, + <&sdhc2_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "ahb", "per"; power-domains = <&pd IMX_SC_R_SDHC_2>; status = "disabled"; diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi index cab3468b1875..f7a91d43a0ff 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi @@ -28,8 +28,8 @@ dma_subsys: bus@5a000000 { #size-cells = <0>; interrupts = <GIC_SPI 336 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&gic>; - clocks = <&spi0_lpcg 0>, - <&spi0_lpcg 1>; + clocks = <&spi0_lpcg IMX_LPCG_CLK_0>, + <&spi0_lpcg IMX_LPCG_CLK_4>; clock-names = "per", "ipg"; assigned-clocks = <&clk IMX_SC_R_SPI_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <60000000>; @@ -44,8 +44,8 @@ dma_subsys: bus@5a000000 { #size-cells = <0>; interrupts = <GIC_SPI 337 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&gic>; - clocks = <&spi1_lpcg 0>, - <&spi1_lpcg 1>; + clocks = <&spi1_lpcg IMX_LPCG_CLK_0>, + <&spi1_lpcg IMX_LPCG_CLK_4>; clock-names = "per", "ipg"; assigned-clocks = <&clk IMX_SC_R_SPI_1 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <60000000>; @@ -60,8 +60,8 @@ dma_subsys: bus@5a000000 { #size-cells = <0>; interrupts = <GIC_SPI 338 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&gic>; - clocks = <&spi2_lpcg 0>, - <&spi2_lpcg 1>; + clocks = <&spi2_lpcg IMX_LPCG_CLK_0>, + <&spi2_lpcg IMX_LPCG_CLK_4>; clock-names = "per", "ipg"; assigned-clocks = <&clk IMX_SC_R_SPI_2 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <60000000>; @@ -76,8 +76,8 @@ dma_subsys: bus@5a000000 { #size-cells = <0>; interrupts = <GIC_SPI 339 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&gic>; - clocks = <&spi3_lpcg 0>, - <&spi3_lpcg 1>; + clocks = <&spi3_lpcg IMX_LPCG_CLK_0>, + <&spi3_lpcg IMX_LPCG_CLK_4>; clock-names = "per", "ipg"; assigned-clocks = <&clk IMX_SC_R_SPI_3 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <60000000>; @@ -145,8 +145,8 @@ dma_subsys: bus@5a000000 { compatible = "fsl,imx8qxp-pwm", "fsl,imx27-pwm"; reg = <0x5a190000 0x1000>; interrupts = <GIC_SPI 127 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&adma_pwm_lpcg 1>, - <&adma_pwm_lpcg 0>; + clocks = <&adma_pwm_lpcg IMX_LPCG_CLK_4>, + <&adma_pwm_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "per"; assigned-clocks = <&clk IMX_SC_R_LCD_0_PWM_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; @@ -355,8 +355,8 @@ dma_subsys: bus@5a000000 { reg = <0x5a880000 0x10000>; interrupts = <GIC_SPI 240 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&gic>; - clocks = <&adc0_lpcg 0>, - <&adc0_lpcg 1>; + clocks = <&adc0_lpcg IMX_LPCG_CLK_0>, + <&adc0_lpcg IMX_LPCG_CLK_4>; clock-names = "per", "ipg"; assigned-clocks = <&clk IMX_SC_R_ADC_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; @@ -370,8 +370,8 @@ dma_subsys: bus@5a000000 { reg = <0x5a890000 0x10000>; interrupts = <GIC_SPI 241 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&gic>; - clocks = <&adc1_lpcg 0>, - <&adc1_lpcg 1>; + clocks = <&adc1_lpcg IMX_LPCG_CLK_0>, + <&adc1_lpcg IMX_LPCG_CLK_4>; clock-names = "per", "ipg"; assigned-clocks = <&clk IMX_SC_R_ADC_1 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; @@ -384,8 +384,8 @@ dma_subsys: bus@5a000000 { reg = <0x5a8d0000 0x10000>; interrupts = <GIC_SPI 235 IRQ_TYPE_LEVEL_HIGH>; interrupt-parent = <&gic>; - clocks = <&can0_lpcg 1>, - <&can0_lpcg 0>; + clocks = <&can0_lpcg IMX_LPCG_CLK_4>, + <&can0_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "per"; assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <40000000>; @@ -405,8 +405,8 @@ dma_subsys: bus@5a000000 { * CAN1 shares CAN0's clock and to enable CAN0's clock it * has to be powered on. */ - clocks = <&can0_lpcg 1>, - <&can0_lpcg 0>; + clocks = <&can0_lpcg IMX_LPCG_CLK_4>, + <&can0_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "per"; assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <40000000>; @@ -426,8 +426,8 @@ dma_subsys: bus@5a000000 { * CAN2 shares CAN0's clock and to enable CAN0's clock it * has to be powered on. */ - clocks = <&can0_lpcg 1>, - <&can0_lpcg 0>; + clocks = <&can0_lpcg IMX_LPCG_CLK_4>, + <&can0_lpcg IMX_LPCG_CLK_0>; clock-names = "ipg", "per"; assigned-clocks = <&clk IMX_SC_R_CAN_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <40000000>; diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi index 7e510b21bbac..764c1a08e3b1 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi @@ -25,8 +25,8 @@ lsio_subsys: bus@5d000000 { compatible = "fsl,imx27-pwm"; reg = <0x5d000000 0x10000>; clock-names = "ipg", "per"; - clocks = <&pwm0_lpcg 4>, - <&pwm0_lpcg 1>; + clocks = <&pwm0_lpcg IMX_LPCG_CLK_6>, + <&pwm0_lpcg IMX_LPCG_CLK_1>; assigned-clocks = <&clk IMX_SC_R_PWM_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; #pwm-cells = <3>; @@ -38,8 +38,8 @@ lsio_subsys: bus@5d000000 { compatible = "fsl,imx27-pwm"; reg = <0x5d010000 0x10000>; clock-names = "ipg", "per"; - clocks = <&pwm1_lpcg 4>, - <&pwm1_lpcg 1>; + clocks = <&pwm1_lpcg IMX_LPCG_CLK_6>, + <&pwm1_lpcg IMX_LPCG_CLK_1>; assigned-clocks = <&clk IMX_SC_R_PWM_1 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; #pwm-cells = <3>; @@ -51,8 +51,8 @@ lsio_subsys: bus@5d000000 { compatible = "fsl,imx27-pwm"; reg = <0x5d020000 0x10000>; clock-names = "ipg", "per"; - clocks = <&pwm2_lpcg 4>, - <&pwm2_lpcg 1>; + clocks = <&pwm2_lpcg IMX_LPCG_CLK_6>, + <&pwm2_lpcg IMX_LPCG_CLK_1>; assigned-clocks = <&clk IMX_SC_R_PWM_2 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; #pwm-cells = <3>; @@ -64,8 +64,8 @@ lsio_subsys: bus@5d000000 { compatible = "fsl,imx27-pwm"; reg = <0x5d030000 0x10000>; clock-names = "ipg", "per"; - clocks = <&pwm3_lpcg 4>, - <&pwm3_lpcg 1>; + clocks = <&pwm3_lpcg IMX_LPCG_CLK_6>, + <&pwm3_lpcg IMX_LPCG_CLK_1>; assigned-clocks = <&clk IMX_SC_R_PWM_3 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; #pwm-cells = <3>; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi index 41c79d2ebdd6..f24b14744799 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw72xx.dtsi @@ -14,6 +14,7 @@ pinctrl-0 = <&pinctrl_usbcon1>; type = "micro"; label = "otg"; + vbus-supply = <®_usb1_vbus>; id-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; port { @@ -183,7 +184,6 @@ }; &usb3_phy0 { - vbus-supply = <®_usb1_vbus>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi index d5c400b355af..f5491a608b2f 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp-venice-gw73xx.dtsi @@ -14,6 +14,7 @@ pinctrl-0 = <&pinctrl_usbcon1>; type = "micro"; label = "otg"; + vbus-supply = <®_usb1_vbus>; id-gpios = <&gpio3 21 GPIO_ACTIVE_HIGH>; port { @@ -202,7 +203,6 @@ }; &usb3_phy0 { - vbus-supply = <®_usb1_vbus>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi index 11626fae5f97..aa9f28c4431d 100644 --- a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi @@ -153,15 +153,15 @@ }; &flexcan2 { - clocks = <&can1_lpcg 1>, - <&can1_lpcg 0>; + clocks = <&can1_lpcg IMX_LPCG_CLK_4>, + <&can1_lpcg IMX_LPCG_CLK_0>; assigned-clocks = <&clk IMX_SC_R_CAN_1 IMX_SC_PM_CLK_PER>; fsl,clk-source = /bits/ 8 <1>; }; &flexcan3 { - clocks = <&can2_lpcg 1>, - <&can2_lpcg 0>; + clocks = <&can2_lpcg IMX_LPCG_CLK_4>, + <&can2_lpcg IMX_LPCG_CLK_0>; assigned-clocks = <&clk IMX_SC_R_CAN_2 IMX_SC_PM_CLK_PER>; fsl,clk-source = /bits/ 8 <1>; }; diff --git a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi index f3a6da8b2890..5260c63db007 100644 --- a/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180-trogdor.dtsi @@ -944,6 +944,8 @@ ap_spi_fp: &spi10 { vddrf-supply = <&pp1300_l2c>; vddch0-supply = <&pp3300_l10c>; max-speed = <3200000>; + + qcom,local-bd-address-broken; }; }; diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 3b0e8248e1a4..a75de2665d84 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -161,12 +161,18 @@ static inline unsigned long get_trans_granule(void) #define MAX_TLBI_RANGE_PAGES __TLBI_RANGE_PAGES(31, 3) /* - * Generate 'num' values from -1 to 30 with -1 rejected by the - * __flush_tlb_range() loop below. + * Generate 'num' values from -1 to 31 with -1 rejected by the + * __flush_tlb_range() loop below. Its return value is only + * significant for a maximum of MAX_TLBI_RANGE_PAGES pages. If + * 'pages' is more than that, you must iterate over the overall + * range. */ -#define TLBI_RANGE_MASK GENMASK_ULL(4, 0) -#define __TLBI_RANGE_NUM(pages, scale) \ - ((((pages) >> (5 * (scale) + 1)) & TLBI_RANGE_MASK) - 1) +#define __TLBI_RANGE_NUM(pages, scale) \ + ({ \ + int __pages = min((pages), \ + __TLBI_RANGE_PAGES(31, (scale))); \ + (__pages >> (5 * (scale) + 1)) - 1; \ + }) /* * TLB Invalidation @@ -379,10 +385,6 @@ static inline void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch) * 3. If there is 1 page remaining, flush it through non-range operations. Range * operations can only span an even number of pages. We save this for last to * ensure 64KB start alignment is maintained for the LPA2 case. - * - * Note that certain ranges can be represented by either num = 31 and - * scale or num = 0 and scale + 1. The loop below favours the latter - * since num is limited to 30 by the __TLBI_RANGE_NUM() macro. */ #define __flush_tlb_range_op(op, start, pages, stride, \ asid, tlb_level, tlbi_user, lpa2) \ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index ce08b744aaab..cb68adcabe07 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -289,8 +289,28 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) adr_l x1, __hyp_text_end adr_l x2, dcache_clean_poc blr x2 + + mov_q x0, INIT_SCTLR_EL2_MMU_OFF + pre_disable_mmu_workaround + msr sctlr_el2, x0 + isb 0: mov_q x0, HCR_HOST_NVHE_FLAGS + + /* + * Compliant CPUs advertise their VHE-onlyness with + * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be + * RES1 in that case. Publish the E2H bit early so that + * it can be picked up by the init_el2_state macro. + * + * Fruity CPUs seem to have HCR_EL2.E2H set to RAO/WI, but + * don't advertise it (they predate this relaxation). + */ + mrs_s x1, SYS_ID_AA64MMFR4_EL1 + tbz x1, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f + + orr x0, x0, #HCR_E2H +1: msr hcr_el2, x0 isb @@ -303,30 +323,16 @@ SYM_INNER_LABEL(init_el2, SYM_L_LOCAL) mov_q x1, INIT_SCTLR_EL1_MMU_OFF - /* - * Compliant CPUs advertise their VHE-onlyness with - * ID_AA64MMFR4_EL1.E2H0 < 0. HCR_EL2.E2H can be - * RES1 in that case. - * - * Fruity CPUs seem to have HCR_EL2.E2H set to RES1, but - * don't advertise it (they predate this relaxation). - */ - mrs_s x0, SYS_ID_AA64MMFR4_EL1 - ubfx x0, x0, #ID_AA64MMFR4_EL1_E2H0_SHIFT, #ID_AA64MMFR4_EL1_E2H0_WIDTH - tbnz x0, #(ID_AA64MMFR4_EL1_E2H0_SHIFT + ID_AA64MMFR4_EL1_E2H0_WIDTH - 1), 1f - mrs x0, hcr_el2 and x0, x0, #HCR_E2H cbz x0, 2f -1: + /* Set a sane SCTLR_EL1, the VHE way */ - pre_disable_mmu_workaround msr_s SYS_SCTLR_EL12, x1 mov x2, #BOOT_CPU_FLAG_E2H b 3f 2: - pre_disable_mmu_workaround msr sctlr_el1, x1 mov x2, xzr 3: diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 162b030ab9da..0d022599eb61 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -761,7 +761,6 @@ static void sve_init_header_from_task(struct user_sve_header *header, { unsigned int vq; bool active; - bool fpsimd_only; enum vec_type task_type; memset(header, 0, sizeof(*header)); @@ -777,12 +776,10 @@ static void sve_init_header_from_task(struct user_sve_header *header, case ARM64_VEC_SVE: if (test_tsk_thread_flag(target, TIF_SVE_VL_INHERIT)) header->flags |= SVE_PT_VL_INHERIT; - fpsimd_only = !test_tsk_thread_flag(target, TIF_SVE); break; case ARM64_VEC_SME: if (test_tsk_thread_flag(target, TIF_SME_VL_INHERIT)) header->flags |= SVE_PT_VL_INHERIT; - fpsimd_only = false; break; default: WARN_ON_ONCE(1); @@ -790,7 +787,7 @@ static void sve_init_header_from_task(struct user_sve_header *header, } if (active) { - if (fpsimd_only) { + if (target->thread.fp_type == FP_STATE_FPSIMD) { header->flags |= SVE_PT_REGS_FPSIMD; } else { header->flags |= SVE_PT_REGS_SVE; diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 3dee5490eea9..c4a0a35e02c7 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -2597,14 +2597,11 @@ static __init int kvm_arm_init(void) if (err) goto out_hyp; - if (is_protected_kvm_enabled()) { - kvm_info("Protected nVHE mode initialized successfully\n"); - } else if (in_hyp_mode) { - kvm_info("VHE mode initialized successfully\n"); - } else { - char mode = cpus_have_final_cap(ARM64_KVM_HVHE) ? 'h' : 'n'; - kvm_info("Hyp mode (%cVHE) initialized successfully\n", mode); - } + kvm_info("%s%sVHE mode initialized successfully\n", + in_hyp_mode ? "" : (is_protected_kvm_enabled() ? + "Protected " : "Hyp "), + in_hyp_mode ? "" : (cpus_have_final_cap(ARM64_KVM_HVHE) ? + "h" : "n")); /* * FIXME: Do something reasonable if kvm_init() fails after pKVM diff --git a/arch/arm64/kvm/hyp/nvhe/tlb.c b/arch/arm64/kvm/hyp/nvhe/tlb.c index a60fb13e2192..2fc68da4036d 100644 --- a/arch/arm64/kvm/hyp/nvhe/tlb.c +++ b/arch/arm64/kvm/hyp/nvhe/tlb.c @@ -154,7 +154,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, /* Switch to requested VMID */ __tlb_switch_to_guest(mmu, &cxt, false); - __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0); + __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, + TLBI_TTL_UNKNOWN); dsb(ish); __tlbi(vmalle1is); diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c index 3fae5830f8d2..5a59ef88b646 100644 --- a/arch/arm64/kvm/hyp/pgtable.c +++ b/arch/arm64/kvm/hyp/pgtable.c @@ -528,7 +528,7 @@ static int hyp_unmap_walker(const struct kvm_pgtable_visit_ctx *ctx, kvm_clear_pte(ctx->ptep); dsb(ishst); - __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), ctx->level); + __tlbi_level(vae2is, __TLBI_VADDR(ctx->addr, 0), TLBI_TTL_UNKNOWN); } else { if (ctx->end - ctx->addr < granule) return -EINVAL; @@ -843,12 +843,15 @@ static bool stage2_try_break_pte(const struct kvm_pgtable_visit_ctx *ctx, * Perform the appropriate TLB invalidation based on the * evicted pte value (if any). */ - if (kvm_pte_table(ctx->old, ctx->level)) - kvm_tlb_flush_vmid_range(mmu, ctx->addr, - kvm_granule_size(ctx->level)); - else if (kvm_pte_valid(ctx->old)) + if (kvm_pte_table(ctx->old, ctx->level)) { + u64 size = kvm_granule_size(ctx->level); + u64 addr = ALIGN_DOWN(ctx->addr, size); + + kvm_tlb_flush_vmid_range(mmu, addr, size); + } else if (kvm_pte_valid(ctx->old)) { kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level); + } } if (stage2_pte_is_counted(ctx->old)) @@ -896,9 +899,13 @@ static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx, if (kvm_pte_valid(ctx->old)) { kvm_clear_pte(ctx->ptep); - if (!stage2_unmap_defer_tlb_flush(pgt)) - kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, - ctx->addr, ctx->level); + if (kvm_pte_table(ctx->old, ctx->level)) { + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, + TLBI_TTL_UNKNOWN); + } else if (!stage2_unmap_defer_tlb_flush(pgt)) { + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, + ctx->level); + } } mm_ops->put_page(ctx->ptep); diff --git a/arch/arm64/kvm/hyp/vhe/tlb.c b/arch/arm64/kvm/hyp/vhe/tlb.c index b32e2940df7d..1a60b95381e8 100644 --- a/arch/arm64/kvm/hyp/vhe/tlb.c +++ b/arch/arm64/kvm/hyp/vhe/tlb.c @@ -171,7 +171,8 @@ void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu, /* Switch to requested VMID */ __tlb_switch_to_guest(mmu, &cxt); - __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, 0); + __flush_s2_tlb_range_op(ipas2e1is, start, pages, stride, + TLBI_TTL_UNKNOWN); dsb(ish); __tlbi(vmalle1is); diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c index 18680771cdb0..dc04bc767865 100644 --- a/arch/arm64/kvm/mmu.c +++ b/arch/arm64/kvm/mmu.c @@ -1637,7 +1637,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu) fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); is_iabt = kvm_vcpu_trap_is_iabt(vcpu); - if (esr_fsc_is_permission_fault(esr)) { + if (esr_fsc_is_translation_fault(esr)) { /* Beyond sanitised PARange (which is the IPA limit) */ if (fault_ipa >= BIT_ULL(get_kvm_ipa_limit())) { kvm_inject_size_fault(vcpu); diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 0f0e10bb0a95..b872b003a55f 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -276,7 +276,10 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, pte_t *ptep = NULL; pgdp = pgd_offset(mm, addr); - p4dp = p4d_offset(pgdp, addr); + p4dp = p4d_alloc(mm, pgdp, addr); + if (!p4dp) + return NULL; + pudp = pud_alloc(mm, p4dp, addr); if (!pudp) return NULL; diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 0c4e3ecf989d..0e270a1c51e6 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -219,9 +219,6 @@ bool kernel_page_present(struct page *page) pte_t *ptep; unsigned long addr = (unsigned long)page_address(page); - if (!can_set_direct_map()) - return true; - pgdp = pgd_offset_k(addr); if (pgd_none(READ_ONCE(*pgdp))) return false; diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index c5b461dda438..122021f9bdfc 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -943,7 +943,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, emit(A64_UXTH(is64, dst, dst), ctx); break; case 32: - emit(A64_REV32(is64, dst, dst), ctx); + emit(A64_REV32(0, dst, dst), ctx); /* upper 32 bits already cleared */ break; case 64: @@ -1256,7 +1256,7 @@ emit_cond_jmp: } else { emit_a64_mov_i(1, tmp, off, ctx); if (sign_extend) - emit(A64_LDRSW(dst, src_adj, off_adj), ctx); + emit(A64_LDRSW(dst, src, tmp), ctx); else emit(A64_LDR32(dst, src, tmp), ctx); } diff --git a/arch/hexagon/kernel/vmlinux.lds.S b/arch/hexagon/kernel/vmlinux.lds.S index 1140051a0c45..1150b77fa281 100644 --- a/arch/hexagon/kernel/vmlinux.lds.S +++ b/arch/hexagon/kernel/vmlinux.lds.S @@ -63,6 +63,7 @@ SECTIONS STABS_DEBUG DWARF_DEBUG ELF_DETAILS + .hexagon.attributes 0 : { *(.hexagon.attributes) } DISCARDS } diff --git a/arch/loongarch/boot/dts/loongson-2k1000.dtsi b/arch/loongarch/boot/dts/loongson-2k1000.dtsi index 49a70f8c3cab..b6aeb1f70e2a 100644 --- a/arch/loongarch/boot/dts/loongson-2k1000.dtsi +++ b/arch/loongarch/boot/dts/loongson-2k1000.dtsi @@ -100,6 +100,13 @@ #size-cells = <2>; dma-coherent; + isa@18000000 { + compatible = "isa"; + #size-cells = <1>; + #address-cells = <2>; + ranges = <1 0x0 0x0 0x18000000 0x4000>; + }; + liointc0: interrupt-controller@1fe01400 { compatible = "loongson,liointc-2.0"; reg = <0x0 0x1fe01400 0x0 0x40>, diff --git a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts index dca91caf895e..74b99bd234cc 100644 --- a/arch/loongarch/boot/dts/loongson-2k2000-ref.dts +++ b/arch/loongarch/boot/dts/loongson-2k2000-ref.dts @@ -61,12 +61,45 @@ &gmac0 { status = "okay"; + + phy-mode = "gmii"; + phy-handle = <&phy0>; + mdio { + compatible = "snps,dwmac-mdio"; + #address-cells = <1>; + #size-cells = <0>; + phy0: ethernet-phy@0 { + reg = <2>; + }; + }; }; &gmac1 { status = "okay"; + + phy-mode = "gmii"; + phy-handle = <&phy1>; + mdio { + compatible = "snps,dwmac-mdio"; + #address-cells = <1>; + #size-cells = <0>; + phy1: ethernet-phy@1 { + reg = <2>; + }; + }; }; &gmac2 { status = "okay"; + + phy-mode = "rgmii"; + phy-handle = <&phy2>; + mdio { + compatible = "snps,dwmac-mdio"; + #address-cells = <1>; + #size-cells = <0>; + phy2: ethernet-phy@2 { + reg = <0>; + }; + }; }; diff --git a/arch/loongarch/boot/dts/loongson-2k2000.dtsi b/arch/loongarch/boot/dts/loongson-2k2000.dtsi index a231949b5f55..9eab2d02cbe8 100644 --- a/arch/loongarch/boot/dts/loongson-2k2000.dtsi +++ b/arch/loongarch/boot/dts/loongson-2k2000.dtsi @@ -51,6 +51,13 @@ #address-cells = <2>; #size-cells = <2>; + isa@18400000 { + compatible = "isa"; + #size-cells = <1>; + #address-cells = <2>; + ranges = <1 0x0 0x0 0x18400000 0x4000>; + }; + pmc: power-management@100d0000 { compatible = "loongson,ls2k2000-pmc", "loongson,ls2k0500-pmc", "syscon"; reg = <0x0 0x100d0000 0x0 0x58>; @@ -109,6 +116,8 @@ msi: msi-controller@1fe01140 { compatible = "loongson,pch-msi-1.0"; reg = <0x0 0x1fe01140 0x0 0x8>; + interrupt-controller; + #interrupt-cells = <1>; msi-controller; loongson,msi-base-vec = <64>; loongson,msi-num-vecs = <192>; @@ -140,27 +149,34 @@ #address-cells = <3>; #size-cells = <2>; device_type = "pci"; + msi-parent = <&msi>; bus-range = <0x0 0xff>; - ranges = <0x01000000 0x0 0x00008000 0x0 0x18400000 0x0 0x00008000>, + ranges = <0x01000000 0x0 0x00008000 0x0 0x18408000 0x0 0x00008000>, <0x02000000 0x0 0x60000000 0x0 0x60000000 0x0 0x20000000>; gmac0: ethernet@3,0 { reg = <0x1800 0x0 0x0 0x0 0x0>; - interrupts = <12 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <12 IRQ_TYPE_LEVEL_HIGH>, + <13 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq", "eth_lpi"; interrupt-parent = <&pic>; status = "disabled"; }; gmac1: ethernet@3,1 { reg = <0x1900 0x0 0x0 0x0 0x0>; - interrupts = <14 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <14 IRQ_TYPE_LEVEL_HIGH>, + <15 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq", "eth_lpi"; interrupt-parent = <&pic>; status = "disabled"; }; gmac2: ethernet@3,2 { reg = <0x1a00 0x0 0x0 0x0 0x0>; - interrupts = <17 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <17 IRQ_TYPE_LEVEL_HIGH>, + <18 IRQ_TYPE_LEVEL_HIGH>; + interrupt-names = "macirq", "eth_lpi"; interrupt-parent = <&pic>; status = "disabled"; }; diff --git a/arch/loongarch/include/asm/addrspace.h b/arch/loongarch/include/asm/addrspace.h index b24437e28c6e..7bd47d65bf7a 100644 --- a/arch/loongarch/include/asm/addrspace.h +++ b/arch/loongarch/include/asm/addrspace.h @@ -11,6 +11,7 @@ #define _ASM_ADDRSPACE_H #include <linux/const.h> +#include <linux/sizes.h> #include <asm/loongarch.h> diff --git a/arch/loongarch/include/asm/io.h b/arch/loongarch/include/asm/io.h index 4a8adcca329b..c2f9979b2979 100644 --- a/arch/loongarch/include/asm/io.h +++ b/arch/loongarch/include/asm/io.h @@ -14,11 +14,6 @@ #include <asm/pgtable-bits.h> #include <asm/string.h> -/* - * Change "struct page" to physical address. - */ -#define page_to_phys(page) ((phys_addr_t)page_to_pfn(page) << PAGE_SHIFT) - extern void __init __iomem *early_ioremap(u64 phys_addr, unsigned long size); extern void __init early_iounmap(void __iomem *addr, unsigned long size); @@ -73,6 +68,21 @@ extern void __memcpy_fromio(void *to, const volatile void __iomem *from, size_t #define __io_aw() mmiowb() +#ifdef CONFIG_KFENCE +#define virt_to_phys(kaddr) \ +({ \ + (likely((unsigned long)kaddr < vm_map_base)) ? __pa((unsigned long)kaddr) : \ + page_to_phys(tlb_virt_to_page((unsigned long)kaddr)) + offset_in_page((unsigned long)kaddr);\ +}) + +#define phys_to_virt(paddr) \ +({ \ + extern char *__kfence_pool; \ + (unlikely(__kfence_pool == NULL)) ? __va((unsigned long)paddr) : \ + page_address(phys_to_page((unsigned long)paddr)) + offset_in_page((unsigned long)paddr);\ +}) +#endif + #include <asm-generic/io.h> #define ARCH_HAS_VALID_PHYS_ADDR_RANGE diff --git a/arch/loongarch/include/asm/kfence.h b/arch/loongarch/include/asm/kfence.h index 6c82aea1c993..a6a5760da3a3 100644 --- a/arch/loongarch/include/asm/kfence.h +++ b/arch/loongarch/include/asm/kfence.h @@ -16,6 +16,7 @@ static inline bool arch_kfence_init_pool(void) { int err; + char *kaddr, *vaddr; char *kfence_pool = __kfence_pool; struct vm_struct *area; @@ -35,6 +36,14 @@ static inline bool arch_kfence_init_pool(void) return false; } + kaddr = kfence_pool; + vaddr = __kfence_pool; + while (kaddr < kfence_pool + KFENCE_POOL_SIZE) { + set_page_address(virt_to_page(kaddr), vaddr); + kaddr += PAGE_SIZE; + vaddr += PAGE_SIZE; + } + return true; } diff --git a/arch/loongarch/include/asm/page.h b/arch/loongarch/include/asm/page.h index 44027060c54a..e85df33f11c7 100644 --- a/arch/loongarch/include/asm/page.h +++ b/arch/loongarch/include/asm/page.h @@ -78,7 +78,26 @@ typedef struct { unsigned long pgprot; } pgprot_t; struct page *dmw_virt_to_page(unsigned long kaddr); struct page *tlb_virt_to_page(unsigned long kaddr); -#define virt_to_pfn(kaddr) PFN_DOWN(PHYSADDR(kaddr)) +#define pfn_to_phys(pfn) __pfn_to_phys(pfn) +#define phys_to_pfn(paddr) __phys_to_pfn(paddr) + +#define page_to_phys(page) pfn_to_phys(page_to_pfn(page)) +#define phys_to_page(paddr) pfn_to_page(phys_to_pfn(paddr)) + +#ifndef CONFIG_KFENCE + +#define page_to_virt(page) __va(page_to_phys(page)) +#define virt_to_page(kaddr) phys_to_page(__pa(kaddr)) + +#else + +#define WANT_PAGE_VIRTUAL + +#define page_to_virt(page) \ +({ \ + extern char *__kfence_pool; \ + (__kfence_pool == NULL) ? __va(page_to_phys(page)) : page_address(page); \ +}) #define virt_to_page(kaddr) \ ({ \ @@ -86,6 +105,11 @@ struct page *tlb_virt_to_page(unsigned long kaddr); dmw_virt_to_page((unsigned long)kaddr) : tlb_virt_to_page((unsigned long)kaddr);\ }) +#endif + +#define pfn_to_virt(pfn) page_to_virt(pfn_to_page(pfn)) +#define virt_to_pfn(kaddr) page_to_pfn(virt_to_page(kaddr)) + extern int __virt_addr_valid(volatile void *kaddr); #define virt_addr_valid(kaddr) __virt_addr_valid((volatile void *)(kaddr)) diff --git a/arch/loongarch/mm/mmap.c b/arch/loongarch/mm/mmap.c index a9630a81b38a..89af7c12e8c0 100644 --- a/arch/loongarch/mm/mmap.c +++ b/arch/loongarch/mm/mmap.c @@ -4,6 +4,7 @@ */ #include <linux/export.h> #include <linux/io.h> +#include <linux/kfence.h> #include <linux/memblock.h> #include <linux/mm.h> #include <linux/mman.h> @@ -111,6 +112,9 @@ int __virt_addr_valid(volatile void *kaddr) { unsigned long vaddr = (unsigned long)kaddr; + if (is_kfence_address((void *)kaddr)) + return 1; + if ((vaddr < PAGE_OFFSET) || (vaddr >= vm_map_base)) return 0; diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c index 2aae72e63871..bda018150000 100644 --- a/arch/loongarch/mm/pgtable.c +++ b/arch/loongarch/mm/pgtable.c @@ -11,13 +11,13 @@ struct page *dmw_virt_to_page(unsigned long kaddr) { - return pfn_to_page(virt_to_pfn(kaddr)); + return phys_to_page(__pa(kaddr)); } EXPORT_SYMBOL(dmw_virt_to_page); struct page *tlb_virt_to_page(unsigned long kaddr) { - return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr))); + return phys_to_page(pfn_to_phys(pte_pfn(*virt_to_kpte(kaddr)))); } EXPORT_SYMBOL(tlb_virt_to_page); diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 06ef440d16ce..516dc7022bd7 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -619,15 +619,6 @@ config MACH_EYEQ5 bool -config FIT_IMAGE_FDT_EPM5 - bool "Include FDT for Mobileye EyeQ5 development platforms" - depends on MACH_EYEQ5 - default n - help - Enable this to include the FDT for the EyeQ5 development platforms - from Mobileye in the FIT kernel image. - This requires u-boot on the platform. - config MACH_NINTENDO64 bool "Nintendo 64 console" select CEVT_R4K @@ -1011,6 +1002,15 @@ config CAVIUM_OCTEON_SOC endchoice +config FIT_IMAGE_FDT_EPM5 + bool "Include FDT for Mobileye EyeQ5 development platforms" + depends on MACH_EYEQ5 + default n + help + Enable this to include the FDT for the EyeQ5 development platforms + from Mobileye in the FIT kernel image. + This requires u-boot on the platform. + source "arch/mips/alchemy/Kconfig" source "arch/mips/ath25/Kconfig" source "arch/mips/ath79/Kconfig" diff --git a/arch/mips/include/asm/ptrace.h b/arch/mips/include/asm/ptrace.h index d14d0e37ad02..4a2b40ce39e0 100644 --- a/arch/mips/include/asm/ptrace.h +++ b/arch/mips/include/asm/ptrace.h @@ -159,7 +159,7 @@ extern unsigned long exception_ip(struct pt_regs *regs); #define exception_ip(regs) exception_ip(regs) #define profile_pc(regs) instruction_pointer(regs) -extern asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall); +extern asmlinkage long syscall_trace_enter(struct pt_regs *regs); extern asmlinkage void syscall_trace_leave(struct pt_regs *regs); extern void die(const char *, struct pt_regs *) __noreturn; diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c index d1b11f66f748..cb1045ebab06 100644 --- a/arch/mips/kernel/asm-offsets.c +++ b/arch/mips/kernel/asm-offsets.c @@ -101,6 +101,7 @@ void output_thread_info_defines(void) OFFSET(TI_CPU, thread_info, cpu); OFFSET(TI_PRE_COUNT, thread_info, preempt_count); OFFSET(TI_REGS, thread_info, regs); + OFFSET(TI_SYSCALL, thread_info, syscall); DEFINE(_THREAD_SIZE, THREAD_SIZE); DEFINE(_THREAD_MASK, THREAD_MASK); DEFINE(_IRQ_STACK_SIZE, IRQ_STACK_SIZE); diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 59288c13b581..61503a36067e 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -1317,16 +1317,13 @@ long arch_ptrace(struct task_struct *child, long request, * Notification of system call entry/exit * - triggered by current->work.syscall_trace */ -asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) +asmlinkage long syscall_trace_enter(struct pt_regs *regs) { user_exit(); - current_thread_info()->syscall = syscall; - if (test_thread_flag(TIF_SYSCALL_TRACE)) { if (ptrace_report_syscall_entry(regs)) return -1; - syscall = current_thread_info()->syscall; } #ifdef CONFIG_SECCOMP @@ -1335,7 +1332,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) struct seccomp_data sd; unsigned long args[6]; - sd.nr = syscall; + sd.nr = current_thread_info()->syscall; sd.arch = syscall_get_arch(current); syscall_get_arguments(current, regs, args); for (i = 0; i < 6; i++) @@ -1345,23 +1342,23 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall) ret = __secure_computing(&sd); if (ret == -1) return ret; - syscall = current_thread_info()->syscall; } #endif if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs->regs[2]); - audit_syscall_entry(syscall, regs->regs[4], regs->regs[5], + audit_syscall_entry(current_thread_info()->syscall, + regs->regs[4], regs->regs[5], regs->regs[6], regs->regs[7]); /* * Negative syscall numbers are mistaken for rejected syscalls, but * won't have had the return value set appropriately, so we do so now. */ - if (syscall < 0) + if (current_thread_info()->syscall < 0) syscall_set_return_value(current, regs, -ENOSYS, 0); - return syscall; + return current_thread_info()->syscall; } /* diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S index 18dc9b345056..2c604717e630 100644 --- a/arch/mips/kernel/scall32-o32.S +++ b/arch/mips/kernel/scall32-o32.S @@ -77,6 +77,18 @@ loads_done: PTR_WD load_a7, bad_stack_a7 .previous + /* + * syscall number is in v0 unless we called syscall(__NR_###) + * where the real syscall number is in a0 + */ + subu t2, v0, __NR_O32_Linux + bnez t2, 1f /* __NR_syscall at offset 0 */ + LONG_S a0, TI_SYSCALL($28) # Save a0 as syscall number + b 2f +1: + LONG_S v0, TI_SYSCALL($28) # Save v0 as syscall number +2: + lw t0, TI_FLAGS($28) # syscall tracing enabled? li t1, _TIF_WORK_SYSCALL_ENTRY and t0, t1 @@ -114,16 +126,7 @@ syscall_trace_entry: SAVE_STATIC move a0, sp - /* - * syscall number is in v0 unless we called syscall(__NR_###) - * where the real syscall number is in a0 - */ - move a1, v0 - subu t2, v0, __NR_O32_Linux - bnez t2, 1f /* __NR_syscall at offset 0 */ - lw a1, PT_R4(sp) - -1: jal syscall_trace_enter + jal syscall_trace_enter bltz v0, 1f # seccomp failed? Skip syscall diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S index 97456b2ca7dc..97788859238c 100644 --- a/arch/mips/kernel/scall64-n32.S +++ b/arch/mips/kernel/scall64-n32.S @@ -44,6 +44,8 @@ NESTED(handle_sysn32, PT_SIZE, sp) sd a3, PT_R26(sp) # save a3 for syscall restarting + LONG_S v0, TI_SYSCALL($28) # Store syscall number + li t1, _TIF_WORK_SYSCALL_ENTRY LONG_L t0, TI_FLAGS($28) # syscall tracing enabled? and t0, t1, t0 @@ -72,7 +74,6 @@ syscall_common: n32_syscall_trace_entry: SAVE_STATIC move a0, sp - move a1, v0 jal syscall_trace_enter bltz v0, 1f # seccomp failed? Skip syscall diff --git a/arch/mips/kernel/scall64-n64.S b/arch/mips/kernel/scall64-n64.S index e6264aa62e45..be11ea5cc67e 100644 --- a/arch/mips/kernel/scall64-n64.S +++ b/arch/mips/kernel/scall64-n64.S @@ -46,6 +46,8 @@ NESTED(handle_sys64, PT_SIZE, sp) sd a3, PT_R26(sp) # save a3 for syscall restarting + LONG_S v0, TI_SYSCALL($28) # Store syscall number + li t1, _TIF_WORK_SYSCALL_ENTRY LONG_L t0, TI_FLAGS($28) # syscall tracing enabled? and t0, t1, t0 @@ -82,7 +84,6 @@ n64_syscall_exit: syscall_trace_entry: SAVE_STATIC move a0, sp - move a1, v0 jal syscall_trace_enter bltz v0, 1f # seccomp failed? Skip syscall diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index d3c2616cba22..7a5abb73e531 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -79,6 +79,22 @@ loads_done: PTR_WD load_a7, bad_stack_a7 .previous + /* + * absolute syscall number is in v0 unless we called syscall(__NR_###) + * where the real syscall number is in a0 + * note: NR_syscall is the first O32 syscall but the macro is + * only defined when compiling with -mabi=32 (CONFIG_32BIT) + * therefore __NR_O32_Linux is used (4000) + */ + + subu t2, v0, __NR_O32_Linux + bnez t2, 1f /* __NR_syscall at offset 0 */ + LONG_S a0, TI_SYSCALL($28) # Save a0 as syscall number + b 2f +1: + LONG_S v0, TI_SYSCALL($28) # Save v0 as syscall number +2: + li t1, _TIF_WORK_SYSCALL_ENTRY LONG_L t0, TI_FLAGS($28) # syscall tracing enabled? and t0, t1, t0 @@ -113,22 +129,7 @@ trace_a_syscall: sd a7, PT_R11(sp) # For indirect syscalls move a0, sp - /* - * absolute syscall number is in v0 unless we called syscall(__NR_###) - * where the real syscall number is in a0 - * note: NR_syscall is the first O32 syscall but the macro is - * only defined when compiling with -mabi=32 (CONFIG_32BIT) - * therefore __NR_O32_Linux is used (4000) - */ - .set push - .set reorder - subu t1, v0, __NR_O32_Linux - move a1, v0 - bnez t1, 1f /* __NR_syscall at offset 0 */ - ld a1, PT_R4(sp) /* Arg1 for __NR_syscall case */ - .set pop - -1: jal syscall_trace_enter + jal syscall_trace_enter bltz v0, 1f # seccomp failed? Skip syscall diff --git a/arch/nios2/kernel/prom.c b/arch/nios2/kernel/prom.c index 8d98af5c7201..9a8393e6b4a8 100644 --- a/arch/nios2/kernel/prom.c +++ b/arch/nios2/kernel/prom.c @@ -21,7 +21,8 @@ void __init early_init_devtree(void *params) { - __be32 *dtb = (u32 *)__dtb_start; + __be32 __maybe_unused *dtb = (u32 *)__dtb_start; + #if defined(CONFIG_NIOS2_DTB_AT_PHYS_ADDR) if (be32_to_cpup((__be32 *)CONFIG_NIOS2_DTB_PHYS_ADDR) == OF_DT_HEADER) { @@ -30,8 +31,11 @@ void __init early_init_devtree(void *params) return; } #endif + +#ifdef CONFIG_NIOS2_DTB_SOURCE_BOOL if (be32_to_cpu((__be32) *dtb) == OF_DT_HEADER) params = (void *)__dtb_start; +#endif early_init_dt_scan(params); } diff --git a/arch/parisc/include/asm/mman.h b/arch/parisc/include/asm/mman.h new file mode 100644 index 000000000000..47c5a1991d10 --- /dev/null +++ b/arch/parisc/include/asm/mman.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_MMAN_H__ +#define __ASM_MMAN_H__ + +#include <uapi/asm/mman.h> + +/* PARISC cannot allow mdwe as it needs writable stacks */ +static inline bool arch_memory_deny_write_exec_supported(void) +{ + return false; +} +#define arch_memory_deny_write_exec_supported arch_memory_deny_write_exec_supported + +#endif /* __ASM_MMAN_H__ */ diff --git a/arch/powerpc/crypto/chacha-p10-glue.c b/arch/powerpc/crypto/chacha-p10-glue.c index 74fb86b0d209..7c728755852e 100644 --- a/arch/powerpc/crypto/chacha-p10-glue.c +++ b/arch/powerpc/crypto/chacha-p10-glue.c @@ -197,6 +197,9 @@ static struct skcipher_alg algs[] = { static int __init chacha_p10_init(void) { + if (!cpu_has_feature(CPU_FTR_ARCH_31)) + return 0; + static_branch_enable(&have_p10); return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); @@ -204,10 +207,13 @@ static int __init chacha_p10_init(void) static void __exit chacha_p10_exit(void) { + if (!static_branch_likely(&have_p10)) + return; + crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); } -module_cpu_feature_match(PPC_MODULE_FEATURE_P10, chacha_p10_init); +module_init(chacha_p10_init); module_exit(chacha_p10_exit); MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (P10 accelerated)"); diff --git a/arch/powerpc/include/asm/vdso/gettimeofday.h b/arch/powerpc/include/asm/vdso/gettimeofday.h index f0a4cf01e85c..78302f6c2580 100644 --- a/arch/powerpc/include/asm/vdso/gettimeofday.h +++ b/arch/powerpc/include/asm/vdso/gettimeofday.h @@ -4,7 +4,6 @@ #ifndef __ASSEMBLY__ -#include <asm/page.h> #include <asm/vdso/timebase.h> #include <asm/barrier.h> #include <asm/unistd.h> @@ -95,7 +94,7 @@ const struct vdso_data *__arch_get_vdso_data(void); static __always_inline const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd) { - return (void *)vd + PAGE_SIZE; + return (void *)vd + (1U << CONFIG_PAGE_SHIFT); } #endif diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c index 1185efebf032..29a8c8e18585 100644 --- a/arch/powerpc/kernel/iommu.c +++ b/arch/powerpc/kernel/iommu.c @@ -1285,15 +1285,14 @@ spapr_tce_platform_iommu_attach_dev(struct iommu_domain *platform_domain, struct device *dev) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct iommu_group *grp = iommu_group_get(dev); struct iommu_table_group *table_group; + struct iommu_group *grp; /* At first attach the ownership is already set */ - if (!domain) { - iommu_group_put(grp); + if (!domain) return 0; - } + grp = iommu_group_get(dev); table_group = iommu_group_get_iommudata(grp); /* * The domain being set to PLATFORM from earlier diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 252d63942f34..5b3115a19852 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -151,7 +151,7 @@ endif endif vdso-install-y += arch/riscv/kernel/vdso/vdso.so.dbg -vdso-install-$(CONFIG_COMPAT) += arch/riscv/kernel/compat_vdso/compat_vdso.so.dbg:../compat_vdso/compat_vdso.so +vdso-install-$(CONFIG_COMPAT) += arch/riscv/kernel/compat_vdso/compat_vdso.so.dbg ifneq ($(CONFIG_XIP_KERNEL),y) ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_ARCH_CANAAN),yy) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 97fcde30e247..9f8ea0e33eb1 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -593,6 +593,12 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, return ptep_test_and_clear_young(vma, address, ptep); } +#define pgprot_nx pgprot_nx +static inline pgprot_t pgprot_nx(pgprot_t _prot) +{ + return __pgprot(pgprot_val(_prot) & ~_PAGE_EXEC); +} + #define pgprot_noncached pgprot_noncached static inline pgprot_t pgprot_noncached(pgprot_t _prot) { diff --git a/arch/riscv/include/asm/syscall_wrapper.h b/arch/riscv/include/asm/syscall_wrapper.h index 980094c2e976..ac80216549ff 100644 --- a/arch/riscv/include/asm/syscall_wrapper.h +++ b/arch/riscv/include/asm/syscall_wrapper.h @@ -36,7 +36,8 @@ asmlinkage long __riscv_sys_ni_syscall(const struct pt_regs *); ulong) \ __attribute__((alias(__stringify(___se_##prefix##name)))); \ __diag_pop(); \ - static long noinline ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__)); \ + static long noinline ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__)) \ + __used; \ static long ___se_##prefix##name(__MAP(x,__SC_LONG,__VA_ARGS__)) #define SC_RISCV_REGS_TO_ARGS(x, ...) \ diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index ec0cab9fbddd..72ec1d9bd3f3 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -319,7 +319,7 @@ unsigned long __must_check clear_user(void __user *to, unsigned long n) #define __get_kernel_nofault(dst, src, type, err_label) \ do { \ - long __kr_err; \ + long __kr_err = 0; \ \ __get_user_nocheck(*((type *)(dst)), (type *)(src), __kr_err); \ if (unlikely(__kr_err)) \ @@ -328,7 +328,7 @@ do { \ #define __put_kernel_nofault(dst, src, type, err_label) \ do { \ - long __kr_err; \ + long __kr_err = 0; \ \ __put_user_nocheck(*((type *)(src)), (type *)(dst), __kr_err); \ if (unlikely(__kr_err)) \ diff --git a/arch/riscv/include/uapi/asm/auxvec.h b/arch/riscv/include/uapi/asm/auxvec.h index 10aaa83db89e..95050ebe9ad0 100644 --- a/arch/riscv/include/uapi/asm/auxvec.h +++ b/arch/riscv/include/uapi/asm/auxvec.h @@ -34,7 +34,7 @@ #define AT_L3_CACHEGEOMETRY 47 /* entries in ARCH_DLINFO */ -#define AT_VECTOR_SIZE_ARCH 9 +#define AT_VECTOR_SIZE_ARCH 10 #define AT_MINSIGSTKSZ 51 #endif /* _UAPI_ASM_RISCV_AUXVEC_H */ diff --git a/arch/riscv/kernel/compat_vdso/Makefile b/arch/riscv/kernel/compat_vdso/Makefile index 62fa393b2eb2..3df4cb788c1f 100644 --- a/arch/riscv/kernel/compat_vdso/Makefile +++ b/arch/riscv/kernel/compat_vdso/Makefile @@ -74,5 +74,5 @@ quiet_cmd_compat_vdsold = VDSOLD $@ rm $@.tmp # actual build commands -quiet_cmd_compat_vdsoas = VDSOAS $@ +quiet_cmd_compat_vdsoas = VDSOAS $@ cmd_compat_vdsoas = $(COMPAT_CC) $(a_flags) $(COMPAT_CC_FLAGS) -c -o $@ $< diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 37e87fdcf6a0..30e12b310cab 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -80,6 +80,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len) */ lockdep_assert_held(&text_mutex); + preempt_disable(); + if (across_pages) patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1); @@ -92,6 +94,8 @@ static int __patch_insn_set(void *addr, u8 c, size_t len) if (across_pages) patch_unmap(FIX_TEXT_POKE1); + preempt_enable(); + return 0; } NOKPROBE_SYMBOL(__patch_insn_set); @@ -122,6 +126,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len) if (!riscv_patch_in_stop_machine) lockdep_assert_held(&text_mutex); + preempt_disable(); + if (across_pages) patch_map(addr + PAGE_SIZE, FIX_TEXT_POKE1); @@ -134,6 +140,8 @@ static int __patch_insn_write(void *addr, const void *insn, size_t len) if (across_pages) patch_unmap(FIX_TEXT_POKE1); + preempt_enable(); + return ret; } NOKPROBE_SYMBOL(__patch_insn_write); diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 92922dbd5b5c..e4bc61c4e58a 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -27,8 +27,6 @@ #include <asm/vector.h> #include <asm/cpufeature.h> -register unsigned long gp_in_global __asm__("gp"); - #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK) #include <linux/stackprotector.h> unsigned long __stack_chk_guard __read_mostly; @@ -37,7 +35,7 @@ EXPORT_SYMBOL(__stack_chk_guard); extern asmlinkage void ret_from_fork(void); -void arch_cpu_idle(void) +void noinstr arch_cpu_idle(void) { cpu_do_idle(); } @@ -207,7 +205,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) if (unlikely(args->fn)) { /* Kernel thread */ memset(childregs, 0, sizeof(struct pt_regs)); - childregs->gp = gp_in_global; /* Supervisor/Machine, irqs on: */ childregs->status = SR_PP | SR_PIE; diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index 501e66debf69..5a2edd7f027e 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -119,6 +119,13 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec) struct __sc_riscv_v_state __user *state = sc_vec; void __user *datap; + /* + * Mark the vstate as clean prior performing the actual copy, + * to avoid getting the vstate incorrectly clobbered by the + * discarded vector state. + */ + riscv_v_vstate_set_restore(current, regs); + /* Copy everything of __sc_riscv_v_state except datap. */ err = __copy_from_user(¤t->thread.vstate, &state->v_state, offsetof(struct __riscv_v_ext_state, datap)); @@ -133,13 +140,7 @@ static long __restore_v_state(struct pt_regs *regs, void __user *sc_vec) * Copy the whole vector content from user space datap. Use * copy_from_user to prevent information leak. */ - err = copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize); - if (unlikely(err)) - return err; - - riscv_v_vstate_set_restore(current, regs); - - return err; + return copy_from_user(current->thread.vstate.datap, datap, riscv_v_vsize); } #else #define save_v_state(task, regs) (0) diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 868d6280cf66..05a16b1f0aee 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -122,7 +122,7 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr) print_vma_addr(KERN_CONT " in ", instruction_pointer(regs)); pr_cont("\n"); __show_regs(regs); - dump_instr(KERN_EMERG, regs); + dump_instr(KERN_INFO, regs); } force_sig_fault(signo, code, (void __user *)addr); diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 9b517fe1b8a8..272c431ac5b9 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -37,6 +37,7 @@ endif # Disable -pg to prevent insert call site CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) +CFLAGS_REMOVE_hwprobe.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) # Disable profiling and instrumentation for VDSO code GCOV_PROFILE := n diff --git a/arch/riscv/kvm/aia_aplic.c b/arch/riscv/kvm/aia_aplic.c index 39e72aa016a4..b467ba5ed910 100644 --- a/arch/riscv/kvm/aia_aplic.c +++ b/arch/riscv/kvm/aia_aplic.c @@ -137,11 +137,21 @@ static void aplic_write_pending(struct aplic *aplic, u32 irq, bool pending) raw_spin_lock_irqsave(&irqd->lock, flags); sm = irqd->sourcecfg & APLIC_SOURCECFG_SM_MASK; - if (!pending && - ((sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) || - (sm == APLIC_SOURCECFG_SM_LEVEL_LOW))) + if (sm == APLIC_SOURCECFG_SM_INACTIVE) goto skip_write_pending; + if (sm == APLIC_SOURCECFG_SM_LEVEL_HIGH || + sm == APLIC_SOURCECFG_SM_LEVEL_LOW) { + if (!pending) + goto skip_write_pending; + if ((irqd->state & APLIC_IRQ_STATE_INPUT) && + sm == APLIC_SOURCECFG_SM_LEVEL_LOW) + goto skip_write_pending; + if (!(irqd->state & APLIC_IRQ_STATE_INPUT) && + sm == APLIC_SOURCECFG_SM_LEVEL_HIGH) + goto skip_write_pending; + } + if (pending) irqd->state |= APLIC_IRQ_STATE_PENDING; else @@ -187,16 +197,31 @@ static void aplic_write_enabled(struct aplic *aplic, u32 irq, bool enabled) static bool aplic_read_input(struct aplic *aplic, u32 irq) { - bool ret; - unsigned long flags; + u32 sourcecfg, sm, raw_input, irq_inverted; struct aplic_irq *irqd; + unsigned long flags; + bool ret = false; if (!irq || aplic->nr_irqs <= irq) return false; irqd = &aplic->irqs[irq]; raw_spin_lock_irqsave(&irqd->lock, flags); - ret = (irqd->state & APLIC_IRQ_STATE_INPUT) ? true : false; + + sourcecfg = irqd->sourcecfg; + if (sourcecfg & APLIC_SOURCECFG_D) + goto skip; + + sm = sourcecfg & APLIC_SOURCECFG_SM_MASK; + if (sm == APLIC_SOURCECFG_SM_INACTIVE) + goto skip; + + raw_input = (irqd->state & APLIC_IRQ_STATE_INPUT) ? 1 : 0; + irq_inverted = (sm == APLIC_SOURCECFG_SM_LEVEL_LOW || + sm == APLIC_SOURCECFG_SM_EDGE_FALL) ? 1 : 0; + ret = !!(raw_input ^ irq_inverted); + +skip: raw_spin_unlock_irqrestore(&irqd->lock, flags); return ret; diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index f4a6124d25c9..994adc26db4b 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -986,7 +986,7 @@ static int copy_isa_ext_reg_indices(const struct kvm_vcpu *vcpu, static inline unsigned long num_isa_ext_regs(const struct kvm_vcpu *vcpu) { - return copy_isa_ext_reg_indices(vcpu, NULL);; + return copy_isa_ext_reg_indices(vcpu, NULL); } static int copy_sbi_ext_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) diff --git a/arch/riscv/mm/tlbflush.c b/arch/riscv/mm/tlbflush.c index 893566e004b7..07d743f87b3f 100644 --- a/arch/riscv/mm/tlbflush.c +++ b/arch/riscv/mm/tlbflush.c @@ -99,7 +99,7 @@ static void __ipi_flush_tlb_range_asid(void *info) local_flush_tlb_range_asid(d->start, d->size, d->stride, d->asid); } -static void __flush_tlb_range(struct cpumask *cmask, unsigned long asid, +static void __flush_tlb_range(const struct cpumask *cmask, unsigned long asid, unsigned long start, unsigned long size, unsigned long stride) { @@ -200,7 +200,7 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start, void flush_tlb_kernel_range(unsigned long start, unsigned long end) { - __flush_tlb_range((struct cpumask *)cpu_online_mask, FLUSH_TLB_NO_ASID, + __flush_tlb_range(cpu_online_mask, FLUSH_TLB_NO_ASID, start, end - start, PAGE_SIZE); } diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index aac190085472..1adf2f39ce59 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -1463,6 +1463,22 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, if (ret < 0) return ret; + if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) { + const struct btf_func_model *fm; + int idx; + + fm = bpf_jit_find_kfunc_model(ctx->prog, insn); + if (!fm) + return -EINVAL; + + for (idx = 0; idx < fm->nr_args; idx++) { + u8 reg = bpf_to_rv_reg(BPF_REG_1 + idx, ctx); + + if (fm->arg_size[idx] == sizeof(int)) + emit_sextw(reg, reg, ctx); + } + } + ret = emit_call(addr, fixed_addr, ctx); if (ret) return ret; diff --git a/arch/s390/include/asm/atomic.h b/arch/s390/include/asm/atomic.h index 7138d189cc42..0c4cad7d5a5b 100644 --- a/arch/s390/include/asm/atomic.h +++ b/arch/s390/include/asm/atomic.h @@ -15,31 +15,31 @@ #include <asm/barrier.h> #include <asm/cmpxchg.h> -static inline int arch_atomic_read(const atomic_t *v) +static __always_inline int arch_atomic_read(const atomic_t *v) { return __atomic_read(v); } #define arch_atomic_read arch_atomic_read -static inline void arch_atomic_set(atomic_t *v, int i) +static __always_inline void arch_atomic_set(atomic_t *v, int i) { __atomic_set(v, i); } #define arch_atomic_set arch_atomic_set -static inline int arch_atomic_add_return(int i, atomic_t *v) +static __always_inline int arch_atomic_add_return(int i, atomic_t *v) { return __atomic_add_barrier(i, &v->counter) + i; } #define arch_atomic_add_return arch_atomic_add_return -static inline int arch_atomic_fetch_add(int i, atomic_t *v) +static __always_inline int arch_atomic_fetch_add(int i, atomic_t *v) { return __atomic_add_barrier(i, &v->counter); } #define arch_atomic_fetch_add arch_atomic_fetch_add -static inline void arch_atomic_add(int i, atomic_t *v) +static __always_inline void arch_atomic_add(int i, atomic_t *v) { __atomic_add(i, &v->counter); } @@ -50,11 +50,11 @@ static inline void arch_atomic_add(int i, atomic_t *v) #define arch_atomic_fetch_sub(_i, _v) arch_atomic_fetch_add(-(int)(_i), _v) #define ATOMIC_OPS(op) \ -static inline void arch_atomic_##op(int i, atomic_t *v) \ +static __always_inline void arch_atomic_##op(int i, atomic_t *v) \ { \ __atomic_##op(i, &v->counter); \ } \ -static inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ +static __always_inline int arch_atomic_fetch_##op(int i, atomic_t *v) \ { \ return __atomic_##op##_barrier(i, &v->counter); \ } @@ -74,7 +74,7 @@ ATOMIC_OPS(xor) #define arch_atomic_xchg(v, new) (arch_xchg(&((v)->counter), new)) -static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) +static __always_inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) { return __atomic_cmpxchg(&v->counter, old, new); } @@ -82,31 +82,31 @@ static inline int arch_atomic_cmpxchg(atomic_t *v, int old, int new) #define ATOMIC64_INIT(i) { (i) } -static inline s64 arch_atomic64_read(const atomic64_t *v) +static __always_inline s64 arch_atomic64_read(const atomic64_t *v) { return __atomic64_read(v); } #define arch_atomic64_read arch_atomic64_read -static inline void arch_atomic64_set(atomic64_t *v, s64 i) +static __always_inline void arch_atomic64_set(atomic64_t *v, s64 i) { __atomic64_set(v, i); } #define arch_atomic64_set arch_atomic64_set -static inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_add_return(s64 i, atomic64_t *v) { return __atomic64_add_barrier(i, (long *)&v->counter) + i; } #define arch_atomic64_add_return arch_atomic64_add_return -static inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) +static __always_inline s64 arch_atomic64_fetch_add(s64 i, atomic64_t *v) { return __atomic64_add_barrier(i, (long *)&v->counter); } #define arch_atomic64_fetch_add arch_atomic64_fetch_add -static inline void arch_atomic64_add(s64 i, atomic64_t *v) +static __always_inline void arch_atomic64_add(s64 i, atomic64_t *v) { __atomic64_add(i, (long *)&v->counter); } @@ -114,20 +114,20 @@ static inline void arch_atomic64_add(s64 i, atomic64_t *v) #define arch_atomic64_xchg(v, new) (arch_xchg(&((v)->counter), new)) -static inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) +static __always_inline s64 arch_atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) { return __atomic64_cmpxchg((long *)&v->counter, old, new); } #define arch_atomic64_cmpxchg arch_atomic64_cmpxchg -#define ATOMIC64_OPS(op) \ -static inline void arch_atomic64_##op(s64 i, atomic64_t *v) \ -{ \ - __atomic64_##op(i, (long *)&v->counter); \ -} \ -static inline long arch_atomic64_fetch_##op(s64 i, atomic64_t *v) \ -{ \ - return __atomic64_##op##_barrier(i, (long *)&v->counter); \ +#define ATOMIC64_OPS(op) \ +static __always_inline void arch_atomic64_##op(s64 i, atomic64_t *v) \ +{ \ + __atomic64_##op(i, (long *)&v->counter); \ +} \ +static __always_inline long arch_atomic64_fetch_##op(s64 i, atomic64_t *v) \ +{ \ + return __atomic64_##op##_barrier(i, (long *)&v->counter); \ } ATOMIC64_OPS(and) diff --git a/arch/s390/include/asm/atomic_ops.h b/arch/s390/include/asm/atomic_ops.h index 50510e08b893..7fa5f96a553a 100644 --- a/arch/s390/include/asm/atomic_ops.h +++ b/arch/s390/include/asm/atomic_ops.h @@ -8,7 +8,7 @@ #ifndef __ARCH_S390_ATOMIC_OPS__ #define __ARCH_S390_ATOMIC_OPS__ -static inline int __atomic_read(const atomic_t *v) +static __always_inline int __atomic_read(const atomic_t *v) { int c; @@ -18,14 +18,14 @@ static inline int __atomic_read(const atomic_t *v) return c; } -static inline void __atomic_set(atomic_t *v, int i) +static __always_inline void __atomic_set(atomic_t *v, int i) { asm volatile( " st %1,%0\n" : "=R" (v->counter) : "d" (i)); } -static inline s64 __atomic64_read(const atomic64_t *v) +static __always_inline s64 __atomic64_read(const atomic64_t *v) { s64 c; @@ -35,7 +35,7 @@ static inline s64 __atomic64_read(const atomic64_t *v) return c; } -static inline void __atomic64_set(atomic64_t *v, s64 i) +static __always_inline void __atomic64_set(atomic64_t *v, s64 i) { asm volatile( " stg %1,%0\n" @@ -45,7 +45,7 @@ static inline void __atomic64_set(atomic64_t *v, s64 i) #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES #define __ATOMIC_OP(op_name, op_type, op_string, op_barrier) \ -static inline op_type op_name(op_type val, op_type *ptr) \ +static __always_inline op_type op_name(op_type val, op_type *ptr) \ { \ op_type old; \ \ @@ -96,7 +96,7 @@ __ATOMIC_CONST_OPS(__atomic64_add_const, long, "agsi") #else /* CONFIG_HAVE_MARCH_Z196_FEATURES */ #define __ATOMIC_OP(op_name, op_string) \ -static inline int op_name(int val, int *ptr) \ +static __always_inline int op_name(int val, int *ptr) \ { \ int old, new; \ \ @@ -122,7 +122,7 @@ __ATOMIC_OPS(__atomic_xor, "xr") #undef __ATOMIC_OPS #define __ATOMIC64_OP(op_name, op_string) \ -static inline long op_name(long val, long *ptr) \ +static __always_inline long op_name(long val, long *ptr) \ { \ long old, new; \ \ @@ -154,7 +154,7 @@ __ATOMIC64_OPS(__atomic64_xor, "xgr") #endif /* CONFIG_HAVE_MARCH_Z196_FEATURES */ -static inline int __atomic_cmpxchg(int *ptr, int old, int new) +static __always_inline int __atomic_cmpxchg(int *ptr, int old, int new) { asm volatile( " cs %[old],%[new],%[ptr]" @@ -164,7 +164,7 @@ static inline int __atomic_cmpxchg(int *ptr, int old, int new) return old; } -static inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new) +static __always_inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new) { int old_expected = old; @@ -176,7 +176,7 @@ static inline bool __atomic_cmpxchg_bool(int *ptr, int old, int new) return old == old_expected; } -static inline long __atomic64_cmpxchg(long *ptr, long old, long new) +static __always_inline long __atomic64_cmpxchg(long *ptr, long old, long new) { asm volatile( " csg %[old],%[new],%[ptr]" @@ -186,7 +186,7 @@ static inline long __atomic64_cmpxchg(long *ptr, long old, long new) return old; } -static inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new) +static __always_inline bool __atomic64_cmpxchg_bool(long *ptr, long old, long new) { long old_expected = old; diff --git a/arch/s390/include/asm/preempt.h b/arch/s390/include/asm/preempt.h index bf15da0fedbc..0e3da500e98c 100644 --- a/arch/s390/include/asm/preempt.h +++ b/arch/s390/include/asm/preempt.h @@ -12,12 +12,12 @@ #define PREEMPT_NEED_RESCHED 0x80000000 #define PREEMPT_ENABLED (0 + PREEMPT_NEED_RESCHED) -static inline int preempt_count(void) +static __always_inline int preempt_count(void) { return READ_ONCE(S390_lowcore.preempt_count) & ~PREEMPT_NEED_RESCHED; } -static inline void preempt_count_set(int pc) +static __always_inline void preempt_count_set(int pc) { int old, new; @@ -29,22 +29,22 @@ static inline void preempt_count_set(int pc) old, new) != old); } -static inline void set_preempt_need_resched(void) +static __always_inline void set_preempt_need_resched(void) { __atomic_and(~PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count); } -static inline void clear_preempt_need_resched(void) +static __always_inline void clear_preempt_need_resched(void) { __atomic_or(PREEMPT_NEED_RESCHED, &S390_lowcore.preempt_count); } -static inline bool test_preempt_need_resched(void) +static __always_inline bool test_preempt_need_resched(void) { return !(READ_ONCE(S390_lowcore.preempt_count) & PREEMPT_NEED_RESCHED); } -static inline void __preempt_count_add(int val) +static __always_inline void __preempt_count_add(int val) { /* * With some obscure config options and CONFIG_PROFILE_ALL_BRANCHES @@ -59,17 +59,17 @@ static inline void __preempt_count_add(int val) __atomic_add(val, &S390_lowcore.preempt_count); } -static inline void __preempt_count_sub(int val) +static __always_inline void __preempt_count_sub(int val) { __preempt_count_add(-val); } -static inline bool __preempt_count_dec_and_test(void) +static __always_inline bool __preempt_count_dec_and_test(void) { return __atomic_add(-1, &S390_lowcore.preempt_count) == 1; } -static inline bool should_resched(int preempt_offset) +static __always_inline bool should_resched(int preempt_offset) { return unlikely(READ_ONCE(S390_lowcore.preempt_count) == preempt_offset); @@ -79,45 +79,45 @@ static inline bool should_resched(int preempt_offset) #define PREEMPT_ENABLED (0) -static inline int preempt_count(void) +static __always_inline int preempt_count(void) { return READ_ONCE(S390_lowcore.preempt_count); } -static inline void preempt_count_set(int pc) +static __always_inline void preempt_count_set(int pc) { S390_lowcore.preempt_count = pc; } -static inline void set_preempt_need_resched(void) +static __always_inline void set_preempt_need_resched(void) { } -static inline void clear_preempt_need_resched(void) +static __always_inline void clear_preempt_need_resched(void) { } -static inline bool test_preempt_need_resched(void) +static __always_inline bool test_preempt_need_resched(void) { return false; } -static inline void __preempt_count_add(int val) +static __always_inline void __preempt_count_add(int val) { S390_lowcore.preempt_count += val; } -static inline void __preempt_count_sub(int val) +static __always_inline void __preempt_count_sub(int val) { S390_lowcore.preempt_count -= val; } -static inline bool __preempt_count_dec_and_test(void) +static __always_inline bool __preempt_count_dec_and_test(void) { return !--S390_lowcore.preempt_count && tif_need_resched(); } -static inline bool should_resched(int preempt_offset) +static __always_inline bool should_resched(int preempt_offset) { return unlikely(preempt_count() == preempt_offset && tif_need_resched()); diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 787394978bc0..6a1e0fbbaa15 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -340,7 +340,8 @@ SYM_CODE_START(pgm_check_handler) mvc __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK stctg %c1,%c1,__PT_CR1(%r11) #if IS_ENABLED(CONFIG_KVM) - lg %r12,__LC_GMAP + ltg %r12,__LC_GMAP + jz 5f clc __GMAP_ASCE(8,%r12), __PT_CR1(%r11) jne 5f BPENTER __SF_SIE_FLAGS(%r10),_TIF_ISOLATE_BP_GUEST @@ -635,6 +636,7 @@ SYM_DATA_START_LOCAL(daton_psw) SYM_DATA_END(daton_psw) .section .rodata, "a" + .balign 8 #define SYSCALL(esame,emu) .quad __s390x_ ## esame SYM_DATA_START(sys_call_table) #include "asm/syscall_table.h" diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 823d652e3917..4ad472d130a3 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -90,7 +90,6 @@ static void paicrypt_event_destroy(struct perf_event *event) event->cpu); struct paicrypt_map *cpump = mp->mapptr; - cpump->event = NULL; static_branch_dec(&pai_key); mutex_lock(&pai_reserve_mutex); debug_sprintf_event(cfm_dbg, 5, "%s event %#llx cpu %d users %d" @@ -356,10 +355,15 @@ static int paicrypt_add(struct perf_event *event, int flags) static void paicrypt_stop(struct perf_event *event, int flags) { - if (!event->attr.sample_period) /* Counting */ + struct paicrypt_mapptr *mp = this_cpu_ptr(paicrypt_root.mapptr); + struct paicrypt_map *cpump = mp->mapptr; + + if (!event->attr.sample_period) { /* Counting */ paicrypt_read(event); - else /* Sampling */ + } else { /* Sampling */ perf_sched_cb_dec(event->pmu); + cpump->event = NULL; + } event->hw.state = PERF_HES_STOPPED; } diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index 616a25606cd6..a6da7e0cc7a6 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -122,7 +122,6 @@ static void paiext_event_destroy(struct perf_event *event) free_page(PAI_SAVE_AREA(event)); mutex_lock(&paiext_reserve_mutex); - cpump->event = NULL; if (refcount_dec_and_test(&cpump->refcnt)) /* Last reference gone */ paiext_free(mp); paiext_root_free(); @@ -362,10 +361,15 @@ static int paiext_add(struct perf_event *event, int flags) static void paiext_stop(struct perf_event *event, int flags) { - if (!event->attr.sample_period) /* Counting */ + struct paiext_mapptr *mp = this_cpu_ptr(paiext_root.mapptr); + struct paiext_map *cpump = mp->mapptr; + + if (!event->attr.sample_period) { /* Counting */ paiext_read(event); - else /* Sampling */ + } else { /* Sampling */ perf_sched_cb_dec(event->pmu); + cpump->event = NULL; + } event->hw.state = PERF_HES_STOPPED; } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index c421dd44ffbe..0c66b32e0f9f 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -75,7 +75,7 @@ static enum fault_type get_fault_type(struct pt_regs *regs) if (!IS_ENABLED(CONFIG_PGSTE)) return KERNEL_FAULT; gmap = (struct gmap *)S390_lowcore.gmap; - if (regs->cr1 == gmap->asce) + if (gmap && gmap->asce == regs->cr1) return GMAP_FAULT; return KERNEL_FAULT; } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index b418333bb086..5af0402e94b8 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -516,11 +516,12 @@ static void bpf_skip(struct bpf_jit *jit, int size) * PLT for hotpatchable calls. The calling convention is the same as for the * ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered. */ -extern const char bpf_plt[]; -extern const char bpf_plt_ret[]; -extern const char bpf_plt_target[]; -extern const char bpf_plt_end[]; -#define BPF_PLT_SIZE 32 +struct bpf_plt { + char code[16]; + void *ret; + void *target; +} __packed; +extern const struct bpf_plt bpf_plt; asm( ".pushsection .rodata\n" " .balign 8\n" @@ -531,15 +532,14 @@ asm( " .balign 8\n" "bpf_plt_ret: .quad 0\n" "bpf_plt_target: .quad 0\n" - "bpf_plt_end:\n" " .popsection\n" ); -static void bpf_jit_plt(void *plt, void *ret, void *target) +static void bpf_jit_plt(struct bpf_plt *plt, void *ret, void *target) { - memcpy(plt, bpf_plt, BPF_PLT_SIZE); - *(void **)((char *)plt + (bpf_plt_ret - bpf_plt)) = ret; - *(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target ?: ret; + memcpy(plt, &bpf_plt, sizeof(*plt)); + plt->ret = ret; + plt->target = target; } /* @@ -662,9 +662,9 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) jit->prg = ALIGN(jit->prg, 8); jit->prologue_plt = jit->prg; if (jit->prg_buf) - bpf_jit_plt(jit->prg_buf + jit->prg, + bpf_jit_plt((struct bpf_plt *)(jit->prg_buf + jit->prg), jit->prg_buf + jit->prologue_plt_ret, NULL); - jit->prg += BPF_PLT_SIZE; + jit->prg += sizeof(struct bpf_plt); } static int get_probe_mem_regno(const u8 *insn) @@ -2040,9 +2040,6 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) struct bpf_jit jit; int pass; - if (WARN_ON_ONCE(bpf_plt_end - bpf_plt != BPF_PLT_SIZE)) - return orig_fp; - if (!fp->jit_requested) return orig_fp; @@ -2148,14 +2145,11 @@ bool bpf_jit_supports_far_kfunc_call(void) int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *old_addr, void *new_addr) { + struct bpf_plt expected_plt, current_plt, new_plt, *plt; struct { u16 opc; s32 disp; } __packed insn; - char expected_plt[BPF_PLT_SIZE]; - char current_plt[BPF_PLT_SIZE]; - char new_plt[BPF_PLT_SIZE]; - char *plt; char *ret; int err; @@ -2174,18 +2168,18 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, */ } else { /* Verify the PLT. */ - plt = (char *)ip + (insn.disp << 1); - err = copy_from_kernel_nofault(current_plt, plt, BPF_PLT_SIZE); + plt = ip + (insn.disp << 1); + err = copy_from_kernel_nofault(¤t_plt, plt, + sizeof(current_plt)); if (err < 0) return err; ret = (char *)ip + 6; - bpf_jit_plt(expected_plt, ret, old_addr); - if (memcmp(current_plt, expected_plt, BPF_PLT_SIZE)) + bpf_jit_plt(&expected_plt, ret, old_addr); + if (memcmp(¤t_plt, &expected_plt, sizeof(current_plt))) return -EINVAL; /* Adjust the call address. */ - bpf_jit_plt(new_plt, ret, new_addr); - s390_kernel_write(plt + (bpf_plt_target - bpf_plt), - new_plt + (bpf_plt_target - bpf_plt), + bpf_jit_plt(&new_plt, ret, new_addr); + s390_kernel_write(&plt->target, &new_plt.target, sizeof(void *)); } diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index 6a1f36df6a18..cf0ad89f5639 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild @@ -28,7 +28,7 @@ obj-y += net/ obj-$(CONFIG_KEXEC_FILE) += purgatory/ -obj-y += virt/svm/ +obj-y += virt/ # for cleaning subdir- += boot tools diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 39886bab943a..4474bf32d0a4 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2439,6 +2439,8 @@ config USE_X86_SEG_SUPPORT # with named address spaces - see GCC PR sanitizer/111736. # depends on !KASAN + # -fsanitize=thread (KCSAN) is also incompatible. + depends on !KCSAN config CC_HAS_SLS def_bool $(cc-option,-mharden-sls=all) @@ -2631,6 +2633,16 @@ config MITIGATION_RFDS stored in floating point, vector and integer registers. See also <file:Documentation/admin-guide/hw-vuln/reg-file-data-sampling.rst> +config MITIGATION_SPECTRE_BHI + bool "Mitigate Spectre-BHB (Branch History Injection)" + depends on CPU_SUP_INTEL + default y + help + Enable BHI mitigations. BHI attacks are a form of Spectre V2 attacks + where the branch history buffer is poisoned to speculatively steer + indirect branches. + See <file:Documentation/admin-guide/hw-vuln/spectre.rst> + endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 662d9d4033e6..5ab93fcdd691 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -251,8 +251,6 @@ archheaders: libs-y += arch/x86/lib/ -core-y += arch/x86/virt/ - # drivers-y are linked after core-y drivers-$(CONFIG_MATH_EMULATION) += arch/x86/math-emu/ drivers-$(CONFIG_PCI) += arch/x86/pci/ diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index 719e939050cb..876fc6d46a13 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -15,10 +15,12 @@ */ #include <linux/linkage.h> +#include <asm/asm-offsets.h> #include <asm/msr.h> #include <asm/page_types.h> #include <asm/processor-flags.h> #include <asm/segment.h> +#include <asm/setup.h> .code64 .text @@ -149,6 +151,7 @@ SYM_FUNC_END(__efi64_thunk) SYM_FUNC_START(efi32_stub_entry) call 1f 1: popl %ecx + leal (efi32_boot_args - 1b)(%ecx), %ebx /* Clear BSS */ xorl %eax, %eax @@ -163,6 +166,7 @@ SYM_FUNC_START(efi32_stub_entry) popl %ecx popl %edx popl %esi + movl %esi, 8(%ebx) jmp efi32_entry SYM_FUNC_END(efi32_stub_entry) #endif @@ -239,8 +243,6 @@ SYM_FUNC_END(efi_enter32) * * Arguments: %ecx image handle * %edx EFI system table pointer - * %esi struct bootparams pointer (or NULL when not using - * the EFI handover protocol) * * Since this is the point of no return for ordinary execution, no registers * are considered live except for the function parameters. [Note that the EFI @@ -266,9 +268,18 @@ SYM_FUNC_START_LOCAL(efi32_entry) leal (efi32_boot_args - 1b)(%ebx), %ebx movl %ecx, 0(%ebx) movl %edx, 4(%ebx) - movl %esi, 8(%ebx) movb $0x0, 12(%ebx) // efi_is64 + /* + * Allocate some memory for a temporary struct boot_params, which only + * needs the minimal pieces that startup_32() relies on. + */ + subl $PARAM_SIZE, %esp + movl %esp, %esi + movl $PAGE_SIZE, BP_kernel_alignment(%esi) + movl $_end - 1b, BP_init_size(%esi) + subl $startup_32 - 1b, BP_init_size(%esi) + /* Disable paging */ movl %cr0, %eax btrl $X86_CR0_PG_BIT, %eax @@ -294,8 +305,7 @@ SYM_FUNC_START(efi32_pe_entry) movl 8(%ebp), %ecx // image_handle movl 12(%ebp), %edx // sys_table - xorl %esi, %esi - jmp efi32_entry // pass %ecx, %edx, %esi + jmp efi32_entry // pass %ecx, %edx // no other registers remain live 2: popl %edi // restore callee-save registers diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index d07be9d05cd0..b31ef2424d19 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -3,19 +3,28 @@ * Confidential Computing Platform Capability checks * * Copyright (C) 2021 Advanced Micro Devices, Inc. + * Copyright (C) 2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. * * Author: Tom Lendacky <thomas.lendacky@amd.com> */ #include <linux/export.h> #include <linux/cc_platform.h> +#include <linux/string.h> +#include <linux/random.h> +#include <asm/archrandom.h> #include <asm/coco.h> #include <asm/processor.h> enum cc_vendor cc_vendor __ro_after_init = CC_VENDOR_NONE; u64 cc_mask __ro_after_init; +static struct cc_attr_flags { + __u64 host_sev_snp : 1, + __resv : 63; +} cc_flags; + static bool noinstr intel_cc_platform_has(enum cc_attr attr) { switch (attr) { @@ -89,6 +98,9 @@ static bool noinstr amd_cc_platform_has(enum cc_attr attr) case CC_ATTR_GUEST_SEV_SNP: return sev_status & MSR_AMD64_SEV_SNP_ENABLED; + case CC_ATTR_HOST_SEV_SNP: + return cc_flags.host_sev_snp; + default: return false; } @@ -148,3 +160,84 @@ u64 cc_mkdec(u64 val) } } EXPORT_SYMBOL_GPL(cc_mkdec); + +static void amd_cc_platform_clear(enum cc_attr attr) +{ + switch (attr) { + case CC_ATTR_HOST_SEV_SNP: + cc_flags.host_sev_snp = 0; + break; + default: + break; + } +} + +void cc_platform_clear(enum cc_attr attr) +{ + switch (cc_vendor) { + case CC_VENDOR_AMD: + amd_cc_platform_clear(attr); + break; + default: + break; + } +} + +static void amd_cc_platform_set(enum cc_attr attr) +{ + switch (attr) { + case CC_ATTR_HOST_SEV_SNP: + cc_flags.host_sev_snp = 1; + break; + default: + break; + } +} + +void cc_platform_set(enum cc_attr attr) +{ + switch (cc_vendor) { + case CC_VENDOR_AMD: + amd_cc_platform_set(attr); + break; + default: + break; + } +} + +__init void cc_random_init(void) +{ + /* + * The seed is 32 bytes (in units of longs), which is 256 bits, which + * is the security level that the RNG is targeting. + */ + unsigned long rng_seed[32 / sizeof(long)]; + size_t i, longs; + + if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + return; + + /* + * Since the CoCo threat model includes the host, the only reliable + * source of entropy that can be neither observed nor manipulated is + * RDRAND. Usually, RDRAND failure is considered tolerable, but since + * CoCo guests have no other unobservable source of entropy, it's + * important to at least ensure the RNG gets some initial random seeds. + */ + for (i = 0; i < ARRAY_SIZE(rng_seed); i += longs) { + longs = arch_get_random_longs(&rng_seed[i], ARRAY_SIZE(rng_seed) - i); + + /* + * A zero return value means that the guest doesn't have RDRAND + * or the CPU is physically broken, and in both cases that + * means most crypto inside of the CoCo instance will be + * broken, defeating the purpose of CoCo in the first place. So + * just panic here because it's absolutely unsafe to continue + * executing. + */ + if (longs == 0) + panic("RDRAND is defective."); + } + add_device_randomness(rng_seed, sizeof(rng_seed)); + memzero_explicit(rng_seed, sizeof(rng_seed)); +} diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 6356060caaf3..51cc9c7cb9bd 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -49,7 +49,7 @@ static __always_inline bool do_syscall_x64(struct pt_regs *regs, int nr) if (likely(unr < NR_syscalls)) { unr = array_index_nospec(unr, NR_syscalls); - regs->ax = sys_call_table[unr](regs); + regs->ax = x64_sys_call(regs, unr); return true; } return false; @@ -66,7 +66,7 @@ static __always_inline bool do_syscall_x32(struct pt_regs *regs, int nr) if (IS_ENABLED(CONFIG_X86_X32_ABI) && likely(xnr < X32_NR_syscalls)) { xnr = array_index_nospec(xnr, X32_NR_syscalls); - regs->ax = x32_sys_call_table[xnr](regs); + regs->ax = x32_sys_call(regs, xnr); return true; } return false; @@ -162,7 +162,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr) if (likely(unr < IA32_NR_syscalls)) { unr = array_index_nospec(unr, IA32_NR_syscalls); - regs->ax = ia32_sys_call_table[unr](regs); + regs->ax = ia32_sys_call(regs, unr); } else if (nr != -1) { regs->ax = __ia32_sys_ni_syscall(regs); } @@ -189,7 +189,7 @@ static __always_inline bool int80_is_external(void) } /** - * int80_emulation - 32-bit legacy syscall entry + * do_int80_emulation - 32-bit legacy syscall C entry from asm * * This entry point can be used by 32-bit and 64-bit programs to perform * 32-bit system calls. Instances of INT $0x80 can be found inline in @@ -207,7 +207,7 @@ static __always_inline bool int80_is_external(void) * eax: system call number * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6 */ -DEFINE_IDTENTRY_RAW(int80_emulation) +__visible noinstr void do_int80_emulation(struct pt_regs *regs) { int nr; @@ -255,6 +255,71 @@ DEFINE_IDTENTRY_RAW(int80_emulation) instrumentation_end(); syscall_exit_to_user_mode(regs); } + +#ifdef CONFIG_X86_FRED +/* + * A FRED-specific INT80 handler is warranted for the follwing reasons: + * + * 1) As INT instructions and hardware interrupts are separate event + * types, FRED does not preclude the use of vector 0x80 for external + * interrupts. As a result, the FRED setup code does not reserve + * vector 0x80 and calling int80_is_external() is not merely + * suboptimal but actively incorrect: it could cause a system call + * to be incorrectly ignored. + * + * 2) It is called only for handling vector 0x80 of event type + * EVENT_TYPE_SWINT and will never be called to handle any external + * interrupt (event type EVENT_TYPE_EXTINT). + * + * 3) FRED has separate entry flows depending on if the event came from + * user space or kernel space, and because the kernel does not use + * INT insns, the FRED kernel entry handler fred_entry_from_kernel() + * falls through to fred_bad_type() if the event type is + * EVENT_TYPE_SWINT, i.e., INT insns. So if the kernel is handling + * an INT insn, it can only be from a user level. + * + * 4) int80_emulation() does a CLEAR_BRANCH_HISTORY. While FRED will + * likely take a different approach if it is ever needed: it + * probably belongs in either fred_intx()/ fred_other() or + * asm_fred_entrypoint_user(), depending on if this ought to be done + * for all entries from userspace or only system + * calls. + * + * 5) INT $0x80 is the fast path for 32-bit system calls under FRED. + */ +DEFINE_FREDENTRY_RAW(int80_emulation) +{ + int nr; + + enter_from_user_mode(regs); + + instrumentation_begin(); + add_random_kstack_offset(); + + /* + * FRED pushed 0 into regs::orig_ax and regs::ax contains the + * syscall number. + * + * User tracing code (ptrace or signal handlers) might assume + * that the regs::orig_ax contains a 32-bit number on invoking + * a 32-bit syscall. + * + * Establish the syscall convention by saving the 32bit truncated + * syscall number in regs::orig_ax and by invalidating regs::ax. + */ + regs->orig_ax = regs->ax & GENMASK(31, 0); + regs->ax = -ENOSYS; + + nr = syscall_32_enter(regs); + + local_irq_enable(); + nr = syscall_enter_from_user_mode_work(regs, nr); + do_syscall_32_irqs_on(regs, nr); + + instrumentation_end(); + syscall_exit_to_user_mode(regs); +} +#endif #else /* CONFIG_IA32_EMULATION */ /* Handles int $0x80 on a 32bit kernel */ diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 8af2a26b24f6..1b5be07f8669 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -116,6 +116,7 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL) /* clobbers %rax, make sure it is after saving the syscall nr */ IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY call do_syscall_64 /* returns with IRQs disabled */ @@ -1491,3 +1492,63 @@ SYM_CODE_START_NOALIGN(rewind_stack_and_make_dead) call make_task_dead SYM_CODE_END(rewind_stack_and_make_dead) .popsection + +/* + * This sequence executes branches in order to remove user branch information + * from the branch history tracker in the Branch Predictor, therefore removing + * user influence on subsequent BTB lookups. + * + * It should be used on parts prior to Alder Lake. Newer parts should use the + * BHI_DIS_S hardware control instead. If a pre-Alder Lake part is being + * virtualized on newer hardware the VMM should protect against BHI attacks by + * setting BHI_DIS_S for the guests. + * + * CALLs/RETs are necessary to prevent Loop Stream Detector(LSD) from engaging + * and not clearing the branch history. The call tree looks like: + * + * call 1 + * call 2 + * call 2 + * call 2 + * call 2 + * call 2 + * ret + * ret + * ret + * ret + * ret + * ret + * + * This means that the stack is non-constant and ORC can't unwind it with %rsp + * alone. Therefore we unconditionally set up the frame pointer, which allows + * ORC to unwind properly. + * + * The alignment is for performance and not for safety, and may be safely + * refactored in the future if needed. + */ +SYM_FUNC_START(clear_bhb_loop) + push %rbp + mov %rsp, %rbp + movl $5, %ecx + ANNOTATE_INTRA_FUNCTION_CALL + call 1f + jmp 5f + .align 64, 0xcc + ANNOTATE_INTRA_FUNCTION_CALL +1: call 2f + RET + .align 64, 0xcc +2: movl $5, %eax +3: jmp 4f + nop +4: sub $1, %eax + jnz 3b + sub $1, %ecx + jnz 1b + RET +5: lfence + pop %rbp + RET +SYM_FUNC_END(clear_bhb_loop) +EXPORT_SYMBOL_GPL(clear_bhb_loop) +STACK_FRAME_NON_STANDARD(clear_bhb_loop) diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index eabf48c4d4b4..c779046cc3fe 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -92,6 +92,7 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY /* * SYSENTER doesn't filter flags, so we need to clear NT and AC @@ -206,6 +207,7 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) IBRS_ENTER UNTRAIN_RET + CLEAR_BRANCH_HISTORY movq %rsp, %rdi call do_fast_syscall_32 @@ -276,3 +278,17 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR int3 SYM_CODE_END(entry_SYSCALL_compat) + +/* + * int 0x80 is used by 32 bit mode as a system call entry. Normally idt entries + * point to C routines, however since this is a system call interface the branch + * history needs to be scrubbed to protect against BHI attacks, and that + * scrubbing needs to take place in assembly code prior to entering any C + * routines. + */ +SYM_CODE_START(int80_emulation) + ANNOTATE_NOENDBR + UNWIND_HINT_FUNC + CLEAR_BRANCH_HISTORY + jmp do_int80_emulation +SYM_CODE_END(int80_emulation) diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c index ac120cbdaaf2..89c1476fcdd9 100644 --- a/arch/x86/entry/entry_fred.c +++ b/arch/x86/entry/entry_fred.c @@ -28,9 +28,9 @@ static noinstr void fred_bad_type(struct pt_regs *regs, unsigned long error_code if (regs->fred_cs.sl > 0) { pr_emerg("PANIC: invalid or fatal FRED event; event type %u " "vector %u error 0x%lx aux 0x%lx at %04x:%016lx\n", - regs->fred_ss.type, regs->fred_ss.vector, regs->orig_ax, + regs->fred_ss.type, regs->fred_ss.vector, error_code, fred_event_data(regs), regs->cs, regs->ip); - die("invalid or fatal FRED event", regs, regs->orig_ax); + die("invalid or fatal FRED event", regs, error_code); panic("invalid or fatal FRED event"); } else { unsigned long flags = oops_begin(); @@ -38,10 +38,10 @@ static noinstr void fred_bad_type(struct pt_regs *regs, unsigned long error_code pr_alert("BUG: invalid or fatal FRED event; event type %u " "vector %u error 0x%lx aux 0x%lx at %04x:%016lx\n", - regs->fred_ss.type, regs->fred_ss.vector, regs->orig_ax, + regs->fred_ss.type, regs->fred_ss.vector, error_code, fred_event_data(regs), regs->cs, regs->ip); - if (__die("Invalid or fatal FRED event", regs, regs->orig_ax)) + if (__die("Invalid or fatal FRED event", regs, error_code)) sig = 0; oops_end(flags, regs, sig); @@ -66,7 +66,7 @@ static noinstr void fred_intx(struct pt_regs *regs) /* INT80 */ case IA32_SYSCALL_VECTOR: if (ia32_enabled()) - return int80_emulation(regs); + return fred_int80_emulation(regs); fallthrough; #endif diff --git a/arch/x86/entry/syscall_32.c b/arch/x86/entry/syscall_32.c index 8cfc9bc73e7f..c2235bae17ef 100644 --- a/arch/x86/entry/syscall_32.c +++ b/arch/x86/entry/syscall_32.c @@ -18,8 +18,25 @@ #include <asm/syscalls_32.h> #undef __SYSCALL +/* + * The sys_call_table[] is no longer used for system calls, but + * kernel/trace/trace_syscalls.c still wants to know the system + * call address. + */ +#ifdef CONFIG_X86_32 #define __SYSCALL(nr, sym) __ia32_##sym, - -__visible const sys_call_ptr_t ia32_sys_call_table[] = { +const sys_call_ptr_t sys_call_table[] = { #include <asm/syscalls_32.h> }; +#undef __SYSCALL +#endif + +#define __SYSCALL(nr, sym) case nr: return __ia32_##sym(regs); + +long ia32_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include <asm/syscalls_32.h> + default: return __ia32_sys_ni_syscall(regs); + } +}; diff --git a/arch/x86/entry/syscall_64.c b/arch/x86/entry/syscall_64.c index be120eec1fc9..33b3f09e6f15 100644 --- a/arch/x86/entry/syscall_64.c +++ b/arch/x86/entry/syscall_64.c @@ -11,8 +11,23 @@ #include <asm/syscalls_64.h> #undef __SYSCALL +/* + * The sys_call_table[] is no longer used for system calls, but + * kernel/trace/trace_syscalls.c still wants to know the system + * call address. + */ #define __SYSCALL(nr, sym) __x64_##sym, - -asmlinkage const sys_call_ptr_t sys_call_table[] = { +const sys_call_ptr_t sys_call_table[] = { #include <asm/syscalls_64.h> }; +#undef __SYSCALL + +#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); + +long x64_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include <asm/syscalls_64.h> + default: return __x64_sys_ni_syscall(regs); + } +}; diff --git a/arch/x86/entry/syscall_x32.c b/arch/x86/entry/syscall_x32.c index bdd0e03a1265..03de4a932131 100644 --- a/arch/x86/entry/syscall_x32.c +++ b/arch/x86/entry/syscall_x32.c @@ -11,8 +11,12 @@ #include <asm/syscalls_x32.h> #undef __SYSCALL -#define __SYSCALL(nr, sym) __x64_##sym, +#define __SYSCALL(nr, sym) case nr: return __x64_##sym(regs); -asmlinkage const sys_call_ptr_t x32_sys_call_table[] = { -#include <asm/syscalls_x32.h> +long x32_sys_call(const struct pt_regs *regs, unsigned int nr) +{ + switch (nr) { + #include <asm/syscalls_x32.h> + default: return __x64_sys_ni_syscall(regs); + } }; diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index fd63051bbbbb..3d64bcc403cf 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -41,6 +41,7 @@ obj-$(CONFIG_X86_X32_ABI) += vdso-image-x32.o obj-$(CONFIG_COMPAT_32) += vdso-image-32.o vdso32-setup.o OBJECT_FILES_NON_STANDARD_vdso-image-32.o := n +OBJECT_FILES_NON_STANDARD_vdso-image-x32.o := n OBJECT_FILES_NON_STANDARD_vdso-image-64.o := n OBJECT_FILES_NON_STANDARD_vdso32-setup.o := n diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index aec16e581f5b..985ef3b47919 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -250,7 +250,7 @@ static const u64 amd_perfmon_event_map[PERF_COUNT_HW_MAX] = /* * AMD Performance Monitor Family 17h and later: */ -static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = +static const u64 amd_zen1_perfmon_event_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, @@ -262,10 +262,39 @@ static const u64 amd_f17h_perfmon_event_map[PERF_COUNT_HW_MAX] = [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x0187, }; +static const u64 amd_zen2_perfmon_event_map[PERF_COUNT_HW_MAX] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0964, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9, +}; + +static const u64 amd_zen4_perfmon_event_map[PERF_COUNT_HW_MAX] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0xff60, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0964, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c2, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c3, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = 0x00a9, + [PERF_COUNT_HW_REF_CPU_CYCLES] = 0x100000120, +}; + static u64 amd_pmu_event_map(int hw_event) { - if (boot_cpu_data.x86 >= 0x17) - return amd_f17h_perfmon_event_map[hw_event]; + if (cpu_feature_enabled(X86_FEATURE_ZEN4) || boot_cpu_data.x86 >= 0x1a) + return amd_zen4_perfmon_event_map[hw_event]; + + if (cpu_feature_enabled(X86_FEATURE_ZEN2) || boot_cpu_data.x86 >= 0x19) + return amd_zen2_perfmon_event_map[hw_event]; + + if (cpu_feature_enabled(X86_FEATURE_ZEN1)) + return amd_zen1_perfmon_event_map[hw_event]; return amd_perfmon_event_map[hw_event]; } @@ -904,8 +933,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs) if (!status) goto done; - /* Read branch records before unfreezing */ - if (status & GLOBAL_STATUS_LBRS_FROZEN) { + /* Read branch records */ + if (x86_pmu.lbr_nr) { amd_pmu_lbr_read(); status &= ~GLOBAL_STATUS_LBRS_FROZEN; } diff --git a/arch/x86/events/amd/lbr.c b/arch/x86/events/amd/lbr.c index 4a1e600314d5..5149830c7c4f 100644 --- a/arch/x86/events/amd/lbr.c +++ b/arch/x86/events/amd/lbr.c @@ -402,10 +402,12 @@ void amd_pmu_lbr_enable_all(void) wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select); } - rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); - rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); + if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { + rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + } - wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN); } @@ -418,10 +420,12 @@ void amd_pmu_lbr_disable_all(void) return; rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg); - rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); - wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN); - wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + + if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) { + rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl); + wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); + } } __init int amd_pmu_lbr_init(void) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 09050641ce5d..5b0dd07b1ef1 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1644,6 +1644,7 @@ static void x86_pmu_del(struct perf_event *event, int flags) while (++i < cpuc->n_events) { cpuc->event_list[i-1] = cpuc->event_list[i]; cpuc->event_constraint[i-1] = cpuc->event_constraint[i]; + cpuc->assign[i-1] = cpuc->assign[i]; } cpuc->event_constraint[i-1] = NULL; --cpuc->n_events; diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 2641ba620f12..e010bfed8417 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1237,11 +1237,11 @@ pebs_update_state(bool needed_cb, struct cpu_hw_events *cpuc, struct pmu *pmu = event->pmu; /* - * Make sure we get updated with the first PEBS - * event. It will trigger also during removal, but - * that does not hurt: + * Make sure we get updated with the first PEBS event. + * During removal, ->pebs_data_cfg is still valid for + * the last PEBS event. Don't clear it. */ - if (cpuc->n_pebs == 1) + if ((cpuc->n_pebs == 1) && add) cpuc->pebs_data_cfg = PEBS_UPDATE_DS_SW; if (needed_cb != pebs_needs_sched_cb(cpuc)) { diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 78cd5084104e..4367aa77cb8d 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1693,6 +1693,7 @@ void x86_perf_get_lbr(struct x86_pmu_lbr *lbr) lbr->from = x86_pmu.lbr_from; lbr->to = x86_pmu.lbr_to; lbr->info = x86_pmu.lbr_info; + lbr->has_callstack = x86_pmu_has_lbr_callstack(); } EXPORT_SYMBOL_GPL(x86_perf_get_lbr); diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 5fc45543e955..0569f579338b 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -105,7 +105,7 @@ static bool cpu_is_self(int cpu) * IPI implementation on Hyper-V. */ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector, - bool exclude_self) + bool exclude_self) { struct hv_send_ipi_ex *ipi_arg; unsigned long flags; @@ -132,8 +132,8 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector, if (!cpumask_equal(mask, cpu_present_mask) || exclude_self) { ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K; - nr_bank = cpumask_to_vpset_skip(&(ipi_arg->vp_set), mask, - exclude_self ? cpu_is_self : NULL); + nr_bank = cpumask_to_vpset_skip(&ipi_arg->vp_set, mask, + exclude_self ? cpu_is_self : NULL); /* * 'nr_bank <= 0' means some CPUs in cpumask can't be @@ -147,7 +147,7 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector, } status = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank, - ipi_arg, NULL); + ipi_arg, NULL); ipi_mask_ex_done: local_irq_restore(flags); @@ -155,7 +155,7 @@ ipi_mask_ex_done: } static bool __send_ipi_mask(const struct cpumask *mask, int vector, - bool exclude_self) + bool exclude_self) { int cur_cpu, vcpu, this_cpu = smp_processor_id(); struct hv_send_ipi ipi_arg; @@ -181,7 +181,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector, return false; } - if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) + if (vector < HV_IPI_LOW_VECTOR || vector > HV_IPI_HIGH_VECTOR) return false; /* @@ -218,7 +218,7 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector, } status = hv_do_fast_hypercall16(HVCALL_SEND_IPI, ipi_arg.vector, - ipi_arg.cpu_mask); + ipi_arg.cpu_mask); return hv_result_success(status); do_ex_hypercall: @@ -241,7 +241,7 @@ static bool __send_ipi_one(int cpu, int vector) return false; } - if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) + if (vector < HV_IPI_LOW_VECTOR || vector > HV_IPI_HIGH_VECTOR) return false; if (vp >= 64) diff --git a/arch/x86/hyperv/hv_proc.c b/arch/x86/hyperv/hv_proc.c index 68a0843d4750..3fa1f2ee7b0d 100644 --- a/arch/x86/hyperv/hv_proc.c +++ b/arch/x86/hyperv/hv_proc.c @@ -3,7 +3,6 @@ #include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/clockchips.h> -#include <linux/acpi.h> #include <linux/hyperv.h> #include <linux/slab.h> #include <linux/cpuhotplug.h> @@ -116,12 +115,11 @@ free_buf: int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id) { - struct hv_add_logical_processor_in *input; - struct hv_add_logical_processor_out *output; + struct hv_input_add_logical_processor *input; + struct hv_output_add_logical_processor *output; u64 status; unsigned long flags; int ret = HV_STATUS_SUCCESS; - int pxm = node_to_pxm(node); /* * When adding a logical processor, the hypervisor may return @@ -137,11 +135,7 @@ int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id) input->lp_index = lp_index; input->apic_id = apic_id; - input->flags = 0; - input->proximity_domain_info.domain_id = pxm; - input->proximity_domain_info.flags.reserved = 0; - input->proximity_domain_info.flags.proximity_info_valid = 1; - input->proximity_domain_info.flags.proximity_preferred = 1; + input->proximity_domain_info = hv_numa_node_to_pxm_info(node); status = hv_do_hypercall(HVCALL_ADD_LOGICAL_PROCESSOR, input, output); local_irq_restore(flags); @@ -166,7 +160,6 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags) u64 status; unsigned long irq_flags; int ret = HV_STATUS_SUCCESS; - int pxm = node_to_pxm(node); /* Root VPs don't seem to need pages deposited */ if (partition_id != hv_current_partition_id) { @@ -185,14 +178,7 @@ int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags) input->vp_index = vp_index; input->flags = flags; input->subnode_type = HvSubnodeAny; - if (node != NUMA_NO_NODE) { - input->proximity_domain_info.domain_id = pxm; - input->proximity_domain_info.flags.reserved = 0; - input->proximity_domain_info.flags.proximity_info_valid = 1; - input->proximity_domain_info.flags.proximity_preferred = 1; - } else { - input->proximity_domain_info.as_uint64 = 0; - } + input->proximity_domain_info = hv_numa_node_to_pxm_info(node); status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL); local_irq_restore(irq_flags); diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index fcd20c6dc7f9..67b68d0d17d1 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -117,7 +117,7 @@ extern void callthunks_patch_builtin_calls(void); extern void callthunks_patch_module_calls(struct callthunk_sites *sites, struct module *mod); extern void *callthunks_translate_call_dest(void *dest); -extern int x86_call_depth_emit_accounting(u8 **pprog, void *func); +extern int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip); #else static __always_inline void callthunks_patch_builtin_calls(void) {} static __always_inline void @@ -128,7 +128,7 @@ static __always_inline void *callthunks_translate_call_dest(void *dest) return dest; } static __always_inline int x86_call_depth_emit_accounting(u8 **pprog, - void *func) + void *func, void *ip) { return 0; } diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 94ce0f7c9d3a..e6ab0cf15ed5 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -13,6 +13,7 @@ #include <asm/mpspec.h> #include <asm/msr.h> #include <asm/hardirq.h> +#include <asm/io.h> #define ARCH_APICTIMER_STOPS_ON_C3 1 @@ -98,7 +99,7 @@ static inline void native_apic_mem_write(u32 reg, u32 v) static inline u32 native_apic_mem_read(u32 reg) { - return *((volatile u32 *)(APIC_BASE + reg)); + return readl((void __iomem *)(APIC_BASE + reg)); } static inline void native_apic_mem_eoi(void) diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index 076bf8dee702..25466c4d2134 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -14,6 +14,7 @@ #include <asm/asm.h> #include <asm/fred.h> #include <asm/gsseg.h> +#include <asm/nospec-branch.h> #ifndef CONFIG_X86_CMPXCHG64 extern void cmpxchg8b_emu(void); diff --git a/arch/x86/include/asm/barrier.h b/arch/x86/include/asm/barrier.h index fe1e7e3cc844..63bdc6b85219 100644 --- a/arch/x86/include/asm/barrier.h +++ b/arch/x86/include/asm/barrier.h @@ -79,6 +79,9 @@ do { \ #define __smp_mb__before_atomic() do { } while (0) #define __smp_mb__after_atomic() do { } while (0) +/* Writing to CR3 provides a full memory barrier in switch_mm(). */ +#define smp_mb__after_switch_mm() do { } while (0) + #include <asm-generic/barrier.h> #endif /* _ASM_X86_BARRIER_H */ diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h index fb7388bbc212..c086699b0d0c 100644 --- a/arch/x86/include/asm/coco.h +++ b/arch/x86/include/asm/coco.h @@ -22,6 +22,7 @@ static inline void cc_set_mask(u64 mask) u64 cc_mkenc(u64 val); u64 cc_mkdec(u64 val); +void cc_random_init(void); #else #define cc_vendor (CC_VENDOR_NONE) @@ -34,6 +35,7 @@ static inline u64 cc_mkdec(u64 val) { return val; } +static inline void cc_random_init(void) { } #endif #endif /* _ASM_X86_COCO_H */ diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index a1273698fc43..686e92d2663e 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -33,6 +33,8 @@ enum cpuid_leafs CPUID_7_EDX, CPUID_8000_001F_EAX, CPUID_8000_0021_EAX, + CPUID_LNX_5, + NR_CPUID_WORDS, }; #define X86_CAP_FMT_NUM "%d:%d" @@ -91,8 +93,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 18, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 19, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 20, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(REQUIRED_MASK, 21, feature_bit) || \ REQUIRED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 21)) + BUILD_BUG_ON_ZERO(NCAPINTS != 22)) #define DISABLED_MASK_BIT_SET(feature_bit) \ ( CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 0, feature_bit) || \ @@ -116,8 +119,9 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 18, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 19, feature_bit) || \ CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 20, feature_bit) || \ + CHECK_BIT_IN_MASK_WORD(DISABLED_MASK, 21, feature_bit) || \ DISABLED_MASK_CHECK || \ - BUILD_BUG_ON_ZERO(NCAPINTS != 21)) + BUILD_BUG_ON_ZERO(NCAPINTS != 22)) #define cpu_has(c, bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f0337f7bcf16..3c7434329661 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -13,7 +13,7 @@ /* * Defines x86 CPU feature bits */ -#define NCAPINTS 21 /* N 32-bit words worth of info */ +#define NCAPINTS 22 /* N 32-bit words worth of info */ #define NBUGINTS 2 /* N 32-bit bug flags */ /* @@ -460,6 +460,18 @@ #define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ /* + * Extended auxiliary flags: Linux defined - for features scattered in various + * CPUID levels like 0x80000022, etc and Linux defined features. + * + * Reuse free bits when adding new feature flags! + */ +#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */ +#define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* "" Clear branch history at syscall entry using SW loop */ +#define X86_FEATURE_BHI_CTRL (21*32+ 2) /* "" BHI_DIS_S HW control available */ +#define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* "" BHI_DIS_S HW control enabled */ +#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* "" Clear branch history at vmexit using SW loop */ + +/* * BUG word(s) */ #define X86_BUG(x) (NCAPINTS*32 + (x)) @@ -507,4 +519,5 @@ #define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ #define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ #define X86_BUG_RFDS X86_BUG(1*32 + 2) /* CPU is vulnerable to Register File Data Sampling */ +#define X86_BUG_BHI X86_BUG(1*32 + 3) /* CPU is affected by Branch History Injection */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/crash_reserve.h b/arch/x86/include/asm/crash_reserve.h index 152239f95541..7835b2cdff04 100644 --- a/arch/x86/include/asm/crash_reserve.h +++ b/arch/x86/include/asm/crash_reserve.h @@ -39,4 +39,6 @@ static inline unsigned long crash_low_size_default(void) #endif } +#define HAVE_ARCH_ADD_CRASH_RES_TO_IOMEM_EARLY + #endif /* _X86_CRASH_RESERVE_H */ diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index da4054fbf533..c492bdc97b05 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -155,6 +155,7 @@ #define DISABLED_MASK18 (DISABLE_IBT) #define DISABLED_MASK19 (DISABLE_SEV_SNP) #define DISABLED_MASK20 0 -#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) +#define DISABLED_MASK21 0 +#define DISABLED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) #endif /* _ASM_X86_DISABLED_FEATURES_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 16e07a2eee19..6efd1497b026 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -855,6 +855,7 @@ struct kvm_vcpu_arch { int cpuid_nent; struct kvm_cpuid_entry2 *cpuid_entries; struct kvm_hypervisor_cpuid kvm_cpuid; + bool is_amd_compatible; /* * FIXME: Drop this macro and use KVM_NR_GOVERNED_FEATURES directly diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 05956bd8bacf..e72c2b872957 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -61,10 +61,13 @@ #define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */ #define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT) +#define SPEC_CTRL_BHI_DIS_S_SHIFT 10 /* Disable Branch History Injection behavior */ +#define SPEC_CTRL_BHI_DIS_S BIT(SPEC_CTRL_BHI_DIS_S_SHIFT) /* A mask for bits which the kernel toggles when controlling mitigations */ #define SPEC_CTRL_MITIGATIONS_MASK (SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD \ - | SPEC_CTRL_RRSBA_DIS_S) + | SPEC_CTRL_RRSBA_DIS_S \ + | SPEC_CTRL_BHI_DIS_S) #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ @@ -163,6 +166,10 @@ * are restricted to targets in * kernel. */ +#define ARCH_CAP_BHI_NO BIT(20) /* + * CPU is not affected by Branch + * History Injection. + */ #define ARCH_CAP_PBRSB_NO BIT(24) /* * Not susceptible to Post-Barrier * Return Stack Buffer Predictions. diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index fc3a8a3c7ffe..ff5f1ecc7d1e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -262,11 +262,20 @@ .Lskip_rsb_\@: .endm +/* + * The CALL to srso_alias_untrain_ret() must be patched in directly at + * the spot where untraining must be done, ie., srso_alias_untrain_ret() + * must be the target of a CALL instruction instead of indirectly + * jumping to a wrapper which then calls it. Therefore, this macro is + * called outside of __UNTRAIN_RET below, for the time being, before the + * kernel can support nested alternatives with arbitrary nesting. + */ +.macro CALL_UNTRAIN_RET #if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO) -#define CALL_UNTRAIN_RET "call entry_untrain_ret" -#else -#define CALL_UNTRAIN_RET "" + ALTERNATIVE_2 "", "call entry_untrain_ret", X86_FEATURE_UNRET, \ + "call srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS #endif +.endm /* * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the @@ -282,8 +291,8 @@ .macro __UNTRAIN_RET ibpb_feature, call_depth_insns #if defined(CONFIG_MITIGATION_RETHUNK) || defined(CONFIG_MITIGATION_IBPB_ENTRY) VALIDATE_UNRET_END - ALTERNATIVE_3 "", \ - CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ + CALL_UNTRAIN_RET + ALTERNATIVE_2 "", \ "call entry_ibpb", \ibpb_feature, \ __stringify(\call_depth_insns), X86_FEATURE_CALL_DEPTH #endif @@ -317,6 +326,19 @@ ALTERNATIVE "", __stringify(verw _ASM_RIP(mds_verw_sel)), X86_FEATURE_CLEAR_CPU_BUF .endm +#ifdef CONFIG_X86_64 +.macro CLEAR_BRANCH_HISTORY + ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP +.endm + +.macro CLEAR_BRANCH_HISTORY_VMEXIT + ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT +.endm +#else +#define CLEAR_BRANCH_HISTORY +#define CLEAR_BRANCH_HISTORY_VMEXIT +#endif + #else /* __ASSEMBLY__ */ #define ANNOTATE_RETPOLINE_SAFE \ @@ -342,6 +364,8 @@ extern void retbleed_return_thunk(void); static inline void retbleed_return_thunk(void) {} #endif +extern void srso_alias_untrain_ret(void); + #ifdef CONFIG_MITIGATION_SRSO extern void srso_return_thunk(void); extern void srso_alias_return_thunk(void); @@ -357,6 +381,10 @@ extern void srso_alias_return_thunk(void); extern void entry_untrain_ret(void); extern void entry_ibpb(void); +#ifdef CONFIG_X86_64 +extern void clear_bhb_loop(void); +#endif + extern void (*x86_return_thunk)(void); extern void __warn_thunk(void); diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 3736b8a46c04..7f1e17250546 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -555,6 +555,7 @@ struct x86_pmu_lbr { unsigned int from; unsigned int to; unsigned int info; + bool has_callstack; }; extern void perf_get_x86_pmu_capability(struct x86_pmu_capability *cap); diff --git a/arch/x86/include/asm/required-features.h b/arch/x86/include/asm/required-features.h index 7ba1726b71c7..e9187ddd3d1f 100644 --- a/arch/x86/include/asm/required-features.h +++ b/arch/x86/include/asm/required-features.h @@ -99,6 +99,7 @@ #define REQUIRED_MASK18 0 #define REQUIRED_MASK19 0 #define REQUIRED_MASK20 0 -#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 21) +#define REQUIRED_MASK21 0 +#define REQUIRED_MASK_CHECK BUILD_BUG_ON_ZERO(NCAPINTS != 22) #endif /* _ASM_X86_REQUIRED_FEATURES_H */ diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 9477b4053bce..7f57382afee4 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -218,17 +218,16 @@ void early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages); void early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages); -void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op); void snp_set_memory_shared(unsigned long vaddr, unsigned long npages); void snp_set_memory_private(unsigned long vaddr, unsigned long npages); void snp_set_wakeup_secondary_cpu(void); bool snp_init(struct boot_params *bp); void __noreturn snp_abort(void); +void snp_dmi_setup(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); void snp_accept_memory(phys_addr_t start, phys_addr_t end); u64 snp_get_unsupported_features(u64 status); u64 sev_get_status(void); -void kdump_sev_callback(void); void sev_show_status(void); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } @@ -244,12 +243,12 @@ static inline void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, unsigned long npages) { } static inline void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, unsigned long npages) { } -static inline void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) { } static inline void snp_set_memory_shared(unsigned long vaddr, unsigned long npages) { } static inline void snp_set_memory_private(unsigned long vaddr, unsigned long npages) { } static inline void snp_set_wakeup_secondary_cpu(void) { } static inline bool snp_init(struct boot_params *bp) { return false; } static inline void snp_abort(void) { } +static inline void snp_dmi_setup(void) { } static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio) { return -ENOTTY; @@ -258,7 +257,6 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } static inline u64 snp_get_unsupported_features(u64 status) { return 0; } static inline u64 sev_get_status(void) { return 0; } -static inline void kdump_sev_callback(void) { } static inline void sev_show_status(void) { } #endif @@ -270,6 +268,7 @@ int psmash(u64 pfn); int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immutable); int rmp_make_shared(u64 pfn, enum pg_level level); void snp_leak_pages(u64 pfn, unsigned int npages); +void kdump_sev_callback(void); #else static inline bool snp_probe_rmptable_info(void) { return false; } static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; } @@ -282,6 +281,7 @@ static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 as } static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; } static inline void snp_leak_pages(u64 pfn, unsigned int npages) {} +static inline void kdump_sev_callback(void) { } #endif #endif diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index f44e2f9ab65d..2fc7bc3863ff 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -16,19 +16,17 @@ #include <asm/thread_info.h> /* for TS_COMPAT */ #include <asm/unistd.h> +/* This is used purely for kernel/trace/trace_syscalls.c */ typedef long (*sys_call_ptr_t)(const struct pt_regs *); extern const sys_call_ptr_t sys_call_table[]; -#if defined(CONFIG_X86_32) -#define ia32_sys_call_table sys_call_table -#else /* * These may not exist, but still put the prototypes in so we * can use IS_ENABLED(). */ -extern const sys_call_ptr_t ia32_sys_call_table[]; -extern const sys_call_ptr_t x32_sys_call_table[]; -#endif +extern long ia32_sys_call(const struct pt_regs *, unsigned int nr); +extern long x32_sys_call(const struct pt_regs *, unsigned int nr); +extern long x64_sys_call(const struct pt_regs *, unsigned int nr); /* * Only the low 32 bits of orig_ax are meaningful, so we return int. @@ -127,6 +125,7 @@ static inline int syscall_get_arch(struct task_struct *task) } bool do_syscall_64(struct pt_regs *regs, int nr); +void do_int80_emulation(struct pt_regs *regs); #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index b89b40f250e6..6149eabe200f 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -30,12 +30,13 @@ struct x86_init_mpparse { * @reserve_resources: reserve the standard resources for the * platform * @memory_setup: platform specific memory setup - * + * @dmi_setup: platform specific DMI setup */ struct x86_init_resources { void (*probe_roms)(void); void (*reserve_resources)(void); char *(*memory_setup)(void); + void (*dmi_setup)(void); }; /** diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index ad29984d5e39..ef11aa4cab42 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -694,6 +694,7 @@ enum sev_cmd_id { struct kvm_sev_cmd { __u32 id; + __u32 pad0; __u64 data; __u32 error; __u32 sev_fd; @@ -704,28 +705,35 @@ struct kvm_sev_launch_start { __u32 policy; __u64 dh_uaddr; __u32 dh_len; + __u32 pad0; __u64 session_uaddr; __u32 session_len; + __u32 pad1; }; struct kvm_sev_launch_update_data { __u64 uaddr; __u32 len; + __u32 pad0; }; struct kvm_sev_launch_secret { __u64 hdr_uaddr; __u32 hdr_len; + __u32 pad0; __u64 guest_uaddr; __u32 guest_len; + __u32 pad1; __u64 trans_uaddr; __u32 trans_len; + __u32 pad2; }; struct kvm_sev_launch_measure { __u64 uaddr; __u32 len; + __u32 pad0; }; struct kvm_sev_guest_status { @@ -738,33 +746,43 @@ struct kvm_sev_dbg { __u64 src_uaddr; __u64 dst_uaddr; __u32 len; + __u32 pad0; }; struct kvm_sev_attestation_report { __u8 mnonce[16]; __u64 uaddr; __u32 len; + __u32 pad0; }; struct kvm_sev_send_start { __u32 policy; + __u32 pad0; __u64 pdh_cert_uaddr; __u32 pdh_cert_len; + __u32 pad1; __u64 plat_certs_uaddr; __u32 plat_certs_len; + __u32 pad2; __u64 amd_certs_uaddr; __u32 amd_certs_len; + __u32 pad3; __u64 session_uaddr; __u32 session_len; + __u32 pad4; }; struct kvm_sev_send_update_data { __u64 hdr_uaddr; __u32 hdr_len; + __u32 pad0; __u64 guest_uaddr; __u32 guest_len; + __u32 pad1; __u64 trans_uaddr; __u32 trans_len; + __u32 pad2; }; struct kvm_sev_receive_start { @@ -772,17 +790,22 @@ struct kvm_sev_receive_start { __u32 policy; __u64 pdh_uaddr; __u32 pdh_len; + __u32 pad0; __u64 session_uaddr; __u32 session_len; + __u32 pad1; }; struct kvm_sev_receive_update_data { __u64 hdr_uaddr; __u32 hdr_len; + __u32 pad0; __u64 guest_uaddr; __u32 guest_len; + __u32 pad1; __u64 trans_uaddr; __u32 trans_len; + __u32 pad2; }; #define KVM_X2APIC_API_USE_32BIT_IDS (1ULL << 0) diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 6bc3456a8ebf..a1efa7907a0b 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -142,7 +142,6 @@ struct kvm_vcpu_pv_apf_data { __u32 token; __u8 pad[56]; - __u32 enabled; }; #define KVM_PV_EOI_BIT 0 diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index a42d8a6f7149..c342c4aa9c68 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1687,11 +1687,11 @@ static int x2apic_state; static bool x2apic_hw_locked(void) { - u64 ia32_cap; + u64 x86_arch_cap_msr; u64 msr; - ia32_cap = x86_read_arch_cap_msr(); - if (ia32_cap & ARCH_CAP_XAPIC_DISABLE) { + x86_arch_cap_msr = x86_read_arch_cap_msr(); + if (x86_arch_cap_msr & ARCH_CAP_XAPIC_DISABLE) { rdmsrl(MSR_IA32_XAPIC_DISABLE_STATUS, msr); return (msr & LEGACY_XAPIC_DISABLED); } diff --git a/arch/x86/kernel/callthunks.c b/arch/x86/kernel/callthunks.c index 30335182b6b0..e92ff0c11db8 100644 --- a/arch/x86/kernel/callthunks.c +++ b/arch/x86/kernel/callthunks.c @@ -314,7 +314,7 @@ static bool is_callthunk(void *addr) return !bcmp(pad, insn_buff, tmpl_size); } -int x86_call_depth_emit_accounting(u8 **pprog, void *func) +int x86_call_depth_emit_accounting(u8 **pprog, void *func, void *ip) { unsigned int tmpl_size = SKL_TMPL_SIZE; u8 insn_buff[MAX_PATCH_LEN]; @@ -327,7 +327,7 @@ int x86_call_depth_emit_accounting(u8 **pprog, void *func) return 0; memcpy(insn_buff, skl_call_thunk_template, tmpl_size); - apply_relocation(insn_buff, tmpl_size, *pprog, + apply_relocation(insn_buff, tmpl_size, ip, skl_call_thunk_template, tmpl_size); memcpy(*pprog, insn_buff, tmpl_size); diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 6d8677e80ddb..cb9eece55904 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -345,6 +345,28 @@ static void srat_detect_node(struct cpuinfo_x86 *c) #endif } +static void bsp_determine_snp(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_ARCH_HAS_CC_PLATFORM + cc_vendor = CC_VENDOR_AMD; + + if (cpu_has(c, X86_FEATURE_SEV_SNP)) { + /* + * RMP table entry format is not architectural and is defined by the + * per-processor PPR. Restrict SNP support on the known CPU models + * for which the RMP table entry format is currently defined for. + */ + if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && + c->x86 >= 0x19 && snp_probe_rmptable_info()) { + cc_platform_set(CC_ATTR_HOST_SEV_SNP); + } else { + setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); + } + } +#endif +} + static void bsp_init_amd(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) { @@ -452,21 +474,7 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) break; } - if (cpu_has(c, X86_FEATURE_SEV_SNP)) { - /* - * RMP table entry format is not architectural and it can vary by processor - * and is defined by the per-processor PPR. Restrict SNP support on the - * known CPU model and family for which the RMP table entry format is - * currently defined for. - */ - if (!boot_cpu_has(X86_FEATURE_ZEN3) && - !boot_cpu_has(X86_FEATURE_ZEN4) && - !boot_cpu_has(X86_FEATURE_ZEN5)) - setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); - else if (!snp_probe_rmptable_info()) - setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); - } - + bsp_determine_snp(c); return; warn: @@ -527,7 +535,6 @@ clear_sev: static void early_init_amd(struct cpuinfo_x86 *c) { - u64 value; u32 dummy; if (c->x86 >= 0xf) @@ -595,20 +602,6 @@ static void early_init_amd(struct cpuinfo_x86 *c) early_detect_mem_encrypt(c); - /* Re-enable TopologyExtensions if switched off by BIOS */ - if (c->x86 == 0x15 && - (c->x86_model >= 0x10 && c->x86_model <= 0x6f) && - !cpu_has(c, X86_FEATURE_TOPOEXT)) { - - if (msr_set_bit(0xc0011005, 54) > 0) { - rdmsrl(0xc0011005, value); - if (value & BIT_64(54)) { - set_cpu_cap(c, X86_FEATURE_TOPOEXT); - pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n"); - } - } - } - if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_IBPB_BRTYPE)) { if (c->x86 == 0x17 && boot_cpu_has(X86_FEATURE_AMD_IBPB)) setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e7ba936d798b..ab18185894df 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -61,6 +61,8 @@ EXPORT_PER_CPU_SYMBOL_GPL(x86_spec_ctrl_current); u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB; EXPORT_SYMBOL_GPL(x86_pred_cmd); +static u64 __ro_after_init x86_arch_cap_msr; + static DEFINE_MUTEX(spec_ctrl_mutex); void (*x86_return_thunk)(void) __ro_after_init = __x86_return_thunk; @@ -144,6 +146,8 @@ void __init cpu_select_mitigations(void) x86_spec_ctrl_base &= ~SPEC_CTRL_MITIGATIONS_MASK; } + x86_arch_cap_msr = x86_read_arch_cap_msr(); + /* Select the proper CPU mitigations before patching alternatives: */ spectre_v1_select_mitigation(); spectre_v2_select_mitigation(); @@ -301,8 +305,6 @@ static const char * const taa_strings[] = { static void __init taa_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_TAA)) { taa_mitigation = TAA_MITIGATION_OFF; return; @@ -341,9 +343,8 @@ static void __init taa_select_mitigation(void) * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode * update is required. */ - ia32_cap = x86_read_arch_cap_msr(); - if ( (ia32_cap & ARCH_CAP_MDS_NO) && - !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR)) + if ( (x86_arch_cap_msr & ARCH_CAP_MDS_NO) && + !(x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR)) taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; /* @@ -401,8 +402,6 @@ static const char * const mmio_strings[] = { static void __init mmio_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) || cpu_mitigations_off()) { @@ -413,8 +412,6 @@ static void __init mmio_select_mitigation(void) if (mmio_mitigation == MMIO_MITIGATION_OFF) return; - ia32_cap = x86_read_arch_cap_msr(); - /* * Enable CPU buffer clear mitigation for host and VMM, if also affected * by MDS or TAA. Otherwise, enable mitigation for VMM only. @@ -437,7 +434,7 @@ static void __init mmio_select_mitigation(void) * be propagated to uncore buffers, clearing the Fill buffers on idle * is required irrespective of SMT state. */ - if (!(ia32_cap & ARCH_CAP_FBSDP_NO)) + if (!(x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) static_branch_enable(&mds_idle_clear); /* @@ -447,10 +444,10 @@ static void __init mmio_select_mitigation(void) * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS * affected systems. */ - if ((ia32_cap & ARCH_CAP_FB_CLEAR) || + if ((x86_arch_cap_msr & ARCH_CAP_FB_CLEAR) || (boot_cpu_has(X86_FEATURE_MD_CLEAR) && boot_cpu_has(X86_FEATURE_FLUSH_L1D) && - !(ia32_cap & ARCH_CAP_MDS_NO))) + !(x86_arch_cap_msr & ARCH_CAP_MDS_NO))) mmio_mitigation = MMIO_MITIGATION_VERW; else mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED; @@ -508,7 +505,7 @@ static void __init rfds_select_mitigation(void) if (rfds_mitigation == RFDS_MITIGATION_OFF) return; - if (x86_read_arch_cap_msr() & ARCH_CAP_RFDS_CLEAR) + if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR) setup_force_cpu_cap(X86_FEATURE_CLEAR_CPU_BUF); else rfds_mitigation = RFDS_MITIGATION_UCODE_NEEDED; @@ -659,8 +656,6 @@ void update_srbds_msr(void) static void __init srbds_select_mitigation(void) { - u64 ia32_cap; - if (!boot_cpu_has_bug(X86_BUG_SRBDS)) return; @@ -669,8 +664,7 @@ static void __init srbds_select_mitigation(void) * are only exposed to SRBDS when TSX is enabled or when CPU is affected * by Processor MMIO Stale Data vulnerability. */ - ia32_cap = x86_read_arch_cap_msr(); - if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && + if ((x86_arch_cap_msr & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) && !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) srbds_mitigation = SRBDS_MITIGATION_TSX_OFF; else if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) @@ -813,7 +807,7 @@ static void __init gds_select_mitigation(void) /* Will verify below that mitigation _can_ be disabled */ /* No microcode */ - if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) { + if (!(x86_arch_cap_msr & ARCH_CAP_GDS_CTRL)) { if (gds_mitigation == GDS_MITIGATION_FORCE) { /* * This only needs to be done on the boot CPU so do it @@ -1544,20 +1538,25 @@ static enum spectre_v2_mitigation __init spectre_v2_select_retpoline(void) return SPECTRE_V2_RETPOLINE; } +static bool __ro_after_init rrsba_disabled; + /* Disable in-kernel use of non-RSB RET predictors */ static void __init spec_ctrl_disable_kernel_rrsba(void) { - u64 ia32_cap; + if (rrsba_disabled) + return; - if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) + if (!(x86_arch_cap_msr & ARCH_CAP_RRSBA)) { + rrsba_disabled = true; return; + } - ia32_cap = x86_read_arch_cap_msr(); + if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL)) + return; - if (ia32_cap & ARCH_CAP_RRSBA) { - x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; - update_spec_ctrl(x86_spec_ctrl_base); - } + x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S; + update_spec_ctrl(x86_spec_ctrl_base); + rrsba_disabled = true; } static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_mitigation mode) @@ -1607,6 +1606,74 @@ static void __init spectre_v2_determine_rsb_fill_type_at_vmexit(enum spectre_v2_ dump_stack(); } +/* + * Set BHI_DIS_S to prevent indirect branches in kernel to be influenced by + * branch history in userspace. Not needed if BHI_NO is set. + */ +static bool __init spec_ctrl_bhi_dis(void) +{ + if (!boot_cpu_has(X86_FEATURE_BHI_CTRL)) + return false; + + x86_spec_ctrl_base |= SPEC_CTRL_BHI_DIS_S; + update_spec_ctrl(x86_spec_ctrl_base); + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_HW); + + return true; +} + +enum bhi_mitigations { + BHI_MITIGATION_OFF, + BHI_MITIGATION_ON, +}; + +static enum bhi_mitigations bhi_mitigation __ro_after_init = + IS_ENABLED(CONFIG_MITIGATION_SPECTRE_BHI) ? BHI_MITIGATION_ON : BHI_MITIGATION_OFF; + +static int __init spectre_bhi_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + bhi_mitigation = BHI_MITIGATION_OFF; + else if (!strcmp(str, "on")) + bhi_mitigation = BHI_MITIGATION_ON; + else + pr_err("Ignoring unknown spectre_bhi option (%s)", str); + + return 0; +} +early_param("spectre_bhi", spectre_bhi_parse_cmdline); + +static void __init bhi_select_mitigation(void) +{ + if (bhi_mitigation == BHI_MITIGATION_OFF) + return; + + /* Retpoline mitigates against BHI unless the CPU has RRSBA behavior */ + if (boot_cpu_has(X86_FEATURE_RETPOLINE) && + !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE)) { + spec_ctrl_disable_kernel_rrsba(); + if (rrsba_disabled) + return; + } + + if (spec_ctrl_bhi_dis()) + return; + + if (!IS_ENABLED(CONFIG_X86_64)) + return; + + /* Mitigate KVM by default */ + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT); + pr_info("Spectre BHI mitigation: SW BHB clearing on vm exit\n"); + + /* Mitigate syscalls when the mitigation is forced =on */ + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP); + pr_info("Spectre BHI mitigation: SW BHB clearing on syscall\n"); +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -1718,6 +1785,9 @@ static void __init spectre_v2_select_mitigation(void) mode == SPECTRE_V2_RETPOLINE) spec_ctrl_disable_kernel_rrsba(); + if (boot_cpu_has(X86_BUG_BHI)) + bhi_select_mitigation(); + spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); @@ -1832,8 +1902,6 @@ static void update_indir_branch_cond(void) /* Update the static key controlling the MDS CPU buffer clear in idle */ static void update_mds_branch_idle(void) { - u64 ia32_cap = x86_read_arch_cap_msr(); - /* * Enable the idle clearing if SMT is active on CPUs which are * affected only by MSBDS and not any other MDS variant. @@ -1848,7 +1916,7 @@ static void update_mds_branch_idle(void) if (sched_smt_active()) { static_branch_enable(&mds_idle_clear); } else if (mmio_mitigation == MMIO_MITIGATION_OFF || - (ia32_cap & ARCH_CAP_FBSDP_NO)) { + (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO)) { static_branch_disable(&mds_idle_clear); } } @@ -2695,15 +2763,15 @@ static char *stibp_state(void) switch (spectre_v2_user_stibp) { case SPECTRE_V2_USER_NONE: - return ", STIBP: disabled"; + return "; STIBP: disabled"; case SPECTRE_V2_USER_STRICT: - return ", STIBP: forced"; + return "; STIBP: forced"; case SPECTRE_V2_USER_STRICT_PREFERRED: - return ", STIBP: always-on"; + return "; STIBP: always-on"; case SPECTRE_V2_USER_PRCTL: case SPECTRE_V2_USER_SECCOMP: if (static_key_enabled(&switch_to_cond_stibp)) - return ", STIBP: conditional"; + return "; STIBP: conditional"; } return ""; } @@ -2712,10 +2780,10 @@ static char *ibpb_state(void) { if (boot_cpu_has(X86_FEATURE_IBPB)) { if (static_key_enabled(&switch_mm_always_ibpb)) - return ", IBPB: always-on"; + return "; IBPB: always-on"; if (static_key_enabled(&switch_mm_cond_ibpb)) - return ", IBPB: conditional"; - return ", IBPB: disabled"; + return "; IBPB: conditional"; + return "; IBPB: disabled"; } return ""; } @@ -2725,14 +2793,32 @@ static char *pbrsb_eibrs_state(void) if (boot_cpu_has_bug(X86_BUG_EIBRS_PBRSB)) { if (boot_cpu_has(X86_FEATURE_RSB_VMEXIT_LITE) || boot_cpu_has(X86_FEATURE_RSB_VMEXIT)) - return ", PBRSB-eIBRS: SW sequence"; + return "; PBRSB-eIBRS: SW sequence"; else - return ", PBRSB-eIBRS: Vulnerable"; + return "; PBRSB-eIBRS: Vulnerable"; } else { - return ", PBRSB-eIBRS: Not affected"; + return "; PBRSB-eIBRS: Not affected"; } } +static const char *spectre_bhi_state(void) +{ + if (!boot_cpu_has_bug(X86_BUG_BHI)) + return "; BHI: Not affected"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_HW)) + return "; BHI: BHI_DIS_S"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP)) + return "; BHI: SW loop, KVM: SW loop"; + else if (boot_cpu_has(X86_FEATURE_RETPOLINE) && + !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE) && + rrsba_disabled) + return "; BHI: Retpoline"; + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) + return "; BHI: Vulnerable, KVM: SW loop"; + + return "; BHI: Vulnerable"; +} + static ssize_t spectre_v2_show_state(char *buf) { if (spectre_v2_enabled == SPECTRE_V2_LFENCE) @@ -2745,13 +2831,15 @@ static ssize_t spectre_v2_show_state(char *buf) spectre_v2_enabled == SPECTRE_V2_EIBRS_LFENCE) return sysfs_emit(buf, "Vulnerable: eIBRS+LFENCE with unprivileged eBPF and SMT\n"); - return sysfs_emit(buf, "%s%s%s%s%s%s%s\n", + return sysfs_emit(buf, "%s%s%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], ibpb_state(), - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? "; IBRS_FW" : "", stibp_state(), - boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", + boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? "; RSB filling" : "", pbrsb_eibrs_state(), + spectre_bhi_state(), + /* this should always be at the end */ spectre_v2_module_string()); } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5c1e6d6be267..605c26c009c8 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1120,6 +1120,7 @@ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) #define NO_SPECTRE_V2 BIT(8) #define NO_MMIO BIT(9) #define NO_EIBRS_PBRSB BIT(10) +#define NO_BHI BIT(11) #define VULNWL(vendor, family, model, whitelist) \ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, whitelist) @@ -1182,18 +1183,18 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { VULNWL_INTEL(ATOM_TREMONT_D, NO_ITLB_MULTIHIT | NO_EIBRS_PBRSB), /* AMD Family 0xf - 0x12 */ - VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), - VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO), + VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), + VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_BHI), /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ - VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), - VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB), + VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI), + VULNWL_HYGON(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT | NO_MMIO | NO_EIBRS_PBRSB | NO_BHI), /* Zhaoxin Family 7 */ - VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), - VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO), + VULNWL(CENTAUR, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI), + VULNWL(ZHAOXIN, 7, X86_MODEL_ANY, NO_SPECTRE_V2 | NO_SWAPGS | NO_MMIO | NO_BHI), {} }; @@ -1283,25 +1284,25 @@ static bool __init cpu_matches(const struct x86_cpu_id *table, unsigned long whi u64 x86_read_arch_cap_msr(void) { - u64 ia32_cap = 0; + u64 x86_arch_cap_msr = 0; if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) - rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + rdmsrl(MSR_IA32_ARCH_CAPABILITIES, x86_arch_cap_msr); - return ia32_cap; + return x86_arch_cap_msr; } -static bool arch_cap_mmio_immune(u64 ia32_cap) +static bool arch_cap_mmio_immune(u64 x86_arch_cap_msr) { - return (ia32_cap & ARCH_CAP_FBSDP_NO && - ia32_cap & ARCH_CAP_PSDP_NO && - ia32_cap & ARCH_CAP_SBDR_SSDP_NO); + return (x86_arch_cap_msr & ARCH_CAP_FBSDP_NO && + x86_arch_cap_msr & ARCH_CAP_PSDP_NO && + x86_arch_cap_msr & ARCH_CAP_SBDR_SSDP_NO); } -static bool __init vulnerable_to_rfds(u64 ia32_cap) +static bool __init vulnerable_to_rfds(u64 x86_arch_cap_msr) { /* The "immunity" bit trumps everything else: */ - if (ia32_cap & ARCH_CAP_RFDS_NO) + if (x86_arch_cap_msr & ARCH_CAP_RFDS_NO) return false; /* @@ -1309,7 +1310,7 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap) * indicate that mitigation is needed because guest is running on a * vulnerable hardware or may migrate to such hardware: */ - if (ia32_cap & ARCH_CAP_RFDS_CLEAR) + if (x86_arch_cap_msr & ARCH_CAP_RFDS_CLEAR) return true; /* Only consult the blacklist when there is no enumeration: */ @@ -1318,11 +1319,11 @@ static bool __init vulnerable_to_rfds(u64 ia32_cap) static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { - u64 ia32_cap = x86_read_arch_cap_msr(); + u64 x86_arch_cap_msr = x86_read_arch_cap_msr(); /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ if (!cpu_matches(cpu_vuln_whitelist, NO_ITLB_MULTIHIT) && - !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) + !(x86_arch_cap_msr & ARCH_CAP_PSCHANGE_MC_NO)) setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); if (cpu_matches(cpu_vuln_whitelist, NO_SPECULATION)) @@ -1334,7 +1335,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) setup_force_cpu_bug(X86_BUG_SPECTRE_V2); if (!cpu_matches(cpu_vuln_whitelist, NO_SSB) && - !(ia32_cap & ARCH_CAP_SSB_NO) && + !(x86_arch_cap_msr & ARCH_CAP_SSB_NO) && !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); @@ -1345,17 +1346,17 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * Don't use AutoIBRS when SNP is enabled because it degrades host * userspace indirect branch performance. */ - if ((ia32_cap & ARCH_CAP_IBRS_ALL) || + if ((x86_arch_cap_msr & ARCH_CAP_IBRS_ALL) || (cpu_has(c, X86_FEATURE_AUTOIBRS) && !cpu_feature_enabled(X86_FEATURE_SEV_SNP))) { setup_force_cpu_cap(X86_FEATURE_IBRS_ENHANCED); if (!cpu_matches(cpu_vuln_whitelist, NO_EIBRS_PBRSB) && - !(ia32_cap & ARCH_CAP_PBRSB_NO)) + !(x86_arch_cap_msr & ARCH_CAP_PBRSB_NO)) setup_force_cpu_bug(X86_BUG_EIBRS_PBRSB); } if (!cpu_matches(cpu_vuln_whitelist, NO_MDS) && - !(ia32_cap & ARCH_CAP_MDS_NO)) { + !(x86_arch_cap_msr & ARCH_CAP_MDS_NO)) { setup_force_cpu_bug(X86_BUG_MDS); if (cpu_matches(cpu_vuln_whitelist, MSBDS_ONLY)) setup_force_cpu_bug(X86_BUG_MSBDS_ONLY); @@ -1374,9 +1375,9 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * TSX_CTRL check alone is not sufficient for cases when the microcode * update is not present or running as guest that don't get TSX_CTRL. */ - if (!(ia32_cap & ARCH_CAP_TAA_NO) && + if (!(x86_arch_cap_msr & ARCH_CAP_TAA_NO) && (cpu_has(c, X86_FEATURE_RTM) || - (ia32_cap & ARCH_CAP_TSX_CTRL_MSR))) + (x86_arch_cap_msr & ARCH_CAP_TSX_CTRL_MSR))) setup_force_cpu_bug(X86_BUG_TAA); /* @@ -1402,7 +1403,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist, * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits. */ - if (!arch_cap_mmio_immune(ia32_cap)) { + if (!arch_cap_mmio_immune(x86_arch_cap_msr)) { if (cpu_matches(cpu_vuln_blacklist, MMIO)) setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO)) @@ -1410,7 +1411,7 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) } if (!cpu_has(c, X86_FEATURE_BTC_NO)) { - if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA)) + if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (x86_arch_cap_msr & ARCH_CAP_RSBA)) setup_force_cpu_bug(X86_BUG_RETBLEED); } @@ -1428,18 +1429,25 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * disabling AVX2. The only way to do this in HW is to clear XCR0[2], * which means that AVX will be disabled. */ - if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) && + if (cpu_matches(cpu_vuln_blacklist, GDS) && !(x86_arch_cap_msr & ARCH_CAP_GDS_NO) && boot_cpu_has(X86_FEATURE_AVX)) setup_force_cpu_bug(X86_BUG_GDS); - if (vulnerable_to_rfds(ia32_cap)) + if (vulnerable_to_rfds(x86_arch_cap_msr)) setup_force_cpu_bug(X86_BUG_RFDS); + /* When virtualized, eIBRS could be hidden, assume vulnerable */ + if (!(x86_arch_cap_msr & ARCH_CAP_BHI_NO) && + !cpu_matches(cpu_vuln_whitelist, NO_BHI) && + (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED) || + boot_cpu_has(X86_FEATURE_HYPERVISOR))) + setup_force_cpu_bug(X86_BUG_BHI); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; /* Rogue Data Cache Load? No! */ - if (ia32_cap & ARCH_CAP_RDCL_NO) + if (x86_arch_cap_msr & ARCH_CAP_RDCL_NO) return; setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index b7174209d855..946813d816bf 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -44,7 +44,10 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_F16C, X86_FEATURE_XMM2, }, { X86_FEATURE_AES, X86_FEATURE_XMM2 }, { X86_FEATURE_SHA_NI, X86_FEATURE_XMM2 }, + { X86_FEATURE_GFNI, X86_FEATURE_XMM2 }, { X86_FEATURE_FMA, X86_FEATURE_AVX }, + { X86_FEATURE_VAES, X86_FEATURE_AVX }, + { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX }, { X86_FEATURE_AVX2, X86_FEATURE_AVX, }, { X86_FEATURE_AVX512F, X86_FEATURE_AVX, }, { X86_FEATURE_AVX512IFMA, X86_FEATURE_AVX512F }, @@ -56,9 +59,6 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_AVX512VL, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512VBMI, X86_FEATURE_AVX512F }, { X86_FEATURE_AVX512_VBMI2, X86_FEATURE_AVX512VL }, - { X86_FEATURE_GFNI, X86_FEATURE_AVX512VL }, - { X86_FEATURE_VAES, X86_FEATURE_AVX512VL }, - { X86_FEATURE_VPCLMULQDQ, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_VNNI, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_BITALG, X86_FEATURE_AVX512VL }, { X86_FEATURE_AVX512_4VNNIW, X86_FEATURE_AVX512F }, diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index b5cc557cfc37..84d41be6d06b 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -2500,12 +2500,14 @@ static ssize_t set_bank(struct device *s, struct device_attribute *attr, return -EINVAL; b = &per_cpu(mce_banks_array, s->id)[bank]; - if (!b->init) return -ENODEV; b->ctl = new; + + mutex_lock(&mce_sysfs_mutex); mce_restart(); + mutex_unlock(&mce_sysfs_mutex); return size; } diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 422a4ddc2ab7..7b29ebda024f 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c @@ -108,7 +108,7 @@ static inline void k8_check_syscfg_dram_mod_en(void) (boot_cpu_data.x86 >= 0x0f))) return; - if (cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return; rdmsr(MSR_AMD64_SYSCFG, lo, hi); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index c99f26ebe7a6..1a8687f8073a 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -78,7 +78,8 @@ cpumask_any_housekeeping(const struct cpumask *mask, int exclude_cpu) else cpu = cpumask_any_but(mask, exclude_cpu); - if (!IS_ENABLED(CONFIG_NO_HZ_FULL)) + /* Only continue if tick_nohz_full_mask has been initialized. */ + if (!tick_nohz_full_enabled()) return cpu; /* If the CPU picked isn't marked nohz_full nothing more needs doing. */ diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 0dad49a09b7a..af5aa2c754c2 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -28,6 +28,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 }, { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 }, + { X86_FEATURE_BHI_CTRL, CPUID_EDX, 4, 0x00000007, 2 }, { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, @@ -49,6 +50,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_BMEC, CPUID_EBX, 3, 0x80000020, 0 }, { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, + { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 }, { 0, 0, 0, 0, 0 } }; diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index aaca8d235dc2..d17c9b71eb4a 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -123,7 +123,6 @@ static void topo_set_cpuids(unsigned int cpu, u32 apic_id, u32 acpi_id) early_per_cpu(x86_cpu_to_apicid, cpu) = apic_id; early_per_cpu(x86_cpu_to_acpiid, cpu) = acpi_id; #endif - set_cpu_possible(cpu, true); set_cpu_present(cpu, true); } @@ -210,7 +209,11 @@ static __init void topo_register_apic(u32 apic_id, u32 acpi_id, bool present) topo_info.nr_disabled_cpus++; } - /* Register present and possible CPUs in the domain maps */ + /* + * Register present and possible CPUs in the domain + * maps. cpu_possible_map will be updated in + * topology_init_possible_cpus() after enumeration is done. + */ for (dom = TOPO_SMT_DOMAIN; dom < TOPO_MAX_DOMAIN; dom++) set_bit(topo_apicid(apic_id, dom), apic_maps[dom].map); } diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c index 1a8b3ad493af..a7aa6eff4ae5 100644 --- a/arch/x86/kernel/cpu/topology_amd.c +++ b/arch/x86/kernel/cpu/topology_amd.c @@ -29,11 +29,21 @@ static bool parse_8000_0008(struct topo_scan *tscan) if (!sft) sft = get_count_order(ecx.cpu_nthreads + 1); - topology_set_dom(tscan, TOPO_SMT_DOMAIN, sft, ecx.cpu_nthreads + 1); + /* + * cpu_nthreads describes the number of threads in the package + * sft is the number of APIC ID bits per package + * + * As the number of actual threads per core is not described in + * this leaf, just set the CORE domain shift and let the later + * parsers set SMT shift. Assume one thread per core by default + * which is correct if there are no other CPUID leafs to parse. + */ + topology_update_dom(tscan, TOPO_SMT_DOMAIN, 0, 1); + topology_set_dom(tscan, TOPO_CORE_DOMAIN, sft, ecx.cpu_nthreads + 1); return true; } -static void store_node(struct topo_scan *tscan, unsigned int nr_nodes, u16 node_id) +static void store_node(struct topo_scan *tscan, u16 nr_nodes, u16 node_id) { /* * Starting with Fam 17h the DIE domain could probably be used to @@ -73,12 +83,14 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb) tscan->c->topo.initial_apicid = leaf.ext_apic_id; /* - * If leaf 0xb is available, then SMT shift is set already. If not - * take it from ecx.threads_per_core and use topo_update_dom() - - * topology_set_dom() would propagate and overwrite the already - * propagated CORE level. + * If leaf 0xb is available, then the domain shifts are set + * already and nothing to do here. */ if (!has_0xb) { + /* + * Leaf 0x80000008 set the CORE domain shift already. + * Update the SMT domain, but do not propagate it. + */ unsigned int nthreads = leaf.core_nthreads + 1; topology_update_dom(tscan, TOPO_SMT_DOMAIN, get_count_order(nthreads), nthreads); @@ -109,13 +121,13 @@ static bool parse_8000_001e(struct topo_scan *tscan, bool has_0xb) static bool parse_fam10h_node_id(struct topo_scan *tscan) { - struct { - union { + union { + struct { u64 node_id : 3, nodes_per_pkg : 3, unused : 58; - u64 msr; }; + u64 msr; } nid; if (!boot_cpu_has(X86_FEATURE_NODEID_MSR)) @@ -135,6 +147,26 @@ static void legacy_set_llc(struct topo_scan *tscan) tscan->c->topo.llc_id = apicid >> tscan->dom_shifts[TOPO_CORE_DOMAIN]; } +static void topoext_fixup(struct topo_scan *tscan) +{ + struct cpuinfo_x86 *c = tscan->c; + u64 msrval; + + /* Try to re-enable TopologyExtensions if switched off by BIOS */ + if (cpu_has(c, X86_FEATURE_TOPOEXT) || c->x86_vendor != X86_VENDOR_AMD || + c->x86 != 0x15 || c->x86_model < 0x10 || c->x86_model > 0x6f) + return; + + if (msr_set_bit(0xc0011005, 54) <= 0) + return; + + rdmsrl(0xc0011005, msrval); + if (msrval & BIT_64(54)) { + set_cpu_cap(c, X86_FEATURE_TOPOEXT); + pr_info_once(FW_INFO "CPU: Re-enabling disabled Topology Extensions Support.\n"); + } +} + static void parse_topology_amd(struct topo_scan *tscan) { bool has_0xb = false; @@ -164,6 +196,7 @@ static void parse_topology_amd(struct topo_scan *tscan) void cpu_parse_topology_amd(struct topo_scan *tscan) { tscan->amd_nodes_per_pkg = 1; + topoext_fixup(tscan); parse_topology_amd(tscan); if (tscan->amd_nodes_per_pkg > 1) diff --git a/arch/x86/kernel/eisa.c b/arch/x86/kernel/eisa.c index e963344b0449..53935b4d62e3 100644 --- a/arch/x86/kernel/eisa.c +++ b/arch/x86/kernel/eisa.c @@ -2,6 +2,7 @@ /* * EISA specific code */ +#include <linux/cc_platform.h> #include <linux/ioport.h> #include <linux/eisa.h> #include <linux/io.h> @@ -12,7 +13,7 @@ static __init int eisa_bus_probe(void) { void __iomem *p; - if (xen_pv_domain() && !xen_initial_domain()) + if ((xen_pv_domain() && !xen_initial_domain()) || cc_platform_has(CC_ATTR_GUEST_SEV_SNP)) return 0; p = ioremap(0x0FFFD9, 4); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 4cadfd606e8e..7f0732bc0ccd 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -65,6 +65,7 @@ static int __init parse_no_stealacc(char *arg) early_param("no-steal-acc", parse_no_stealacc); +static DEFINE_PER_CPU_READ_MOSTLY(bool, async_pf_enabled); static DEFINE_PER_CPU_DECRYPTED(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); DEFINE_PER_CPU_DECRYPTED(struct kvm_steal_time, steal_time) __aligned(64) __visible; static int has_steal_clock = 0; @@ -244,7 +245,7 @@ noinstr u32 kvm_read_and_reset_apf_flags(void) { u32 flags = 0; - if (__this_cpu_read(apf_reason.enabled)) { + if (__this_cpu_read(async_pf_enabled)) { flags = __this_cpu_read(apf_reason.flags); __this_cpu_write(apf_reason.flags, 0); } @@ -295,7 +296,7 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_kvm_asyncpf_interrupt) inc_irq_stat(irq_hv_callback_count); - if (__this_cpu_read(apf_reason.enabled)) { + if (__this_cpu_read(async_pf_enabled)) { token = __this_cpu_read(apf_reason.token); kvm_async_pf_task_wake(token); __this_cpu_write(apf_reason.token, 0); @@ -362,7 +363,7 @@ static void kvm_guest_cpu_init(void) wrmsrl(MSR_KVM_ASYNC_PF_INT, HYPERVISOR_CALLBACK_VECTOR); wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); - __this_cpu_write(apf_reason.enabled, 1); + __this_cpu_write(async_pf_enabled, true); pr_debug("setup async PF for cpu %d\n", smp_processor_id()); } @@ -383,11 +384,11 @@ static void kvm_guest_cpu_init(void) static void kvm_pv_disable_apf(void) { - if (!__this_cpu_read(apf_reason.enabled)) + if (!__this_cpu_read(async_pf_enabled)) return; wrmsrl(MSR_KVM_ASYNC_PF_EN, 0); - __this_cpu_write(apf_reason.enabled, 0); + __this_cpu_write(async_pf_enabled, false); pr_debug("disable async PF for cpu %d\n", smp_processor_id()); } diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 9a5b372c706f..ed163c8c8604 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -580,7 +580,7 @@ EXPORT_SYMBOL_GPL(asm_exc_nmi_kvm_vmx); static char *nmi_check_stall_msg[] = { /* */ -/* +--------- nsp->idt_seq_snap & 0x1: CPU is in NMI handler. */ +/* +--------- nmi_seq & 0x1: CPU is currently in NMI handler. */ /* | +------ cpu_is_offline(cpu) */ /* | | +--- nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls): */ /* | | | NMI handler has been invoked. */ @@ -628,22 +628,26 @@ void nmi_backtrace_stall_check(const struct cpumask *btp) nmi_seq = READ_ONCE(nsp->idt_nmi_seq); if (nsp->idt_nmi_seq_snap + 1 == nmi_seq && (nmi_seq & 0x1)) { msgp = "CPU entered NMI handler function, but has not exited"; - } else if ((nsp->idt_nmi_seq_snap & 0x1) != (nmi_seq & 0x1)) { - msgp = "CPU is handling NMIs"; - } else { - idx = ((nsp->idt_seq_snap & 0x1) << 2) | + } else if (nsp->idt_nmi_seq_snap == nmi_seq || + nsp->idt_nmi_seq_snap + 1 == nmi_seq) { + idx = ((nmi_seq & 0x1) << 2) | (cpu_is_offline(cpu) << 1) | (nsp->idt_calls_snap != atomic_long_read(&nsp->idt_calls)); msgp = nmi_check_stall_msg[idx]; if (nsp->idt_ignored_snap != READ_ONCE(nsp->idt_ignored) && (idx & 0x1)) modp = ", but OK because ignore_nmis was set"; - if (nmi_seq & 0x1) - msghp = " (CPU currently in NMI handler function)"; - else if (nsp->idt_nmi_seq_snap + 1 == nmi_seq) + if (nsp->idt_nmi_seq_snap + 1 == nmi_seq) msghp = " (CPU exited one NMI handler function)"; + else if (nmi_seq & 0x1) + msghp = " (CPU currently in NMI handler function)"; + else + msghp = " (CPU was never in an NMI handler function)"; + } else { + msgp = "CPU is handling NMIs"; } - pr_alert("%s: CPU %d: %s%s%s, last activity: %lu jiffies ago.\n", - __func__, cpu, msgp, modp, msghp, j - READ_ONCE(nsp->recv_jiffies)); + pr_alert("%s: CPU %d: %s%s%s\n", __func__, cpu, msgp, modp, msghp); + pr_alert("%s: last activity: %lu jiffies ago.\n", + __func__, j - READ_ONCE(nsp->recv_jiffies)); } } diff --git a/arch/x86/kernel/probe_roms.c b/arch/x86/kernel/probe_roms.c index 319fef37d9dc..cc2c34ba7228 100644 --- a/arch/x86/kernel/probe_roms.c +++ b/arch/x86/kernel/probe_roms.c @@ -203,16 +203,6 @@ void __init probe_roms(void) unsigned char c; int i; - /* - * The ROM memory range is not part of the e820 table and is therefore not - * pre-validated by BIOS. The kernel page table maps the ROM region as encrypted - * memory, and SNP requires encrypted memory to be validated before access. - * Do that here. - */ - snp_prep_memory(video_rom_resource.start, - ((system_rom_resource.end + 1) - video_rom_resource.start), - SNP_PAGE_STATE_PRIVATE); - /* video rom */ upper = adapter_rom_resources[0].start; for (start = video_rom_resource.start; start < upper; start += 2048) { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index ef206500ed6f..e125e059e2c4 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -9,7 +9,6 @@ #include <linux/console.h> #include <linux/crash_dump.h> #include <linux/dma-map-ops.h> -#include <linux/dmi.h> #include <linux/efi.h> #include <linux/ima.h> #include <linux/init_ohci1394_dma.h> @@ -36,6 +35,7 @@ #include <asm/bios_ebda.h> #include <asm/bugs.h> #include <asm/cacheinfo.h> +#include <asm/coco.h> #include <asm/cpu.h> #include <asm/efi.h> #include <asm/gart.h> @@ -902,7 +902,7 @@ void __init setup_arch(char **cmdline_p) efi_init(); reserve_ibft_region(); - dmi_setup(); + x86_init.resources.dmi_setup(); /* * VMware detection requires dmi to be available, so this @@ -992,6 +992,7 @@ void __init setup_arch(char **cmdline_p) * memory size. */ mem_encrypt_setup_arch(); + cc_random_init(); efi_fake_memmap(); efi_find_mirror(); diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index b59b09c2f284..38ad066179d8 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -23,6 +23,7 @@ #include <linux/platform_device.h> #include <linux/io.h> #include <linux/psp-sev.h> +#include <linux/dmi.h> #include <uapi/linux/sev-guest.h> #include <asm/init.h> @@ -795,21 +796,6 @@ void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr early_set_pages_state(vaddr, paddr, npages, SNP_PAGE_STATE_SHARED); } -void __init snp_prep_memory(unsigned long paddr, unsigned int sz, enum psc_op op) -{ - unsigned long vaddr, npages; - - vaddr = (unsigned long)__va(paddr); - npages = PAGE_ALIGN(sz) >> PAGE_SHIFT; - - if (op == SNP_PAGE_STATE_PRIVATE) - early_snp_set_memory_private(vaddr, paddr, npages); - else if (op == SNP_PAGE_STATE_SHARED) - early_snp_set_memory_shared(vaddr, paddr, npages); - else - WARN(1, "invalid memory op %d\n", op); -} - static unsigned long __set_pages_state(struct snp_psc_desc *data, unsigned long vaddr, unsigned long vaddr_end, int op) { @@ -2136,6 +2122,17 @@ void __head __noreturn snp_abort(void) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); } +/* + * SEV-SNP guests should only execute dmi_setup() if EFI_CONFIG_TABLES are + * enabled, as the alternative (fallback) logic for DMI probing in the legacy + * ROM region can cause a crash since this region is not pre-validated. + */ +void __init snp_dmi_setup(void) +{ + if (efi_enabled(EFI_CONFIG_TABLES)) + dmi_setup(); +} + static void dump_cpuid_table(void) { const struct snp_cpuid_table *cpuid_table = snp_cpuid_get_table(); @@ -2287,16 +2284,6 @@ static int __init snp_init_platform_device(void) } device_initcall(snp_init_platform_device); -void kdump_sev_callback(void) -{ - /* - * Do wbinvd() on remote CPUs when SNP is enabled in order to - * safely do SNP_SHUTDOWN on the local CPU. - */ - if (cpu_feature_enabled(X86_FEATURE_SEV_SNP)) - wbinvd(); -} - void sev_show_status(void) { int i; diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index a42830dc151b..d5dc5a92635a 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -3,6 +3,7 @@ * * For licencing details see kernel-base/COPYING */ +#include <linux/dmi.h> #include <linux/init.h> #include <linux/ioport.h> #include <linux/export.h> @@ -66,6 +67,7 @@ struct x86_init_ops x86_init __initdata = { .probe_roms = probe_roms, .reserve_resources = reserve_standard_io_resources, .memory_setup = e820__memory_setup_default, + .dmi_setup = dmi_setup, }, .mpparse = { diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 3aaf7e86a859..0ebdd088f28b 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -122,6 +122,7 @@ config KVM_AMD_SEV default y depends on KVM_AMD && X86_64 depends on CRYPTO_DEV_SP_PSP && !(KVM_AMD=y && CRYPTO_DEV_CCP_DD=m) + select ARCH_HAS_CC_PLATFORM help Provides support for launching Encrypted VMs (SEV) and Encrypted VMs with Encrypted State (SEV-ES) on AMD processors. diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index a88bb14266b6..addc44fc7187 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -3,11 +3,6 @@ ccflags-y += -I $(srctree)/arch/x86/kvm ccflags-$(CONFIG_KVM_WERROR) += -Werror -ifeq ($(CONFIG_FRAME_POINTER),y) -OBJECT_FILES_NON_STANDARD_vmx/vmenter.o := y -OBJECT_FILES_NON_STANDARD_svm/vmenter.o := y -endif - include $(srctree)/virt/kvm/Makefile.kvm kvm-y += x86.o emulate.o i8259.o irq.o lapic.o \ diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index adba49afb5fe..77352a4abd87 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -189,15 +189,15 @@ static int kvm_cpuid_check_equal(struct kvm_vcpu *vcpu, struct kvm_cpuid_entry2 return 0; } -static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu, - const char *sig) +static struct kvm_hypervisor_cpuid __kvm_get_hypervisor_cpuid(struct kvm_cpuid_entry2 *entries, + int nent, const char *sig) { struct kvm_hypervisor_cpuid cpuid = {}; struct kvm_cpuid_entry2 *entry; u32 base; for_each_possible_hypervisor_cpuid_base(base) { - entry = kvm_find_cpuid_entry(vcpu, base); + entry = cpuid_entry2_find(entries, nent, base, KVM_CPUID_INDEX_NOT_SIGNIFICANT); if (entry) { u32 signature[3]; @@ -217,22 +217,29 @@ static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcp return cpuid; } -static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu, - struct kvm_cpuid_entry2 *entries, int nent) +static struct kvm_hypervisor_cpuid kvm_get_hypervisor_cpuid(struct kvm_vcpu *vcpu, + const char *sig) { - u32 base = vcpu->arch.kvm_cpuid.base; - - if (!base) - return NULL; + return __kvm_get_hypervisor_cpuid(vcpu->arch.cpuid_entries, + vcpu->arch.cpuid_nent, sig); +} - return cpuid_entry2_find(entries, nent, base | KVM_CPUID_FEATURES, +static struct kvm_cpuid_entry2 *__kvm_find_kvm_cpuid_features(struct kvm_cpuid_entry2 *entries, + int nent, u32 kvm_cpuid_base) +{ + return cpuid_entry2_find(entries, nent, kvm_cpuid_base | KVM_CPUID_FEATURES, KVM_CPUID_INDEX_NOT_SIGNIFICANT); } static struct kvm_cpuid_entry2 *kvm_find_kvm_cpuid_features(struct kvm_vcpu *vcpu) { - return __kvm_find_kvm_cpuid_features(vcpu, vcpu->arch.cpuid_entries, - vcpu->arch.cpuid_nent); + u32 base = vcpu->arch.kvm_cpuid.base; + + if (!base) + return NULL; + + return __kvm_find_kvm_cpuid_features(vcpu->arch.cpuid_entries, + vcpu->arch.cpuid_nent, base); } void kvm_update_pv_runtime(struct kvm_vcpu *vcpu) @@ -266,6 +273,7 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e int nent) { struct kvm_cpuid_entry2 *best; + struct kvm_hypervisor_cpuid kvm_cpuid; best = cpuid_entry2_find(entries, nent, 1, KVM_CPUID_INDEX_NOT_SIGNIFICANT); if (best) { @@ -292,10 +300,12 @@ static void __kvm_update_cpuid_runtime(struct kvm_vcpu *vcpu, struct kvm_cpuid_e cpuid_entry_has(best, X86_FEATURE_XSAVEC))) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); - best = __kvm_find_kvm_cpuid_features(vcpu, entries, nent); - if (kvm_hlt_in_guest(vcpu->kvm) && best && - (best->eax & (1 << KVM_FEATURE_PV_UNHALT))) - best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); + kvm_cpuid = __kvm_get_hypervisor_cpuid(entries, nent, KVM_SIGNATURE); + if (kvm_cpuid.base) { + best = __kvm_find_kvm_cpuid_features(entries, nent, kvm_cpuid.base); + if (kvm_hlt_in_guest(vcpu->kvm) && best) + best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT); + } if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) { best = cpuid_entry2_find(entries, nent, 0x1, KVM_CPUID_INDEX_NOT_SIGNIFICANT); @@ -366,6 +376,7 @@ static void kvm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) kvm_update_pv_runtime(vcpu); + vcpu->arch.is_amd_compatible = guest_cpuid_is_amd_or_hygon(vcpu); vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); vcpu->arch.reserved_gpa_bits = kvm_vcpu_reserved_gpa_bits_raw(vcpu); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 856e3037e74f..23dbb9eb277c 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -120,6 +120,16 @@ static inline bool guest_cpuid_is_intel(struct kvm_vcpu *vcpu) return best && is_guest_vendor_intel(best->ebx, best->ecx, best->edx); } +static inline bool guest_cpuid_is_amd_compatible(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.is_amd_compatible; +} + +static inline bool guest_cpuid_is_intel_compatible(struct kvm_vcpu *vcpu) +{ + return !guest_cpuid_is_amd_compatible(vcpu); +} + static inline int guest_cpuid_family(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best; diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index cf37586f0466..ebf41023be38 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2776,7 +2776,8 @@ int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type) trig_mode = reg & APIC_LVT_LEVEL_TRIGGER; r = __apic_accept_irq(apic, mode, vector, 1, trig_mode, NULL); - if (r && lvt_type == APIC_LVTPC) + if (r && lvt_type == APIC_LVTPC && + guest_cpuid_is_intel_compatible(apic->vcpu)) kvm_lapic_set_reg(apic, APIC_LVTPC, reg | APIC_LVT_MASKED); return r; } diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 992e651540e8..db007a4dffa2 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -4935,7 +4935,7 @@ static void reset_guest_rsvds_bits_mask(struct kvm_vcpu *vcpu, context->cpu_role.base.level, is_efer_nx(context), guest_can_use(vcpu, X86_FEATURE_GBPAGES), is_cr4_pse(context), - guest_cpuid_is_amd_or_hygon(vcpu)); + guest_cpuid_is_amd_compatible(vcpu)); } static void __reset_rsvds_bits_mask_ept(struct rsvd_bits_validate *rsvd_check, @@ -5576,9 +5576,9 @@ void kvm_mmu_after_set_cpuid(struct kvm_vcpu *vcpu) * that problem is swept under the rug; KVM's CPUID API is horrific and * it's all but impossible to solve it without introducing a new API. */ - vcpu->arch.root_mmu.root_role.word = 0; - vcpu->arch.guest_mmu.root_role.word = 0; - vcpu->arch.nested_mmu.root_role.word = 0; + vcpu->arch.root_mmu.root_role.invalid = 1; + vcpu->arch.guest_mmu.root_role.invalid = 1; + vcpu->arch.nested_mmu.root_role.invalid = 1; vcpu->arch.root_mmu.cpu_role.ext.valid = 0; vcpu->arch.guest_mmu.cpu_role.ext.valid = 0; vcpu->arch.nested_mmu.cpu_role.ext.valid = 0; @@ -7399,7 +7399,8 @@ bool kvm_arch_post_set_memory_attributes(struct kvm *kvm, * by the memslot, KVM can't use a hugepage due to the * misaligned address regardless of memory attributes. */ - if (gfn >= slot->base_gfn) { + if (gfn >= slot->base_gfn && + gfn + nr_pages <= slot->base_gfn + slot->npages) { if (hugepage_has_attrs(kvm, slot, gfn, level, attrs)) hugepage_clear_mixed(slot, gfn, level); else diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index d078157e62aa..04c1f0957fea 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -1548,17 +1548,21 @@ void kvm_tdp_mmu_try_split_huge_pages(struct kvm *kvm, } } -/* - * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If - * AD bits are enabled, this will involve clearing the dirty bit on each SPTE. - * If AD bits are not enabled, this will require clearing the writable bit on - * each SPTE. Returns true if an SPTE has been changed and the TLBs need to - * be flushed. - */ +static bool tdp_mmu_need_write_protect(struct kvm_mmu_page *sp) +{ + /* + * All TDP MMU shadow pages share the same role as their root, aside + * from level, so it is valid to key off any shadow page to determine if + * write protection is needed for an entire tree. + */ + return kvm_mmu_page_ad_need_write_protect(sp) || !kvm_ad_enabled(); +} + static bool clear_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root, gfn_t start, gfn_t end) { - u64 dbit = kvm_ad_enabled() ? shadow_dirty_mask : PT_WRITABLE_MASK; + const u64 dbit = tdp_mmu_need_write_protect(root) ? PT_WRITABLE_MASK : + shadow_dirty_mask; struct tdp_iter iter; bool spte_set = false; @@ -1573,7 +1577,7 @@ retry: if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true)) continue; - KVM_MMU_WARN_ON(kvm_ad_enabled() && + KVM_MMU_WARN_ON(dbit == shadow_dirty_mask && spte_ad_need_write_protect(iter.old_spte)); if (!(iter.old_spte & dbit)) @@ -1590,11 +1594,9 @@ retry: } /* - * Clear the dirty status of all the SPTEs mapping GFNs in the memslot. If - * AD bits are enabled, this will involve clearing the dirty bit on each SPTE. - * If AD bits are not enabled, this will require clearing the writable bit on - * each SPTE. Returns true if an SPTE has been changed and the TLBs need to - * be flushed. + * Clear the dirty status (D-bit or W-bit) of all the SPTEs mapping GFNs in the + * memslot. Returns true if an SPTE has been changed and the TLBs need to be + * flushed. */ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, const struct kvm_memory_slot *slot) @@ -1610,18 +1612,11 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, return spte_set; } -/* - * Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is - * set in mask, starting at gfn. The given memslot is expected to contain all - * the GFNs represented by set bits in the mask. If AD bits are enabled, - * clearing the dirty status will involve clearing the dirty bit on each SPTE - * or, if AD bits are not enabled, clearing the writable bit on each SPTE. - */ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root, gfn_t gfn, unsigned long mask, bool wrprot) { - u64 dbit = (wrprot || !kvm_ad_enabled()) ? PT_WRITABLE_MASK : - shadow_dirty_mask; + const u64 dbit = (wrprot || tdp_mmu_need_write_protect(root)) ? PT_WRITABLE_MASK : + shadow_dirty_mask; struct tdp_iter iter; lockdep_assert_held_write(&kvm->mmu_lock); @@ -1633,7 +1628,7 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root, if (!mask) break; - KVM_MMU_WARN_ON(kvm_ad_enabled() && + KVM_MMU_WARN_ON(dbit == shadow_dirty_mask && spte_ad_need_write_protect(iter.old_spte)); if (iter.level > PG_LEVEL_4K || @@ -1659,11 +1654,9 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root, } /* - * Clears the dirty status of all the 4k SPTEs mapping GFNs for which a bit is - * set in mask, starting at gfn. The given memslot is expected to contain all - * the GFNs represented by set bits in the mask. If AD bits are enabled, - * clearing the dirty status will involve clearing the dirty bit on each SPTE - * or, if AD bits are not enabled, clearing the writable bit on each SPTE. + * Clear the dirty status (D-bit or W-bit) of all the 4k SPTEs mapping GFNs for + * which a bit is set in mask, starting at gfn. The given memslot is expected to + * contain all the GFNs represented by set bits in the mask. */ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm, struct kvm_memory_slot *slot, diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index c397b28e3d1b..a593b03c9aed 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -775,8 +775,20 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu) pmu->pebs_data_cfg_mask = ~0ull; bitmap_zero(pmu->all_valid_pmc_idx, X86_PMC_IDX_MAX); - if (vcpu->kvm->arch.enable_pmu) - static_call(kvm_x86_pmu_refresh)(vcpu); + if (!vcpu->kvm->arch.enable_pmu) + return; + + static_call(kvm_x86_pmu_refresh)(vcpu); + + /* + * At RESET, both Intel and AMD CPUs set all enable bits for general + * purpose counters in IA32_PERF_GLOBAL_CTRL (so that software that + * was written for v1 PMUs don't unknowingly leave GP counters disabled + * in the global controls). Emulate that behavior when refreshing the + * PMU so that userspace doesn't need to manually set PERF_GLOBAL_CTRL. + */ + if (kvm_pmu_has_perf_global_ctrl(pmu) && pmu->nr_arch_gp_counters) + pmu->global_ctrl = GENMASK_ULL(pmu->nr_arch_gp_counters - 1, 0); } void kvm_pmu_init(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/reverse_cpuid.h b/arch/x86/kvm/reverse_cpuid.h index aadefcaa9561..2f4e155080ba 100644 --- a/arch/x86/kvm/reverse_cpuid.h +++ b/arch/x86/kvm/reverse_cpuid.h @@ -52,7 +52,7 @@ enum kvm_only_cpuid_leafs { #define X86_FEATURE_IPRED_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 1) #define KVM_X86_FEATURE_RRSBA_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 2) #define X86_FEATURE_DDPD_U KVM_X86_FEATURE(CPUID_7_2_EDX, 3) -#define X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4) +#define KVM_X86_FEATURE_BHI_CTRL KVM_X86_FEATURE(CPUID_7_2_EDX, 4) #define X86_FEATURE_MCDT_NO KVM_X86_FEATURE(CPUID_7_2_EDX, 5) /* CPUID level 0x80000007 (EDX). */ @@ -102,10 +102,12 @@ static const struct cpuid_reg reverse_cpuid[] = { */ static __always_inline void reverse_cpuid_check(unsigned int x86_leaf) { + BUILD_BUG_ON(NR_CPUID_WORDS != NCAPINTS); BUILD_BUG_ON(x86_leaf == CPUID_LNX_1); BUILD_BUG_ON(x86_leaf == CPUID_LNX_2); BUILD_BUG_ON(x86_leaf == CPUID_LNX_3); BUILD_BUG_ON(x86_leaf == CPUID_LNX_4); + BUILD_BUG_ON(x86_leaf == CPUID_LNX_5); BUILD_BUG_ON(x86_leaf >= ARRAY_SIZE(reverse_cpuid)); BUILD_BUG_ON(reverse_cpuid[x86_leaf].function == 0); } @@ -126,6 +128,7 @@ static __always_inline u32 __feature_translate(int x86_feature) KVM_X86_TRANSLATE_FEATURE(CONSTANT_TSC); KVM_X86_TRANSLATE_FEATURE(PERFMON_V2); KVM_X86_TRANSLATE_FEATURE(RRSBA_CTRL); + KVM_X86_TRANSLATE_FEATURE(BHI_CTRL); default: return x86_feature; } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index ae0ac12382b9..759581bb2128 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -84,9 +84,10 @@ struct enc_region { }; /* Called with the sev_bitmap_lock held, or on shutdown */ -static int sev_flush_asids(int min_asid, int max_asid) +static int sev_flush_asids(unsigned int min_asid, unsigned int max_asid) { - int ret, asid, error = 0; + int ret, error = 0; + unsigned int asid; /* Check if there are any ASIDs to reclaim before performing a flush */ asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid); @@ -116,7 +117,7 @@ static inline bool is_mirroring_enc_context(struct kvm *kvm) } /* Must be called with the sev_bitmap_lock held */ -static bool __sev_recycle_asids(int min_asid, int max_asid) +static bool __sev_recycle_asids(unsigned int min_asid, unsigned int max_asid) { if (sev_flush_asids(min_asid, max_asid)) return false; @@ -143,8 +144,20 @@ static void sev_misc_cg_uncharge(struct kvm_sev_info *sev) static int sev_asid_new(struct kvm_sev_info *sev) { - int asid, min_asid, max_asid, ret; + /* + * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. + * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. + * Note: min ASID can end up larger than the max if basic SEV support is + * effectively disabled by disallowing use of ASIDs for SEV guests. + */ + unsigned int min_asid = sev->es_active ? 1 : min_sev_asid; + unsigned int max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; + unsigned int asid; bool retry = true; + int ret; + + if (min_asid > max_asid) + return -ENOTTY; WARN_ON(sev->misc_cg); sev->misc_cg = get_current_misc_cg(); @@ -157,12 +170,6 @@ static int sev_asid_new(struct kvm_sev_info *sev) mutex_lock(&sev_bitmap_lock); - /* - * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid. - * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1. - */ - min_asid = sev->es_active ? 1 : min_sev_asid; - max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid; again: asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid); if (asid > max_asid) { @@ -179,7 +186,8 @@ again: mutex_unlock(&sev_bitmap_lock); - return asid; + sev->asid = asid; + return 0; e_uncharge: sev_misc_cg_uncharge(sev); put_misc_cg(sev->misc_cg); @@ -187,7 +195,7 @@ e_uncharge: return ret; } -static int sev_get_asid(struct kvm *kvm) +static unsigned int sev_get_asid(struct kvm *kvm) { struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; @@ -247,21 +255,19 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) { struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; struct sev_platform_init_args init_args = {0}; - int asid, ret; + int ret; if (kvm->created_vcpus) return -EINVAL; - ret = -EBUSY; if (unlikely(sev->active)) - return ret; + return -EINVAL; sev->active = true; sev->es_active = argp->id == KVM_SEV_ES_INIT; - asid = sev_asid_new(sev); - if (asid < 0) + ret = sev_asid_new(sev); + if (ret) goto e_no_asid; - sev->asid = asid; init_args.probe = false; ret = sev_platform_init(&init_args); @@ -287,8 +293,8 @@ e_no_asid: static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error) { + unsigned int asid = sev_get_asid(kvm); struct sev_data_activate activate; - int asid = sev_get_asid(kvm); int ret; /* activate ASID on the given handle */ @@ -428,7 +434,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, /* Avoid using vmalloc for smaller buffers. */ size = npages * sizeof(struct page *); if (size > PAGE_SIZE) - pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); + pages = __vmalloc(size, GFP_KERNEL_ACCOUNT); else pages = kmalloc(size, GFP_KERNEL_ACCOUNT); @@ -2240,8 +2246,10 @@ void __init sev_hardware_setup(void) goto out; } - sev_asid_count = max_sev_asid - min_sev_asid + 1; - WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)); + if (min_sev_asid <= max_sev_asid) { + sev_asid_count = max_sev_asid - min_sev_asid + 1; + WARN_ON_ONCE(misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count)); + } sev_supported = true; /* SEV-ES support requested? */ @@ -2272,7 +2280,9 @@ void __init sev_hardware_setup(void) out: if (boot_cpu_has(X86_FEATURE_SEV)) pr_info("SEV %s (ASIDs %u - %u)\n", - sev_supported ? "enabled" : "disabled", + sev_supported ? min_sev_asid <= max_sev_asid ? "enabled" : + "unusable" : + "disabled", min_sev_asid, max_sev_asid); if (boot_cpu_has(X86_FEATURE_SEV_ES)) pr_info("SEV-ES %s (ASIDs %u - %u)\n", @@ -2320,7 +2330,7 @@ int sev_cpu_init(struct svm_cpu_data *sd) */ static void sev_flush_encrypted_page(struct kvm_vcpu *vcpu, void *va) { - int asid = to_kvm_svm(vcpu->kvm)->sev_info.asid; + unsigned int asid = sev_get_asid(vcpu->kvm); /* * Note! The address must be a kernel address, as regular page walk @@ -2638,7 +2648,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm) void pre_sev_run(struct vcpu_svm *svm, int cpu) { struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, cpu); - int asid = sev_get_asid(svm->vcpu.kvm); + unsigned int asid = sev_get_asid(svm->vcpu.kvm); /* Assign the asid allocated with this SEV guest */ svm->asid = asid; @@ -3174,7 +3184,7 @@ struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu) unsigned long pfn; struct page *p; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); /* diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index d1a9f9951635..9aaf83c8d57d 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1503,6 +1503,11 @@ static void svm_vcpu_free(struct kvm_vcpu *vcpu) __free_pages(virt_to_page(svm->msrpm), get_order(MSRPM_SIZE)); } +static struct sev_es_save_area *sev_es_host_save_area(struct svm_cpu_data *sd) +{ + return page_address(sd->save_area) + 0x400; +} + static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1519,12 +1524,8 @@ static void svm_prepare_switch_to_guest(struct kvm_vcpu *vcpu) * or subsequent vmload of host save area. */ vmsave(sd->save_area_pa); - if (sev_es_guest(vcpu->kvm)) { - struct sev_es_save_area *hostsa; - hostsa = (struct sev_es_save_area *)(page_address(sd->save_area) + 0x400); - - sev_es_prepare_switch_to_guest(hostsa); - } + if (sev_es_guest(vcpu->kvm)) + sev_es_prepare_switch_to_guest(sev_es_host_save_area(sd)); if (tsc_scaling) __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio); @@ -4101,6 +4102,7 @@ static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_intercepted) { + struct svm_cpu_data *sd = per_cpu_ptr(&svm_data, vcpu->cpu); struct vcpu_svm *svm = to_svm(vcpu); guest_state_enter_irqoff(); @@ -4108,7 +4110,8 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in amd_clear_divider(); if (sev_es_guest(vcpu->kvm)) - __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted); + __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted, + sev_es_host_save_area(sd)); else __svm_vcpu_run(svm, spec_ctrl_intercepted); diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 7f1fbd874c45..33878efdebc8 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -698,7 +698,8 @@ struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu); /* vmenter.S */ -void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted); +void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted, + struct sev_es_save_area *hostsa); void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted); #define DEFINE_KVM_GHCB_ACCESSORS(field) \ diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 187018c424bf..a0c8eb37d3e1 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -3,6 +3,7 @@ #include <asm/asm.h> #include <asm/asm-offsets.h> #include <asm/bitsperlong.h> +#include <asm/frame.h> #include <asm/kvm_vcpu_regs.h> #include <asm/nospec-branch.h> #include "kvm-asm-offsets.h" @@ -67,7 +68,7 @@ "", X86_FEATURE_V_SPEC_CTRL 901: .endm -.macro RESTORE_HOST_SPEC_CTRL_BODY +.macro RESTORE_HOST_SPEC_CTRL_BODY spec_ctrl_intercepted:req 900: /* Same for after vmexit. */ mov $MSR_IA32_SPEC_CTRL, %ecx @@ -76,7 +77,7 @@ * Load the value that the guest had written into MSR_IA32_SPEC_CTRL, * if it was not intercepted during guest execution. */ - cmpb $0, (%_ASM_SP) + cmpb $0, \spec_ctrl_intercepted jnz 998f rdmsr movl %eax, SVM_spec_ctrl(%_ASM_DI) @@ -99,6 +100,7 @@ */ SYM_FUNC_START(__svm_vcpu_run) push %_ASM_BP + mov %_ASM_SP, %_ASM_BP #ifdef CONFIG_X86_64 push %r15 push %r14 @@ -268,7 +270,7 @@ SYM_FUNC_START(__svm_vcpu_run) RET RESTORE_GUEST_SPEC_CTRL_BODY - RESTORE_HOST_SPEC_CTRL_BODY + RESTORE_HOST_SPEC_CTRL_BODY (%_ASM_SP) 10: cmpb $0, _ASM_RIP(kvm_rebooting) jne 2b @@ -290,66 +292,68 @@ SYM_FUNC_START(__svm_vcpu_run) SYM_FUNC_END(__svm_vcpu_run) +#ifdef CONFIG_KVM_AMD_SEV + + +#ifdef CONFIG_X86_64 +#define SEV_ES_GPRS_BASE 0x300 +#define SEV_ES_RBX (SEV_ES_GPRS_BASE + __VCPU_REGS_RBX * WORD_SIZE) +#define SEV_ES_RBP (SEV_ES_GPRS_BASE + __VCPU_REGS_RBP * WORD_SIZE) +#define SEV_ES_RSI (SEV_ES_GPRS_BASE + __VCPU_REGS_RSI * WORD_SIZE) +#define SEV_ES_RDI (SEV_ES_GPRS_BASE + __VCPU_REGS_RDI * WORD_SIZE) +#define SEV_ES_R12 (SEV_ES_GPRS_BASE + __VCPU_REGS_R12 * WORD_SIZE) +#define SEV_ES_R13 (SEV_ES_GPRS_BASE + __VCPU_REGS_R13 * WORD_SIZE) +#define SEV_ES_R14 (SEV_ES_GPRS_BASE + __VCPU_REGS_R14 * WORD_SIZE) +#define SEV_ES_R15 (SEV_ES_GPRS_BASE + __VCPU_REGS_R15 * WORD_SIZE) +#endif + /** * __svm_sev_es_vcpu_run - Run a SEV-ES vCPU via a transition to SVM guest mode * @svm: struct vcpu_svm * * @spec_ctrl_intercepted: bool */ SYM_FUNC_START(__svm_sev_es_vcpu_run) - push %_ASM_BP -#ifdef CONFIG_X86_64 - push %r15 - push %r14 - push %r13 - push %r12 -#else - push %edi - push %esi -#endif - push %_ASM_BX + FRAME_BEGIN /* - * Save variables needed after vmexit on the stack, in inverse - * order compared to when they are needed. + * Save non-volatile (callee-saved) registers to the host save area. + * Except for RAX and RSP, all GPRs are restored on #VMEXIT, but not + * saved on VMRUN. */ + mov %rbp, SEV_ES_RBP (%rdx) + mov %r15, SEV_ES_R15 (%rdx) + mov %r14, SEV_ES_R14 (%rdx) + mov %r13, SEV_ES_R13 (%rdx) + mov %r12, SEV_ES_R12 (%rdx) + mov %rbx, SEV_ES_RBX (%rdx) - /* Accessed directly from the stack in RESTORE_HOST_SPEC_CTRL. */ - push %_ASM_ARG2 - - /* Save @svm. */ - push %_ASM_ARG1 - -.ifnc _ASM_ARG1, _ASM_DI /* - * Stash @svm in RDI early. On 32-bit, arguments are in RAX, RCX - * and RDX which are clobbered by RESTORE_GUEST_SPEC_CTRL. + * Save volatile registers that hold arguments that are needed after + * #VMEXIT (RDI=@svm and RSI=@spec_ctrl_intercepted). */ - mov %_ASM_ARG1, %_ASM_DI -.endif + mov %rdi, SEV_ES_RDI (%rdx) + mov %rsi, SEV_ES_RSI (%rdx) - /* Clobbers RAX, RCX, RDX. */ + /* Clobbers RAX, RCX, RDX (@hostsa). */ RESTORE_GUEST_SPEC_CTRL /* Get svm->current_vmcb->pa into RAX. */ - mov SVM_current_vmcb(%_ASM_DI), %_ASM_AX - mov KVM_VMCB_pa(%_ASM_AX), %_ASM_AX + mov SVM_current_vmcb(%rdi), %rax + mov KVM_VMCB_pa(%rax), %rax /* Enter guest mode */ sti -1: vmrun %_ASM_AX +1: vmrun %rax 2: cli - /* Pop @svm to RDI, guest registers have been saved already. */ - pop %_ASM_DI - #ifdef CONFIG_MITIGATION_RETPOLINE /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */ - FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE + FILL_RETURN_BUFFER %rax, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE #endif - /* Clobbers RAX, RCX, RDX. */ + /* Clobbers RAX, RCX, RDX, consumes RDI (@svm) and RSI (@spec_ctrl_intercepted). */ RESTORE_HOST_SPEC_CTRL /* @@ -361,30 +365,17 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) */ UNTRAIN_RET_VM - /* "Pop" @spec_ctrl_intercepted. */ - pop %_ASM_BX - - pop %_ASM_BX - -#ifdef CONFIG_X86_64 - pop %r12 - pop %r13 - pop %r14 - pop %r15 -#else - pop %esi - pop %edi -#endif - pop %_ASM_BP + FRAME_END RET RESTORE_GUEST_SPEC_CTRL_BODY - RESTORE_HOST_SPEC_CTRL_BODY + RESTORE_HOST_SPEC_CTRL_BODY %sil -3: cmpb $0, _ASM_RIP(kvm_rebooting) +3: cmpb $0, kvm_rebooting(%rip) jne 2b ud2 _ASM_EXTABLE(1b, 3b) SYM_FUNC_END(__svm_sev_es_vcpu_run) +#endif /* CONFIG_KVM_AMD_SEV */ diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 88659de4d2a7..c6b4b1728006 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -735,13 +735,13 @@ TRACE_EVENT(kvm_nested_intr_vmexit, * Tracepoint for nested #vmexit because of interrupt pending */ TRACE_EVENT(kvm_invlpga, - TP_PROTO(__u64 rip, int asid, u64 address), + TP_PROTO(__u64 rip, unsigned int asid, u64 address), TP_ARGS(rip, asid, address), TP_STRUCT__entry( - __field( __u64, rip ) - __field( int, asid ) - __field( __u64, address ) + __field( __u64, rip ) + __field( unsigned int, asid ) + __field( __u64, address ) ), TP_fast_assign( @@ -750,7 +750,7 @@ TRACE_EVENT(kvm_invlpga, __entry->address = address; ), - TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx", + TP_printk("rip: 0x%016llx asid: %u address: 0x%016llx", __entry->rip, __entry->asid, __entry->address) ); diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 12ade343a17e..be40474de6e4 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -535,7 +535,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) perf_capabilities = vcpu_get_perf_capabilities(vcpu); if (cpuid_model_is_consistent(vcpu) && (perf_capabilities & PMU_CAP_LBR_FMT)) - x86_perf_get_lbr(&lbr_desc->records); + memcpy(&lbr_desc->records, &vmx_lbr_caps, sizeof(vmx_lbr_caps)); else lbr_desc->records.nr = 0; diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 2bfbf758d061..f6986dee6f8c 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -275,6 +275,8 @@ SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL) call vmx_spec_ctrl_restore_host + CLEAR_BRANCH_HISTORY_VMEXIT + /* Put return value in AX */ mov %_ASM_BX, %_ASM_AX diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index c37a89eda90f..22411f4aff53 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -218,6 +218,8 @@ module_param(ple_window_max, uint, 0444); int __read_mostly pt_mode = PT_MODE_SYSTEM; module_param(pt_mode, int, S_IRUGO); +struct x86_pmu_lbr __ro_after_init vmx_lbr_caps; + static DEFINE_STATIC_KEY_FALSE(vmx_l1d_should_flush); static DEFINE_STATIC_KEY_FALSE(vmx_l1d_flush_cond); static DEFINE_MUTEX(vmx_l1d_flush_mutex); @@ -7862,10 +7864,9 @@ static void vmx_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu) vmx_update_exception_bitmap(vcpu); } -static u64 vmx_get_perf_capabilities(void) +static __init u64 vmx_get_perf_capabilities(void) { u64 perf_cap = PMU_CAP_FW_WRITES; - struct x86_pmu_lbr lbr; u64 host_perf_cap = 0; if (!enable_pmu) @@ -7875,15 +7876,43 @@ static u64 vmx_get_perf_capabilities(void) rdmsrl(MSR_IA32_PERF_CAPABILITIES, host_perf_cap); if (!cpu_feature_enabled(X86_FEATURE_ARCH_LBR)) { - x86_perf_get_lbr(&lbr); - if (lbr.nr) + x86_perf_get_lbr(&vmx_lbr_caps); + + /* + * KVM requires LBR callstack support, as the overhead due to + * context switching LBRs without said support is too high. + * See intel_pmu_create_guest_lbr_event() for more info. + */ + if (!vmx_lbr_caps.has_callstack) + memset(&vmx_lbr_caps, 0, sizeof(vmx_lbr_caps)); + else if (vmx_lbr_caps.nr) perf_cap |= host_perf_cap & PMU_CAP_LBR_FMT; } if (vmx_pebs_supported()) { perf_cap |= host_perf_cap & PERF_CAP_PEBS_MASK; - if ((perf_cap & PERF_CAP_PEBS_FORMAT) < 4) - perf_cap &= ~PERF_CAP_PEBS_BASELINE; + + /* + * Disallow adaptive PEBS as it is functionally broken, can be + * used by the guest to read *host* LBRs, and can be used to + * bypass userspace event filters. To correctly and safely + * support adaptive PEBS, KVM needs to: + * + * 1. Account for the ADAPTIVE flag when (re)programming fixed + * counters. + * + * 2. Gain support from perf (or take direct control of counter + * programming) to support events without adaptive PEBS + * enabled for the hardware counter. + * + * 3. Ensure LBR MSRs cannot hold host data on VM-Entry with + * adaptive PEBS enabled and MSR_PEBS_DATA_CFG.LBRS=1. + * + * 4. Document which PMU events are effectively exposed to the + * guest via adaptive PEBS, and make adaptive PEBS mutually + * exclusive with KVM_SET_PMU_EVENT_FILTER if necessary. + */ + perf_cap &= ~PERF_CAP_PEBS_BASELINE; } return perf_cap; diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index 65786dbe7d60..90f9e4434646 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -15,6 +15,7 @@ #include "vmx_ops.h" #include "../cpuid.h" #include "run_flags.h" +#include "../mmu.h" #define MSR_TYPE_R 1 #define MSR_TYPE_W 2 @@ -109,6 +110,8 @@ struct lbr_desc { bool msr_passthrough; }; +extern struct x86_pmu_lbr vmx_lbr_caps; + /* * The nested_vmx structure is part of vcpu_vmx, and holds information we need * for correct emulation of VMX (i.e., nested VMX) on this vcpu. @@ -719,7 +722,8 @@ static inline bool vmx_need_pf_intercept(struct kvm_vcpu *vcpu) if (!enable_ept) return true; - return allow_smaller_maxphyaddr && cpuid_maxphyaddr(vcpu) < boot_cpu_data.x86_phys_bits; + return allow_smaller_maxphyaddr && + cpuid_maxphyaddr(vcpu) < kvm_get_shadow_phys_bits(); } static inline bool is_unrestricted_guest(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 47d9f03b7778..91478b769af0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1621,7 +1621,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr) ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \ ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \ ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO | \ - ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR) + ARCH_CAP_RFDS_NO | ARCH_CAP_RFDS_CLEAR | ARCH_CAP_BHI_NO) static u64 kvm_get_arch_capabilities(void) { @@ -3470,7 +3470,7 @@ static bool is_mci_status_msr(u32 msr) static bool can_set_mci_status(struct kvm_vcpu *vcpu) { /* McStatusWrEn enabled? */ - if (guest_cpuid_is_amd_or_hygon(vcpu)) + if (guest_cpuid_is_amd_compatible(vcpu)) return !!(vcpu->arch.msr_hwcr & BIT_ULL(18)); return false; diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 721b528da9ac..391059b2c6fb 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -163,6 +163,7 @@ SYM_CODE_START_NOALIGN(srso_alias_untrain_ret) lfence jmp srso_alias_return_thunk SYM_FUNC_END(srso_alias_untrain_ret) +__EXPORT_THUNK(srso_alias_untrain_ret) .popsection .pushsection .text..__x86.rethunk_safe @@ -224,10 +225,16 @@ SYM_CODE_START(srso_return_thunk) SYM_CODE_END(srso_return_thunk) #define JMP_SRSO_UNTRAIN_RET "jmp srso_untrain_ret" -#define JMP_SRSO_ALIAS_UNTRAIN_RET "jmp srso_alias_untrain_ret" #else /* !CONFIG_MITIGATION_SRSO */ +/* Dummy for the alternative in CALL_UNTRAIN_RET. */ +SYM_CODE_START(srso_alias_untrain_ret) + ANNOTATE_UNRET_SAFE + ANNOTATE_NOENDBR + ret + int3 +SYM_FUNC_END(srso_alias_untrain_ret) +__EXPORT_THUNK(srso_alias_untrain_ret) #define JMP_SRSO_UNTRAIN_RET "ud2" -#define JMP_SRSO_ALIAS_UNTRAIN_RET "ud2" #endif /* CONFIG_MITIGATION_SRSO */ #ifdef CONFIG_MITIGATION_UNRET_ENTRY @@ -319,9 +326,7 @@ SYM_FUNC_END(retbleed_untrain_ret) #if defined(CONFIG_MITIGATION_UNRET_ENTRY) || defined(CONFIG_MITIGATION_SRSO) SYM_FUNC_START(entry_untrain_ret) - ALTERNATIVE_2 JMP_RETBLEED_UNTRAIN_RET, \ - JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO, \ - JMP_SRSO_ALIAS_UNTRAIN_RET, X86_FEATURE_SRSO_ALIAS + ALTERNATIVE JMP_RETBLEED_UNTRAIN_RET, JMP_SRSO_UNTRAIN_RET, X86_FEATURE_SRSO SYM_FUNC_END(entry_untrain_ret) __EXPORT_THUNK(entry_untrain_ret) @@ -377,8 +382,15 @@ SYM_FUNC_END(call_depth_return_thunk) SYM_CODE_START(__x86_return_thunk) UNWIND_HINT_FUNC ANNOTATE_NOENDBR +#if defined(CONFIG_MITIGATION_UNRET_ENTRY) || \ + defined(CONFIG_MITIGATION_SRSO) || \ + defined(CONFIG_MITIGATION_CALL_DEPTH_TRACKING) ALTERNATIVE __stringify(ANNOTATE_UNRET_SAFE; ret), \ "jmp warn_thunk_thunk", X86_FEATURE_ALWAYS +#else + ANNOTATE_UNRET_SAFE + ret +#endif int3 SYM_CODE_END(__x86_return_thunk) EXPORT_SYMBOL(__x86_return_thunk) diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index a204a332c71f..968d7005f4a7 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -26,31 +26,18 @@ static int ident_pud_init(struct x86_mapping_info *info, pud_t *pud_page, for (; addr < end; addr = next) { pud_t *pud = pud_page + pud_index(addr); pmd_t *pmd; - bool use_gbpage; next = (addr & PUD_MASK) + PUD_SIZE; if (next > end) next = end; - /* if this is already a gbpage, this portion is already mapped */ - if (pud_leaf(*pud)) - continue; - - /* Is using a gbpage allowed? */ - use_gbpage = info->direct_gbpages; - - /* Don't use gbpage if it maps more than the requested region. */ - /* at the begining: */ - use_gbpage &= ((addr & ~PUD_MASK) == 0); - /* ... or at the end: */ - use_gbpage &= ((next & ~PUD_MASK) == 0); - - /* Never overwrite existing mappings */ - use_gbpage &= !pud_present(*pud); - - if (use_gbpage) { + if (info->direct_gbpages) { pud_t pudval; + if (pud_present(*pud)) + continue; + + addr &= PUD_MASK; pudval = __pud((addr - info->offset) | info->page_flag); set_pud(pud, pudval); continue; diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 70b91de2e053..422602f6039b 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -492,6 +492,24 @@ void __init sme_early_init(void) */ if (sev_status & MSR_AMD64_SEV_ENABLED) ia32_disable(); + + /* + * Override init functions that scan the ROM region in SEV-SNP guests, + * as this memory is not pre-validated and would thus cause a crash. + */ + if (sev_status & MSR_AMD64_SEV_SNP_ENABLED) { + x86_init.mpparse.find_mptable = x86_init_noop; + x86_init.pci.init_irq = x86_init_noop; + x86_init.resources.probe_roms = x86_init_noop; + + /* + * DMI setup behavior for SEV-SNP guests depends on + * efi_enabled(EFI_CONFIG_TABLES), which hasn't been + * parsed yet. snp_dmi_setup() will run after that + * parsing has happened. + */ + x86_init.resources.dmi_setup = snp_dmi_setup; + } } void __init mem_encrypt_free_decrypted_mem(void) diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index 104544359d69..025fd7ea5d69 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -24,6 +24,7 @@ #include <linux/memblock.h> #include <linux/init.h> +#include <asm/pgtable_areas.h> #include "numa_internal.h" diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c index 0d72183b5dd0..36b603d0cdde 100644 --- a/arch/x86/mm/pat/memtype.c +++ b/arch/x86/mm/pat/memtype.c @@ -947,6 +947,38 @@ static void free_pfn_range(u64 paddr, unsigned long size) memtype_free(paddr, paddr + size); } +static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr, + pgprot_t *pgprot) +{ + unsigned long prot; + + VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT)); + + /* + * We need the starting PFN and cachemode used for track_pfn_remap() + * that covered the whole VMA. For most mappings, we can obtain that + * information from the page tables. For COW mappings, we might now + * suddenly have anon folios mapped and follow_phys() will fail. + * + * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to + * detect the PFN. If we need the cachemode as well, we're out of luck + * for now and have to fail fork(). + */ + if (!follow_phys(vma, vma->vm_start, 0, &prot, paddr)) { + if (pgprot) + *pgprot = __pgprot(prot); + return 0; + } + if (is_cow_mapping(vma->vm_flags)) { + if (pgprot) + return -EINVAL; + *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; + return 0; + } + WARN_ON_ONCE(1); + return -EINVAL; +} + /* * track_pfn_copy is called when vma that is covering the pfnmap gets * copied through copy_page_range(). @@ -957,20 +989,13 @@ static void free_pfn_range(u64 paddr, unsigned long size) int track_pfn_copy(struct vm_area_struct *vma) { resource_size_t paddr; - unsigned long prot; unsigned long vma_size = vma->vm_end - vma->vm_start; pgprot_t pgprot; if (vma->vm_flags & VM_PAT) { - /* - * reserve the whole chunk covered by vma. We need the - * starting address and protection from pte. - */ - if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { - WARN_ON_ONCE(1); + if (get_pat_info(vma, &paddr, &pgprot)) return -EINVAL; - } - pgprot = __pgprot(prot); + /* reserve the whole chunk covered by vma. */ return reserve_pfn_range(paddr, vma_size, &pgprot, 1); } @@ -1045,7 +1070,6 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, unsigned long size, bool mm_wr_locked) { resource_size_t paddr; - unsigned long prot; if (vma && !(vma->vm_flags & VM_PAT)) return; @@ -1053,11 +1077,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn, /* free the chunk starting from pfn or the whole chunk */ paddr = (resource_size_t)pfn << PAGE_SHIFT; if (!paddr && !size) { - if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) { - WARN_ON_ONCE(1); + if (get_pat_info(vma, &paddr, NULL)) return; - } - size = vma->vm_end - vma->vm_start; } free_pfn_range(paddr, size); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index a7ba8e178645..df5fac428408 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -480,7 +480,7 @@ static int emit_call(u8 **pprog, void *func, void *ip) static int emit_rsb_call(u8 **pprog, void *func, void *ip) { OPTIMIZER_HIDE_VAR(func); - x86_call_depth_emit_accounting(pprog, func); + ip += x86_call_depth_emit_accounting(pprog, func, ip); return emit_patch(pprog, func, ip, 0xE8); } @@ -1972,20 +1972,17 @@ populate_extable: /* call */ case BPF_JMP | BPF_CALL: { - int offs; + u8 *ip = image + addrs[i - 1]; func = (u8 *) __bpf_call_base + imm32; if (tail_call_reachable) { RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth); - if (!imm32) - return -EINVAL; - offs = 7 + x86_call_depth_emit_accounting(&prog, func); - } else { - if (!imm32) - return -EINVAL; - offs = x86_call_depth_emit_accounting(&prog, func); + ip += 7; } - if (emit_call(&prog, func, image + addrs[i - 1] + offs)) + if (!imm32) + return -EINVAL; + ip += x86_call_depth_emit_accounting(&prog, func, ip); + if (emit_call(&prog, func, ip)) return -EINVAL; break; } @@ -2835,7 +2832,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im * Direct-call fentry stub, as such it needs accounting for the * __fentry__ call. */ - x86_call_depth_emit_accounting(&prog, NULL); + x86_call_depth_emit_accounting(&prog, NULL, image); } EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ diff --git a/arch/x86/virt/Makefile b/arch/x86/virt/Makefile index 1e36502cd738..ea343fc392dc 100644 --- a/arch/x86/virt/Makefile +++ b/arch/x86/virt/Makefile @@ -1,2 +1,2 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += vmx/ +obj-y += svm/ vmx/ diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index cffe1157a90a..ab0e8448bb6e 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -77,7 +77,7 @@ static int __mfd_enable(unsigned int cpu) { u64 val; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return 0; rdmsrl(MSR_AMD64_SYSCFG, val); @@ -98,7 +98,7 @@ static int __snp_enable(unsigned int cpu) { u64 val; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return 0; rdmsrl(MSR_AMD64_SYSCFG, val); @@ -174,11 +174,11 @@ static int __init snp_rmptable_init(void) u64 rmptable_size; u64 val; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return 0; if (!amd_iommu_snp_en) - return 0; + goto nosnp; if (!probed_rmp_size) goto nosnp; @@ -225,7 +225,7 @@ skip_enable: return 0; nosnp: - setup_clear_cpu_cap(X86_FEATURE_SEV_SNP); + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); return -ENOSYS; } @@ -246,7 +246,7 @@ static struct rmpentry *__snp_lookup_rmpentry(u64 pfn, int *level) { struct rmpentry *large_entry, *entry; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return ERR_PTR(-ENODEV); entry = get_rmpentry(pfn); @@ -363,7 +363,7 @@ int psmash(u64 pfn) unsigned long paddr = pfn << PAGE_SHIFT; int ret; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return -ENODEV; if (!pfn_valid(pfn)) @@ -472,7 +472,7 @@ static int rmpupdate(u64 pfn, struct rmp_state *state) unsigned long paddr = pfn << PAGE_SHIFT; int ret, level; - if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) + if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) return -ENODEV; level = RMP_TO_PG_LEVEL(state->pagesize); @@ -558,3 +558,13 @@ void snp_leak_pages(u64 pfn, unsigned int npages) spin_unlock(&snp_leaked_pages_list_lock); } EXPORT_SYMBOL_GPL(snp_leak_pages); + +void kdump_sev_callback(void) +{ + /* + * Do wbinvd() on remote CPUs when SNP is enabled in order to + * safely do SNP_SHUTDOWN on the local CPU. + */ + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) + wbinvd(); +} |